Full Code of h2oai/h2ogpt for AI

main 5af700f5bdd1 cached

287 files

18.1 MB

4.8M tokens

1858 symbols

1 requests

Copy disabled (too large) Download .txt

Showing preview only (19,034K chars total). Download the full file to get everything.

Repository: h2oai/h2ogpt
Branch: main
Commit: 5af700f5bdd1
Files: 287
Total size: 18.1 MB

Directory structure:
gitextract_5zppyvqi/

├── .dockerignore
├── .gitattributes
├── .github/
│   └── workflows/
│       └── python-package-publish.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── benchmarks/
│   ├── llm_gpu_benchmark.py
│   ├── llm_gpu_benchmark_text-generation-inference.html
│   ├── llm_gpu_benchmark_transformers.html
│   ├── llm_gpu_benchmarks.json
│   ├── perf.json
│   ├── perf.md
│   └── rag_benchmark.md
├── blog/
│   └── README.md
├── ci/
│   └── jenkinsfile
├── cloud/
│   └── packer/
│       ├── Jenkinsfile
│       ├── README.md
│       ├── h2oai-h2ogpt-4096-llama2-13b-chat.sh
│       ├── h2ogpt-azure.json
│       ├── h2ogpt-gcp.json
│       ├── install_h2ogpt.sh
│       ├── setup_environment.sh
│       └── startup-scripts/
│           ├── h2ogpt.service
│           ├── h2ogpt_nginx.service
│           ├── run_h2ogpt.sh
│           ├── run_nginx.sh
│           ├── run_vllm.sh
│           ├── temp.conf
│           └── vllm.service
├── data/
│   ├── README-template.md
│   ├── censor_words.txt
│   ├── config.json
│   ├── create_data_cards.py
│   ├── dai_docs.train.json
│   ├── dai_docs.train_cleaned.json
│   ├── dai_docs.valid.json
│   ├── dai_faq.json
│   ├── example.xlsx
│   ├── h2ogpt-personality.json
│   └── merged.json
├── dev_installers/
│   └── mac/
│       ├── README.md
│       ├── build_mac_installer.sh
│       ├── h2ogpt-osx-m1-cpu.spec
│       ├── h2ogpt-osx-m1-gpu.spec
│       └── mac_run_app.py
├── docker-compose-cpu.yml
├── docker-compose-vllm.yml
├── docker-compose.yml
├── docker_build_script_ubuntu.sh
├── docs/
│   ├── Dockerfile.delta2
│   ├── Dockerfile.internvl
│   ├── FAQ.md
│   ├── FINETUNE.md
│   ├── INSTALL.md
│   ├── LINKS.md
│   ├── README_Agents.md
│   ├── README_CLI.md
│   ├── README_CLIENT.md
│   ├── README_CPU.md
│   ├── README_DOCKER.md
│   ├── README_GPU.md
│   ├── README_InferenceServers.md
│   ├── README_LINUX.md
│   ├── README_LangChain.md
│   ├── README_MACOS.md
│   ├── README_SerpAPI.md
│   ├── README_WHEEL.md
│   ├── README_WINDOWS.md
│   ├── README_offline.md
│   ├── README_quickstart.md
│   ├── README_ui.md
│   ├── TRITON.md
│   ├── autogen.patch
│   ├── autogen2.patch
│   ├── build_windows_gpu.sh
│   ├── google.patch
│   ├── h2oGPT_CPU.ipynb
│   ├── h2oGPT_GPU.ipynb
│   ├── linux_install.sh
│   ├── linux_install_full.sh
│   ├── openai.patch
│   ├── pytubefix.patch
│   ├── run_patches.sh
│   ├── setup_docker_linux.sh
│   ├── tos.md
│   ├── trans.patch
│   ├── trans2.patch
│   ├── windows_freezelist.txt
│   ├── windows_install.bat
│   └── xtt.patch
├── finetune.py
├── generate.py
├── gradio_utils/
│   ├── __init__.py
│   ├── css.py
│   ├── google_auth.py
│   ├── grclient.py
│   ├── prompt_form.py
│   └── yield_utils.py
├── h2ogpt/
│   └── __init__.py
├── helm/
│   └── h2ogpt-chart/
│       ├── .helmignore
│       ├── Chart.yaml
│       ├── templates/
│       │   ├── _helpers.tpl
│       │   ├── config-map.yaml
│       │   ├── deployment.yaml
│       │   └── service.yaml
│       └── values.yaml
├── iterators/
│   ├── __init__.py
│   ├── iterator_pipe.py
│   └── timeout_iterator.py
├── metrics/
│   ├── __init__.py
│   └── quip.py
├── models/
│   ├── README-template.md
│   ├── __init__.py
│   ├── create_model_cards.py
│   ├── gpu_mem_track.py
│   ├── makevllm.sh
│   ├── predict_aquila.py
│   └── test_scrape1.py
├── notebooks/
│   └── h2oGPT_api_examples.ipynb
├── openai_server/
│   ├── __init__.py
│   ├── agent_prompting.py
│   ├── agent_tools/
│   │   ├── aider_code_generation.py
│   │   ├── ask_question_about_documents.py
│   │   ├── ask_question_about_image.py
│   │   ├── audio_transcription.py
│   │   ├── bing_search.py
│   │   ├── common/
│   │   │   └── utils.py
│   │   ├── convert_document_to_text.py
│   │   ├── download_web_video.py
│   │   ├── driverless_ai_data_science.py
│   │   ├── google_search.py
│   │   ├── image_generation.py
│   │   ├── mermaid_renderer.py
│   │   ├── news_query.py
│   │   ├── query_to_web_image.py
│   │   ├── scholar_papers_query.py
│   │   └── wolfram_alpha_math_science_query.py
│   ├── agent_utils.py
│   ├── autogen_2agent_backend.py
│   ├── autogen_agents.py
│   ├── autogen_multi_agent_backend.py
│   ├── autogen_streaming.py
│   ├── autogen_utils.py
│   ├── backend.py
│   ├── backend_utils.py
│   ├── chat_history_render.py
│   ├── cogvlm2_server/
│   │   ├── cogvlm2.py
│   │   └── requirements.txt
│   ├── log.py
│   ├── openai_client.py
│   ├── server.py
│   ├── server_start.py
│   ├── test_autogen_utils.py
│   ├── test_backend_utils.py
│   ├── test_conversion.py
│   ├── test_openai_server.py
│   └── test_prompt_caching.py
├── papers/
│   └── technical-report/
│       ├── compile.sh
│       ├── conf.sty
│       └── h2oGPT-TR.tex
├── reqs_optional/
│   ├── reqs_constraints.txt
│   ├── requirements_optional_agents.txt
│   ├── requirements_optional_audio.txt
│   ├── requirements_optional_cpu_only.txt
│   ├── requirements_optional_doctr.txt
│   ├── requirements_optional_gpu_only.txt
│   ├── requirements_optional_image.txt
│   ├── requirements_optional_langchain.gpllike.txt
│   ├── requirements_optional_langchain.metrics.txt
│   ├── requirements_optional_langchain.txt
│   ├── requirements_optional_langchain.urls.txt
│   ├── requirements_optional_llamacpp_gpt4all.txt
│   ├── requirements_optional_training.txt
│   └── requirements_optional_wikiprocessing.txt
├── requirements.txt
├── setup.py
├── spaces/
│   ├── chatbot/
│   │   └── repo_to_spaces.sh
│   └── demo/
│       ├── app.py
│       ├── app_client_test.py
│       └── requirements.txt
├── spkemb/
│   ├── cmu_us_awb_arctic-wav-arctic_a0002.npy
│   ├── cmu_us_bdl_arctic-wav-arctic_a0009.npy
│   ├── cmu_us_clb_arctic-wav-arctic_a0144.npy
│   ├── cmu_us_ksp_arctic-wav-arctic_b0087.npy
│   ├── cmu_us_rms_arctic-wav-arctic_b0353.npy
│   └── cmu_us_slt_arctic-wav-arctic_a0508.npy
├── src/
│   ├── __init__.py
│   ├── audio_langchain.py
│   ├── basic_nltk.py
│   ├── cli.py
│   ├── client_test.py
│   ├── create_data.py
│   ├── db_utils.py
│   ├── enums.py
│   ├── eval.py
│   ├── evaluate_params.py
│   ├── export_hf_checkpoint.py
│   ├── function_client.py
│   ├── function_server.py
│   ├── gen.py
│   ├── gpt4all_llm.py
│   ├── gpt_langchain.py
│   ├── gradio_funcs.py
│   ├── gradio_runner.py
│   ├── gradio_themes.py
│   ├── h2o_serpapi.py
│   ├── h2oai_pipeline.py
│   ├── image_captions.py
│   ├── image_doctr.py
│   ├── image_pix2struct.py
│   ├── image_utils.py
│   ├── langchain_mistralai/
│   │   └── chat_models.py
│   ├── langchain_openai_local.py
│   ├── llama_flash_attn_monkey_patch.py
│   ├── llm_exllama.py
│   ├── loaders.py
│   ├── make_db.py
│   ├── model_utils.py
│   ├── output_parser.py
│   ├── pandas_agent_langchain.py
│   ├── pre-commit
│   ├── prepare_offline.py
│   ├── prompter.py
│   ├── prompter_utils.py
│   ├── read_wiki_full.py
│   ├── sagemaker.py
│   ├── stopping.py
│   ├── stt.py
│   ├── tts.py
│   ├── tts_coqui.py
│   ├── tts_sentence_parsing.py
│   ├── tts_utils.py
│   ├── utils.py
│   ├── utils_langchain.py
│   ├── utils_procs.py
│   ├── utils_sys.py
│   ├── version.py
│   └── vision/
│       ├── __init__.py
│       ├── extract_movie.py
│       ├── flux.py
│       ├── playv2.py
│       ├── sdxl_turbo.py
│       ├── stable_diffusion_xl.py
│       └── utils_vision.py
├── tests/
│   ├── 1paul_graham.txt
│   ├── __init__.py
│   ├── conftest.py
│   ├── example.xlsx
│   ├── memory_hog_script.py
│   ├── next.txt
│   ├── sample.eml
│   ├── table_as_image.docx
│   ├── test4gpus.sh
│   ├── test_async_iterator_pipe.py
│   ├── test_async_timeout_iterator.py
│   ├── test_cli.py
│   ├── test_client_calls.py
│   ├── test_client_readme.py
│   ├── test_eval.py
│   ├── test_eval_models.py
│   ├── test_fine_tune_export_tgi.sh
│   ├── test_imports.py
│   ├── test_inference_servers.py
│   ├── test_iterator_pipe.py
│   ├── test_langchain_simple.py
│   ├── test_langchain_units.py
│   ├── test_long_context.py
│   ├── test_manual_test.py
│   ├── test_metrics.py
│   ├── test_openai_server.py
│   ├── test_perf_benchmarks.py
│   ├── test_pipeline.py
│   ├── test_prompter.py
│   ├── test_requirements.py
│   ├── test_sentence_parsing.py
│   ├── test_timeout_iterator.py
│   ├── test_tokenizer.py
│   ├── test_tts.py
│   ├── test_ui.py
│   ├── test_utils.py
│   ├── test_vision.py
│   └── utils.py
├── version.txt
├── win_run_app.py
└── windows_installer.cfg

================================================
FILE CONTENTS
================================================

================================================
FILE: .dockerignore
================================================
.git
.npm
.dockerignore
.pytest_cache
.cache
.local
.github
.nv
.benchmarks
.bash_history
.gitignore
h2ogpt.egg-info
venv
build
dist
prebuilt_deps
Dockerfile

================================================
FILE: .gitattributes
================================================


================================================
FILE: .github/workflows/python-package-publish.yml
================================================
name: Build & Publish h2oGPT Python wheel to PYPI

on:
  workflow_dispatch:
    inputs:
      pypi-index:
        type: choice
        description: PyPI index that needed to be published
        required: true
        default: Test-PyPI
        options:
          - PyPI
          - Test-PyPI
      version:
        description: |
          Override the current version for the python package for dev purposes when uploading to Test-PyPI
        type: string

jobs:
  build_and_upload:
    runs-on: ubuntu-latest
    steps:
        - uses: actions/checkout@v3.5.3

        - uses: actions/setup-python@v4
          with:
            python-version: '3.10'
        
        - name: Install Dependencies
          run: |
            python3.10 -m pip install --upgrade pip
            python3.10 -m pip install setuptools wheel twine --upgrade
        
        - name: Modify Version
          if: ${{ inputs.version != ''}}
          run: |
            echo ${{ inputs.version}} > version.txt
            echo "h2ogpt-wheel-version  = $(cat version.txt)"
        
        - name: Build Wheel
          run: make clean dist
        
        - name: Publish to Test-PyPI
          if: ${{ inputs.pypi-index == 'Test-PyPI' }}
          run: |
            twine upload -r testpypi dist/*
          env:
            TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}  
            TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
        
        - name: Publish to PyPI
          if: ${{ inputs.pypi-index == 'PyPI' }}
          run: |
            twine upload dist/*
          env:
            TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}  
            TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}


================================================
FILE: .gitignore
================================================
out/
7B/
13B/
__pycache__/
checkpoint**
minimal-llama**
upload.py
lora-**
*ckpt
wandb
evaluate.py
test_data.json
todo.txt
.neptune/
*.bin
db_dir_UserData
temp_path_do_doc1
offline_folder
flagged_data_points
.pytest_cache
user_path
user_path_test
build
h2ogpt.egg-info
dist
.idea
.cache
.local
.bash_history
.benchmarks
Dockerfile-runner.dockerfile
build_info.txt
prebuilt_deps
Dockerfile_deps

# IDEs
.idea/

# virtual envs
venv

# Mac one click installer
Tesseract-OCR/
poppler/


================================================
FILE: Dockerfile
================================================
# devel needed for bitsandbytes requirement of libcudart.so, otherwise runtime sufficient
FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04

ENV DEBIAN_FRONTEND=noninteractive

ENV PATH="/h2ogpt_conda/envs/h2ogpt/bin:${PATH}"
ARG PATH="/h2ogpt_conda/envs/h2ogpt/bin:${PATH}"

ENV HOME=/workspace
ENV CUDA_HOME=/usr/local/cuda-12.1
ENV VLLM_CACHE=/workspace/.vllm_cache
ENV TIKTOKEN_CACHE_DIR=/workspace/tiktoken_cache
ENV HF_HUB_ENABLE_HF_TRANSFER=1

WORKDIR /workspace

COPY . /workspace/

COPY build_info.txt /workspace/

RUN cd /workspace && ./docker_build_script_ubuntu.sh

RUN chmod -R a+rwx /workspace

ARG user=h2ogpt
ARG group=h2ogpt
ARG uid=1000
ARG gid=1000

RUN groupadd -g ${gid} ${group} && useradd -u ${uid} -g ${group} -s /bin/bash ${user}
# already exists in base image
# RUN groupadd -g ${gid} docker && useradd -u ${uid} -g ${group} -m ${user}

# Add the user to the docker group
RUN usermod -aG docker ${user}

# Switch to the new user
USER ${user}

EXPOSE 8888
EXPOSE 7860
EXPOSE 5000
EXPOSE 5002
EXPOSE 5004

ENTRYPOINT ["python3.10"]


================================================
FILE: LICENSE
================================================
                                Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright 2023 Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: Makefile
================================================
all: clean dist

PACKAGE_VERSION              := `cat version.txt | tr -d '\n'`
BUILD_TAG                    := $(shell git describe --always --dirty)
DOCKER_H2OGPT_RUNTIME_IMAGE  := gcr.io/vorvan/h2oai/h2oai-h2ogpt-runtime:$(BUILD_TAG)
DOCKER_H2OGPT_VLLM_IMAGE     := gcr.io/vorvan/h2oai/h2oai-h2ogpt-vllm:$(BUILD_TAG)
PYTHON_BINARY                ?= `which python`
DEFAULT_MARKERS              ?= "not need_tokens and not need_gpu"

# h2ogpt base and vllm images built elsewhere and referenced here:
DOCKER_BASE_OS_IMAGE     := gcr.io/vorvan/h2oai/h2ogpt-oss-wolfi-base:9
DOCKER_VLLM_IMAGE        := gcr.io/vorvan/h2oai/h2ogpte-vllm:0.6.3.post1-38ed4ff2


.PHONY: venv dist test publish docker_build docker_push build_info.txt

clean:
	rm -rf dist build h2ogpt.egg-info

venv:
	$(PYTHON_BINARY) -m virtualenv -p $(PYTHON_BINARY) venv

install:
	$(PYTHON_BINARY) -m pip install dist/h2ogpt-$(PACKAGE_VERSION)-py3-none-any.whl

install-%:
	$(PYTHON_BINARY) -m pip install dist/h2ogpt-$(PACKAGE_VERSION)-py3-none-any.whl[$*]

dist:
	$(PYTHON_BINARY) setup.py bdist_wheel

test:
	$(PYTHON_BINARY) -m pip install requirements-parser
	$(PYTHON_BINARY) -m pytest tests --disable-warnings --junit-xml=test_report.xml -m "$(DEFAULT_MARKERS)"

test_imports:
	$(PYTHON_BINARY) -m pytest tests/test_imports.py --disable-warnings --junit-xml=test_report.xml -m "$(DEFAULT_MARKERS)"

publish:
	echo "Publishing not implemented yet."

build_info.txt:
	@rm -rf build_info.txt
	@echo "commit=\"$(shell git rev-parse HEAD)\"" >> $@
	@echo "branch=\"`git rev-parse HEAD | git branch -a --contains | grep -v detached | sed -e 's~remotes/origin/~~g' -e 's~^ *~~' | sort | uniq | tr '*\n' ' '`\"" >> $@
	@echo "describe=\"`git describe --always --dirty`\"" >> $@
	@echo "build_os=\"`uname -a`\"" >> $@
	@echo "build_machine=\"`hostname`\"" >> $@
	@echo "build_date=\"$(shell date "+%Y%m%d")\"" >> $@
	@echo "build_user=\"`id -u -n`\"" >> $@
	@echo "base_version=\"$(PACKAGE_VERSION)\"" >> $@


docker_build: build_info.txt
ifeq ($(shell curl --connect-timeout 4 --write-out %{http_code} -sS --output /dev/null -X GET https://gcr.io/v2/vorvan/h2oai/h2oai-h2ogpt-runtime/manifests/$(BUILD_TAG)),200)
	@echo "Image already pushed to GCR: $(DOCKER_H2OGPT_RUNTIME_IMAGE)"
	docker pull $(DOCKER_H2OGPT_RUNTIME_IMAGE)
else
	docker pull $(DOCKER_BASE_OS_IMAGE)
	DOCKER_BUILDKIT=1 docker build -t $(DOCKER_H2OGPT_RUNTIME_IMAGE) -t h2ogpt:current -f Dockerfile .
endif
ifeq ($(shell curl --connect-timeout 4 --write-out %{http_code} -sS --output /dev/null -X GET https://gcr.io/v2/vorvan/h2oai/h2oai-h2ogpt-vllm/manifests/$(BUILD_TAG)),200)
	@echo "VLLM Image already pushed to GCR: $(DOCKER_H2OGPT_VLLM_IMAGE)"
	docker pull $(DOCKER_H2OGPT_VLLM_IMAGE)
else
	docker pull $(DOCKER_VLLM_IMAGE)
	docker tag $(DOCKER_VLLM_IMAGE) $(DOCKER_H2OGPT_VLLM_IMAGE)
endif

docker_push:
	docker tag $(DOCKER_H2OGPT_RUNTIME_IMAGE) gcr.io/vorvan/h2oai/h2oai-h2ogpt-runtime:$(PACKAGE_VERSION)
	docker tag $(DOCKER_H2OGPT_VLLM_IMAGE) gcr.io/vorvan/h2oai/h2oai-h2ogpt-vllm:$(PACKAGE_VERSION)

	docker tag $(DOCKER_H2OGPT_RUNTIME_IMAGE) gcr.io/vorvan/h2oai/h2oai-h2ogpt-runtime:latest
	docker tag $(DOCKER_H2OGPT_VLLM_IMAGE) gcr.io/vorvan/h2oai/h2oai-h2ogpt-vllm:latest

	docker push gcr.io/vorvan/h2oai/h2oai-h2ogpt-runtime:$(BUILD_TAG)
	docker push gcr.io/vorvan/h2oai/h2oai-h2ogpt-runtime:$(PACKAGE_VERSION)
	docker push gcr.io/vorvan/h2oai/h2oai-h2ogpt-runtime:latest

	docker push gcr.io/vorvan/h2oai/h2oai-h2ogpt-vllm:$(BUILD_TAG)
	docker push gcr.io/vorvan/h2oai/h2oai-h2ogpt-vllm:$(PACKAGE_VERSION)
	docker push gcr.io/vorvan/h2oai/h2oai-h2ogpt-vllm:latest

ifdef BUILD_ID
	docker tag $(DOCKER_H2OGPT_RUNTIME_IMAGE) gcr.io/vorvan/h2oai/h2oai-h2ogpt-runtime:$(PACKAGE_VERSION)-$(BUILD_ID)
	docker push gcr.io/vorvan/h2oai/h2oai-h2ogpt-runtime:$(PACKAGE_VERSION)-$(BUILD_ID)

	docker tag $(DOCKER_H2OGPT_VLLM_IMAGE) gcr.io/vorvan/h2oai/h2oai-h2ogpt-vllm:$(PACKAGE_VERSION)-$(BUILD_ID)
	docker push gcr.io/vorvan/h2oai/h2oai-h2ogpt-vllm:$(PACKAGE_VERSION)-$(BUILD_ID)
endif

print-%:
	@echo $($*)


================================================
FILE: README.md
================================================
# h2oGPT

Turn ★ into ⭐ (top-right corner) if you like the project!

Query and summarize your documents or just chat with local private GPT LLMs using h2oGPT, an Apache V2 open-source project.

Check out a long CoT Open-o1 open 🍓strawberry🍓 project: https://github.com/pseudotensor/open-strawberry

## Try Enterprise Version for Free
[Enterprise h2oGPTe](https://h2ogpte.genai.h2o.ai/)

## Video Demo

https://github.com/h2oai/h2ogpt/assets/2249614/2f805035-2c85-42fb-807f-fd0bca79abc6

[![img-small.png](docs/img-small.png) YouTube 4K Video](https://www.youtube.com/watch?v=_iktbj4obAI)

## Features

- **Private** offline database of any documents [(PDFs, Excel, Word, Images, Video Frames, YouTube, Audio, Code, Text, MarkDown, etc.)](docs/README_LangChain.md#supported-datatypes)
  - **Persistent** database (Chroma, Weaviate, or in-memory FAISS) using accurate embeddings (instructor-large, all-MiniLM-L6-v2, etc.)
  - **Efficient** use of context using instruct-tuned LLMs (no need for LangChain's few-shot approach)
  - **Parallel** summarization and extraction, reaching an output of 80 tokens per second with the 13B LLaMa2 model
  - **HYDE** (Hypothetical Document Embeddings) for enhanced retrieval based upon LLM responses
  - **Semantic Chunking** for better document splitting (requires GPU)
- **Variety** of models supported (LLaMa2, Mistral, Falcon, Vicuna, WizardLM.  With AutoGPTQ, 4-bit/8-bit, LORA, etc.)
  - **GPU** support from HF and LLaMa.cpp GGML models, and **CPU** support using HF, LLaMa.cpp, and GPT4ALL models
  - **Attention Sinks** for [arbitrarily long](https://github.com/tomaarsen/attention_sinks) generation (LLaMa-2, Mistral, MPT, Pythia, Falcon, etc.)
- **Gradio UI** or CLI with streaming of all models
  - **Upload** and **View** documents through the UI (control multiple collaborative or personal collections)
  - **Vision Models** LLaVa, Claude-3, Gemini-Pro-Vision, GPT-4-Vision
  - **Image Generation** Stable Diffusion (sdxl-turbo, sdxl, SD3), PlaygroundAI (playv2), and Flux
  - **Voice STT** using Whisper with streaming audio conversion
  - **Voice TTS** using MIT-Licensed Microsoft Speech T5 with multiple voices and Streaming audio conversion
  - **Voice TTS** using MPL2-Licensed TTS including Voice Cloning and Streaming audio conversion
  - **AI Assistant Voice Control Mode** for hands-free control of h2oGPT chat
  - **Bake-off** UI mode against many models at the same time
  - **Easy Download** of model artifacts and control over models like LLaMa.cpp through the UI
  - **Authentication** in the UI by user/password via Native or Google OAuth
  - **State Preservation** in the UI by user/password
- **Open Web UI** with h2oGPT as backend via OpenAI Proxy
  - See [Start-up Docs](docs/FAQ.md#open-web-ui).
  - Chat completion with streaming
  - Document Q/A using h2oGPT ingestion with advanced OCR from DocTR
  - Vision models
  - Audio Transcription (STT)
  - Audio Generation (TTS)
  - Image generation
  - Authentication
  - State preservation
- **Linux, Docker, macOS, and Windows** support
- **Inference Servers** [support](docs/README_InferenceServers.md) for oLLaMa, HF TGI server, vLLM, Gradio, ExLLaMa, Replicate, Together.ai, OpenAI, Azure OpenAI, Anthropic, MistralAI, Google, and Groq
- **OpenAI compliant**
  - Server Proxy [API](docs/README_CLIENT.md) (h2oGPT acts as drop-in-replacement to OpenAI server)
  - Chat and Text Completions (streaming and non-streaming)
  - Audio Transcription (STT)
  - Audio Generation (TTS)
  - Image Generation
  - Embedding
  - Function tool calling w/auto tool selection
  - AutoGen Code Execution Agent
- **JSON Mode**
  - Strict schema control for vLLM via its use of outlines
  - Strict schema control for OpenAI, Anthropic, Google Gemini, MistralAI models
  - JSON mode for some older OpenAI or Gemini models with schema control if model is smart enough (e.g. gemini 1.5 flash)
  - Any model via code block extraction
- **Web-Search** integration with Chat and Document Q/A
- **Agents** for Search, Document Q/A, Python Code, CSV frames
  - High quality Agents via OpenAI proxy server on separate port
  - Code-first agent that generates plots, researches, evaluates images via vision model, etc. (client code openai_server/openai_client.py).
  - No UI for this, just API
- **Evaluate** performance using reward models
- **Quality** maintained with over 1000 unit and integration tests taking over 24 GPU-hours

## Get Started

[![GitHub license](https://img.shields.io/github/license/NVIDIA/nvidia-docker?style=flat-square)](LICENSE)
[![Linux](https://img.shields.io/badge/Linux-FCC624?style=for-the-badge&logo=linux&logoColor=black)](https://github.com/h2oai/h2ogpt/blob/main/docs/README_LINUX.md)
[![macOS](https://img.shields.io/badge/mac%20os-000000?style=for-the-badge&logo=macos&logoColor=F0F0F0)](https://github.com/h2oai/h2ogpt/blob/main/docs/README_MACOS.md)
[![Windows](https://img.shields.io/badge/Windows-0078D6?style=for-the-badge&logo=windows&logoColor=white)](https://github.com/h2oai/h2ogpt/blob/main/docs/README_WINDOWS.md)
[![Docker](https://img.shields.io/badge/docker-%230db7ed.svg?style=for-the-badge&logo=docker&logoColor=white)](https://github.com/h2oai/h2ogpt/blob/main/docs/README_DOCKER.md)

### Install h2oGPT

Docker is recommended for Linux, Windows, and MAC for full capabilities.  Linux Script also has full capability, while Windows and MAC scripts have less capabilities than using Docker.

* [Docker Build and Run Docs (Linux, Windows, MAC)](docs/README_DOCKER.md)
* [Linux Install and Run Docs](docs/README_LINUX.md)
* [Windows 10/11 Installation Script](docs/README_WINDOWS.md)
* [MAC Install and Run Docs](docs/README_MACOS.md)
* [Quick Start on any Platform](docs/README_quickstart.md)

---

### Collab Demos
- [![](https://colab.research.google.com/assets/colab-badge.svg) h2oGPT CPU](https://colab.research.google.com/drive/13RiBdAFZ6xqDwDKfW6BG_-tXfXiqPNQe?usp=sharing)
- [![](https://colab.research.google.com/assets/colab-badge.svg) h2oGPT GPU](https://colab.research.google.com/drive/143-KFHs2iCqXTQLI2pFCDiR69z0dR8iE?usp=sharing)

### Resources
- [FAQs](docs/FAQ.md)
- [README for LangChain](docs/README_LangChain.md)
- [Discord](https://discord.gg/WKhYMWcVbq)
- [Models (LLaMa-2, Falcon 40, etc.) at 🤗](https://huggingface.co/h2oai/)
- [YouTube: 100% Offline ChatGPT Alternative?](https://www.youtube.com/watch?v=Coj72EzmX20)
- [YouTube: Ultimate Open-Source LLM Showdown (6 Models Tested) - Surprising Results!](https://www.youtube.com/watch?v=FTm5C_vV_EY)
- [YouTube: Blazing Fast Falcon 40b 🚀 Uncensored, Open-Source, Fully Hosted, Chat With Your Docs](https://www.youtube.com/watch?v=H8Dx-iUY49s)
- [Technical Paper: https://arxiv.org/pdf/2306.08161.pdf](https://arxiv.org/pdf/2306.08161.pdf)

### Docs Guide
<!--  cat README.md | ./gh-md-toc  -  But Help is heavily processed -->
* [Get Started](#get-started)
   * [Linux (CPU or CUDA)](docs/README_LINUX.md)
   * [macOS (CPU or M1/M2)](docs/README_MACOS.md)
   * [Windows 10/11 (CPU or CUDA)](docs/README_WINDOWS.md)
   * [GPU (CUDA, AutoGPTQ, exllama) Running Details](docs/README_GPU.md)
   * [CPU Running Details](docs/README_CPU.md)
   * [CLI chat](docs/README_CLI.md)
   * [Gradio UI](docs/README_ui.md)
   * [Client API (Gradio, OpenAI-Compliant)](docs/README_CLIENT.md)
   * [Inference Servers (oLLaMa, HF TGI server, vLLM, Groq, Anthropic, Google, Mistral, Gradio, ExLLaMa, Replicate, OpenAI, Azure OpenAI)](docs/README_InferenceServers.md)
   * [Build Python Wheel](docs/README_WHEEL.md)
   * [Offline Installation](docs/README_offline.md)
   * [Low Memory](docs/FAQ.md#low-memory-mode)
   * [Docker](docs/README_DOCKER.md)
* [LangChain Document Support](docs/README_LangChain.md)
* [Compare to PrivateGPT et al.](docs/README_LangChain.md#what-is-h2ogpts-langchain-integration-like)
* [Roadmap](#roadmap)
* [Development](#development)
* [Help](#help)
   * [LangChain file types supported](docs/README_LangChain.md#supported-datatypes)
   * [CLI Database control](docs/README_LangChain.md#database-creation)
   * [FAQ](docs/FAQ.md)
     * [Model Usage Notes](docs/FAQ.md#model-usage-notes)
     * [Adding LLM Models (including using GGUF and Attention Sinks)](docs/FAQ.md#adding-models)
     * [Adding Embedding Models](docs/FAQ.md#add-new-embedding-model)
     * [Adding Prompts](docs/FAQ.md#adding-prompt-templates)
     * [In-Context Learning](docs/FAQ.md#in-context-learning-via-prompt-engineering)
     * [Multiple GPUs](docs/FAQ.md#multiple-gpus)
     * [Low-Memory Usage](docs/FAQ.md#low-memory-mode)
     * [Environment Variables](docs/FAQ.md#what-envs-can-i-pass-to-control-h2ogpt)
     * [HTTPS access for server and client](docs/FAQ.md#https-access-for-server-and-client)
   * [Useful Links](docs/LINKS.md)
   * [Fine-Tuning](docs/FINETUNE.md)
   * [Triton](docs/TRITON.md)
   * [Commercial viability](docs/FAQ.md#commercial-viability)
* [Acknowledgements](#acknowledgements)
* [Why H2O.ai?](#why-h2oai)
* [Disclaimer](#disclaimer)

### Development

- To create a development environment for training and generation, follow the [installation instructions](docs/INSTALL.md).
- To fine-tune any LLM models on your data, follow the [fine-tuning instructions](docs/FINETUNE.md).
- To run h2oGPT tests:
    ```bash
    pip install requirements-parser pytest-instafail pytest-random-order playsound==1.3.0
    conda install -c conda-forge gst-python -y
    sudo apt-get install gstreamer-1.0
    pip install pygame
    GPT_H2O_AI=0 CONCURRENCY_COUNT=1 pytest --instafail -s -v tests
    # for openai server test on already-running local server
    pytest -s -v -n 4 openai_server/test_openai_server.py::test_openai_client
    ```
  or tweak/run `tests/test4gpus.sh` to run tests in parallel.

### Acknowledgements

* Some training code was based upon March 24 version of [Alpaca-LoRA](https://github.com/tloen/alpaca-lora/).
* Used high-quality created data by [OpenAssistant](https://open-assistant.io/).
* Used base models by [EleutherAI](https://www.eleuther.ai/).
* Used OIG data created by [LAION](https://laion.ai/blog/oig-dataset/).

### Why H2O.ai?

Our [Makers](https://h2o.ai/company/team/) at [H2O.ai](https://h2o.ai) have built several world-class Machine Learning, Deep Learning and AI platforms:
- #1 open-source machine learning platform for the enterprise [H2O-3](https://github.com/h2oai/h2o-3)
- The world's best AutoML (Automatic Machine Learning) with [H2O Driverless AI](https://h2o.ai/platform/ai-cloud/make/h2o-driverless-ai/)
- No-Code Deep Learning with [H2O Hydrogen Torch](https://h2o.ai/platform/ai-cloud/make/hydrogen-torch/)
- Document Processing with Deep Learning in [Document AI](https://h2o.ai/platform/ai-cloud/make/document-ai/)

We also built platforms for deployment and monitoring, and for data wrangling and governance:
- [H2O MLOps](https://h2o.ai/platform/ai-cloud/operate/h2o-mlops/) to deploy and monitor models at scale
- [H2O Feature Store](https://h2o.ai/platform/ai-cloud/make/feature-store/) in collaboration with AT&T
- Open-source Low-Code AI App Development Frameworks [Wave](https://wave.h2o.ai/) and [Nitro](https://nitro.h2o.ai/)
- Open-source Python [datatable](https://github.com/h2oai/datatable/) (the engine for H2O Driverless AI feature engineering)

Many of our customers are creating models and deploying them enterprise-wide and at scale in the [H2O AI Cloud](https://h2o.ai/platform/ai-cloud/):
- Multi-Cloud or on Premises
- [Managed Cloud (SaaS)](https://h2o.ai/platform/ai-cloud/managed)
- [Hybrid Cloud](https://h2o.ai/platform/ai-cloud/hybrid)
- [AI Appstore](https://docs.h2o.ai/h2o-ai-cloud/)

We are proud to have over 25 (of the world's 280) [Kaggle Grandmasters](https://h2o.ai/company/team/kaggle-grandmasters/) call H2O home, including three Kaggle Grandmasters who have made it to world #1.

### Disclaimer

Please read this disclaimer carefully before using the large language model provided in this repository. Your use of the model signifies your agreement to the following terms and conditions.

- Biases and Offensiveness: The large language model is trained on a diverse range of internet text data, which may contain biased, racist, offensive, or otherwise inappropriate content. By using this model, you acknowledge and accept that the generated content may sometimes exhibit biases or produce content that is offensive or inappropriate. The developers of this repository do not endorse, support, or promote any such content or viewpoints.
- Limitations: The large language model is an AI-based tool and not a human. It may produce incorrect, nonsensical, or irrelevant responses. It is the user's responsibility to critically evaluate the generated content and use it at their discretion.
- Use at Your Own Risk: Users of this large language model must assume full responsibility for any consequences that may arise from their use of the tool. The developers and contributors of this repository shall not be held liable for any damages, losses, or harm resulting from the use or misuse of the provided model.
- Ethical Considerations: Users are encouraged to use the large language model responsibly and ethically. By using this model, you agree not to use it for purposes that promote hate speech, discrimination, harassment, or any form of illegal or harmful activities.
- Reporting Issues: If you encounter any biased, offensive, or otherwise inappropriate content generated by the large language model, please report it to the repository maintainers through the provided channels. Your feedback will help improve the model and mitigate potential issues.
- Changes to this Disclaimer: The developers of this repository reserve the right to modify or update this disclaimer at any time without prior notice. It is the user's responsibility to periodically review the disclaimer to stay informed about any changes.

By using the large language model provided in this repository, you agree to accept and comply with the terms and conditions outlined in this disclaimer. If you do not agree with any part of this disclaimer, you should refrain from using the model and any content generated by it.

## Star History

[![Star History Chart](https://api.star-history.com/svg?repos=h2oai/h2ogpt&type=Timeline)](https://star-history.com/#h2oai/h2ogpt&Timeline)


================================================
FILE: benchmarks/llm_gpu_benchmark.py
================================================


# %%
import json

import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots

# %%
# Read the json file
# This file processes the llm_gpu_benchmark.json file in the tmp/inputs folder
# File is generated using the command
# curl  -sSL https://raw.githubusercontent.com/h2oai/h2ogpt/main/benchmarks/perf.json | jq -s '.' > llm_gpu_benchmarks.json
with open('llm_gpu_benchmarks.json') as f:
    data = json.load(f)
del f

# %%
# Read the json file into a dataframe
df = pd.json_normalize(data)
del data

# %%
# Process the dataframe
# Drop columns that are not needed
df.drop(columns=['task', 'ngpus', 'reps', 'date', 'git_sha', 'transformers', 'bitsandbytes', 'cuda', 'hostname',
                 'summarize_input_len_bytes'], inplace=True)
# Rename columns
df.rename(columns={'n_gpus': 'gpu_count'}, inplace=True)
# Split the gpu column into gpu and gpu_memory
df["gpu_name"] = df.gpus.str.extract(r'[1-9] x ([\w\- ]+) .+')
df["gpu_memory_gb"] = round(
    pd.to_numeric(df.gpus.str.extract(r'[\w ]+ \(([\d]+) .+', expand=False), errors='coerce') / 1024)
df["gpu_memory_gb"] = df["gpu_memory_gb"].astype('Int64')
df.drop(columns=['gpus'], inplace=True)
# Manage gpu_names
df.gpu_name = df.gpu_name.str.replace('NVIDIA ', '')
df.gpu_name = df.gpu_name.str.replace('GeForce ', '')
df.gpu_name = df.gpu_name.str.replace('A100-SXM4-80GB', 'A100 SXM4')
df.gpu_name = df.gpu_memory_gb.astype(str) + "-" + df.gpu_name
# Remove CPUs
df.drop(df[df.gpu_name.isnull()].index, inplace=True)

# %%
# Remove duplicate rows
df.drop_duplicates(['backend', 'base_model', 'bits', 'gpu_count', 'gpu_name'], inplace=True)

# %% Add baseline comparison columns
# Looking at the CPU data for 4, 8, and 16 bit quantization values for the benchmark we are simplifying it to a single
# value
cpu_summary_out_throughput = 1353 / 1216  # bytes/second  (calculated from summarize_output_len_bytes / summarize_time)
cpu_generate_out_throughput = 849 / 180  # bytes/second   (calculated from generate_output_len_bytes / generate_time)

# add GPU throughput columns
df["summary_out_throughput"] = df.summarize_output_len_bytes / df.summarize_time
df["generate_out_throughput"] = df.generate_output_len_bytes / df.generate_time
# add GPU throughput boost columns
df["summary_out_throughput_normalize"] = df.summary_out_throughput / cpu_summary_out_throughput
df["generate_out_throughput_normalize"] = df.generate_out_throughput / cpu_generate_out_throughput

# %%
# df.to_excel('tmp/scratchpad/output/llm_gpu_benchmarks.xlsx', index=False)

# %%
pio.renderers.default = "browser"

# %%
bits_bar_colors = {'4': px.colors.qualitative.D3[0],
                   '8': px.colors.qualitative.D3[1],
                   '16': px.colors.qualitative.D3[2]}

backends = list(df.backend.unique())
base_models = list(df.base_model.unique())
n_gpus = list(df.gpu_count.unique())

# %%
for backend in backends:
    # for backend in ['transformers']:
    fig_bar = make_subplots(rows=len(n_gpus),
                            cols=len(base_models) * 2,
                            shared_xaxes='all',
                            shared_yaxes='columns',
                            start_cell="top-left",
                            vertical_spacing=0.1,
                            print_grid=False,
                            row_titles=[f'{gpu_count} GPUs' for gpu_count in n_gpus],
                            column_titles=['llama2-7b-chat Summarization', 'llama2-7b-chat Generation',
                                           'llama2-13b-chat Summarization', 'llama2-13b-chat Generation',
                                           'llama2-70b-chat Summarization', 'llama2-70b-chat Generation'],)

    # for base_model in ['h2oai/h2ogpt-4096-llama2-7b-chat']:
    for base_model in base_models:
        for gpu_count in n_gpus:
            for bits in sorted(df.bits.unique()):
                sub_df = df[(df.backend == backend) &
                            (df.base_model == base_model) &
                            (df.gpu_count == gpu_count) &
                            (df.bits == bits)].sort_values(by='gpu_name')
                fig_bar.add_trace(go.Bar(x=sub_df.summary_out_throughput_normalize,
                                         y=sub_df.gpu_name,
                                         name=f'sum-{bits} bits',
                                         legendgroup=f'sum-{bits} bits',
                                         marker=dict(color=bits_bar_colors[f'{bits}']),
                                         orientation='h'),
                                  row=n_gpus.index(gpu_count) + 1,
                                  col=base_models.index(base_model) * 2 + 1)
                fig_bar.add_trace(go.Bar(x=sub_df.generate_out_throughput_normalize,
                                         y=sub_df.gpu_name,
                                         name=f'gen-{bits} bits',
                                         legendgroup=f'gen-{bits} bits',
                                         marker=dict(color=bits_bar_colors[f'{bits}']),
                                         orientation='h'),
                                  row=list(n_gpus).index(gpu_count) + 1,
                                  col=list(base_models).index(base_model) * 2 + 2)

    fig_bar.update_layout(plot_bgcolor='rgb(250,250,250)',
                          showlegend=True,
                          barmode="group")
    # fig_bar.show()
    fig_bar.write_html(f'llm_gpu_benchmark_{backend}.html', include_plotlyjs='cdn')

================================================
FILE: benchmarks/llm_gpu_benchmark_text-generation-inference.html
================================================
<html>
<head><meta charset="utf-8" /></head>
<body>
    <div>                        <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>
        <script src="https://cdn.plot.ly/plotly-2.2.0.min.js"></script>                <div id="8d98303e-9d8d-4a86-9ab9-85be1f565ba7" class="plotly-graph-div" style="height:100%; width:100%;"></div>            <script type="text/javascript">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("8d98303e-9d8d-4a86-9ab9-85be1f565ba7")) {                    Plotly.newPlot(                        "8d98303e-9d8d-4a86-9ab9-85be1f565ba7",                        [{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x","y":[],"yaxis":"y"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x2","y":[],"yaxis":"y2"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x","y":[],"yaxis":"y"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x2","y":[],"yaxis":"y2"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[31.964670378460696,40.07702972093452,28.212217062134258,24.76324507950772,29.383143217889106],"xaxis":"x","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[38.97113273835895,37.81293817302825,25.418311714688866,46.82453047975238,25.870047557539163],"xaxis":"x2","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y2"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x7","y":[],"yaxis":"y7"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x8","y":[],"yaxis":"y8"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x7","y":[],"yaxis":"y7"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x8","y":[],"yaxis":"y8"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[27.742149283479364,131.11372927692716,27.756812705358207],"xaxis":"x7","y":["45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y7"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[25.757641294033732,60.88036130542081,24.89894321470165],"xaxis":"x8","y":["45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y8"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x13","y":[],"yaxis":"y13"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x14","y":[],"yaxis":"y14"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x13","y":[],"yaxis":"y13"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x14","y":[],"yaxis":"y14"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[26.58192050074467,27.706125039541696],"xaxis":"x13","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y13"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[24.92264927072723,24.11901127583454],"xaxis":"x14","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y14"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x19","y":[],"yaxis":"y19"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x20","y":[],"yaxis":"y20"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x19","y":[],"yaxis":"y19"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x20","y":[],"yaxis":"y20"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[26.56845022740626],"xaxis":"x19","y":["80-A100 SXM4"],"yaxis":"y19"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[23.63055816163121],"xaxis":"x20","y":["80-A100 SXM4"],"yaxis":"y20"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x3","y":[],"yaxis":"y3"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x4","y":[],"yaxis":"y4"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x3","y":[],"yaxis":"y3"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x4","y":[],"yaxis":"y4"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[null,null,38.784585018023556,18.13337657657005],"xaxis":"x3","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y3"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[null,null,28.590730184060984,16.18347618092991],"xaxis":"x4","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y4"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x9","y":[],"yaxis":"y9"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x10","y":[],"yaxis":"y10"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x9","y":[],"yaxis":"y9"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x10","y":[],"yaxis":"y10"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[20.929693801547206,12.694114023867758,85.02391911717123,17.23203722663425],"xaxis":"x9","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y9"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[26.649908731325855,18.11013971401145,49.03779902422664,18.7070327239283],"xaxis":"x10","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y10"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x15","y":[],"yaxis":"y15"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x16","y":[],"yaxis":"y16"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x15","y":[],"yaxis":"y15"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x16","y":[],"yaxis":"y16"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[12.361580993407348,16.12018834278174],"xaxis":"x15","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y15"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[17.620036315851138,17.885323649884445],"xaxis":"x16","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y16"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x21","y":[],"yaxis":"y21"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x22","y":[],"yaxis":"y22"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x21","y":[],"yaxis":"y21"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x22","y":[],"yaxis":"y22"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[17.333509386436194],"xaxis":"x21","y":["80-A100 SXM4"],"yaxis":"y21"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[17.907476788430102],"xaxis":"x22","y":["80-A100 SXM4"],"yaxis":"y22"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x5","y":[],"yaxis":"y5"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x6","y":[],"yaxis":"y6"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x5","y":[],"yaxis":"y5"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x6","y":[],"yaxis":"y6"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[],"xaxis":"x5","y":[],"yaxis":"y5"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[],"xaxis":"x6","y":[],"yaxis":"y6"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x11","y":[],"yaxis":"y11"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x12","y":[],"yaxis":"y12"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x11","y":[],"yaxis":"y11"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x12","y":[],"yaxis":"y12"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[],"xaxis":"x11","y":[],"yaxis":"y11"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[],"xaxis":"x12","y":[],"yaxis":"y12"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x17","y":[],"yaxis":"y17"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x18","y":[],"yaxis":"y18"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x17","y":[],"yaxis":"y17"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x18","y":[],"yaxis":"y18"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[null,6.337898874140187],"xaxis":"x17","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y17"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[null,8.157040216950774],"xaxis":"x18","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y18"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x23","y":[],"yaxis":"y23"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x24","y":[],"yaxis":"y24"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x23","y":[],"yaxis":"y23"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x24","y":[],"yaxis":"y24"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[6.239297143818297],"xaxis":"x23","y":["80-A100 SXM4"],"yaxis":"y23"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[8.082069511295837],"xaxis":"x24","y":["80-A100 SXM4"],"yaxis":"y24"}],                        {"annotations":[{"font":{"size":16},"showarrow":false,"text":"llama2-7b-chat Summarization","x":0.06777777777777778,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-7b-chat Generation","x":0.2366666666666667,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-13b-chat Summarization","x":0.40555555555555556,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-13b-chat Generation","x":0.5744444444444445,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-70b-chat Summarization","x":0.7433333333333334,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-70b-chat Generation","x":0.9122222222222223,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"1 GPUs","textangle":90,"x":0.98,"xanchor":"left","xref":"paper","y":0.9125,"yanchor":"middle","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"2 GPUs","textangle":90,"x":0.98,"xanchor":"left","xref":"paper","y":0.6375000000000001,"yanchor":"middle","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"4 GPUs","textangle":90,"x":0.98,"xanchor":"left","xref":"paper","y":0.36250000000000004,"yanchor":"middle","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"8 GPUs","textangle":90,"x":0.98,"xanchor":"left","xref":"paper","y":0.0875,"yanchor":"middle","yref":"paper"}],"barmode":"group","plot_bgcolor":"rgb(250,250,250)","showlegend":true,"template":{"data":{"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"choropleth":[{"colorbar":{"outlinewidth":0,"ticks":""},"type":"choropleth"}],"contour":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"contour"}],"contourcarpet":[{"colorbar":{"outlinewidth":0,"ticks":""},"type":"contourcarpet"}],"heatmap":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"heatmap"}],"heatmapgl":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"heatmapgl"}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"histogram2d":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"histogram2d"}],"histogram2dcontour":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"histogram2dcontour"}],"mesh3d":[{"colorbar":{"outlinewidth":0,"ticks":""},"type":"mesh3d"}],"parcoords":[{"line":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"parcoords"}],"pie":[{"automargin":true,"type":"pie"}],"scatter":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatter"}],"scatter3d":[{"line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatter3d"}],"scattercarpet":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattercarpet"}],"scattergeo":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattergeo"}],"scattergl":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattergl"}],"scattermapbox":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattermapbox"}],"scatterpolar":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatterpolar"}],"scatterpolargl":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatterpolargl"}],"scatterternary":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatterternary"}],"surface":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"surface"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}]},"layout":{"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"autotypenumbers":"strict","coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]],"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]},"colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"geo":{"bgcolor":"white","lakecolor":"white","landcolor":"#E5ECF6","showlakes":true,"showland":true,"subunitcolor":"white"},"hoverlabel":{"align":"left"},"hovermode":"closest","mapbox":{"style":"light"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF6","radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"ternary":{"aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF6","caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"title":{"x":0.05},"xaxis":{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","zerolinewidth":2},"yaxis":{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","zerolinewidth":2}}},"xaxis":{"anchor":"y","domain":[0.0,0.13555555555555557],"matches":"x19","showticklabels":false},"xaxis10":{"anchor":"y10","domain":[0.5066666666666667,0.6422222222222222],"matches":"x19","showticklabels":false},"xaxis11":{"anchor":"y11","domain":[0.6755555555555556,0.8111111111111111],"matches":"x19","showticklabels":false},"xaxis12":{"anchor":"y12","domain":[0.8444444444444444,0.98],"matches":"x19","showticklabels":false},"xaxis13":{"anchor":"y13","domain":[0.0,0.13555555555555557],"matches":"x19","showticklabels":false},"xaxis14":{"anchor":"y14","domain":[0.1688888888888889,0.30444444444444446],"matches":"x19","showticklabels":false},"xaxis15":{"anchor":"y15","domain":[0.3377777777777778,0.4733333333333334],"matches":"x19","showticklabels":false},"xaxis16":{"anchor":"y16","domain":[0.5066666666666667,0.6422222222222222],"matches":"x19","showticklabels":false},"xaxis17":{"anchor":"y17","domain":[0.6755555555555556,0.8111111111111111],"matches":"x19","showticklabels":false},"xaxis18":{"anchor":"y18","domain":[0.8444444444444444,0.98],"matches":"x19","showticklabels":false},"xaxis19":{"anchor":"y19","domain":[0.0,0.13555555555555557]},"xaxis2":{"anchor":"y2","domain":[0.1688888888888889,0.30444444444444446],"matches":"x19","showticklabels":false},"xaxis20":{"anchor":"y20","domain":[0.1688888888888889,0.30444444444444446],"matches":"x19"},"xaxis21":{"anchor":"y21","domain":[0.3377777777777778,0.4733333333333334],"matches":"x19"},"xaxis22":{"anchor":"y22","domain":[0.5066666666666667,0.6422222222222222],"matches":"x19"},"xaxis23":{"anchor":"y23","domain":[0.6755555555555556,0.8111111111111111],"matches":"x19"},"xaxis24":{"anchor":"y24","domain":[0.8444444444444444,0.98],"matches":"x19"},"xaxis3":{"anchor":"y3","domain":[0.3377777777777778,0.4733333333333334],"matches":"x19","showticklabels":false},"xaxis4":{"anchor":"y4","domain":[0.5066666666666667,0.6422222222222222],"matches":"x19","showticklabels":false},"xaxis5":{"anchor":"y5","domain":[0.6755555555555556,0.8111111111111111],"matches":"x19","showticklabels":false},"xaxis6":{"anchor":"y6","domain":[0.8444444444444444,0.98],"matches":"x19","showticklabels":false},"xaxis7":{"anchor":"y7","domain":[0.0,0.13555555555555557],"matches":"x19","showticklabels":false},"xaxis8":{"anchor":"y8","domain":[0.1688888888888889,0.30444444444444446],"matches":"x19","showticklabels":false},"xaxis9":{"anchor":"y9","domain":[0.3377777777777778,0.4733333333333334],"matches":"x19","showticklabels":false},"yaxis":{"anchor":"x","domain":[0.825,1.0],"matches":"y19"},"yaxis10":{"anchor":"x10","domain":[0.55,0.7250000000000001],"matches":"y22"},"yaxis11":{"anchor":"x11","domain":[0.55,0.7250000000000001],"matches":"y23"},"yaxis12":{"anchor":"x12","domain":[0.55,0.7250000000000001],"matches":"y24"},"yaxis13":{"anchor":"x13","domain":[0.275,0.45],"matches":"y19"},"yaxis14":{"anchor":"x14","domain":[0.275,0.45],"matches":"y20"},"yaxis15":{"anchor":"x15","domain":[0.275,0.45],"matches":"y21"},"yaxis16":{"anchor":"x16","domain":[0.275,0.45],"matches":"y22"},"yaxis17":{"anchor":"x17","domain":[0.275,0.45],"matches":"y23"},"yaxis18":{"anchor":"x18","domain":[0.275,0.45],"matches":"y24"},"yaxis19":{"anchor":"x19","domain":[0.0,0.175]},"yaxis2":{"anchor":"x2","domain":[0.825,1.0],"matches":"y20"},"yaxis20":{"anchor":"x20","domain":[0.0,0.175]},"yaxis21":{"anchor":"x21","domain":[0.0,0.175]},"yaxis22":{"anchor":"x22","domain":[0.0,0.175]},"yaxis23":{"anchor":"x23","domain":[0.0,0.175]},"yaxis24":{"anchor":"x24","domain":[0.0,0.175]},"yaxis3":{"anchor":"x3","domain":[0.825,1.0],"matches":"y21"},"yaxis4":{"anchor":"x4","domain":[0.825,1.0],"matches":"y22"},"yaxis5":{"anchor":"x5","domain":[0.825,1.0],"matches":"y23"},"yaxis6":{"anchor":"x6","domain":[0.825,1.0],"matches":"y24"},"yaxis7":{"anchor":"x7","domain":[0.55,0.7250000000000001],"matches":"y19"},"yaxis8":{"anchor":"x8","domain":[0.55,0.7250000000000001],"matches":"y20"},"yaxis9":{"anchor":"x9","domain":[0.55,0.7250000000000001],"matches":"y21"}},                        {"responsive": true}                    )                };                            </script>        </div>
</body>
</html>

================================================
FILE: benchmarks/llm_gpu_benchmark_transformers.html
================================================
<html>
<head><meta charset="utf-8" /></head>
<body>
    <div>                        <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>
        <script src="https://cdn.plot.ly/plotly-2.2.0.min.js"></script>                <div id="4671500e-e030-484c-8d8f-02c9ef28c439" class="plotly-graph-div" style="height:100%; width:100%;"></div>            <script type="text/javascript">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("4671500e-e030-484c-8d8f-02c9ef28c439")) {                    Plotly.newPlot(                        "4671500e-e030-484c-8d8f-02c9ef28c439",                        [{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[9.839381644193974,19.682153353799034,14.47651674912018,26.790154000919145,16.85058557689085],"xaxis":"x","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[12.67469844085007,27.622051912134882,19.374373797474846,27.42684895928983,20.2526752952322],"xaxis":"x2","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y2"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[4.417365201244467,11.290925144038532,6.08976919051411,9.56217317275004,5.9263976593415855],"xaxis":"x","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[3.695887145541112,7.812688672567852,5.614002693550519,7.59461596844275,6.252509885345299],"xaxis":"x2","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y2"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[18.73507454097704,39.43429532784967,27.07453064626594,39.96998450085984,29.3453161508673],"xaxis":"x","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[15.313436327725622,34.706856549443415,25.316661797353536,35.57028809081909,26.27458999671037],"xaxis":"x2","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y2"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[9.423935993931764,13.777794033942168,26.52473854898931,15.828182317775882],"xaxis":"x7","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y7"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[12.395401201017949,18.633481353508632,27.185836623669307,19.299187279602062],"xaxis":"x8","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y8"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[4.376286144153169,5.93295870509821,9.48124590639799,5.974715789431367],"xaxis":"x7","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y7"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[3.689148081304866,5.460311898298637,7.664435463393246,6.406802687346095],"xaxis":"x8","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y8"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[17.81624239176298,26.86157274268731,39.624799784757535,27.909081799152222],"xaxis":"x7","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y7"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[15.05454520400735,24.464037234597612,34.25052506253877,25.495156728837525],"xaxis":"x8","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y8"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[13.394795492541103,15.210707499507597],"xaxis":"x13","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y13"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[18.15606381072783,18.661753478727857],"xaxis":"x14","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y14"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[5.899421336969099,5.767145178389089],"xaxis":"x13","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y13"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[5.482425931352881,6.192523296540574],"xaxis":"x14","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y14"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[25.9430839554289,27.46244144955532],"xaxis":"x13","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y13"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[23.520372312313448,25.20924356998125],"xaxis":"x14","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y14"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[14.764927656045513],"xaxis":"x19","y":["80-A100 SXM4"],"yaxis":"y19"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[18.07719847124392],"xaxis":"x20","y":["80-A100 SXM4"],"yaxis":"y20"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[5.718961706449293],"xaxis":"x19","y":["80-A100 SXM4"],"yaxis":"y19"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[6.177879854004683],"xaxis":"x20","y":["80-A100 SXM4"],"yaxis":"y20"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[27.054106396318144],"xaxis":"x19","y":["80-A100 SXM4"],"yaxis":"y19"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[25.138719102309768],"xaxis":"x20","y":["80-A100 SXM4"],"yaxis":"y20"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[8.083390907285379,12.174340676118161,11.076606608131389,16.98095523506584,12.1008725506651],"xaxis":"x3","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y3"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[8.287678373962581,13.674114390829141,13.308822531004934,17.365713991091738,12.794482361704157],"xaxis":"x4","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y4"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[3.1403647823510736,4.962801741500335,3.5348819482865093,5.3562909858984185,4.1213135763128905],"xaxis":"x3","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y3"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[3.18815245154689,5.105728547922034,4.718240806380357,6.509024089959697,4.827719089783637],"xaxis":"x4","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y4"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[null,null,11.435609485285738,17.906931325335666,18.878279411581737],"xaxis":"x3","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y3"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[null,null,15.63493452970772,22.260343102292754,21.142120495293863],"xaxis":"x4","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y4"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[7.931395602652238,10.626258179366356,16.319110879759947,11.241866660596408],"xaxis":"x9","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y9"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[8.215795602873966,12.833338647314658,16.943446615015436,12.165800832662722],"xaxis":"x10","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y10"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[3.1362935762237645,3.499943275803895,5.402452917863267,3.9771491776646073],"xaxis":"x9","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y9"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[3.156100924190738,4.674808411970743,6.638529207897594,4.611620121814299],"xaxis":"x10","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y10"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[9.178059918412773,11.287265701494618,18.203631997182082,18.325614335569053],"xaxis":"x9","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y9"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[10.880162241524287,15.437944210820223,22.20571335065674,21.096027375985646],"xaxis":"x10","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y10"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[10.252856056970655,11.390035634842294],"xaxis":"x15","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y15"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[12.44766998737035,12.445574043628245],"xaxis":"x16","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y16"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[3.432161213004653,4.005435712274412],"xaxis":"x15","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y15"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[4.577172738204334,4.623478053690466],"xaxis":"x16","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y16"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[11.047490604822276,16.97583795634349],"xaxis":"x15","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y15"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[15.140929085583872,18.40904684710705],"xaxis":"x16","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y16"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[10.82198892665345],"xaxis":"x21","y":["80-A100 SXM4"],"yaxis":"y21"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[11.846523539191672],"xaxis":"x22","y":["80-A100 SXM4"],"yaxis":"y22"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[3.8795801184687786],"xaxis":"x21","y":["80-A100 SXM4"],"yaxis":"y21"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[4.568029810459134],"xaxis":"x22","y":["80-A100 SXM4"],"yaxis":"y22"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[16.97013525520682],"xaxis":"x21","y":["80-A100 SXM4"],"yaxis":"y21"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[18.913362098572737],"xaxis":"x22","y":["80-A100 SXM4"],"yaxis":"y22"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[null,5.161890396610965,6.976123395155549],"xaxis":"x5","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation"],"yaxis":"y5"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[null,5.887611768925055,9.031399021823733],"xaxis":"x6","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation"],"yaxis":"y6"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[null,null,null],"xaxis":"x5","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation"],"yaxis":"y5"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[null,null,null],"xaxis":"x6","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation"],"yaxis":"y6"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[null,null,null,null],"xaxis":"x5","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y5"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[null,null,null,null],"xaxis":"x6","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y6"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[null,4.96092701086689,7.068376492905629],"xaxis":"x11","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation"],"yaxis":"y11"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[null,5.693355665703394,8.905280446876153],"xaxis":"x12","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation"],"yaxis":"y12"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[1.9856691832414866],"xaxis":"x11","y":["45-RTX A6000"],"yaxis":"y11"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[1.913951722547195],"xaxis":"x12","y":["45-RTX A6000"],"yaxis":"y12"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[null],"xaxis":"x11","y":["45-RTX A6000"],"yaxis":"y11"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[null],"xaxis":"x12","y":["45-RTX A6000"],"yaxis":"y12"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[4.8550061015042685],"xaxis":"x17","y":["45-RTX A6000"],"yaxis":"y17"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[5.58004075989967],"xaxis":"x18","y":["45-RTX A6000"],"yaxis":"y18"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[1.9670200139619358],"xaxis":"x17","y":["45-RTX A6000"],"yaxis":"y17"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[1.8873606277914459],"xaxis":"x18","y":["45-RTX A6000"],"yaxis":"y18"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[2.665381007576966],"xaxis":"x17","y":["45-RTX A6000"],"yaxis":"y17"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[3.597816249219273],"xaxis":"x18","y":["45-RTX A6000"],"yaxis":"y18"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x23","y":[],"yaxis":"y23"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x24","y":[],"yaxis":"y24"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x23","y":[],"yaxis":"y23"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x24","y":[],"yaxis":"y24"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[],"xaxis":"x23","y":[],"yaxis":"y23"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[],"xaxis":"x24","y":[],"yaxis":"y24"}],                        {"annotations":[{"font":{"size":16},"showarrow":false,"text":"llama2-7b-chat Summarization","x":0.06777777777777778,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-7b-chat Generation","x":0.2366666666666667,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-13b-chat Summarization","x":0.40555555555555556,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-13b-chat Generation","x":0.5744444444444445,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-70b-chat Summarization","x":0.7433333333333334,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-70b-chat Generation","x":0.9122222222222223,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"1 GPUs","textangle":90,"x":0.98,"xanchor":"left","xref":"paper","y":0.9125,"yanchor":"middle","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"2 GPUs","textangle":90,"x":0.98,"xanchor":"left","xref":"paper","y":0.6375000000000001,"yanchor":"middle","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"4 GPUs","textangle":90,"x":0.98,"xanchor":"left","xref":"paper","y":0.36250000000000004,"yanchor":"middle","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"8 GPUs","textangle":90,"x":0.98,"xanchor":"left","xref":"paper","y":0.0875,"yanchor":"middle","yref":"paper"}],"barmode":"group","plot_bgcolor":"rgb(250,250,250)","showlegend":true,"template":{"data":{"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"choropleth":[{"colorbar":{"outlinewidth":0,"ticks":""},"type":"choropleth"}],"contour":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"contour"}],"contourcarpet":[{"colorbar":{"outlinewidth":0,"ticks":""},"type":"contourcarpet"}],"heatmap":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"heatmap"}],"heatmapgl":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"heatmapgl"}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"histogram2d":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"histogram2d"}],"histogram2dcontour":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"histogram2dcontour"}],"mesh3d":[{"colorbar":{"outlinewidth":0,"ticks":""},"type":"mesh3d"}],"parcoords":[{"line":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"parcoords"}],"pie":[{"automargin":true,"type":"pie"}],"scatter":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatter"}],"scatter3d":[{"line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatter3d"}],"scattercarpet":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattercarpet"}],"scattergeo":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattergeo"}],"scattergl":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattergl"}],"scattermapbox":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattermapbox"}],"scatterpolar":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatterpolar"}],"scatterpolargl":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatterpolargl"}],"scatterternary":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatterternary"}],"surface":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"surface"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}]},"layout":{"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"autotypenumbers":"strict","coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]],"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]},"colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"geo":{"bgcolor":"white","lakecolor":"white","landcolor":"#E5ECF6","showlakes":true,"showland":true,"subunitcolor":"white"},"hoverlabel":{"align":"left"},"hovermode":"closest","mapbox":{"style":"light"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF6","radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"ternary":{"aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF6","caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"title":{"x":0.05},"xaxis":{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","zerolinewidth":2},"yaxis":{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","zerolinewidth":2}}},"xaxis":{"anchor":"y","domain":[0.0,0.13555555555555557],"matches":"x19","showticklabels":false},"xaxis10":{"anchor":"y10","domain":[0.5066666666666667,0.6422222222222222],"matches":"x19","showticklabels":false},"xaxis11":{"anchor":"y11","domain":[0.6755555555555556,0.8111111111111111],"matches":"x19","showticklabels":false},"xaxis12":{"anchor":"y12","domain":[0.8444444444444444,0.98],"matches":"x19","showticklabels":false},"xaxis13":{"anchor":"y13","domain":[0.0,0.13555555555555557],"matches":"x19","showticklabels":false},"xaxis14":{"anchor":"y14","domain":[0.1688888888888889,0.30444444444444446],"matches":"x19","showticklabels":false},"xaxis15":{"anchor":"y15","domain":[0.3377777777777778,0.4733333333333334],"matches":"x19","showticklabels":false},"xaxis16":{"anchor":"y16","domain":[0.5066666666666667,0.6422222222222222],"matches":"x19","showticklabels":false},"xaxis17":{"anchor":"y17","domain":[0.6755555555555556,0.8111111111111111],"matches":"x19","showticklabels":false},"xaxis18":{"anchor":"y18","domain":[0.8444444444444444,0.98],"matches":"x19","showticklabels":false},"xaxis19":{"anchor":"y19","domain":[0.0,0.13555555555555557]},"xaxis2":{"anchor":"y2","domain":[0.1688888888888889,0.30444444444444446],"matches":"x19","showticklabels":false},"xaxis20":{"anchor":"y20","domain":[0.1688888888888889,0.30444444444444446],"matches":"x19"},"xaxis21":{"anchor":"y21","domain":[0.3377777777777778,0.4733333333333334],"matches":"x19"},"xaxis22":{"anchor":"y22","domain":[0.5066666666666667,0.6422222222222222],"matches":"x19"},"xaxis23":{"anchor":"y23","domain":[0.6755555555555556,0.8111111111111111],"matches":"x19"},"xaxis24":{"anchor":"y24","domain":[0.8444444444444444,0.98],"matches":"x19"},"xaxis3":{"anchor":"y3","domain":[0.3377777777777778,0.4733333333333334],"matches":"x19","showticklabels":false},"xaxis4":{"anchor":"y4","domain":[0.5066666666666667,0.6422222222222222],"matches":"x19","showticklabels":false},"xaxis5":{"anchor":"y5","domain":[0.6755555555555556,0.8111111111111111],"matches":"x19","showticklabels":false},"xaxis6":{"anchor":"y6","domain":[0.8444444444444444,0.98],"matches":"x19","showticklabels":false},"xaxis7":{"anchor":"y7","domain":[0.0,0.13555555555555557],"matches":"x19","showticklabels":false},"xaxis8":{"anchor":"y8","domain":[0.1688888888888889,0.30444444444444446],"matches":"x19","showticklabels":false},"xaxis9":{"anchor":"y9","domain":[0.3377777777777778,0.4733333333333334],"matches":"x19","showticklabels":false},"yaxis":{"anchor":"x","domain":[0.825,1.0],"matches":"y19"},"yaxis10":{"anchor":"x10","domain":[0.55,0.7250000000000001],"matches":"y22"},"yaxis11":{"anchor":"x11","domain":[0.55,0.7250000000000001],"matches":"y23"},"yaxis12":{"anchor":"x12","domain":[0.55,0.7250000000000001],"matches":"y24"},"yaxis13":{"anchor":"x13","domain":[0.275,0.45],"matches":"y19"},"yaxis14":{"anchor":"x14","domain":[0.275,0.45],"matches":"y20"},"yaxis15":{"anchor":"x15","domain":[0.275,0.45],"matches":"y21"},"yaxis16":{"anchor":"x16","domain":[0.275,0.45],"matches":"y22"},"yaxis17":{"anchor":"x17","domain":[0.275,0.45],"matches":"y23"},"yaxis18":{"anchor":"x18","domain":[0.275,0.45],"matches":"y24"},"yaxis19":{"anchor":"x19","domain":[0.0,0.175]},"yaxis2":{"anchor":"x2","domain":[0.825,1.0],"matches":"y20"},"yaxis20":{"anchor":"x20","domain":[0.0,0.175]},"yaxis21":{"anchor":"x21","domain":[0.0,0.175]},"yaxis22":{"anchor":"x22","domain":[0.0,0.175]},"yaxis23":{"anchor":"x23","domain":[0.0,0.175]},"yaxis24":{"anchor":"x24","domain":[0.0,0.175]},"yaxis3":{"anchor":"x3","domain":[0.825,1.0],"matches":"y21"},"yaxis4":{"anchor":"x4","domain":[0.825,1.0],"matches":"y22"},"yaxis5":{"anchor":"x5","domain":[0.825,1.0],"matches":"y23"},"yaxis6":{"anchor":"x6","domain":[0.825,1.0],"matches":"y24"},"yaxis7":{"anchor":"x7","domain":[0.55,0.7250000000000001],"matches":"y19"},"yaxis8":{"anchor":"x8","domain":[0.55,0.7250000000000001],"matches":"y20"},"yaxis9":{"anchor":"x9","domain":[0.55,0.7250000000000001],"matches":"y21"}},                        {"responsive": true}                    )                };                            </script>        </div>
</body>
</html>

================================================
FILE: benchmarks/llm_gpu_benchmarks.json
================================================
[
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 10:46:19",
    "git_sha": "55d3b55b",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1417,
    "summarize_time": 32.29472152392069,
    "generate_output_len_bytes": 2384,
    "generate_time": 14.563165505727133
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 10:48:55",
    "git_sha": "55d3b55b",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "timemachine",
    "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1417,
    "summarize_time": 67.97515447934468,
    "generate_output_len_bytes": 2384,
    "generate_time": 33.00641902287801
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 10:48:58",
    "git_sha": "55d3b55b",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1440,
    "summarize_time": 114.62220064798991,
    "generate_output_len_bytes": 2619,
    "generate_time": 71.0722058614095
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 10:58:34",
    "git_sha": "55d3b55b",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 866,
    "summarize_time": 39.54404203097025,
    "generate_output_len_bytes": 2927,
    "generate_time": 22.466302394866943
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 2,
    "reps": 3,
    "date": "08/18/2023 11:01:59",
    "git_sha": "55d3b55b",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1417,
    "summarize_time": 32.1394579410553,
    "generate_output_len_bytes": 2384,
    "generate_time": 14.757195552190145
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 10:54:29",
    "git_sha": "55d3b55b",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "timemachine",
    "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 910,
    "summarize_time": 185.14580019315085,
    "generate_output_len_bytes": 2042,
    "generate_time": 117.13909141222636
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 2,
    "reps": 3,
    "date": "08/18/2023 11:04:37",
    "git_sha": "55d3b55b",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1002,
    "summarize_time": 94.98129558563232,
    "generate_output_len_bytes": 2512,
    "generate_time": 69.4871145884196
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 2,
    "reps": 3,
    "date": "08/18/2023 11:13:08",
    "git_sha": "55d3b55b",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1276,
    "summarize_time": 43.23498781522115,
    "generate_output_len_bytes": 2927,
    "generate_time": 22.826789538065594
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 11:10:08",
    "git_sha": "55d3b55b",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "timemachine",
    "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 991,
    "summarize_time": 90.51939169565837,
    "generate_output_len_bytes": 2927,
    "generate_time": 48.96095744768778
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 11:16:48",
    "git_sha": "55d3b55b",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1417,
    "summarize_time": 31.86189842224121,
    "generate_output_len_bytes": 2384,
    "generate_time": 14.209659894307455
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 2,
    "reps": 3,
    "date": "08/18/2023 11:17:39",
    "git_sha": "55d3b55b",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "timemachine",
    "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1417,
    "summarize_time": 71.48081835110982,
    "generate_output_len_bytes": 2384,
    "generate_time": 33.5740262667338
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 11:19:24",
    "git_sha": "55d3b55b",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1002,
    "summarize_time": 94.17744310696919,
    "generate_output_len_bytes": 2512,
    "generate_time": 70.12592967351277
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 11:27:57",
    "git_sha": "55d3b55b",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1276,
    "summarize_time": 42.8066500822703,
    "generate_output_len_bytes": 2927,
    "generate_time": 22.626200040181477
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 2,
    "reps": 3,
    "date": "08/18/2023 11:23:22",
    "git_sha": "55d3b55b",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "timemachine",
    "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 910,
    "summarize_time": 186.88371555010477,
    "generate_output_len_bytes": 2042,
    "generate_time": 117.3530724843343
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 2,
    "reps": 3,
    "date": "08/18/2023 11:39:03",
    "git_sha": "55d3b55b",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "timemachine",
    "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 991,
    "summarize_time": 94.50985678037007,
    "generate_output_len_bytes": 2927,
    "generate_time": 50.06416177749634
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 21:08:31",
    "git_sha": "fc4826f2",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1267,
    "summarize_time": 38.80374129613241,
    "generate_output_len_bytes": 2384,
    "generate_time": 19.23690136273702
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 21:11:49",
    "git_sha": "fc4826f2",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1179,
    "summarize_time": 178.79640992482504,
    "generate_output_len_bytes": 2772,
    "generate_time": 93.99476226170857
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 21:25:53",
    "git_sha": "fc4826f2",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1002,
    "summarize_time": 53.44271365801493,
    "generate_output_len_bytes": 2927,
    "generate_time": 30.641155401865642
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 2,
    "reps": 3,
    "date": "08/18/2023 21:30:30",
    "git_sha": "fc4826f2",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1267,
    "summarize_time": 40.80062770843506,
    "generate_output_len_bytes": 2384,
    "generate_time": 19.825008392333984
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 2,
    "reps": 3,
    "date": "08/18/2023 21:35:29",
    "git_sha": "fc4826f2",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1179,
    "summarize_time": 177.35046529769897,
    "generate_output_len_bytes": 2772,
    "generate_time": 91.73111907641093
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 2,
    "reps": 3,
    "date": "08/18/2023 21:49:20",
    "git_sha": "fc4826f2",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1002,
    "summarize_time": 56.894784371058144,
    "generate_output_len_bytes": 2927,
    "generate_time": 32.15500020980835
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 4,
    "reps": 3,
    "date": "08/18/2023 21:54:11",
    "git_sha": "fc4826f2",
    "n_gpus": 4,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1267,
    "summarize_time": 41.46419604619344,
    "generate_output_len_bytes": 2384,
    "generate_time": 20.049855709075928
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 4,
    "reps": 3,
    "date": "08/18/2023 21:57:39",
    "git_sha": "fc4826f2",
    "n_gpus": 4,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1179,
    "summarize_time": 183.73364853858948,
    "generate_output_len_bytes": 2772,
    "generate_time": 94.9052836894989
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 4,
    "reps": 3,
    "date": "08/18/2023 22:11:59",
    "git_sha": "fc4826f2",
    "n_gpus": 4,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1002,
    "summarize_time": 59.204413731892906,
    "generate_output_len_bytes": 2927,
    "generate_time": 33.25332593917847
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 8,
    "reps": 3,
    "date": "08/18/2023 22:17:00",
    "git_sha": "fc4826f2",
    "n_gpus": 8,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1267,
    "summarize_time": 42.09002653757731,
    "generate_output_len_bytes": 2384,
    "generate_time": 20.106103817621868
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 8,
    "reps": 3,
    "date": "08/18/2023 22:20:31",
    "git_sha": "fc4826f2",
    "n_gpus": 8,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1179,
    "summarize_time": 185.28164370854697,
    "generate_output_len_bytes": 2772,
    "generate_time": 95.13023789723714
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 8,
    "reps": 3,
    "date": "08/18/2023 22:34:58",
    "git_sha": "fc4826f2",
    "n_gpus": 8,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1002,
    "summarize_time": 60.9919019540151,
    "generate_output_len_bytes": 2927,
    "generate_time": 34.328625202178955
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 13:31:34",
    "git_sha": "fc4826f2",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1046,
    "summarize_time": 52.49842747052511,
    "generate_output_len_bytes": 2172,
    "generate_time": 20.686774571736652
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 13:31:55",
    "git_sha": "fc4826f2",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "timemachine",
    "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
    "exception": "OOM"
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 13:35:38",
    "git_sha": "fc4826f2",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1007,
    "summarize_time": 168.9666860898336,
    "generate_output_len_bytes": 2249,
    "generate_time": 73.25518870353699
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 13:48:09",
    "git_sha": "fc4826f2",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 856,
    "summarize_time": 45.30513469378153,
    "generate_output_len_bytes": 1802,
    "generate_time": 22.000216643015545
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 2,
    "reps": 3,
    "date": "08/18/2023 13:51:56",
    "git_sha": "fc4826f2",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1046,
    "summarize_time": 51.64275654157003,
    "generate_output_len_bytes": 2172,
    "generate_time": 20.737667481104534
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 13:35:47",
    "git_sha": "fc4826f2",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "timemachine",
    "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 980,
    "summarize_time": 280.4669913450877,
    "generate_output_len_bytes": 2132,
    "generate_time": 141.7793349424998
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 13:57:35",
    "git_sha": "fc4826f2",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "timemachine",
    "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 869,
    "summarize_time": 96.61887431144714,
    "generate_output_len_bytes": 3244,
    "generate_time": 82.98751719792683
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 2,
    "reps": 3,
    "date": "08/18/2023 13:55:51",
    "git_sha": "fc4826f2",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1007,
    "summarize_time": 167.52292919158936,
    "generate_output_len_bytes": 2249,
    "generate_time": 71.82611886660258
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 2,
    "reps": 3,
    "date": "08/18/2023 14:08:08",
    "git_sha": "fc4826f2",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 856,
    "summarize_time": 47.14254776636759,
    "generate_output_len_bytes": 1802,
    "generate_time": 22.54850967725118
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 14:15:15",
    "git_sha": "d13230ee",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
    "exception": "OOM"
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 2,
    "reps": 3,
    "date": "08/18/2023 14:07:15",
    "git_sha": "fc4826f2",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "timemachine",
    "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 915,
    "summarize_time": 89.59958203633626,
    "generate_output_len_bytes": 2172,
    "generate_time": 42.32424934705099
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 14:15:30",
    "git_sha": "d13230ee",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1024,
    "summarize_time": 185.44230167071024,
    "generate_output_len_bytes": 2122,
    "generate_time": 88.11553311347961
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 14:29:36",
    "git_sha": "d13230ee",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 922,
    "summarize_time": 68.06459252039592,
    "generate_output_len_bytes": 1802,
    "generate_time": 27.939613421758015
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 2,
    "reps": 3,
    "date": "08/18/2023 14:26:29",
    "git_sha": "d13230ee",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "timemachine",
    "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 980,
    "summarize_time": 280.8310640652974,
    "generate_output_len_bytes": 2132,
    "generate_time": 143.21916349728903
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 2,
    "reps": 3,
    "date": "08/18/2023 14:48:17",
    "git_sha": "d13230ee",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "timemachine",
    "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 869,
    "summarize_time": 98.47045453389485,
    "generate_output_len_bytes": 3244,
    "generate_time": 83.71360301971436
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 15:35:13",
    "git_sha": "0dec0f52",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
    "exception": "OOM"
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 15:49:33",
    "git_sha": "0cdb75ef",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "timemachine",
    "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
    "exception": "OOM"
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 16:26:53",
    "git_sha": "0cdb75ef",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
    "exception": "OOM"
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 16:27:32",
    "git_sha": "0cdb75ef",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "timemachine",
    "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
    "exception": "OOM"
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 16:29:03",
    "git_sha": "0cdb75ef",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "timemachine",
    "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
    "exception": "OOM"
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 2,
    "reps": 3,
    "date": "08/18/2023 17:26:02",
    "git_sha": "0cdb75ef",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "timemachine",
    "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
    "exception": "OOM"
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 18:59:16",
    "git_sha": "5691db4a",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1075,
    "summarize_time": 39.01545596122742,
    "generate_output_len_bytes": 2242,
    "generate_time": 10.151424566904703
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 19:03:13",
    "git_sha": "5691db4a",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 940,
    "summarize_time": 21.78233750661214,
    "generate_output_len_bytes": 2130,
    "generate_time": 15.794983307520548
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 2,
    "reps": 3,
    "date": "08/18/2023 19:38:40",
    "git_sha": "6f05e8f1",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1114,
    "summarize_time": 7.636120955149333,
    "generate_output_len_bytes": 2275,
    "generate_time": 7.922623078028361
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 2,
    "reps": 3,
    "date": "08/18/2023 19:41:02",
    "git_sha": "6f05e8f1",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1024,
    "summarize_time": 10.824170271555582,
    "generate_output_len_bytes": 2130,
    "generate_time": 9.209020694096884
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 19:55:17",
    "git_sha": "2c548f21",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1088,
    "summarize_time": 24.39883820215861,
    "generate_output_len_bytes": 2275,
    "generate_time": 12.755743900934855
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/19/2023 00:57:21",
    "git_sha": "a227be4f",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1267,
    "summarize_time": 37.113919814427696,
    "generate_output_len_bytes": 2384,
    "generate_time": 18.36507821083069
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/19/2023 01:00:31",
    "git_sha": "a227be4f",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1046,
    "summarize_time": 49.79721482594808,
    "generate_output_len_bytes": 2172,
    "generate_time": 21.780913591384888
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/19/2023 01:04:36",
    "git_sha": "a227be4f",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "exception": "OOM"
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 1,
    "reps": 3,
    "date": "08/19/2023 01:05:26",
    "git_sha": "a227be4f",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1179,
    "summarize_time": 181.2461258570353,
    "generate_output_len_bytes": 2772,
    "generate_time": 92.64811905225118
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 1,
    "reps": 3,
    "date": "08/19/2023 01:19:33",
    "git_sha": "a227be4f",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 800,
    "summarize_time": 174.4576851526896,
    "generate_output_len_bytes": 2713,
    "generate_time": 119.14412077267964
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 1,
    "reps": 3,
    "date": "08/19/2023 01:36:14",
    "git_sha": "a227be4f",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1002,
    "summarize_time": 53.39731526374817,
    "generate_output_len_bytes": 2927,
    "generate_time": 31.369641542434692
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 1,
    "reps": 3,
    "date": "08/19/2023 01:40:53",
    "git_sha": "a227be4f",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1000,
    "summarize_time": 74.27096923192342,
    "generate_output_len_bytes": 1802,
    "generate_time": 29.860486666361492
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 2,
    "reps": 3,
    "date": "08/19/2023 01:48:09",
    "git_sha": "a227be4f",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1267,
    "summarize_time": 39.926851193110146,
    "generate_output_len_bytes": 2384,
    "generate_time": 18.481745958328247
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 2,
    "reps": 3,
    "date": "08/19/2023 01:51:27",
    "git_sha": "a227be4f",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1046,
    "summarize_time": 51.299002488454185,
    "generate_output_len_bytes": 2172,
    "generate_time": 21.828503131866455
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 2,
    "reps": 3,
    "date": "08/19/2023 01:56:20",
    "git_sha": "a227be4f",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1179,
    "summarize_time": 178.19972308476767,
    "generate_output_len_bytes": 2772,
    "generate_time": 91.73426882425944
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 2,
    "reps": 3,
    "date": "08/19/2023 02:10:13",
    "git_sha": "a227be4f",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 800,
    "summarize_time": 180.7814578215281,
    "generate_output_len_bytes": 2713,
    "generate_time": 124.72717420260112
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 2,
    "reps": 3,
    "date": "08/19/2023 02:26:43",
    "git_sha": "a227be4f",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1002,
    "summarize_time": 57.08081785837809,
    "generate_output_len_bytes": 2927,
    "generate_time": 32.26534946759542
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 2,
    "reps": 3,
    "date": "08/19/2023 02:31:36",
    "git_sha": "a227be4f",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1000,
    "summarize_time": 79.9461121559143,
    "generate_output_len_bytes": 1802,
    "generate_time": 31.403561115264893
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 4,
    "reps": 3,
    "date": "08/19/2023 02:38:23",
    "git_sha": "a227be4f",
    "n_gpus": 4,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1267,
    "summarize_time": 42.33977222442627,
    "generate_output_len_bytes": 2384,
    "generate_time": 19.723278522491455
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 4,
    "reps": 3,
    "date": "08/19/2023 02:41:52",
    "git_sha": "a227be4f",
    "n_gpus": 4,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1046,
    "summarize_time": 55.377869288126625,
    "generate_output_len_bytes": 2172,
    "generate_time": 25.01458676656087
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 4,
    "reps": 3,
    "date": "08/19/2023 02:47:05",
    "git_sha": "a227be4f",
    "n_gpus": 4,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1179,
    "summarize_time": 180.53432401021323,
    "generate_output_len_bytes": 2772,
    "generate_time": 91.93375285466512
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 4,
    "reps": 3,
    "date": "08/19/2023 03:01:07",
    "git_sha": "a227be4f",
    "n_gpus": 4,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 800,
    "summarize_time": 179.50477250417075,
    "generate_output_len_bytes": 2713,
    "generate_time": 124.40728378295898
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 4,
    "reps": 3,
    "date": "08/19/2023 03:17:36",
    "git_sha": "a227be4f",
    "n_gpus": 4,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1002,
    "summarize_time": 58.62867816289266,
    "generate_output_len_bytes": 2927,
    "generate_time": 33.394495725631714
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 4,
    "reps": 3,
    "date": "08/19/2023 03:22:37",
    "git_sha": "a227be4f",
    "n_gpus": 4,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1000,
    "summarize_time": 78.90612125396729,
    "generate_output_len_bytes": 1802,
    "generate_time": 30.697617371877033
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 8,
    "reps": 3,
    "date": "08/19/2023 03:29:20",
    "git_sha": "a227be4f",
    "n_gpus": 8,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1267,
    "summarize_time": 40.498607873916626,
    "generate_output_len_bytes": 2384,
    "generate_time": 19.509677171707153
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 8,
    "reps": 3,
    "date": "08/19/2023 03:32:44",
    "git_sha": "a227be4f",
    "n_gpus": 8,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1046,
    "summarize_time": 55.3964786529541,
    "generate_output_len_bytes": 2172,
    "generate_time": 24.347585439682007
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 8,
    "reps": 3,
    "date": "08/19/2023 03:37:55",
    "git_sha": "a227be4f",
    "n_gpus": 8,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1179,
    "summarize_time": 186.71331850687662,
    "generate_output_len_bytes": 2772,
    "generate_time": 95.784650405248
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 8,
    "reps": 3,
    "date": "08/19/2023 03:52:28",
    "git_sha": "a227be4f",
    "n_gpus": 8,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 800,
    "summarize_time": 185.3280005455017,
    "generate_output_len_bytes": 2713,
    "generate_time": 125.91738017400105
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 8,
    "reps": 3,
    "date": "08/19/2023 04:09:18",
    "git_sha": "a227be4f",
    "n_gpus": 8,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1002,
    "summarize_time": 60.18280680974325,
    "generate_output_len_bytes": 2927,
    "generate_time": 33.386961142222084
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 8,
    "reps": 3,
    "date": "08/19/2023 04:14:25",
    "git_sha": "a227be4f",
    "n_gpus": 8,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1000,
    "summarize_time": 83.04790727297465,
    "generate_output_len_bytes": 1802,
    "generate_time": 32.24992283185323
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 23:26:19",
    "git_sha": "0cdb75ef",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1417,
    "summarize_time": 47.03754989306132,
    "generate_output_len_bytes": 2384,
    "generate_time": 19.964784463246662
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 23:33:09",
    "git_sha": "0cdb75ef",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 915,
    "summarize_time": 71.91136892636617,
    "generate_output_len_bytes": 2480,
    "generate_time": 33.6295014222463
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 23:44:08",
    "git_sha": "0cdb75ef",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
    "exception": "OOM"
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 1,
    "reps": 3,
    "date": "08/19/2023 00:45:42",
    "git_sha": "0cdb75ef",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1007,
    "summarize_time": 148.61560583114624,
    "generate_output_len_bytes": 2357,
    "generate_time": 89.01266026496887
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 1,
    "reps": 3,
    "date": "08/19/2023 00:58:00",
    "git_sha": "0cdb75ef",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 763,
    "summarize_time": 193.99270629882812,
    "generate_output_len_bytes": 2129,
    "generate_time": 95.66660761833191
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 1,
    "reps": 3,
    "date": "08/19/2023 01:13:01",
    "git_sha": "0cdb75ef",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
    "exception": "OOM"
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 1,
    "reps": 3,
    "date": "08/19/2023 01:13:55",
    "git_sha": "0cdb75ef",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 991,
    "summarize_time": 61.52411222457886,
    "generate_output_len_bytes": 2927,
    "generate_time": 32.030215660730995
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 1,
    "reps": 3,
    "date": "08/19/2023 01:19:00",
    "git_sha": "0cdb75ef",
    "n_gpus": 1,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1000,
    "summarize_time": 81.13888708750407,
    "generate_output_len_bytes": 3486,
    "generate_time": 55.5331826210022
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 2,
    "reps": 3,
    "date": "08/19/2023 01:27:49",
    "git_sha": "0cdb75ef",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1417,
    "summarize_time": 47.41046245892843,
    "generate_output_len_bytes": 2384,
    "generate_time": 20.660600344340008
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 2,
    "reps": 3,
    "date": "08/19/2023 01:34:28",
    "git_sha": "0cdb75ef",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 915,
    "summarize_time": 72.85646979014079,
    "generate_output_len_bytes": 2480,
    "generate_time": 34.05861854553223
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 2,
    "reps": 3,
    "date": "08/19/2023 02:39:22",
    "git_sha": "0cdb75ef",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1007,
    "summarize_time": 152.54357608159384,
    "generate_output_len_bytes": 2357,
    "generate_time": 91.51808977127075
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 2,
    "reps": 3,
    "date": "08/19/2023 02:52:58",
    "git_sha": "0cdb75ef",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 763,
    "summarize_time": 195.92926557858786,
    "generate_output_len_bytes": 2129,
    "generate_time": 96.55542047818501
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 2,
    "reps": 3,
    "date": "08/19/2023 03:15:01",
    "git_sha": "0cdb75ef",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 991,
    "summarize_time": 64.64422671000163,
    "generate_output_len_bytes": 2927,
    "generate_time": 33.30378039677938
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 2,
    "reps": 3,
    "date": "08/19/2023 03:20:19",
    "git_sha": "0cdb75ef",
    "n_gpus": 2,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1000,
    "summarize_time": 84.57761120796204,
    "generate_output_len_bytes": 3486,
    "generate_time": 57.59072462717692
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 4,
    "reps": 3,
    "date": "08/19/2023 03:28:44",
    "git_sha": "0cdb75ef",
    "n_gpus": 4,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1417,
    "summarize_time": 49.08898218472799,
    "generate_output_len_bytes": 2384,
    "generate_time": 21.489527861277264
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 4,
    "reps": 3,
    "date": "08/19/2023 03:32:39",
    "git_sha": "0cdb75ef",
    "n_gpus": 4,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 915,
    "summarize_time": 74.43774898846944,
    "generate_output_len_bytes": 2480,
    "generate_time": 34.72673638661703
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 4,
    "reps": 3,
    "date": "08/19/2023 03:39:21",
    "git_sha": "0cdb75ef",
    "n_gpus": 4,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1007,
    "summarize_time": 153.41076453526816,
    "generate_output_len_bytes": 2357,
    "generate_time": 91.14894040425618
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 4,
    "reps": 3,
    "date": "08/19/2023 03:52:00",
    "git_sha": "0cdb75ef",
    "n_gpus": 4,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 763,
    "summarize_time": 199.79869039853415,
    "generate_output_len_bytes": 2129,
    "generate_time": 98.61504419644673
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 4,
    "reps": 3,
    "date": "08/19/2023 04:08:12",
    "git_sha": "0cdb75ef",
    "n_gpus": 4,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 991,
    "summarize_time": 66.49260465304057,
    "generate_output_len_bytes": 2927,
    "generate_time": 34.17951035499573
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 4,
    "reps": 3,
    "date": "08/19/2023 04:13:39",
    "git_sha": "0cdb75ef",
    "n_gpus": 4,
    "transformers": "4.30.2",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1000,
    "summarize_time": 87.65787092844646,
    "generate_output_len_bytes": 3486,
    "generate_time": 59.3750696182251
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 22:22:24",
    "git_sha": "b63768c6",
    "n_gpus": 1,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 948,
    "summarize_time": 122.13213857014973,
    "generate_output_len_bytes": 2826,
    "generate_time": 66.34098903338115
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 2,
    "reps": 3,
    "date": "08/18/2023 22:33:33",
    "git_sha": "c1348fb3",
    "n_gpus": 2,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 948,
    "summarize_time": 120.53812781969707,
    "generate_output_len_bytes": 2826,
    "generate_time": 67.28052496910095
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 22:56:52",
    "git_sha": "fb84de76",
    "n_gpus": 1,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "timemachine",
    "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1036,
    "summarize_time": 29.128981749216717,
    "generate_output_len_bytes": 2242,
    "generate_time": 12.197122732798258
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/18/2023 23:00:33",
    "git_sha": "fb84de76",
    "n_gpus": 1,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "timemachine",
    "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
    "exception": "OOM"
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/19/2023 05:47:43",
    "git_sha": "22352acd",
    "n_gpus": 1,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
    "exception": "OOM"
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 1,
    "reps": 3,
    "date": "08/19/2023 05:48:58",
    "git_sha": "22352acd",
    "n_gpus": 1,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
    "exception": "OOM"
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 1,
    "reps": 3,
    "date": "08/19/2023 05:50:40",
    "git_sha": "22352acd",
    "n_gpus": 1,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 948,
    "summarize_time": 165.05752809842429,
    "generate_output_len_bytes": 2605,
    "generate_time": 93.80659619967143
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 2,
    "reps": 3,
    "date": "08/19/2023 06:05:51",
    "git_sha": "22352acd",
    "n_gpus": 2,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
    "exception": "OOM"
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 2,
    "reps": 3,
    "date": "08/19/2023 06:10:05",
    "git_sha": "22352acd",
    "n_gpus": 2,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 906,
    "summarize_time": 410.0691332022349,
    "generate_output_len_bytes": 521,
    "generate_time": 57.71272214253744
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 2,
    "reps": 3,
    "date": "08/19/2023 06:36:58",
    "git_sha": "22352acd",
    "n_gpus": 2,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 948,
    "summarize_time": 171.74388321240744,
    "generate_output_len_bytes": 2605,
    "generate_time": 97.00725762049358
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 4,
    "reps": 3,
    "date": "08/19/2023 06:51:13",
    "git_sha": "22352acd",
    "n_gpus": 4,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 792,
    "summarize_time": 267.0555826822917,
    "generate_output_len_bytes": 2783,
    "generate_time": 163.99818523724875
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 4,
    "reps": 3,
    "date": "08/19/2023 07:13:35",
    "git_sha": "22352acd",
    "n_gpus": 4,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 906,
    "summarize_time": 413.9569679101308,
    "generate_output_len_bytes": 521,
    "generate_time": 58.52583885192871
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 4,
    "reps": 3,
    "date": "08/19/2023 07:38:02",
    "git_sha": "22352acd",
    "n_gpus": 4,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 948,
    "summarize_time": 175.4907926718394,
    "generate_output_len_bytes": 2605,
    "generate_time": 98.97720170021057
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 2,
    "reps": 3,
    "date": "08/19/2023 12:35:08",
    "git_sha": "29a002e5",
    "n_gpus": 2,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "timemachine",
    "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 983,
    "summarize_time": 42.21107586224874,
    "generate_output_len_bytes": 2130,
    "generate_time": 16.94527777036031
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 2,
    "reps": 3,
    "date": "08/21/2023 20:03:36",
    "git_sha": "51318f44",
    "n_gpus": 2,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1267,
    "summarize_time": 41.0461368560791,
    "generate_output_len_bytes": 2383,
    "generate_time": 19.614749511082966
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 4,
    "reps": 3,
    "date": "08/21/2023 20:07:35",
    "git_sha": "51318f44",
    "n_gpus": 4,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1267,
    "summarize_time": 42.8376894791921,
    "generate_output_len_bytes": 2383,
    "generate_time": 20.2719091574351
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/21/2023 20:42:46",
    "git_sha": "2f4bb620",
    "n_gpus": 1,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
    "exception": "OOM"
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 4,
    "reps": 3,
    "date": "08/21/2023 20:50:19",
    "git_sha": "2f4bb620",
    "n_gpus": 4,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 915,
    "summarize_time": 66.52468911806743,
    "generate_output_len_bytes": 2479,
    "generate_time": 29.828714847564697
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 4,
    "reps": 3,
    "date": "08/21/2023 20:56:04",
    "git_sha": "2f4bb620",
    "n_gpus": 4,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
    "exception": "OOM"
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/21/2023 19:55:35",
    "git_sha": "51318f44",
    "n_gpus": 1,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1267,
    "summarize_time": 38.753786404927574,
    "generate_output_len_bytes": 2383,
    "generate_time": 19.529522736867268
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 2,
    "reps": 3,
    "date": "08/21/2023 20:36:13",
    "git_sha": "51318f44",
    "n_gpus": 2,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1267,
    "summarize_time": 41.024452924728394,
    "generate_output_len_bytes": 2383,
    "generate_time": 20.29120985666911
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 2,
    "reps": 3,
    "date": "08/21/2023 20:40:08",
    "git_sha": "51318f44",
    "n_gpus": 2,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1046,
    "summarize_time": 54.554532527923584,
    "generate_output_len_bytes": 2171,
    "generate_time": 24.604793945948284
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 4,
    "reps": 3,
    "date": "08/21/2023 20:50:05",
    "git_sha": "51318f44",
    "n_gpus": 4,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1267,
    "summarize_time": 41.09950613975525,
    "generate_output_len_bytes": 2383,
    "generate_time": 20.947362899780273
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 4,
    "reps": 3,
    "date": "08/21/2023 20:54:08",
    "git_sha": "51318f44",
    "n_gpus": 4,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1046,
    "summarize_time": 58.3172922929128,
    "generate_output_len_bytes": 2171,
    "generate_time": 25.735217014948528
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 8,
    "reps": 3,
    "date": "08/21/2023 21:01:04",
    "git_sha": "51318f44",
    "n_gpus": 8,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1267,
    "summarize_time": 42.85940829912821,
    "generate_output_len_bytes": 2383,
    "generate_time": 21.380353291829426
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 8,
    "reps": 3,
    "date": "08/21/2023 21:05:24",
    "git_sha": "51318f44",
    "n_gpus": 8,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1046,
    "summarize_time": 54.235164642333984,
    "generate_output_len_bytes": 2171,
    "generate_time": 25.70338026682536
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 8,
    "reps": 3,
    "date": "08/21/2023 21:10:37",
    "git_sha": "51318f44",
    "n_gpus": 8,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 927,
    "summarize_time": 133.53030570348105,
    "generate_output_len_bytes": 2782,
    "generate_time": 72.97924383481343
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 4,
    "reps": 3,
    "date": "08/21/2023 22:18:17",
    "git_sha": "51318f44",
    "n_gpus": 4,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 927,
    "summarize_time": 131.45291074117026,
    "generate_output_len_bytes": 2782,
    "generate_time": 72.30849742889404
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/21/2023 22:51:09",
    "git_sha": "383b6bbc",
    "n_gpus": 1,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1267,
    "summarize_time": 39.269713958104454,
    "generate_output_len_bytes": 2383,
    "generate_time": 19.65731406211853
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/21/2023 22:54:54",
    "git_sha": "383b6bbc",
    "n_gpus": 1,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1046,
    "summarize_time": 51.84283971786499,
    "generate_output_len_bytes": 2171,
    "generate_time": 28.441521485646565
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 2,
    "reps": 3,
    "date": "08/21/2023 23:13:10",
    "git_sha": "383b6bbc",
    "n_gpus": 2,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1046,
    "summarize_time": 53.383726040522255,
    "generate_output_len_bytes": 2171,
    "generate_time": 24.422890504201252
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 4,
    "reps": 3,
    "date": "08/21/2023 23:18:04",
    "git_sha": "383b6bbc",
    "n_gpus": 4,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1046,
    "summarize_time": 52.791220347086586,
    "generate_output_len_bytes": 2171,
    "generate_time": 25.378511508305866
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 8,
    "reps": 3,
    "date": "08/21/2023 23:23:11",
    "git_sha": "383b6bbc",
    "n_gpus": 8,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.8",
    "hostname": "cloudvm",
    "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1046,
    "summarize_time": 56.3846542040507,
    "generate_output_len_bytes": 2171,
    "generate_time": 26.636192480723064
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 1,
    "reps": 3,
    "date": "08/21/2023 23:52:44",
    "git_sha": "da69b822",
    "n_gpus": 1,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1267,
    "summarize_time": 40.36223220825195,
    "generate_output_len_bytes": 2383,
    "generate_time": 19.87660264968872
  },
  {
    "backend": "text-generation-inference",
    "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 2,
    "reps": 3,
    "date": "08/22/2023 00:15:05",
    "git_sha": "e843e8c3",
    "n_gpus": 2,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "recypabaszmhhmuae",
    "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 915,
    "summarize_time": 64.78201874097188,
    "generate_output_len_bytes": 2479,
    "generate_time": 29.02147897084554
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 16,
    "ngpus": 0,
    "reps": 3,
    "date": "08/22/2023 19:01:15",
    "git_sha": "855b7d15",
    "n_gpus": 0,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "CPU",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1351,
    "summarize_time": 1215.5185990333557,
    "generate_output_len_bytes": 849,
    "generate_time": 180.56836318969727
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 8,
    "ngpus": 0,
    "reps": 3,
    "date": "08/22/2023 20:11:16",
    "git_sha": "855b7d15",
    "n_gpus": 0,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "CPU",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1353,
    "summarize_time": 1216.9783231417339,
    "generate_output_len_bytes": 849,
    "generate_time": 180.42225472132364
  },
  {
    "backend": "transformers",
    "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
    "task": "summary_and_generate",
    "bits": 4,
    "ngpus": 0,
    "reps": 3,
    "date": "08/22/2023 21:21:20",
    "git_sha": "855b7d15",
    "n_gpus": 0,
    "transformers": "4.31.0",
    "bitsandbytes": "0.41.1",
    "cuda": "11.7",
    "hostname": "rippa",
    "gpus": "CPU",
    "summarize_input_len_bytes": 857252,
    "summarize_output_len_bytes": 1354,
    "summarize_time": 1217.1687794526417,
    "generate_output_len_bytes": 843,
    "generate_time": 180.78463260332742
  }
]


================================================
FILE: benchmarks/perf.json
================================================
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 10:46:19", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 32.29472152392069, "generate_output_len_bytes": 2384, "generate_time": 14.563165505727133}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 10:48:55", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 67.97515447934468, "generate_output_len_bytes": 2384, "generate_time": 33.00641902287801}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 10:48:58", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1440, "summarize_time": 114.62220064798991, "generate_output_len_bytes": 2619, "generate_time": 71.0722058614095}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 10:58:34", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 866, "summarize_time": 39.54404203097025, "generate_output_len_bytes": 2927, "generate_time": 22.466302394866943}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:01:59", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 32.1394579410553, "generate_output_len_bytes": 2384, "generate_time": 14.757195552190145}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 10:54:29", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 910, "summarize_time": 185.14580019315085, "generate_output_len_bytes": 2042, "generate_time": 117.13909141222636}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:04:37", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 94.98129558563232, "generate_output_len_bytes": 2512, "generate_time": 69.4871145884196}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:13:08", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1276, "summarize_time": 43.23498781522115, "generate_output_len_bytes": 2927, "generate_time": 22.826789538065594}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 11:10:08", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 991, "summarize_time": 90.51939169565837, "generate_output_len_bytes": 2927, "generate_time": 48.96095744768778}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 11:16:48", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 31.86189842224121, "generate_output_len_bytes": 2384, "generate_time": 14.209659894307455}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:17:39", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 71.48081835110982, "generate_output_len_bytes": 2384, "generate_time": 33.5740262667338}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 11:19:24", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 94.17744310696919, "generate_output_len_bytes": 2512, "generate_time": 70.12592967351277}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 11:27:57", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1276, "summarize_time": 42.8066500822703, "generate_output_len_bytes": 2927, "generate_time": 22.626200040181477}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:23:22", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 910, "summarize_time": 186.88371555010477, "generate_output_len_bytes": 2042, "generate_time": 117.3530724843343}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:39:03", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 991, "summarize_time": 94.50985678037007, "generate_output_len_bytes": 2927, "generate_time": 50.06416177749634}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 21:08:31", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 38.80374129613241, "generate_output_len_bytes": 2384, "generate_time": 19.23690136273702}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 21:11:49", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 178.79640992482504, "generate_output_len_bytes": 2772, "generate_time": 93.99476226170857}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 21:25:53", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 53.44271365801493, "generate_output_len_bytes": 2927, "generate_time": 30.641155401865642}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 21:30:30", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 40.80062770843506, "generate_output_len_bytes": 2384, "generate_time": 19.825008392333984}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/18/2023 21:35:29", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 177.35046529769897, "generate_output_len_bytes": 2772, "generate_time": 91.73111907641093}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 21:49:20", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 56.894784371058144, "generate_output_len_bytes": 2927, "generate_time": 32.15500020980835}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/18/2023 21:54:11", "git_sha": "fc4826f2", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 41.46419604619344, "generate_output_len_bytes": 2384, "generate_time": 20.049855709075928}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 4, "reps": 3, "date": "08/18/2023 21:57:39", "git_sha": "fc4826f2", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 183.73364853858948, "generate_output_len_bytes": 2772, "generate_time": 94.9052836894989}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 4, "reps": 3, "date": "08/18/2023 22:11:59", "git_sha": "fc4826f2", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 59.204413731892906, "generate_output_len_bytes": 2927, "generate_time": 33.25332593917847}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/18/2023 22:17:00", "git_sha": "fc4826f2", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 42.09002653757731, "generate_output_len_bytes": 2384, "generate_time": 20.106103817621868}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 8, "reps": 3, "date": "08/18/2023 22:20:31", "git_sha": "fc4826f2", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 185.28164370854697, "generate_output_len_bytes": 2772, "generate_time": 95.13023789723714}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 8, "reps": 3, "date": "08/18/2023 22:34:58", "git_sha": "fc4826f2", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 60.9919019540151, "generate_output_len_bytes": 2927, "generate_time": 34.328625202178955}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:31:34", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 52.49842747052511, "generate_output_len_bytes": 2172, "generate_time": 20.686774571736652}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:31:55", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "exception": "OOM"}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:35:38", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1007, "summarize_time": 168.9666860898336, "generate_output_len_bytes": 2249, "generate_time": 73.25518870353699}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:48:09", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 856, "summarize_time": 45.30513469378153, "generate_output_len_bytes": 1802, "generate_time": 22.000216643015545}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 13:51:56", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 51.64275654157003, "generate_output_len_bytes": 2172, "generate_time": 20.737667481104534}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:35:47", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 980, "summarize_time": 280.4669913450877, "generate_output_len_bytes": 2132, "generate_time": 141.7793349424998}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:57:35", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 869, "summarize_time": 96.61887431144714, "generate_output_len_bytes": 3244, "generate_time": 82.98751719792683}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/18/2023 13:55:51", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1007, "summarize_time": 167.52292919158936, "generate_output_len_bytes": 2249, "generate_time": 71.82611886660258}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 14:08:08", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 856, "summarize_time": 47.14254776636759, "generate_output_len_bytes": 1802, "generate_time": 22.54850967725118}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 14:15:15", "git_sha": "d13230ee", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "exception": "OOM"}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 14:07:15", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 915, "summarize_time": 89.59958203633626, "generate_output_len_bytes": 2172, "generate_time": 42.32424934705099}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 14:15:30", "git_sha": "d13230ee", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1024, "summarize_time": 185.44230167071024, "generate_output_len_bytes": 2122, "generate_time": 88.11553311347961}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 14:29:36", "git_sha": "d13230ee", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 922, "summarize_time": 68.06459252039592, "generate_output_len_bytes": 1802, "generate_time": 27.939613421758015}
{"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/18/2023 14:26:29", "git_sha": "d13230ee", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 980, "summarize_time": 280.8310640652974, "generate_output_len_bytes":

Download .txt

gitextract_5zppyvqi/

├── .dockerignore
├── .gitattributes
├── .github/
│   └── workflows/
│       └── python-package-publish.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── benchmarks/
│   ├── llm_gpu_benchmark.py
│   ├── llm_gpu_benchmark_text-generation-inference.html
│   ├── llm_gpu_benchmark_transformers.html
│   ├── llm_gpu_benchmarks.json
│   ├── perf.json
│   ├── perf.md
│   └── rag_benchmark.md
├── blog/
│   └── README.md
├── ci/
│   └── jenkinsfile
├── cloud/
│   └── packer/
│       ├── Jenkinsfile
│       ├── README.md
│       ├── h2oai-h2ogpt-4096-llama2-13b-chat.sh
│       ├── h2ogpt-azure.json
│       ├── h2ogpt-gcp.json
│       ├── install_h2ogpt.sh
│       ├── setup_environment.sh
│       └── startup-scripts/
│           ├── h2ogpt.service
│           ├── h2ogpt_nginx.service
│           ├── run_h2ogpt.sh
│           ├── run_nginx.sh
│           ├── run_vllm.sh
│           ├── temp.conf
│           └── vllm.service
├── data/
│   ├── README-template.md
│   ├── censor_words.txt
│   ├── config.json
│   ├── create_data_cards.py
│   ├── dai_docs.train.json
│   ├── dai_docs.train_cleaned.json
│   ├── dai_docs.valid.json
│   ├── dai_faq.json
│   ├── example.xlsx
│   ├── h2ogpt-personality.json
│   └── merged.json
├── dev_installers/
│   └── mac/
│       ├── README.md
│       ├── build_mac_installer.sh
│       ├── h2ogpt-osx-m1-cpu.spec
│       ├── h2ogpt-osx-m1-gpu.spec
│       └── mac_run_app.py
├── docker-compose-cpu.yml
├── docker-compose-vllm.yml
├── docker-compose.yml
├── docker_build_script_ubuntu.sh
├── docs/
│   ├── Dockerfile.delta2
│   ├── Dockerfile.internvl
│   ├── FAQ.md
│   ├── FINETUNE.md
│   ├── INSTALL.md
│   ├── LINKS.md
│   ├── README_Agents.md
│   ├── README_CLI.md
│   ├── README_CLIENT.md
│   ├── README_CPU.md
│   ├── README_DOCKER.md
│   ├── README_GPU.md
│   ├── README_InferenceServers.md
│   ├── README_LINUX.md
│   ├── README_LangChain.md
│   ├── README_MACOS.md
│   ├── README_SerpAPI.md
│   ├── README_WHEEL.md
│   ├── README_WINDOWS.md
│   ├── README_offline.md
│   ├── README_quickstart.md
│   ├── README_ui.md
│   ├── TRITON.md
│   ├── autogen.patch
│   ├── autogen2.patch
│   ├── build_windows_gpu.sh
│   ├── google.patch
│   ├── h2oGPT_CPU.ipynb
│   ├── h2oGPT_GPU.ipynb
│   ├── linux_install.sh
│   ├── linux_install_full.sh
│   ├── openai.patch
│   ├── pytubefix.patch
│   ├── run_patches.sh
│   ├── setup_docker_linux.sh
│   ├── tos.md
│   ├── trans.patch
│   ├── trans2.patch
│   ├── windows_freezelist.txt
│   ├── windows_install.bat
│   └── xtt.patch
├── finetune.py
├── generate.py
├── gradio_utils/
│   ├── __init__.py
│   ├── css.py
│   ├── google_auth.py
│   ├── grclient.py
│   ├── prompt_form.py
│   └── yield_utils.py
├── h2ogpt/
│   └── __init__.py
├── helm/
│   └── h2ogpt-chart/
│       ├── .helmignore
│       ├── Chart.yaml
│       ├── templates/
│       │   ├── _helpers.tpl
│       │   ├── config-map.yaml
│       │   ├── deployment.yaml
│       │   └── service.yaml
│       └── values.yaml
├── iterators/
│   ├── __init__.py
│   ├── iterator_pipe.py
│   └── timeout_iterator.py
├── metrics/
│   ├── __init__.py
│   └── quip.py
├── models/
│   ├── README-template.md
│   ├── __init__.py
│   ├── create_model_cards.py
│   ├── gpu_mem_track.py
│   ├── makevllm.sh
│   ├── predict_aquila.py
│   └── test_scrape1.py
├── notebooks/
│   └── h2oGPT_api_examples.ipynb
├── openai_server/
│   ├── __init__.py
│   ├── agent_prompting.py
│   ├── agent_tools/
│   │   ├── aider_code_generation.py
│   │   ├── ask_question_about_documents.py
│   │   ├── ask_question_about_image.py
│   │   ├── audio_transcription.py
│   │   ├── bing_search.py
│   │   ├── common/
│   │   │   └── utils.py
│   │   ├── convert_document_to_text.py
│   │   ├── download_web_video.py
│   │   ├── driverless_ai_data_science.py
│   │   ├── google_search.py
│   │   ├── image_generation.py
│   │   ├── mermaid_renderer.py
│   │   ├── news_query.py
│   │   ├── query_to_web_image.py
│   │   ├── scholar_papers_query.py
│   │   └── wolfram_alpha_math_science_query.py
│   ├── agent_utils.py
│   ├── autogen_2agent_backend.py
│   ├── autogen_agents.py
│   ├── autogen_multi_agent_backend.py
│   ├── autogen_streaming.py
│   ├── autogen_utils.py
│   ├── backend.py
│   ├── backend_utils.py
│   ├── chat_history_render.py
│   ├── cogvlm2_server/
│   │   ├── cogvlm2.py
│   │   └── requirements.txt
│   ├── log.py
│   ├── openai_client.py
│   ├── server.py
│   ├── server_start.py
│   ├── test_autogen_utils.py
│   ├── test_backend_utils.py
│   ├── test_conversion.py
│   ├── test_openai_server.py
│   └── test_prompt_caching.py
├── papers/
│   └── technical-report/
│       ├── compile.sh
│       ├── conf.sty
│       └── h2oGPT-TR.tex
├── reqs_optional/
│   ├── reqs_constraints.txt
│   ├── requirements_optional_agents.txt
│   ├── requirements_optional_audio.txt
│   ├── requirements_optional_cpu_only.txt
│   ├── requirements_optional_doctr.txt
│   ├── requirements_optional_gpu_only.txt
│   ├── requirements_optional_image.txt
│   ├── requirements_optional_langchain.gpllike.txt
│   ├── requirements_optional_langchain.metrics.txt
│   ├── requirements_optional_langchain.txt
│   ├── requirements_optional_langchain.urls.txt
│   ├── requirements_optional_llamacpp_gpt4all.txt
│   ├── requirements_optional_training.txt
│   └── requirements_optional_wikiprocessing.txt
├── requirements.txt
├── setup.py
├── spaces/
│   ├── chatbot/
│   │   └── repo_to_spaces.sh
│   └── demo/
│       ├── app.py
│       ├── app_client_test.py
│       └── requirements.txt
├── spkemb/
│   ├── cmu_us_awb_arctic-wav-arctic_a0002.npy
│   ├── cmu_us_bdl_arctic-wav-arctic_a0009.npy
│   ├── cmu_us_clb_arctic-wav-arctic_a0144.npy
│   ├── cmu_us_ksp_arctic-wav-arctic_b0087.npy
│   ├── cmu_us_rms_arctic-wav-arctic_b0353.npy
│   └── cmu_us_slt_arctic-wav-arctic_a0508.npy
├── src/
│   ├── __init__.py
│   ├── audio_langchain.py
│   ├── basic_nltk.py
│   ├── cli.py
│   ├── client_test.py
│   ├── create_data.py
│   ├── db_utils.py
│   ├── enums.py
│   ├── eval.py
│   ├── evaluate_params.py
│   ├── export_hf_checkpoint.py
│   ├── function_client.py
│   ├── function_server.py
│   ├── gen.py
│   ├── gpt4all_llm.py
│   ├── gpt_langchain.py
│   ├── gradio_funcs.py
│   ├── gradio_runner.py
│   ├── gradio_themes.py
│   ├── h2o_serpapi.py
│   ├── h2oai_pipeline.py
│   ├── image_captions.py
│   ├── image_doctr.py
│   ├── image_pix2struct.py
│   ├── image_utils.py
│   ├── langchain_mistralai/
│   │   └── chat_models.py
│   ├── langchain_openai_local.py
│   ├── llama_flash_attn_monkey_patch.py
│   ├── llm_exllama.py
│   ├── loaders.py
│   ├── make_db.py
│   ├── model_utils.py
│   ├── output_parser.py
│   ├── pandas_agent_langchain.py
│   ├── pre-commit
│   ├── prepare_offline.py
│   ├── prompter.py
│   ├── prompter_utils.py
│   ├── read_wiki_full.py
│   ├── sagemaker.py
│   ├── stopping.py
│   ├── stt.py
│   ├── tts.py
│   ├── tts_coqui.py
│   ├── tts_sentence_parsing.py
│   ├── tts_utils.py
│   ├── utils.py
│   ├── utils_langchain.py
│   ├── utils_procs.py
│   ├── utils_sys.py
│   ├── version.py
│   └── vision/
│       ├── __init__.py
│       ├── extract_movie.py
│       ├── flux.py
│       ├── playv2.py
│       ├── sdxl_turbo.py
│       ├── stable_diffusion_xl.py
│       └── utils_vision.py
├── tests/
│   ├── 1paul_graham.txt
│   ├── __init__.py
│   ├── conftest.py
│   ├── example.xlsx
│   ├── memory_hog_script.py
│   ├── next.txt
│   ├── sample.eml
│   ├── table_as_image.docx
│   ├── test4gpus.sh
│   ├── test_async_iterator_pipe.py
│   ├── test_async_timeout_iterator.py
│   ├── test_cli.py
│   ├── test_client_calls.py
│   ├── test_client_readme.py
│   ├── test_eval.py
│   ├── test_eval_models.py
│   ├── test_fine_tune_export_tgi.sh
│   ├── test_imports.py
│   ├── test_inference_servers.py
│   ├── test_iterator_pipe.py
│   ├── test_langchain_simple.py
│   ├── test_langchain_units.py
│   ├── test_long_context.py
│   ├── test_manual_test.py
│   ├── test_metrics.py
│   ├── test_openai_server.py
│   ├── test_perf_benchmarks.py
│   ├── test_pipeline.py
│   ├── test_prompter.py
│   ├── test_requirements.py
│   ├── test_sentence_parsing.py
│   ├── test_timeout_iterator.py
│   ├── test_tokenizer.py
│   ├── test_tts.py
│   ├── test_ui.py
│   ├── test_utils.py
│   ├── test_vision.py
│   └── utils.py
├── version.txt
├── win_run_app.py
└── windows_installer.cfg

Download .txt

SYMBOL INDEX (1858 symbols across 136 files)

FILE: data/create_data_cards.py
  function test_create_data_cards (line 101) | def test_create_data_cards(dataset_name, link_to_source):

FILE: dev_installers/mac/mac_run_app.py
  function main (line 30) | def main():

FILE: finetune.py
  function log (line 19) | def log(*args, **kwargs):
  function train (line 30) | def train(
  function tokenize (line 580) | def tokenize(prompt, tokenizer, cutoff_len, add_eos_token=False):
  function prune_long_sequences (line 603) | def prune_long_sequences(data_point, cutoff_len=None):
  function generate_and_tokenize_prompt (line 614) | def generate_and_tokenize_prompt(data_point, prompt_type=None, train_on_...
  function test_debug (line 640) | def test_debug():
  function entrypoint_main (line 644) | def entrypoint_main():

FILE: generate.py
  function entrypoint_main (line 15) | def entrypoint_main():

FILE: gradio_utils/css.py
  function get_css (line 1) | def get_css(kwargs, select_string) -> str:
  function make_css_base (line 14) | def make_css_base(select_string) -> str:

FILE: gradio_utils/google_auth.py
  function setup_app (line 5) | def setup_app(name_login='google_login', name_app='h2ogpt', verbose=False):
  function login_gradio (line 121) | def login_gradio(**kwargs):
  function get_app (line 144) | def get_app(demo, app_kwargs={}, **login_kwargs):

FILE: gradio_utils/grclient.py
  function check_job (line 66) | def check_job(job, timeout=0.0, raise_exception=True, verbose=False):
  class LangChainAction (line 83) | class LangChainAction(Enum):
  class CommonClient (line 110) | class CommonClient:
    method question (line 111) | def question(self, instruction, *args, **kwargs) -> str:
    method question_stream (line 123) | def question_stream(
    method query (line 135) | def query(self, query, *args, **kwargs) -> str:
    method query_stream (line 146) | def query_stream(self, query, *args, **kwargs) -> Generator[ReturnType...
    method summarize (line 155) | def summarize(self, *args, query=None, focus=None, **kwargs) -> str:
    method summarize_stream (line 170) | def summarize_stream(self, *args, query=None, focus=None, **kwargs) ->...
    method extract (line 183) | def extract(self, *args, query=None, focus=None, **kwargs) -> list[str]:
    method extract_stream (line 198) | def extract_stream(self, *args, query=None, focus=None, **kwargs) -> l...
    method get_client_kwargs (line 211) | def get_client_kwargs(self, **kwargs):
    method get_query_kwargs (line 259) | def get_query_kwargs(self, **kwargs):
    method check_error (line 268) | def check_error(res_dict):
    method query_or_summarize_or_extract (line 283) | def query_or_summarize_or_extract(
    method check_model (line 1007) | def check_model(self, model):
    method _get_ttl_hash (line 1027) | def _get_ttl_hash(seconds=60):
    method _get_models_full (line 1032) | def _get_models_full(self, ttl_hash=None, do_lock=False) -> List[Dict[...
    method get_models_full (line 1049) | def get_models_full(self, do_lock=False) -> List[Dict[str, Any]]:
    method list_models (line 1055) | def list_models(self) -> List[str]:
    method simple_stream (line 1061) | def simple_stream(
    method stream (line 1188) | def stream(
    method _stream (line 1236) | def _stream(
    method yield_res (line 1300) | def yield_res(
  class H2OGradioClient (line 1366) | class H2OGradioClient(CommonClient, Client):
    method reset_session (line 1372) | def reset_session(self) -> None:
    method __init__ (line 1377) | def __init__(
    method __repr__ (line 1478) | def __repr__(self):
    method __str__ (line 1484) | def __str__(self):
    method setup (line 1490) | def setup(self):
    method get_endpoints (line 1584) | def get_endpoints(client, verbose=False):
    method is_full_git_hash (line 1622) | def is_full_git_hash(s):
    method get_server_hash (line 1626) | def get_server_hash(self) -> str:
    method _get_server_hash (line 1629) | def _get_server_hash(self, ttl_hash=None) -> str:
    method refresh_client_if_should (line 1653) | def refresh_client_if_should(self):
    method refresh_client (line 1675) | def refresh_client(self):
    method clone (line 1717) | def clone(self, do_lock=False):
    method _clone (line 1724) | def _clone(self):
    method submit (line 1739) | def submit(
  class CloneableGradioClient (line 1787) | class CloneableGradioClient(CommonClient, Client):
    method __init__ (line 1788) | def __init__(self, *args, **kwargs):
    method _initialize_session_specific (line 1801) | def _initialize_session_specific(self):
    method _initialize_shared_info (line 1811) | def _initialize_shared_info(self):
    method config (line 1825) | def config(self):
    method config (line 1829) | def config(self, value):
    method _info (line 1833) | def _info(self):
    method _info (line 1837) | def _info(self, value):
    method endpoints (line 1841) | def endpoints(self):
    method endpoints (line 1845) | def endpoints(self, value):
    method executor (line 1849) | def executor(self):
    method executor (line 1853) | def executor(self, value):
    method heartbeat (line 1857) | def heartbeat(self):
    method heartbeat (line 1861) | def heartbeat(self, value):
    method setup (line 1864) | def setup(self):
    method _get_ttl_hash (line 1869) | def _get_ttl_hash(seconds=60):
    method get_server_hash (line 1873) | def get_server_hash(self) -> str:
    method _get_server_hash (line 1876) | def _get_server_hash(self, ttl_hash=None):
    method clone (line 1880) | def clone(self):
    method __repr__ (line 1888) | def __repr__(self):
    method __str__ (line 1893) | def __str__(self):
    method cleanup (line 1898) | def cleanup(self):

FILE: gradio_utils/prompt_form.py
  function get_chatbot_name (line 13) | def get_chatbot_name(base_model, display_name, model_path_llama, inferen...
  function get_avatars (line 40) | def get_avatars(base_model, model_path_llama, inference_server=''):
  function ratingfn1 (line 97) | def ratingfn1():
  function ratingfn2 (line 101) | def ratingfn2():
  function ratingfn3 (line 105) | def ratingfn3():
  function ratingfn4 (line 109) | def ratingfn4():
  function ratingfn5 (line 113) | def ratingfn5():
  function submit_review (line 117) | def submit_review(review_text, text_output, text_output2, *text_outputs1...
  function make_chatbots (line 137) | def make_chatbots(output_label0, output_label0_model2, **kwargs):

FILE: gradio_utils/yield_utils.py
  class ReturnType (line 4) | class ReturnType(BaseModel):

FILE: iterators/iterator_pipe.py
  class IteratorPipe (line 5) | class IteratorPipe:
    method __init__ (line 10) | def __init__(self, sentinel=object()):
    method __iter__ (line 16) | def __iter__(self):
    method __next__ (line 19) | def __next__(self):
    method put (line 30) | def put(self, data) -> bool:
    method close (line 41) | def close(self):
  class AsyncIteratorPipe (line 52) | class AsyncIteratorPipe:
    method __init__ (line 54) | def __init__(self, sentinel=object()):
    method __aiter__ (line 60) | def __aiter__(self):
    method __anext__ (line 63) | async def __anext__(self):
    method put (line 74) | async def put(self, data) -> bool:
    method close (line 85) | async def close(self):

FILE: iterators/timeout_iterator.py
  class TimeoutIterator (line 7) | class TimeoutIterator:
    method __init__ (line 23) | def __init__(self, iterator, timeout=0.0, sentinel=object(),
    method get_sentinel (line 39) | def get_sentinel(self):
    method set_reset_on_next (line 42) | def set_reset_on_next(self, reset_on_next):
    method set_timeout (line 45) | def set_timeout(self, timeout: float):
    method interrupt (line 51) | def interrupt(self):
    method __iter__ (line 59) | def __iter__(self):
    method __next__ (line 62) | def __next__(self):
    method __lookahead (line 99) | def __lookahead(self):
  class AsyncTimeoutIterator (line 111) | class AsyncTimeoutIterator:
    method __init__ (line 117) | def __init__(self, iterator, timeout=0.0, sentinel=object(), reset_on_...
    method get_sentinel (line 128) | def get_sentinel(self):
    method set_reset_on_next (line 131) | def set_reset_on_next(self, reset_on_next):
    method set_timeout (line 134) | def set_timeout(self, timeout: float):
    method interrupt (line 137) | def interrupt(self):
    method __aiter__ (line 140) | def __aiter__(self):
    method __anext__ (line 143) | async def __anext__(self):
    method __lookahead (line 167) | async def __lookahead(self):

FILE: metrics/quip.py
  class Quip (line 105) | class Quip(evaluate.Metric):
    method __init__ (line 106) | def __init__(self, **kwargs):
    method _info (line 130) | def _info(self):
    method _compute (line 161) | def _compute(
    method get_reduced_size (line 213) | def get_reduced_size(self, reduced_query, verbose=True):

FILE: models/create_model_cards.py
  function test_create_model_cards (line 224) | def test_create_model_cards(model_name, base_model, dataset, training_lo...

FILE: models/gpu_mem_track.py
  function get_mem_space (line 32) | def get_mem_space(x):
  function file_writer (line 43) | def file_writer(file_name = None):
  class MemTracker (line 52) | class MemTracker(object):
    method __init__ (line 62) | def __init__(self, detail=True, path='', verbose=False, device=0, log_...
    method get_tensors (line 71) | def get_tensors(self):
    method get_tensor_usage (line 84) | def get_tensor_usage(self):
    method get_allocate_usage (line 88) | def get_allocate_usage(self):
    method clear_cache (line 91) | def clear_cache(self):
    method print_all_gpu_tensor (line 95) | def print_all_gpu_tensor(self, file=None):
    method track (line 99) | def track(self):

FILE: models/predict_aquila.py
  class SeparatorStyle (line 35) | class SeparatorStyle(IntEnum):
  class Conversation (line 47) | class Conversation:
    method get_prompt (line 71) | def get_prompt(self) -> str:
    method set_system_message (line 125) | def set_system_message(self, system_message: str):
    method append_message (line 129) | def append_message(self, role: str, message: str):
    method update_last_message (line 133) | def update_last_message(self, message: str):
    method copy (line 141) | def copy(self):
    method dict (line 156) | def dict(self):
  function register_conv_template (line 170) | def register_conv_template(template: Conversation, override: bool = False):
  function get_conv_template (line 180) | def get_conv_template(name: str) -> Conversation:
  function get_conversation_template (line 184) | def get_conversation_template(model_path: str) -> Conversation:
  function set_random_seed (line 297) | def set_random_seed(seed):
  function covert_prompt_to_input_ids_with_history (line 304) | def covert_prompt_to_input_ids_with_history(text, history, tokenizer, ma...
  function predict (line 329) | def predict(model, text, tokenizer=None,

FILE: models/test_scrape1.py
  function test_get_models (line 15) | def test_get_models(model_name):

FILE: openai_server/agent_prompting.py
  function agent_system_prompt (line 13) | def agent_system_prompt(agent_code_writer_system_message, agent_system_s...
  function get_chat_doc_context (line 209) | def get_chat_doc_context(text_context_list, image_file, agent_work_dir, ...
  function get_ask_question_about_image_helper (line 376) | def get_ask_question_about_image_helper(base_url, api_key, model):
  function get_mermaid_renderer_helper (line 417) | def get_mermaid_renderer_helper():
  function get_image_generation_helper (line 437) | def get_image_generation_helper():
  function get_audio_transcription_helper (line 502) | def get_audio_transcription_helper():
  function get_query_to_web_image_helper (line 524) | def get_query_to_web_image_helper():
  function get_aider_coder_helper (line 544) | def get_aider_coder_helper(base_url, api_key, model, autogen_timeout, de...
  function get_rag_helper (line 570) | def get_rag_helper(base_url, api_key, model, autogen_timeout, text_conte...
  function get_convert_to_text_helper (line 603) | def get_convert_to_text_helper():
  function get_download_web_video_helper (line 628) | def get_download_web_video_helper():
  function get_serp_helper (line 649) | def get_serp_helper():
  function get_semantic_scholar_helper (line 680) | def get_semantic_scholar_helper():
  function get_wolfram_alpha_helper (line 702) | def get_wolfram_alpha_helper():
  function get_dai_helper (line 724) | def get_dai_helper():
  function get_news_api_helper (line 750) | def get_news_api_helper():
  function get_bing_search_helper (line 773) | def get_bing_search_helper():
  function get_api_helper (line 809) | def get_api_helper():
  function get_agent_tools (line 835) | def get_agent_tools():
  function get_full_system_prompt (line 844) | def get_full_system_prompt(agent_code_writer_system_message, agent_syste...
  function planning_prompt (line 920) | def planning_prompt(query):
  function planning_final_prompt (line 941) | def planning_final_prompt(query):

FILE: openai_server/agent_tools/aider_code_generation.py
  function install_aider (line 14) | def install_aider():
  function main (line 20) | def main():

FILE: openai_server/agent_tools/ask_question_about_documents.py
  function has_gpu (line 13) | def has_gpu():
  function get_rag_answer (line 22) | def get_rag_answer(prompt,
  function ask_question_about_documents (line 99) | def ask_question_about_documents():

FILE: openai_server/agent_tools/ask_question_about_image.py
  function convert_svg_to_png (line 17) | def convert_svg_to_png(svg_path):
  function convert_pdf_to_images (line 24) | def convert_pdf_to_images(pdf_path):
  function process_file (line 35) | def process_file(file_path):
  function main (line 48) | def main():

FILE: openai_server/agent_tools/audio_transcription.py
  function check_valid_extension (line 6) | def check_valid_extension(file):
  function main (line 25) | def main():

FILE: openai_server/agent_tools/bing_search.py
  function setup_argparse (line 23) | def setup_argparse():
  function search_web (line 39) | def search_web(client, args):
  function search_images (line 50) | def search_images(client, args):
  function search_news (line 61) | def search_news(client, args):
  function search_videos (line 72) | def search_videos(client, args):
  function print_web_result (line 83) | def print_web_result(result, args):
  function print_image_result (line 93) | def print_image_result(result, args):
  function print_news_result (line 103) | def print_news_result(result, args):
  function print_video_result (line 115) | def print_video_result(result, args):
  function print_info (line 126) | def print_info(info, args):
  function bing_search (line 141) | def bing_search():

FILE: openai_server/agent_tools/common/utils.py
  function is_url_valid_and_alive (line 9) | def is_url_valid_and_alive(url, timeout=5):
  function filename_is_url (line 24) | def filename_is_url(filename):
  function download_simple (line 31) | def download_simple(url, dest=None, overwrite=False, verbose=False):

FILE: openai_server/agent_tools/convert_document_to_text.py
  function has_gpu (line 11) | def has_gpu():
  function pdf_has_images (line 20) | def pdf_has_images(pdf_path):
  function get_num_pages (line 33) | def get_num_pages(file):
  function convert_to_csv (line 42) | def convert_to_csv(file):
  function sources_to_text (line 56) | def sources_to_text(sources1):
  function process_files (line 76) | def process_files(files, urls):
  function get_text (line 194) | def get_text(files, urls):
  function main (line 206) | def main():

FILE: openai_server/agent_tools/download_web_video.py
  function selenium (line 6) | def selenium(base_url, video_url):
  function download_web_video (line 71) | def download_web_video(video_url, base_url="https://www.youtube.com", ou...
  function main (line 96) | def main():

FILE: openai_server/agent_tools/driverless_ai_data_science.py
  function connect_to_h2o_engine (line 10) | def connect_to_h2o_engine(token: str, client_id, token_endpoint_url, env...
  function connect_to_driverless_ai (line 38) | def connect_to_driverless_ai(engine_manager, dai_engine: str = None):
  function create_dataset (line 60) | def create_dataset(dai, data_url: str, dataset_name: str, data_source: s...
  function split_dataset (line 72) | def split_dataset(dataset, train_size: float, train_name: str, test_name...
  function create_experiment (line 90) | def create_experiment(dai, dataset_split, target_column: str, scorer: st...
  function get_experiment_from_key (line 131) | def get_experiment_from_key(experiment_key, token, client_id, token_endp...
  function visualize_importance (line 142) | def visualize_importance(experiment):
  function print_experiment_details (line 168) | def print_experiment_details(experiment):
  function plot_roc_curve (line 188) | def plot_roc_curve(roc_data, save_dir='plots'):
  function plot_precision_recall (line 206) | def plot_precision_recall(pr_data, save_dir='plots'):
  function plot_gains_chart (line 222) | def plot_gains_chart(gains_data, save_dir='plots'):
  function plot_lift_chart (line 239) | def plot_lift_chart(lift_data, save_dir='plots'):
  function plot_ks_chart (line 257) | def plot_ks_chart(ks_data, save_dir='plots'):
  function plot_all_charts (line 273) | def plot_all_charts(roc_curve, prec_recall_curve, gains_chart, lift_char...
  function key_to_experiment (line 344) | def key_to_experiment(experiment_key, client_id, dai_engine, token_endpo...
  function get_artifacts (line 353) | def get_artifacts(experiment=None, experiment_key=None, client_id=None, ...
  function main (line 417) | def main():

FILE: openai_server/agent_tools/google_search.py
  function setup_argparse (line 85) | def setup_argparse():
  function validate_language (line 123) | def validate_language(hl: str) -> str:
  function validate_country (line 129) | def validate_country(gl: str) -> str:
  function perform_search (line 135) | def perform_search(args) -> Dict[str, Any]:
  function save_results_to_file (line 191) | def save_results_to_file(results: Dict[str, Any], filename: str) -> None:
  function print_results (line 203) | def print_results(results: Dict[str, Any], args):
  function google_search (line 264) | def google_search():

FILE: openai_server/agent_tools/image_generation.py
  function main (line 9) | def main():
  function get_image_format (line 157) | def get_image_format(image_data):

FILE: openai_server/agent_tools/mermaid_renderer.py
  function generate_unique_filename (line 12) | def generate_unique_filename(format):
  function find_chrome_path (line 18) | def find_chrome_path():
  function render_mermaid (line 37) | def render_mermaid(mermaid_code, output_file, format='svg'):
  function main (line 108) | def main():

FILE: openai_server/agent_tools/news_query.py
  function fetch_everything (line 7) | def fetch_everything(api_key, query, sources, from_date, to_date, sort_b...
  function fetch_top_headlines (line 27) | def fetch_top_headlines(api_key, sources, country, category, page_size):
  function display_articles (line 46) | def display_articles(articles):
  function main (line 57) | def main():

FILE: openai_server/agent_tools/query_to_web_image.py
  function download_image (line 18) | def download_image(text, file, save_dir='.'):
  function main (line 59) | def main():

FILE: openai_server/agent_tools/scholar_papers_query.py
  function setup_argparse (line 9) | def setup_argparse():
  function search_papers_semanticscholar (line 34) | def search_papers_semanticscholar(sch, args):
  function search_papers_arxiv (line 48) | def search_papers_arxiv(args):
  function print_paper_info_semanticscholar (line 58) | def print_paper_info_semanticscholar(paper, index, args):
  function print_paper_info_arxiv (line 85) | def print_paper_info_arxiv(paper, index, args):
  function print_info (line 99) | def print_info(info, args):
  function download_pdf_semanticscholar (line 118) | def download_pdf_semanticscholar(paper, output_dir):
  function download_pdf_arxiv (line 127) | def download_pdf_arxiv(paper, output_dir):
  function download_pdf (line 133) | def download_pdf(pdf_url, filename):
  function main (line 144) | def main():

FILE: openai_server/agent_tools/wolfram_alpha_math_science_query.py
  function sanitize_filename (line 7) | def sanitize_filename(name):
  function extract_and_save_images (line 14) | def extract_and_save_images(query, app_id, output_dir):
  function main (line 79) | def main():

FILE: openai_server/agent_utils.py
  function get_have_internet (line 15) | def get_have_internet():
  function is_image_file (line 27) | def is_image_file(filename):
  function identify_image_files (line 36) | def identify_image_files(file_list):
  function in_pycharm (line 52) | def in_pycharm():
  function get_inner_function_signature (line 56) | def get_inner_function_signature(func):
  function filter_kwargs (line 67) | def filter_kwargs(func, kwargs):
  function set_python_path (line 74) | def set_python_path():
  function current_datetime (line 91) | def current_datetime():
  function run_agent (line 108) | def run_agent(run_agent_func=None,
  function set_dummy_term (line 126) | def set_dummy_term():
  function fix_markdown_image_paths (line 143) | def fix_markdown_image_paths(text):
  function get_ret_dict_and_handle_files (line 190) | def get_ret_dict_and_handle_files(chat_result, chat_result_planning,
  function guardrail_files (line 344) | def guardrail_files(file_list, hard_fail=False):
  function is_binary_file (line 379) | def is_binary_file(file_path, sample_size=1024):
  function extract_agent_tool (line 390) | def extract_agent_tool(input_string):
  function get_openai_client (line 410) | def get_openai_client(max_time=120):

FILE: openai_server/autogen_2agent_backend.py
  function run_autogen_2agent (line 13) | def run_autogen_2agent(query=None,

FILE: openai_server/autogen_agents.py
  function get_code_execution_agent (line 5) | def get_code_execution_agent(
  function get_code_writer_agent (line 23) | def get_code_writer_agent(
  function get_chat_agent (line 42) | def get_chat_agent(
  function get_human_proxy_agent (line 86) | def get_human_proxy_agent(
  function get_code_group_chat_manager (line 103) | def get_code_group_chat_manager(
  function get_main_group_chat_manager (line 166) | def get_main_group_chat_manager(

FILE: openai_server/autogen_multi_agent_backend.py
  function run_autogen_multi_agent (line 14) | def run_autogen_multi_agent(query=None,

FILE: openai_server/autogen_streaming.py
  class CustomOutputStream (line 14) | class CustomOutputStream(OutputStream):
    method print (line 15) | def print(self, *objects, sep="", end="", flush=False):
    method dump (line 19) | def dump(self, *objects, sep="", end="", flush=False):
  class CustomIOStream (line 24) | class CustomIOStream(IOStream, CustomOutputStream):
  class CaptureIOStream (line 28) | class CaptureIOStream(IOStream):
    method __init__ (line 29) | def __init__(self, output_queue: queue.Queue):
    method print (line 32) | def print(self, *objects: typing.Any, sep: str = "", end: str = "", fl...
  function capture_iostream (line 39) | def capture_iostream(output_queue: queue.Queue) -> typing.Generator[Capt...
  function run_autogen_in_proc (line 45) | def run_autogen_in_proc(func, output_queue, result_queue, exception_queu...
  function iostream_generator (line 62) | async def iostream_generator(func, use_process=False, **kwargs) -> typin...

FILE: openai_server/autogen_utils.py
  class H2OCodeBlock (line 53) | class H2OCodeBlock(CodeBlock):
  class H2OLocalCommandLineCodeExecutor (line 59) | class H2OLocalCommandLineCodeExecutor(LocalCommandLineCodeExecutor):
    method __init__ (line 60) | def __init__(
    method remove_comments_strings (line 94) | def remove_comments_strings(code: str, lang: str) -> str:
    method sanitize_command (line 120) | def sanitize_command(lang: str, code: str) -> None:
    method _get_file_name_from_content (line 242) | def _get_file_name_from_content(self, code: str, workspace_path: Path)...
    method __execute_code_dont_check_setup (line 272) | def __execute_code_dont_check_setup(self, code_blocks: List[CodeBlock]...
    method is_in_container (line 388) | def is_in_container() -> bool:
    method _execute_code_dont_check_setup (line 399) | def _execute_code_dont_check_setup(self, code_blocks: List[CodeBlock])...
    method update_agent_tool_usages (line 479) | def update_agent_tool_usages(self, code_blocks: List[CodeBlock]) -> None:
    method executed_code_note (line 495) | def executed_code_note(ret: CommandLineCodeResult,
    method agent_tool_usage_note (line 513) | def agent_tool_usage_note(self, ret) -> CommandLineCodeResult:
    method output_guardrail (line 529) | def output_guardrail(ret: CommandLineCodeResult) -> CommandLineCodeRes...
    method text_guardrail (line 534) | def text_guardrail(text, any_fail=False, max_bad_lines=3, just_filter_...
    method truncate_output (line 645) | def truncate_output(ret: CommandLineCodeResult) -> CommandLineCodeResult:
  function backoff_handler (line 684) | def backoff_handler(details):
  class H2OConversableAgent (line 689) | class H2OConversableAgent(ConversableAgent):
    method __init__ (line 696) | def __init__(
    method _generate_oai_reply_from_client (line 838) | def _generate_oai_reply_from_client(self, llm_client, messages, cache)...
    method generate_oai_reply (line 849) | def generate_oai_reply(
    method _generate_code_execution_reply_using_executor (line 864) | def _generate_code_execution_reply_using_executor(
    method __generate_code_execution_reply_using_executor (line 880) | def __generate_code_execution_reply_using_executor(
    method confidence_level_guidelines (line 965) | def confidence_level_guidelines() -> str:
    method final_answer_guidelines (line 984) | def final_answer_guidelines() -> str:
  class H2OGroupChatManager (line 1005) | class H2OGroupChatManager(GroupChatManager):
    method _generate_oai_reply_from_client (line 1011) | def _generate_oai_reply_from_client(self, llm_client, messages, cache)...
  function terminate_message_func (line 1023) | def terminate_message_func(msg):
  function get_autogen_response (line 1037) | async def get_autogen_response(func=None, use_process=False, **kwargs):
  function get_code_executor (line 1055) | def get_code_executor(
  function merge_group_chat_messages (line 1110) | def merge_group_chat_messages(a, b):
  function get_all_conversable_agents (line 1146) | def get_all_conversable_agents(group_chat_manager: GroupChatManager) -> ...
  function get_autogen_use_planning_prompt (line 1159) | def get_autogen_use_planning_prompt(model: str) -> bool:

FILE: openai_server/backend.py
  function start_faulthandler (line 24) | def start_faulthandler():
  function decode (line 40) | def decode(x, encoding_name="cl100k_base"):
  function encode (line 49) | def encode(x, encoding_name="cl100k_base"):
  function count_tokens (line 58) | def count_tokens(x, encoding_name="cl100k_base"):
  function get_gradio_auth (line 67) | def get_gradio_auth(user=None, verbose=False):
  function get_gradio_client (line 119) | def get_gradio_client(user=None, verbose=False):
  function sanitize (line 152) | def sanitize(name):
  function get_client (line 159) | def get_client(user=None):
  function get_chunk (line 242) | def get_chunk(outputs_list, job_outputs_num, last_response, num, verbose...
  function get_response (line 260) | async def get_response(chunk_response=True, **kwargs):
  function split_concatenated_dicts (line 330) | def split_concatenated_dicts(concatenated_dicts: str):
  function get_generator (line 354) | def get_generator(instruction, gen_kwargs, use_agent=False, stream_outpu...
  function achat_completion_action (line 387) | async def achat_completion_action(body: dict, stream_output=False):
  function acompletions_action (line 530) | async def acompletions_action(body: dict, stream_output=False):
  function astream_chat_completions (line 654) | async def astream_chat_completions(body: dict, stream_output=True):
  function astream_completions (line 659) | async def astream_completions(body: dict, stream_output=True):
  function get_model_info (line 664) | def get_model_info():
  function get_model_list (line 671) | def get_model_list():
  function split_audio_on_silence (line 679) | def split_audio_on_silence(audio_bytes):
  function split_audio_fixed_intervals (line 695) | def split_audio_fixed_intervals(audio_bytes, interval_ms=10000):
  function audio_to_text (line 710) | async def audio_to_text(model, audio_file, stream, response_format, chun...
  function _audio_to_text (line 726) | async def _audio_to_text(model, audio_file, stream, response_format, chu...
  function text_to_audio (line 766) | async def text_to_audio(model, voice, input, stream, response_format, **...
  function audio_str_to_bytes (line 819) | def audio_str_to_bytes(audio_str1, response_format='wav'):
  function list_to_bytes (line 853) | def list_to_bytes(lst: list) -> str:
  function text_to_embedding (line 861) | def text_to_embedding(model, text, encoding_format, **kwargs):

FILE: openai_server/backend_utils.py
  function concatenate_messages (line 8) | def concatenate_messages(messages, role="assistant", sep="\n"):
  function concat_tool_messages (line 29) | def concat_tool_messages(messages):
  function convert_messages_to_structure (line 93) | def convert_messages_to_structure(
  function handle_content (line 189) | def handle_content(content, structure):
  function structure_to_messages (line 218) | def structure_to_messages(instruction, system_message, history, image_fi...
  function convert_gen_kwargs (line 276) | def convert_gen_kwargs(gen_kwargs):
  function get_user_dir (line 333) | def get_user_dir(authorization):
  function run_upload_api (line 342) | def run_upload_api(content, filename, purpose, authorization, created_at...
  function run_download_api (line 370) | def run_download_api(file_id, authorization):
  function run_download_api_all (line 388) | def run_download_api_all(agent_files, authorization, agent_work_dir):
  function extract_xml_tags (line 397) | def extract_xml_tags(full_text, tags=['name', 'page']):
  function generate_unique_filename (line 407) | def generate_unique_filename(name_page_dict):
  function deduplicate_filenames (line 423) | def deduplicate_filenames(filenames):

FILE: openai_server/chat_history_render.py
  function chat_to_pretty_markdown (line 8) | def chat_to_pretty_markdown(
  function wrap_long_lines (line 91) | def wrap_long_lines(line: str, max_width: int = 80) -> str:
  function chat_to_pretty_markdown_simple (line 113) | def chat_to_pretty_markdown_simple(

FILE: openai_server/cogvlm2_server/cogvlm2.py
  function lifespan (line 35) | async def lifespan(app: FastAPI):
  class ModelCard (line 58) | class ModelCard(BaseModel):
  class ModelList (line 72) | class ModelList(BaseModel):
  class ImageUrl (line 77) | class ImageUrl(BaseModel):
  class TextContent (line 81) | class TextContent(BaseModel):
  class ImageUrlContent (line 86) | class ImageUrlContent(BaseModel):
  class ChatMessageInput (line 94) | class ChatMessageInput(BaseModel):
  class ChatMessageResponse (line 100) | class ChatMessageResponse(BaseModel):
  class DeltaMessage (line 106) | class DeltaMessage(BaseModel):
  class ChatCompletionRequest (line 111) | class ChatCompletionRequest(BaseModel):
  class ChatCompletionResponseChoice (line 122) | class ChatCompletionResponseChoice(BaseModel):
  class ChatCompletionResponseStreamChoice (line 127) | class ChatCompletionResponseStreamChoice(BaseModel):
  class UsageInfo (line 132) | class UsageInfo(BaseModel):
  class ChatCompletionResponse (line 138) | class ChatCompletionResponse(BaseModel):
  function health (line 147) | async def health() -> Response:
  function list_models (line 153) | async def list_models():
  function create_chat_completion (line 163) | async def create_chat_completion(request: ChatCompletionRequest):
  function predict (line 206) | def predict(model_id: str, params: dict):
  function generate_cogvlm (line 232) | def generate_cogvlm(model: AutoModelForCausalLM, tokenizer: AutoTokenize...
  function process_history_and_images (line 245) | def process_history_and_images(messages: List[ChatMessageInput]) -> Tuple[
  function generate_stream_cogvlm (line 308) | def generate_stream_cogvlm(model: AutoModelForCausalLM, tokenizer: AutoT...

FILE: openai_server/openai_client.py
  class MyReturnType (line 27) | class MyReturnType(BaseModel):
    class Config (line 28) | class Config:
  class LangChainAction (line 33) | class LangChainAction(Enum):
  function get_files_from_ids (line 41) | def get_files_from_ids(usage=None, client=None, file_ids=None, work_dir=...
  function file_to_base64 (line 82) | def file_to_base64(file_path, file_path_to_use=None):
  function clean_text_string (line 99) | def clean_text_string(input_string):
  function local_convert_to_pdf (line 107) | def local_convert_to_pdf(convert_to_pdf, x, files_already_pdf, *args, **...
  function group_files_by_base_name (line 118) | def group_files_by_base_name(file_names):
  function group_and_prioritize_files (line 126) | def group_and_prioritize_files(file_names):
  function select_preferred_file (line 139) | def select_preferred_file(files):
  function get_pdf_files (line 149) | def get_pdf_files(file_names, convert_to_pdf):
  function completion_with_backoff (line 306) | def completion_with_backoff(
  function run_openai_client (line 451) | def run_openai_client(
  function is_binary (line 820) | def is_binary(filename):
  function update_file_names (line 848) | def update_file_names(file_list):
  function shutil_rmtree (line 861) | def shutil_rmtree(*args, **kwargs):
  function remove (line 872) | def remove(path: str):

FILE: openai_server/server.py
  class Generation (line 41) | class Generation(BaseModel):
  class ResponseFormat (line 48) | class ResponseFormat(BaseModel):
  class H2oGPTParams (line 56) | class H2oGPTParams(BaseModel):
  class AgentParams (line 182) | class AgentParams(BaseModel):
  class Params (line 203) | class Params(H2oGPTParams, AgentParams):
  class CompletionParams (line 220) | class CompletionParams(Params):
  class TextRequest (line 226) | class TextRequest(Generation, CompletionParams):
  class TextResponse (line 230) | class TextResponse(BaseModel):
  class ChatParams (line 239) | class ChatParams(Params):
  class ChatRequest (line 245) | class ChatRequest(Generation, ChatParams):
  class ChatResponse (line 250) | class ChatResponse(BaseModel):
  class Model (line 259) | class Model(BaseModel):
  class ModelInfoResponse (line 266) | class ModelInfoResponse(BaseModel):
  class ModelListResponse (line 270) | class ModelListResponse(BaseModel):
  function verify_api_key (line 274) | def verify_api_key(authorization: str = Header(None)) -> None:
  function extract_model_from_request (line 310) | async def extract_model_from_request(request: Request, request_data: Cha...
  function model_rate_limit_key (line 319) | def model_rate_limit_key(request: Request):
  function api_key_rate_limit_key (line 332) | def api_key_rate_limit_key(request: Request):
  class InvalidRequestError (line 362) | class InvalidRequestError(Exception):
  function health (line 389) | async def health(request: Request) -> Response:
  function show_version (line 397) | async def show_version(request: Request):
  function validation_exception_handler (line 408) | async def validation_exception_handler(request, exc):
  function options_route (line 415) | async def options_route():
  function openai_completions (line 423) | async def openai_completions(request: Request, request_data: TextRequest...
  function random_uuid (line 478) | def random_uuid() -> str:
  class FunctionCall (line 482) | class FunctionCall(BaseModel):
  class ToolCall (line 487) | class ToolCall(BaseModel):
  function get_tool (line 493) | async def get_tool(request: Request, request_data: ChatRequest, authoriz...
  function _get_tool (line 508) | async def _get_tool(request: Request, request_data: ChatRequest, authori...
  function tool_to_guided_json (line 593) | def tool_to_guided_json(tool):
  function openai_chat_completions (line 605) | async def openai_chat_completions(request: Request,
  function handle_models (line 721) | async def handle_models(request: Request):
  function handle_model_info (line 751) | async def handle_model_info(request: Request):
  function handle_list_models (line 759) | async def handle_list_models(request: Request):
  class AudiotoTextRequest (line 765) | class AudiotoTextRequest(BaseModel):
  function handle_audio_transcription (line 777) | async def handle_audio_transcription(request: Request):
  class AudioTextRequest (line 835) | class AudioTextRequest(BaseModel):
  function modify_wav_header (line 846) | def modify_wav_header(wav_bytes):
  function handle_audio_to_speech (line 880) | async def handle_audio_to_speech(request: Request):
  class ImageGenerationRequest (line 935) | class ImageGenerationRequest(BaseModel):
  function handle_image_generation (line 949) | async def handle_image_generation(request: Request):
  class EmbeddingsResponse (line 1000) | class EmbeddingsResponse(BaseModel):
  class EmbeddingsRequest (line 1006) | class EmbeddingsRequest(BaseModel):
  function handle_embeddings (line 1016) | async def handle_embeddings(request: Request, request_data: EmbeddingsRe...
  class UploadFileResponse (line 1044) | class UploadFileResponse(BaseModel):
  function upload_file (line 1056) | async def upload_file(
  class FileData (line 1070) | class FileData(BaseModel):
  class ListFilesResponse (line 1079) | class ListFilesResponse(BaseModel):
  function list_files (line 1086) | async def list_files(request: Request, authorization: str = Header(None)):
  class RetrieveFileResponse (line 1126) | class RetrieveFileResponse(BaseModel):
  function retrieve_file (line 1138) | async def retrieve_file(request: Request, file_id: str, authorization: s...
  class DeleteFileResponse (line 1165) | class DeleteFileResponse(BaseModel):
  function delete_file (line 1174) | async def delete_file(request: Request, file_id: str, authorization: str...
  function retrieve_file_content (line 1199) | async def retrieve_file_content(request: Request, file_id: str, stream: ...

FILE: openai_server/server_start.py
  function run_server (line 23) | def run_server(host: str = '0.0.0.0',
  function run (line 149) | def run(wait=True, **kwargs):
  function argv_to_kwargs (line 213) | def argv_to_kwargs(argv=None):

FILE: openai_server/test_autogen_utils.py
  function test_shell_safe_commands (line 10) | def test_shell_safe_commands():
  function test_shell_dangerous_commands (line 17) | def test_shell_dangerous_commands():
  function test_shell_comments_and_strings (line 32) | def test_shell_comments_and_strings():
  function test_shell_background_and_scheduling (line 37) | def test_shell_background_and_scheduling():
  function test_shell_file_operations (line 44) | def test_shell_file_operations():
  function test_shell_network_operations (line 51) | def test_shell_network_operations():
  function test_shell_command_substitution (line 58) | def test_shell_command_substitution():
  function test_python_safe_operations (line 72) | def test_python_safe_operations():
  function test_python_dangerous_operations (line 79) | def test_python_dangerous_operations():
  function test_python_subprocess_and_system (line 88) | def test_python_subprocess_and_system():
  function test_python_comments_and_strings (line 96) | def test_python_comments_and_strings():
  function test_python_network_operations (line 103) | def test_python_network_operations():
  function test_python_system_operations (line 116) | def test_python_system_operations():
  function test_remove_comments_strings_shell (line 124) | def test_remove_comments_strings_shell():
  function test_remove_comments_strings_python (line 131) | def test_remove_comments_strings_python():
  function test_edge_cases (line 138) | def test_edge_cases():
  function test_complex_commands (line 144) | def test_complex_commands():
  function test_shell_path_traversal (line 151) | def test_shell_path_traversal():
  function test_python_eval_variations (line 158) | def test_python_eval_variations():
  function test_complex_imports (line 163) | def test_complex_imports():
  function test_nested_function_calls (line 176) | def test_nested_function_calls():
  function test_multi_line_commands (line 184) | def test_multi_line_commands():
  function test_ctypes_import (line 190) | def test_ctypes_import():
  function setup_env_vars (line 208) | def setup_env_vars():
  function test_output_guardrail_safe_output (line 220) | def test_output_guardrail_safe_output(setup_env_vars):
  function test_output_guardrail_key_name_in_output (line 225) | def test_output_guardrail_key_name_in_output(setup_env_vars):
  function test_output_guardrail_dummy_value_in_output (line 230) | def test_output_guardrail_dummy_value_in_output(setup_env_vars):
  function test_output_guardrail_real_key_in_output (line 235) | def test_output_guardrail_real_key_in_output(setup_env_vars):
  function test_output_guardrail_multiple_keys_in_output (line 241) | def test_output_guardrail_multiple_keys_in_output(setup_env_vars):
  function test_output_guardrail_partial_key_in_output (line 250) | def test_output_guardrail_partial_key_in_output(setup_env_vars):
  function test_output_guardrail_empty_output (line 255) | def test_output_guardrail_empty_output():
  function test_output_guardrail_non_string_output (line 260) | def test_output_guardrail_non_string_output():
  function test_output_guardrail_allowed_values (line 276) | def test_output_guardrail_allowed_values(allowed_value):
  function test_output_guardrail1 (line 281) | def test_output_guardrail1():
  function workspace_path (line 390) | def workspace_path():
  function test_basic_filename_extraction (line 394) | def test_basic_filename_extraction(workspace_path):
  function test_filename_with_path (line 399) | def test_filename_with_path(workspace_path):
  function test_filename_with_different_comment_styles (line 404) | def test_filename_with_different_comment_styles(workspace_path):
  function test_filename_not_on_first_line (line 413) | def test_filename_not_on_first_line(workspace_path):
  function test_no_filename_specified (line 418) | def test_no_filename_specified(workspace_path):
  function test_invalid_filename (line 423) | def test_invalid_filename(workspace_path):
  function test_filename_outside_workspace (line 428) | def test_filename_outside_workspace(workspace_path):
  function test_filename_with_colon (line 433) | def test_filename_with_colon(workspace_path):
  function test_filename_without_colon (line 438) | def test_filename_without_colon(workspace_path):
  function test_multiple_filenames (line 443) | def test_multiple_filenames(workspace_path):
  function test_commented_out_filename (line 448) | def test_commented_out_filename(workspace_path):
  function test_filename_with_spaces_around (line 453) | def test_filename_with_spaces_around(workspace_path):
  function test_filename_with_extension_containing_dot (line 458) | def test_filename_with_extension_containing_dot(workspace_path):

FILE: openai_server/test_backend_utils.py
  function test_extract_xml_tags (line 4) | def test_extract_xml_tags():
  function test_deduplicate_filenames (line 24) | def test_deduplicate_filenames():
  function test_generate_unique_filename_multiple_returns (line 46) | def test_generate_unique_filename_multiple_returns():
  function test_exif (line 82) | def test_exif():

FILE: openai_server/test_conversion.py
  function test_conversion (line 13) | def test_conversion():
  function test_conversion2 (line 89) | def test_conversion2():
  function test_structure_to_messages (line 169) | def test_structure_to_messages():
  function test_structure_to_messages_with_system_message (line 207) | def test_structure_to_messages_with_system_message():
  function test_convert_messages_to_structure (line 226) | def test_convert_messages_to_structure():
  function test_image_download (line 325) | def test_image_download():
  function test_concat (line 335) | def test_concat():
  function test_concat_tool (line 371) | def test_concat_tool():
  function test_concat_tool_messages (line 569) | def test_concat_tool_messages(messages: List[Dict[str, str]], expected: ...
  function test_split_single_dict (line 574) | def test_split_single_dict():
  function test_split_multiple_simple_dicts (line 580) | def test_split_multiple_simple_dicts():
  function test_split_multiple_complex_dicts (line 586) | def test_split_multiple_complex_dicts():
  function test_split_dicts_with_nested_braces (line 592) | def test_split_dicts_with_nested_braces():
  function test_split_empty_dicts (line 598) | def test_split_empty_dicts():
  function test_split_mixed_empty_and_non_empty_dicts (line 604) | def test_split_mixed_empty_and_non_empty_dicts():
  function test_split_whitespace_between_dicts (line 610) | def test_split_whitespace_between_dicts():
  function test_split_invalid_input (line 616) | def test_split_invalid_input():
  function test_split_empty_input (line 621) | def test_split_empty_input():
  function test_split_single_dict_with_whitespace (line 625) | def test_split_single_dict_with_whitespace():
  function test_split_dicts_with_escaped_quotes (line 631) | def test_split_dicts_with_escaped_quotes():

FILE: openai_server/test_openai_server.py
  function launch_openai_server (line 16) | def launch_openai_server():
  function test_openai_server (line 22) | def test_openai_server():
  function test_openai_client_test2 (line 41) | def test_openai_client_test2(stream_output, chat, local_server):
  function test_openai_client (line 61) | def test_openai_client(stream_output, chat, local_server, openai_workers...
  function run_openai_client (line 67) | def run_openai_client(stream_output, chat, local_server, openai_workers,...
  function run_test_chat (line 135) | def run_test_chat(chat, openai_client, async_client, system_prompt, chat...
  function show_plot_from_ids (line 196) | def show_plot_from_ids(usage, client):
  function test_autogen (line 233) | def test_autogen():
  function text_file (line 339) | def text_file():
  function pdf_file (line 354) | def pdf_file():
  function image_file (line 367) | def image_file():
  function python_file (line 380) | def python_file():
  function video_file (line 392) | def video_file():
  function test_file_operations (line 405) | def test_file_operations(request, test_file):
  function check_content (line 469) | def check_content(content, test_file_type, test_file):
  function test_return_generator (line 527) | def test_return_generator():
  function test_tool_use (line 552) | def test_tool_use():
  function test_tool_use2 (line 648) | def test_tool_use2():

FILE: openai_server/test_prompt_caching.py
  function assert_cache_control_count (line 16) | def assert_cache_control_count(messages: List[Dict], expected_count: int):
  function test_simple_string_messages (line 25) | def test_simple_string_messages():
  function test_mixed_content_types (line 41) | def test_mixed_content_types():
  function test_max_cache_control_limit (line 57) | def test_max_cache_control_limit():
  function test_empty_list_content (line 71) | def test_empty_list_content():
  function test_preserve_message_order (line 82) | def test_preserve_message_order():

FILE: setup.py
  function parse_requirements (line 10) | def parse_requirements(file_name: str) -> List[str]:

FILE: spaces/demo/app.py
  function generate (line 25) | def generate(query):
  function process_example (line 40) | def process_example(args):

FILE: spaces/demo/app_client_test.py
  function test_app_client_basic (line 28) | def test_app_client_basic():
  function md_to_text (line 40) | def md_to_text(md):

FILE: src/audio_langchain.py
  class OpenAIWhisperParser (line 16) | class OpenAIWhisperParser(BaseBlobParser):
    method __init__ (line 20) | def __init__(self, api_key: Optional[str] = None):
    method lazy_parse (line 23) | def lazy_parse(self, blob: Blob) -> Iterator[Document]:
  class OpenAIWhisperParserLocal (line 85) | class OpenAIWhisperParserLocal(BaseBlobParser):
    method __init__ (line 108) | def __init__(
    method lazy_parse (line 231) | def lazy_parse(self, blob: Blob) -> Iterator[Document]:
  class H2OAudioCaptionLoader (line 308) | class H2OAudioCaptionLoader(ImageCaptionLoader):
    method __init__ (line 311) | def __init__(self, path_audios: Union[str, List[str]] = None,
    method set_context (line 332) | def set_context(self):
    method load_model (line 353) | def load_model(self):
    method set_audio_paths (line 379) | def set_audio_paths(self, path_audios: Union[str, List[str]]):
    method load (line 388) | def load(self, from_youtube=False) -> List[Document]:
    method unload_model (line 411) | def unload_model(self):
  class YoutubeAudioLoader (line 423) | class YoutubeAudioLoader(BlobLoader):
    method __init__ (line 427) | def __init__(self, urls: List[str], save_dir: str):
    method yield_blobs (line 435) | def yield_blobs(self) -> Iterable[Blob]:

FILE: src/cli.py
  function run_cli (line 10) | def run_cli(  # for local function:

FILE: src/client_test.py
  function get_client (line 65) | def get_client(serialize=not is_gradio_version4):
  function get_args (line 74) | def get_args(prompt, prompt_type=None, chat=False,
  function test_client_basic (line 236) | def test_client_basic(prompt_type='human_bot', version=None, visible_mod...
  function test_client_basic_benchmark (line 250) | def test_client_basic_benchmark(id, prompt_type='human_bot', version=None):
  function run_client_nochat (line 296) | def run_client_nochat(prompt, prompt_type, max_new_tokens, version=None,...
  function test_client_basic_api (line 314) | def test_client_basic_api(prompt_type='human_bot', version=None, h2ogpt_...
  function run_client_nochat_api (line 319) | def run_client_nochat_api(prompt, prompt_type, max_new_tokens, version=N...
  function test_client_basic_api_lean (line 338) | def test_client_basic_api_lean(prompt='Who are you?', prompt_type='human...
  function run_client_nochat_api_lean (line 346) | def run_client_nochat_api_lean(prompt, prompt_type, max_new_tokens, vers...
  function test_client_basic_api_lean_morestuff (line 367) | def test_client_basic_api_lean_morestuff(prompt_type='human_bot', versio...
  function run_client_nochat_api_lean_morestuff (line 372) | def run_client_nochat_api_lean_morestuff(prompt, prompt_type='human_bot'...
  function test_client_chat (line 427) | def test_client_chat(prompt_type='human_bot', version=None, h2ogpt_key=N...
  function test_client_chat_stream (line 437) | def test_client_chat_stream(prompt_type='human_bot', version=None, h2ogp...
  function run_client_chat (line 447) | def run_client_chat(prompt='',
  function run_client (line 496) | def run_client(client, prompt, args, kwargs, do_md_to_text=True, verbose...
  function test_client_nochat_stream (line 543) | def test_client_nochat_stream(prompt_type='human_bot', version=None, h2o...
  function run_client_nochat_gen (line 553) | def run_client_nochat_gen(prompt, prompt_type, stream_output, max_new_to...
  function run_client_gen (line 565) | def run_client_gen(client, kwargs, do_md_to_text=True):
  function md_to_text (line 592) | def md_to_text(md, do_md_to_text=True):
  function run_client_many (line 601) | def run_client_many(prompt_type='human_bot', version=None, h2ogpt_key=No...

FILE: src/create_data.py
  function parse_rst_file (line 29) | def parse_rst_file(filepath):
  function test_scrape_dai_docs (line 60) | def test_scrape_dai_docs():
  function test_scrape_dai_docs_all (line 73) | def test_scrape_dai_docs_all():
  function get_sentences (line 111) | def get_sentences(blob, length):
  function setup_dai_docs (line 136) | def setup_dai_docs(path=None, dst="working_dir_docs", from_hf=False):
  function rst_to_outputs (line 187) | def rst_to_outputs(files, min_len=30, max_len=2048 // 2 - 30):
  function test_scrape_dai_docs_all_pandoc (line 255) | def test_scrape_dai_docs_all_pandoc():
  function test_config_to_json (line 277) | def test_config_to_json():
  function copy_tree (line 353) | def copy_tree(src, dst, follow_symlink=False):
  function atomic_move (line 368) | def atomic_move(src, dst):
  function atomic_copy (line 376) | def atomic_copy(src=None, dst=None, with_permissions=True):
  function makedirs (line 391) | def makedirs(path, exist_ok=True):
  function test_prep_instruct_vicuna (line 406) | def test_prep_instruct_vicuna():
  function test_get_small_sample_oig_data (line 500) | def test_get_small_sample_oig_data(filename):
  function test_download_useful_data_as_parquet (line 514) | def test_download_useful_data_as_parquet(filename):
  function test_merge_shuffle_small_sample_oig_data (line 525) | def test_merge_shuffle_small_sample_oig_data():
  function test_join_jsons (line 536) | def test_join_jsons():
  function test_make_rlhf_good_data (line 548) | def test_make_rlhf_good_data(filename):
  function test_show_prompts (line 562) | def test_show_prompts():
  function test_get_open_datasets (line 573) | def test_get_open_datasets():
  function do_one (line 762) | def do_one(data_id, num_downloads):
  function test_otherlic (line 805) | def test_otherlic():
  function test_assemble_and_detox (line 933) | def test_assemble_and_detox():
  function test_basic_cleaning (line 983) | def test_basic_cleaning():
  function parallel_apply (line 1029) | def parallel_apply(df, func, n_jobs=-1, **kwargs):
  function add_better_profanity_flag (line 1053) | def add_better_profanity_flag(df):
  function add_textstat_grade (line 1063) | def add_textstat_grade(df):
  function add_deberta_grade (line 1081) | def add_deberta_grade(df):
  function test_chop_by_lengths (line 1156) | def test_chop_by_lengths():
  function count_human_bot_lengths (line 1180) | def count_human_bot_lengths(df, human=None, bot=None):
  function test_grade (line 1230) | def test_grade():
  function test_add_open_assistant (line 1296) | def test_add_open_assistant(fixup_personality, only_personality, deberta...
  function test_finalize_to_json (line 1493) | def test_finalize_to_json():
  function create_personality_data (line 1539) | def create_personality_data(prompt_type="llama2"):
  function test_check_stats_data (line 1589) | def test_check_stats_data():
  function get_unhelpful_list (line 1644) | def get_unhelpful_list():
  function test_check_unhelpful (line 1745) | def test_check_unhelpful():
  function test_fortune2000_personalized (line 1825) | def test_fortune2000_personalized():

FILE: src/db_utils.py
  function set_userid (line 9) | def set_userid(db1s, requests_state1, get_userid_auth, guest_name=''):
  function set_userid_direct (line 25) | def set_userid_direct(db1s, userid, username):
  function get_userid_direct (line 31) | def get_userid_direct(db1s):
  function get_username_direct (line 35) | def get_username_direct(db1s):
  function get_dbid (line 39) | def get_dbid(db1):
  function set_dbid (line 43) | def set_dbid(db1):
  function length_db1 (line 51) | def length_db1():
  function create_table (line 64) | def create_table(auth_filename):
  function fetch_user (line 79) | def fetch_user(auth_filename, username, verbose=False):
  function upsert_user (line 133) | def upsert_user(db_filename, username, user_details, verbose=False):
  function upsert_auth_dict (line 162) | def upsert_auth_dict(db_filename, auth_dict, verbose=False):
  function get_all_usernames (line 192) | def get_all_usernames(auth_filename):
  function merge_dicts (line 211) | def merge_dicts(original, updates):
  function append_to_users_data (line 229) | def append_to_users_data(auth_filename, updates, verbose=False):
  function append_to_user_data (line 271) | def append_to_user_data(auth_filename, username, updates, verbose=False):

FILE: src/enums.py
  class PromptType (line 4) | class PromptType(Enum):
  class DocumentSubset (line 83) | class DocumentSubset(Enum):
  class DocumentChoice (line 95) | class DocumentChoice(Enum):
  class LangChainMode (line 99) | class LangChainMode(Enum):
  class LangChainTypes (line 112) | class LangChainTypes(Enum):
  class LangChainAction (line 127) | class LangChainAction(Enum):
  class LangChainAgent (line 156) | class LangChainAgent(Enum):
  function is_gradio_vision_model (line 431) | def is_gradio_vision_model(base_model):
  function is_vision_model (line 440) | def is_vision_model(base_model, all_visible_models=[], visible_vision_mo...
  function extra_stop_token_ids (line 470) | def extra_stop_token_ids(base_model, tokenizer=None, as_ids=False):
  function tokens_per_image (line 490) | def tokens_per_image(base_model):
  function is_video_model (line 526) | def is_video_model(base_model):
  function is_json_model (line 532) | def is_json_model(base_model, inference_server, json_vllm=False):
  function does_support_functiontools (line 567) | def does_support_functiontools(inference_server, model_name):
  function does_support_json_mode (line 585) | def does_support_json_mode(inference_server, model_name, json_vllm=False):
  function t5_type (line 607) | def t5_type(model_name):
  function get_langchain_prompts (line 615) | def get_langchain_prompts(pre_prompt_query, prompt_query, pre_prompt_sum...
  function gr_to_lg (line 644) | def gr_to_lg(image_audio_loaders,

FILE: src/eval.py
  function run_eval (line 13) | def run_eval(  # for local function:

FILE: src/export_hf_checkpoint.py
  function do_export (line 13) | def do_export():
  function do_copy (line 220) | def do_copy(OUTPUT_NAME):
  function test_copy (line 236) | def test_copy():
  function inner_test_copy (line 248) | def inner_test_copy():

FILE: src/function_client.py
  function execute_function_on_server (line 8) | def execute_function_on_server(host: str, port: int, function_name: str,...
  function read_result_from_disk (line 28) | def read_result_from_disk(file_path: str, use_pickle: bool, verbose=False):
  function call_function_server (line 48) | def call_function_server(host, port, function_name, args, kwargs, use_di...
  function get_data_h2ogpt (line 63) | def get_data_h2ogpt(file_path, verbose=False, is_url=False, **kwargs):

FILE: src/function_server.py
  function verify_api_key (line 31) | def verify_api_key(authorization: str = Header(None)) -> None:
  class InvalidRequestError (line 52) | class InvalidRequestError(Exception):
  class FunctionRequest (line 56) | class FunctionRequest(BaseModel):
  function health (line 65) | async def health() -> Response:
  function validation_exception_handler (line 71) | async def validation_exception_handler(request, exc):
  function options_route (line 78) | async def options_route():
  function initialize_gen_kwargs (line 86) | def initialize_gen_kwargs():
  function execute_function (line 127) | def execute_function(request: FunctionRequest):
  function do_check (line 173) | def do_check(in_finally=False):
  function startup_event (line 184) | async def startup_event(verbose=True):
  function periodic_health_check (line 188) | async def periodic_health_check(verbose=False):
  function check_some_conditions (line 196) | def check_some_conditions():

FILE: src/gen.py
  function main (line 113) | def main(
  function evaluate_fake (line 2443) | def evaluate_fake(*args, **kwargs):
  function evaluate (line 2456) | def evaluate(
  function get_cutoffs (line 4393) | def get_cutoffs(memory_restriction_level, for_context=False, model_max_l...
  class H2OTextIteratorStreamer (line 4414) | class H2OTextIteratorStreamer(TextIteratorStreamer):
    method __init__ (line 4420) | def __init__(self, tokenizer, skip_prompt: bool = False, timeout: typi...
    method on_finalized_text (line 4429) | def on_finalized_text(self, text: str, stream_end: bool = False):
    method __iter__ (line 4435) | def __iter__(self):
    method __next__ (line 4438) | def __next__(self):
    method clear_queue (line 4460) | def clear_queue(self):
    method put (line 4465) | def put(self, value):
  function generate_with_exceptions (line 4504) | def generate_with_exceptions(func, *args, raise_generate_gpu_exceptions=...
  function get_generate_params (line 4537) | def get_generate_params(model_lower,
  function languages_covered (line 4895) | def languages_covered():
  function score_qa (line 4903) | def score_qa(smodel, stokenizer, question, answer, memory_restriction_le...
  function check_locals (line 4947) | def check_locals(**kwargs):
  function get_model_max_length (line 4970) | def get_model_max_length(model_state):
  function get_model_max_length_from_tokenizer (line 4977) | def get_model_max_length_from_tokenizer(tokenizer):
  function get_max_max_new_tokens (line 4984) | def get_max_max_new_tokens(model_state, **kwargs):
  function get_minmax_top_k_docs (line 5015) | def get_minmax_top_k_docs(is_public, from_ui):
  function remove_refs (line 5030) | def remove_refs(text, keep_sources_in_context, langchain_mode, hyde_leve...
  function history_to_context (line 5058) | def history_to_context(history, langchain_mode=None,
  function get_relaxed_max_new_tokens (line 5179) | def get_relaxed_max_new_tokens(prompt, tokenizer=None, max_new_tokens=No...
  function get_limited_prompt (line 5192) | def get_limited_prompt(instruction,
  function count_overhead_tokens (line 5574) | def count_overhead_tokens(tokenizer, doing_grounding=False):
  function entrypoint_main (line 5596) | def entrypoint_main():
  function append_certificates (line 5629) | def append_certificates(certs_dir):

FILE: src/gpt4all_llm.py
  function get_model_tokenizer_gpt4all (line 16) | def get_model_tokenizer_gpt4all(base_model, n_jobs=None, gpu_id=None, n_...
  class H2OStreamingStdOutCallbackHandler (line 52) | class H2OStreamingStdOutCallbackHandler(StreamingStdOutCallbackHandler):
    method on_llm_new_token (line 54) | def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
  function get_model_kwargs (line 62) | def get_model_kwargs(llamacpp_dict, default_kwargs, cls, exclude_list=[]):
  function get_gpt4all_default_kwargs (line 84) | def get_gpt4all_default_kwargs(max_new_tokens=256,
  function get_llm_gpt4all (line 121) | def get_llm_gpt4all(model_name=None,
  class H2OGPT4All (line 279) | class H2OGPT4All(gpt4all.GPT4All):
    method validate_environment (line 290) | def validate_environment(cls, values: Dict) -> Dict:
    method _call (line 327) | def _call(
  class H2OLlamaCpp (line 359) | class H2OLlamaCpp(LlamaCpp):
    method validate_environment (line 375) | def validate_environment(cls, values: Dict) -> Dict:
    method _call (line 425) | def _call(
    method remove_stop_text (line 482) | def remove_stop_text(self, text, stop=None):
    method _stream (line 491) | def _stream(
    method get_token_ids (line 511) | def get_token_ids(self, text: str) -> List[int]:

FILE: src/gpt_langchain.py
  function get_context_cast (line 131) | def get_context_cast():
  function get_db (line 137) | def get_db(sources, use_openai_embedding=False, db_type='faiss',
  function _get_unique_sources_in_weaviate (line 261) | def _get_unique_sources_in_weaviate(db):
  function del_from_db (line 275) | def del_from_db(db, sources, db_type=None):
  function add_to_db (line 312) | def add_to_db(db, sources, db_type='faiss',
  function create_or_update_db (line 422) | def create_or_update_db(db_type, persist_directory, collection_name,
  class H2OFakeEmbeddings (line 493) | class H2OFakeEmbeddings(FakeEmbeddings):
    method _get_embedding (line 499) | def _get_embedding(self) -> typing.List[float]:
    method embed_documents (line 502) | def embed_documents(self, texts: typing.List[str]) -> typing.List[typi...
    method embed_query (line 505) | def embed_query(self, text: str) -> typing.List[float]:
  function get_embedding (line 509) | def get_embedding(use_openai_embedding, hf_embedding_model=None, preload...
  function get_answer_from_sources (line 578) | def get_answer_from_sources(chain, sources, question):
  class H2Oagenerate (line 598) | class H2Oagenerate:
    method _agenerate (line 599) | async def _agenerate(
    method _agenerate_one (line 628) | async def _agenerate_one(
  class AGenerateStreamFirst (line 643) | class AGenerateStreamFirst:
    method agenerate (line 646) | async def agenerate(
  class ChatAGenerateStreamFirst (line 803) | class ChatAGenerateStreamFirst:
    method agenerate (line 806) | async def agenerate(
  class GradioInference (line 911) | class GradioInference(AGenerateStreamFirst, H2Oagenerate, LLM):
    method validate_environment (line 997) | def validate_environment(cls, values: Dict) -> Dict:
    method _llm_type (line 1014) | def _llm_type(self) -> str:
    method setup_call (line 1018) | def setup_call(self, prompt):
    method _call (line 1159) | def _call(
    method use_gradio_return (line 1277) | def use_gradio_return(self, res_dict, prompt_raw):
    method _acall (line 1284) | async def _acall(
    method get_token_ids (line 1373) | def get_token_ids(self, text: str) -> List[int]:
  class GradioLLaVaInference (line 1379) | class GradioLLaVaInference(GradioInference):
    method validate_environment (line 1386) | def validate_environment(cls, values: Dict) -> Dict:
    method _llm_type (line 1403) | def _llm_type(self) -> str:
    method setup_call (line 1407) | def setup_call(self, prompt):
    method _call (line 1456) | def _call(
    method _acall (line 1511) | async def _acall(
  class SGlangInference (line 1558) | class SGlangInference(AGenerateStreamFirst, H2Oagenerate, LLM):
    method validate_environment (line 1618) | def validate_environment(cls, values: Dict) -> Dict:
    method _llm_type (line 1631) | def _llm_type(self) -> str:
    method get_token_ids (line 1635) | def get_token_ids(self, text: str) -> List[int]:
    method get_conv_template (line 1641) | def get_conv_template(conv_template_name):
    method send_request (line 1647) | async def send_request(self, url, data, delay=0, timeout=None):
    method setup_call (line 1667) | def setup_call(self, prompt):
    method do_many (line 1731) | def do_many(self):
    method a_do_many (line 1736) | async def a_do_many(self):
    method many_to_prompt (line 1739) | def many_to_prompt(self, prompt, responses):
    method do_final (line 1756) | def do_final(self):
    method get_many (line 1763) | async def get_many(self, url, pload):
    method _call (line 1776) | def _call(
    method _acall (line 1838) | async def _acall(
  class H2OHuggingFaceTextGenInference (line 1900) | class H2OHuggingFaceTextGenInference(AGenerateStreamFirst, H2Oagenerate,...
    method prep_prompt (line 1942) | def prep_prompt(self, prompt, stop, kwargs):
    method _call (line 1992) | def _call(
    method _acall (line 2044) | async def _acall(
    method get_token_ids (line 2067) | def get_token_ids(self, text: str) -> List[int]:
  class H2OTextGenOpenAI (line 2079) | class H2OTextGenOpenAI:
    method update_prompts_and_stops (line 2080) | def update_prompts_and_stops(self, prompts, stop, **kwargs):
    method update_kwargs (line 2105) | def update_kwargs(self, prompts, kwargs):
    method max_tokens_for_prompt (line 2115) | def max_tokens_for_prompt(self, prompt: str) -> int:
    method count_out_tokens (line 2123) | def count_out_tokens(self, rets):
    method collect_llm_results (line 2132) | def collect_llm_results(self, rets):
    method _generate (line 2149) | def _generate(
    method _stream (line 2188) | def _stream(
    method _astream (line 2198) | async def _astream(
    method _agenerate (line 2209) | async def _agenerate(
    method _agenerate_one (line 2242) | async def _agenerate_one(
    method get_token_ids (line 2256) | def get_token_ids(self, text: str) -> List[int]:
  class H2OOpenAI (line 2264) | class H2OOpenAI(H2OTextGenOpenAI, OpenAI):
  class H2OReplicate (line 2289) | class H2OReplicate(Replicate):
    method _call (line 2300) | def _call(
  class ExtraChat (line 2329) | class ExtraChat:
    method get_token_ids (line 2330) | def get_token_ids(self, text: str) -> List[int]:
    method get_messages (line 2342) | def get_messages(self, prompts):
    method get_num_tokens (line 2455) | def get_num_tokens(self, text: str) -> int:
  class GenerateStream (line 2465) | class GenerateStream:
    method get_count_output_tokens (line 2466) | def get_count_output_tokens(self, ret):
    method generate_prompt (line 2477) | def generate_prompt(
    method agenerate_prompt (line 2501) | async def agenerate_prompt(
    method _generate (line 2519) | def _generate(
    method tool_string_return (line 2553) | def tool_string_return(self, ret, have_tool):
    method _agenerate (line 2587) | async def _agenerate(
  class GenerateNormal (line 2612) | class GenerateNormal:
    method get_count_output_tokens (line 2613) | def get_count_output_tokens(self, ret):
    method generate_prompt (line 2624) | def generate_prompt(
    method agenerate_prompt (line 2638) | async def agenerate_prompt(
  class GenerateStream2 (line 2655) | class GenerateStream2:
    method count_out_tokens (line 2656) | def count_out_tokens(self, rets):
    method pre_generate (line 2667) | def pre_generate(self, prompts):
    method generate_prompt (line 2675) | def generate_prompt(
    method agenerate_prompt (line 2704) | async def agenerate_prompt(
  class H2OChatOpenAI (line 2738) | class H2OChatOpenAI(ChatAGenerateStreamFirst, GenerateStream, ExtraChat,...
    method get_token_ids (line 2751) | def get_token_ids(self, text: str) -> List[int]:
  class H2OAzureChatOpenAI (line 2759) | class H2OAzureChatOpenAI(ChatAGenerateStreamFirst, GenerateNormal, Extra...
    method get_token_ids (line 2770) | def get_token_ids(self, text: str) -> List[int]:
  class H2OChatAnthropic2 (line 2781) | class H2OChatAnthropic2(ChatAGenerateStreamFirst, GenerateNormal, ExtraC...
  class H2OChatAnthropic2Sys (line 2797) | class H2OChatAnthropic2Sys(H2OChatAnthropic2):
  class H2OChatAnthropic3 (line 2801) | class H2OChatAnthropic3(ChatAGenerateStreamFirst, GenerateStream, ExtraC...
    method process_messages (line 2817) | def process_messages(messages, max_cache_controls=3):
    method _get_request_payload (line 2855) | def _get_request_payload(
    method _stream (line 2884) | def _stream(
  class H2OChatAnthropic3Sys (line 2926) | class H2OChatAnthropic3Sys(H2OChatAnthropic3):
  class H2OChatGoogle (line 2938) | class H2OChatGoogle(ChatAGenerateStreamFirst, GenerateStream, ExtraChat,...
  class H2OChatMistralAI (line 2952) | class H2OChatMistralAI(ChatAGenerateStreamFirst, GenerateStream2, ExtraC...
  class H2OChatGroq (line 2967) | class H2OChatGroq(ChatAGenerateStreamFirst, GenerateStream2, ExtraChat, ...
  class H2OAzureOpenAI (line 2980) | class H2OAzureOpenAI(H2OTextGenOpenAI, AzureOpenAI):
  class H2OHuggingFacePipeline (line 2995) | class H2OHuggingFacePipeline(HuggingFacePipeline):
    method _generate (line 3000) | def _generate(
    method _call (line 3018) | def _call(
    method get_token_ids (line 3047) | def get_token_ids(self, text: str) -> List[int]:
  function get_llm (line 3055) | def get_llm(use_openai_model=False,
  function get_device_dtype (line 4193) | def get_device_dtype():
  function get_wiki_data (line 4205) | def get_wiki_data(title, first_paragraph_only, text_limit=None, take_hea...
  function get_wiki_sources (line 4234) | def get_wiki_sources(first_para=True, text_limit=None):
  function get_github_docs (line 4246) | def get_github_docs(repo_owner, repo_name):
  function get_dai_pickle (line 4275) | def get_dai_pickle(dest="."):
  function get_dai_docs (line 4283) | def get_dai_docs(from_hf=False, get_pickle=True):
  function get_supported_types (line 4328) | def get_supported_types():
  function try_as_html (line 4371) | def try_as_html(file):
  function json_metadata_func (line 4388) | def json_metadata_func(record: dict, metadata: dict) -> dict:
  function get_num_pages (line 4403) | def get_num_pages(file):
  function get_each_page (line 4412) | def get_each_page(file):
  class Crawler (line 4431) | class Crawler:
    method __init__ (line 4437) | def __init__(self, urls=[], deeper_only=True, depth=int(os.getenv('CRA...
    method download_url (line 4446) | def download_url(self, url):
    method get_linked_urls (line 4449) | def get_linked_urls(self, url, html):
    method add_url_to_visit (line 4460) | def add_url_to_visit(self, url):
    method crawl (line 4472) | def crawl(self, url):
    method run (line 4477) | def run(self):
  function file_to_doc (line 4499) | def file_to_doc(file,
  function path_to_doc1 (line 5566) | def path_to_doc1(file,
  function path_to_docs (line 5711) | def path_to_docs(path_or_paths,
  function prep_langchain (line 5988) | def prep_langchain(persist_directory,
  class FakeConsumer (line 6048) | class FakeConsumer(object):
    method __init__ (line 6049) | def __init__(self, *args, **kwargs):
    method run (line 6052) | def run(self):
    method pause (line 6055) | def pause(self):
    method upload (line 6058) | def upload(self):
    method next (line 6061) | def next(self):
    method request (line 6064) | def request(self, batch):
  function get_hf_embedding_model_name (line 6071) | def get_hf_embedding_model_name(hf_embedding_model):
  function check_update_chroma_embedding (line 6078) | def check_update_chroma_embedding(db,
  function migrate_meta_func (line 6121) | def migrate_meta_func(db, langchain_mode):
  function get_existing_db (line 6157) | def get_existing_db(db, persist_directory,
  function clear_embedding (line 6273) | def clear_embedding(db):
  function make_db (line 6287) | def make_db(**langchain_kwargs):
  function get_embed_lock_file (line 6305) | def get_embed_lock_file(db, persist_directory=None):
  function save_embed (line 6318) | def save_embed(db, use_openai_embedding, hf_embedding_model):
  function load_embed (line 6340) | def load_embed(db=None, persist_directory=None, use_openai_embedding=Fal...
  function sanitize_path_segment (line 6379) | def sanitize_path_segment(segment):
  function get_persist_directory (line 6384) | def get_persist_directory(langchain_mode, langchain_type=None, db1s=None...
  function check_persist_directory (line 6460) | def check_persist_directory(persist_directory):
  function _make_db (line 6467) | def _make_db(use_openai_embedding=False,
  function is_chroma_db (line 6682) | def is_chroma_db(db):
  function is_new_chroma_db (line 6686) | def is_new_chroma_db(db):
  function sim_search (line 6693) | def sim_search(db, query='', k=1000, with_score=False, filter_kwargs=Non...
  function _sim_search (line 6721) | def _sim_search(db, query='', k=1000, with_score=False, filter_kwargs=None,
  function large_chroma_db (line 6763) | def large_chroma_db(db):
  function get_metadatas (line 6767) | def get_metadatas(db, full_required=True, k_max=10000):
  function get_db_lock_file (line 6807) | def get_db_lock_file(db, lock_type='getdb'):
  function get_documents (line 6819) | def get_documents(db):
  function get_docs_and_meta (line 6857) | def get_docs_and_meta(db, top_k_docs, filter_kwargs={}, text_context_lis...
  function get_existing_files (line 6899) | def get_existing_files(db):
  function get_existing_hash_ids (line 6906) | def get_existing_hash_ids(db):
  function run_qa_db (line 6913) | def run_qa_db(**kwargs):
  function _run_qa_db (line 6963) | def _run_qa_db(query=None,
  function run_target (line 7619) | def run_target(query='',
  function get_docs_with_score (line 7788) | def get_docs_with_score(query, k_db,
  function _get_docs_with_score (line 7813) | def _get_docs_with_score(query, k_db,
  function get_single_document (line 7861) | def get_single_document(document_choice, db, extension=None):
  function run_hyde (line 7885) | def run_hyde(*args, **kwargs):
  function get_chain (line 8021) | def get_chain(query=None,
  function get_model_max_length (line 9284) | def get_model_max_length(llm=None, tokenizer=None, inference_server=None...
  function get_max_input_tokens (line 9298) | def get_max_input_tokens(llm=None, tokenizer=None, inference_server=None...
  function get_tokenizer (line 9317) | def get_tokenizer(db=None, llm=None, tokenizer=None, inference_server=No...
  function escape_braces (line 9346) | def escape_braces(text):
  function get_template (line 9353) | def get_template(query, iinput,
  function get_hyde_acc (line 9469) | def get_hyde_acc(answer, llm_answers, hyde_show_intermediate_in_accordio...
  function get_sources_answer (line 9511) | def get_sources_answer(query, docs, answer,
  function get_any_db (line 9638) | def get_any_db(db1s, langchain_mode, langchain_mode_paths, langchain_mod...
  function get_sources (line 9686) | def get_sources(db1s, selection_docs_state1, requests_state1, langchain_...
  function update_user_db (line 9753) | def update_user_db(file, db1s, selection_docs_state1, requests_state1,
  function get_lock_file (line 9793) | def get_lock_file(db1, langchain_mode):
  function _update_user_db (line 9807) | def _update_user_db(file,
  function get_all_sources_last_dict (line 10151) | def get_all_sources_last_dict(sources, gradio_upload_to_chatbot_num_max):
  function get_source_files_given_langchain_mode (line 10165) | def get_source_files_given_langchain_mode(db1s, selection_docs_state1, r...
  function get_source_files (line 10202) | def get_source_files(db=None, exceptions=None, metadatas=None):
  function update_and_get_source_files_given_langchain_mode (line 10303) | def update_and_get_source_files_given_langchain_mode(db1s,
  function get_db1 (line 10441) | def get_db1(db1s, langchain_mode1):
  function clean_doc (line 10450) | def clean_doc(docs1):
  function clone_documents (line 10458) | def clone_documents(documents: Iterable[Document]) -> List[Document]:
  function get_db_from_hf (line 10467) | def get_db_from_hf(dest=".", db_dir='db_dir_DriverlessAI_docs.zip'):
  function get_some_dbs_from_hf (line 10493) | def get_some_dbs_from_hf(dest='.', db_zips=None):
  function _create_local_weaviate_client (line 10505) | def _create_local_weaviate_client():
  function _get_qdrant_options (line 10530) | def _get_qdrant_options():
  function _get_unique_sources_in_qdrant (line 10554) | def _get_unique_sources_in_qdrant(db):

FILE: src/gradio_funcs.py
  function evaluate_nochat (line 25) | def evaluate_nochat(*args1, default_kwargs1=None, str_api=False, plain_a...
  function visible_models_to_model_choice (line 406) | def visible_models_to_model_choice(visible_models1, model_states1, api=F...
  function clear_embeddings (line 433) | def clear_embeddings(langchain_mode1, db_type, db1s, dbs=None):
  function fix_text_for_gradio (line 447) | def fix_text_for_gradio(text, fix_new_lines=False, fix_latex_dollars=Tru...
  function get_images_num_max (line 492) | def get_images_num_max(model_choice, fun_args, visible_vision_models, do...
  function get_response (line 544) | def get_response(fun1, history, chatbot_role1, speaker1, tts_language1, ...
  function _get_response (line 847) | def _get_response(fun1, history, chatbot_role1, speaker1, tts_language1,...
  function prepare_audio (line 935) | def prepare_audio(chatbot_role1, speaker1, tts_language1, roles_state1, ...
  function prep_bot (line 987) | def prep_bot(*args, retry=False, which_model=0, kwargs_eval={}, plain_ap...
  function choose_exc (line 1187) | def choose_exc(x, is_public=True):
  function bot (line 1195) | def bot(*args, retry=False, kwargs_evaluate={}, kwargs={}, db_type=None,...
  function is_from_ui (line 1308) | def is_from_ui(requests_state1):
  function is_valid_key (line 1312) | def is_valid_key(enforce_h2ogpt_api_key, enforce_h2ogpt_ui_key, h2ogpt_a...
  function get_one_key (line 1335) | def get_one_key(h2ogpt_api_keys, enforce_h2ogpt_api_key):
  function get_model_max_length (line 1349) | def get_model_max_length(model_state1, model_state0):
  function get_llm_history (line 1362) | def get_llm_history(history):
  function gen1_fake (line 1374) | def gen1_fake(fun1, history):
  function merge_chat_conversation_history (line 1386) | def merge_chat_conversation_history(chat_conversation1, history):
  function update_langchain_mode_paths (line 1406) | def update_langchain_mode_paths(selection_docs_state1):
  function my_db_state_done (line 1419) | def my_db_state_done(state):
  function process_audio (line 1433) | def process_audio(file1, t1=0, t2=30):
  function allow_empty_instruction (line 1445) | def allow_empty_instruction(langchain_mode1, document_subset1, langchain...
  function update_prompt (line 1459) | def update_prompt(prompt_type1, prompt_dict1, model_state1, which_model=...
  function get_fun_with_dict_str_plain (line 1483) | def get_fun_with_dict_str_plain(default_kwargs, kwargs, **kwargs_evaluat...

FILE: src/gradio_runner.py
  function my_get (line 43) | def my_get(url, **kwargs):
  function fix_pydantic_duplicate_validators_error (line 56) | def fix_pydantic_duplicate_validators_error():
  function get_prompt_type1 (line 93) | def get_prompt_type1(is_public, **kwargs):
  function get_prompt_type2 (line 110) | def get_prompt_type2(is_public, **kwargs):
  function ask_block (line 126) | def ask_block(kwargs, instruction_label, visible_upload, file_types, mic...
  function go_gradio (line 273) | def go_gradio(**kwargs):
  function show_doc (line 6495) | def show_doc(db1s, selection_docs_state1, requests_state1,
  function get_inputs_list (line 6733) | def get_inputs_list(inputs_dict, model_lower, model_id=1):
  function update_user_db_gr (line 6766) | def update_user_db_gr(file, db1s, selection_docs_state1, requests_state1,
  function get_sources_gr (line 6837) | def get_sources_gr(db1s, selection_docs_state1, requests_state1, langcha...
  function get_source_files_given_langchain_mode_gr (line 6890) | def get_source_files_given_langchain_mode_gr(db1s, selection_docs_state1...
  function del_source_files_given_langchain_mode_gr (line 6935) | def del_source_files_given_langchain_mode_gr(db1s, selection_docs_state1...
  function update_and_get_source_files_given_langchain_mode_gr (line 6976) | def update_and_get_source_files_given_langchain_mode_gr(db1s,
  function set_userid_gr (line 7072) | def set_userid_gr(db1s, requests_state1, get_userid_auth):
  function set_dbid_gr (line 7077) | def set_dbid_gr(db1):
  function set_userid_direct_gr (line 7082) | def set_userid_direct_gr(db1s, userid, username):

FILE: src/gradio_themes.py
  class H2oTheme (line 72) | class H2oTheme(Soft):
    method __init__ (line 73) | def __init__(
  class SoftTheme (line 151) | class SoftTheme(Soft):
    method __init__ (line 152) | def __init__(
  function get_h2o_title (line 212) | def get_h2o_title(title, description, visible_h2ogpt_qrcode):
  function get_simple_title (line 231) | def get_simple_title(title, description):
  function get_dark_js (line 235) | def get_dark_js() -> str:
  function get_heap_js (line 245) | def get_heap_js(heapAppId: str) -> str:
  function wrap_js_to_lambda (line 251) | def wrap_js_to_lambda(num_params: int, *args: str) -> str:

FILE: src/h2o_serpapi.py
  class H2OSerpAPIWrapper (line 12) | class H2OSerpAPIWrapper(SerpAPIWrapper):
    method get_search_documents (line 13) | def get_search_documents(self, query,
    method arun (line 41) | async def arun(self, query: str, headsize: int, **kwargs: typing.Any) ...
    method run (line 45) | def run(self, query: str, headsize: int, **kwargs: typing.Any) -> list:
    method _process_response (line 50) | def _process_response(res: dict, query: str, headsize: int) -> list:
    method __process_response (line 58) | def __process_response(res: dict, query: str, headsize: int) -> list:
    method results (line 169) | def results(self, query: str) -> dict:

FILE: src/h2oai_pipeline.py
  class H2OTextGenerationPipeline (line 11) | class H2OTextGenerationPipeline(TextGenerationPipeline):
    method __init__ (line 12) | def __init__(self, *args, debug=False, chat=False, stream_output=False,
    method get_token_count (line 104) | def get_token_count(x, tokenizer):
    method limit_prompt (line 124) | def limit_prompt(prompt_text, tokenizer, max_prompt_length=None, buffe...
    method preprocess (line 182) | def preprocess(self, prompt_text, prefix="", handle_long_generation=No...
    method _preprocess (line 200) | def _preprocess(
    method _postprocess (line 271) | def _postprocess(self, model_outputs, return_type=ReturnType.FULL_TEXT...
    method postprocess (line 313) | def postprocess(self, model_outputs, return_type=ReturnType.FULL_TEXT,...
    method _forward (line 357) | def _forward(self, model_inputs, **generate_kwargs):
    method __forward (line 381) | def __forward(self, model_inputs, **generate_kwargs):

FILE: src/image_captions.py
  class ImageCaptionLoader (line 37) | class ImageCaptionLoader(BaseLoader):
    method __init__ (line 45) | def __init__(
    method load (line 67) | def load(self) -> List[Document]:
    method _get_captions_and_metadata (line 90) | def _get_captions_and_metadata(
  class H2OImageCaptionLoader (line 131) | class H2OImageCaptionLoader(ImageCaptionLoader):
    method __init__ (line 134) | def __init__(self, path_images: Union[str, List[str]] = None,
    method set_context (line 177) | def set_context(self):
    method load_model (line 201) | def load_model(self):
    method set_image_paths (line 249) | def set_image_paths(self, path_images: Union[str, List[str]]):
    method load (line 258) | def load(self, prompt=None) -> List[Document]:
    method unload_model (line 272) | def unload_model(self):
    method _get_captions_and_metadata (line 277) | def _get_captions_and_metadata(

FILE: src/image_doctr.py
  class H2OOCRLoader (line 19) | class H2OOCRLoader(ImageCaptionLoader):
    method __init__ (line 22) | def __init__(self, path_images: Union[str, List[str]] = None, layout_a...
    method set_context (line 32) | def set_context(self):
    method load_model (line 47) | def load_model(self):
    method unload_model (line 64) | def unload_model(self):
    method set_document_paths (line 75) | def set_document_paths(self, document_paths: Union[str, List[str]]):
    method load (line 84) | def load(self, prompt=None) -> List[Document]:
    method _get_captions_and_metadata (line 99) | def _get_captions_and_metadata(
  function boxes_sort (line 151) | def boxes_sort(boxes):
  function is_same_line (line 163) | def is_same_line(box1, box2):
  function union_box (line 179) | def union_box(box1, box2):
  function space_layout (line 193) | def space_layout(texts, boxes, threshold_show_spaces=8, threshold_char_w...
  function read_pdf (line 250) | def read_pdf(

FILE: src/image_pix2struct.py
  class H2OPix2StructLoader (line 16) | class H2OPix2StructLoader(ImageCaptionLoader):
    method __init__ (line 19) | def __init__(self, path_images: Union[str, List[str]] = None, model_ty...
    method set_context (line 26) | def set_context(self):
    method load_model (line 38) | def load_model(self):
    method unload_model (line 54) | def unload_model(self):
    method set_image_paths (line 59) | def set_image_paths(self, path_images: Union[str, List[str]]):
    method load (line 68) | def load(self, prompt=None) -> List[Document]:
    method _get_captions_and_metadata (line 81) | def _get_captions_and_metadata(

FILE: src/image_utils.py
  function largest_contour (line 10) | def largest_contour(contours):
  function is_contour_acceptable (line 23) | def is_contour_acceptable(contour, image, size_threshold=0.1, aspect_rat...
  function file_to_cv2 (line 46) | def file_to_cv2(img_file):
  function align_image (line 68) | def align_image(img_file):
  function correct_rotation (line 110) | def correct_rotation(img_file, border_size=50):
  function pad_resize_image_file (line 166) | def pad_resize_image_file(img_file, relaxed_resize=False):
  function resize_image (line 185) | def resize_image(image, return_none_if_no_change=True, max_dimension=2048):
  function pad_resize_image (line 209) | def pad_resize_image(image, return_none_if_no_change=False, max_dimensio...
  function fix_image_file (line 269) | def fix_image_file(file, do_align=False, do_rotate=False, do_pad=False, ...
  function get_image_types (line 285) | def get_image_types():
  function get_image_file (line 297) | def get_image_file(image_file, image_control, document_choice, base_mode...

FILE: src/langchain_mistralai/chat_models.py
  function _create_retry_decorator (line 74) | def _create_retry_decorator(
  function _convert_mistral_chat_message_to_message (line 88) | def _convert_mistral_chat_message_to_message(
  function _raise_on_error (line 127) | def _raise_on_error(response: httpx.Response) -> None:
  function _araise_on_error (line 139) | async def _araise_on_error(response: httpx.Response) -> None:
  function _aiter_sse (line 151) | async def _aiter_sse(
  function acompletion_with_retry (line 163) | async def acompletion_with_retry(
  function _convert_delta_to_message_chunk (line 189) | def _convert_delta_to_message_chunk(
  function _format_tool_call_for_mistral (line 237) | def _format_tool_call_for_mistral(tool_call: ToolCall) -> dict:
  function _format_invalid_tool_call_for_mistral (line 251) | def _format_invalid_tool_call_for_mistral(invalid_tool_call: InvalidTool...
  function _convert_message_to_mistral_chat_message (line 265) | def _convert_message_to_mistral_chat_message(
  class ChatMistralAI (line 317) | class ChatMistralAI(BaseChatModel):
    class Config (line 339) | class Config:
    method _default_params (line 346) | def _default_params(self) -> Dict[str, Any]:
    method _client_params (line 379) | def _client_params(self) -> Dict[str, Any]:
    method completion_with_retry (line 383) | def completion_with_retry(
    method _combine_llm_outputs (line 415) | def _combine_llm_outputs(self, llm_outputs: List[Optional[dict]]) -> d...
    method validate_environment (line 432) | def validate_environment(cls, values: Dict) -> Dict:
    method _generate (line 472) | def _generate(
    method _create_chat_result (line 494) | def _create_chat_result(self, response: Dict) -> ChatResult:
    method _create_message_dicts (line 513) | def _create_message_dicts(
    method _stream (line 526) | def _stream(
    method _astream (line 553) | async def _astream(
    method _agenerate (line 580) | async def _agenerate(
    method bind_tools (line 602) | def bind_tools(
    method with_structured_output (line 628) | def with_structured_output(
    method _identifying_params (line 826) | def _identifying_params(self) -> Dict[str, Any]:
    method _llm_type (line 831) | def _llm_type(self) -> str:
    method lc_secrets (line 836) | def lc_secrets(self) -> Dict[str, str]:
    method is_lc_serializable (line 840) | def is_lc_serializable(cls) -> bool:
    method get_lc_namespace (line 845) | def get_lc_namespace(cls) -> List[str]:

FILE: src/langchain_openai_local.py
  class H2OBaseChatOpenAI (line 36) | class H2OBaseChatOpenAI:
    method _stream (line 37) | def _stream(
    method mod_cg_chunk (line 72) | def mod_cg_chunk(self, cg_chunk: ChatGenerationChunk) -> ChatGeneratio...
    method _generate (line 80) | def _generate(
    method _create_chat_result (line 105) | def _create_chat_result(self, response: Union[dict, BaseModel]) -> Cha...
    method _astream (line 132) | async def _astream(
    method _agenerate (line 167) | async def _agenerate(
  class H2OBaseAzureChatOpenAI (line 194) | class H2OBaseAzureChatOpenAI(H2OBaseChatOpenAI, AzureChatOpenAI):

FILE: src/llama_flash_attn_monkey_patch.py
  function forward (line 14) | def forward(
  function _prepare_decoder_attention_mask (line 101) | def _prepare_decoder_attention_mask(
  function replace_llama_attn_with_flash_attn (line 108) | def replace_llama_attn_with_flash_attn():

FILE: src/llm_exllama.py
  class H2OExLlamaTokenizer (line 17) | class H2OExLlamaTokenizer(ExLlamaTokenizer):
    method __call__ (line 18) | def __call__(self, text, *args, **kwargs):
  class H2OExLlamaGenerator (line 22) | class H2OExLlamaGenerator(ExLlamaGenerator):
    method is_exlama (line 23) | def is_exlama(self):
  class Exllama (line 27) | class Exllama(LLM):
    method get_model_path_at (line 92) | def get_model_path_at(path):
    method configure_object (line 106) | def configure_object(params, values, logfunc):
    method validate_environment (line 121) | def validate_environment(cls, values: Dict) -> Dict:
    method _llm_type (line 223) | def _llm_type(self) -> str:
    method get_num_tokens (line 227) | def get_num_tokens(self, text: str) -> int:
    method get_token_ids (line 231) | def get_token_ids(self, text: str) -> List[int]:
    method _call (line 236) | def _call(
    class MatchStatus (line 261) | class MatchStatus(Enum):
    method match_status (line 266) | def match_status(self, sequence: str, banned_sequences: List[str]):
    method stream (line 275) | def stream(

FILE: src/loaders.py
  function get_loaders (line 7) | def get_loaders(model_name, reward_type, llama_type=None,
  function get_tokenizer (line 155) | def get_tokenizer(tokenizer_loader, tokenizer_base_model, local_files_on...

FILE: src/make_db.py
  function glob_to_db (line 14) | def glob_to_db(user_path, chunk=True, chunk_size=512, verbose=False,
  function make_db_main (line 119) | def make_db_main(use_openai_embedding: bool = False,

FILE: src/model_utils.py
  function switch_a_roo_llama (line 37) | def switch_a_roo_llama(base_model, model_path_llama, load_gptq, load_awq...
  function get_config (line 106) | def get_config(base_model,
  function get_non_lora_model (line 254) | def get_non_lora_model(base_model, model_loader, load_half,
  function get_client_from_inference_server (line 359) | def get_client_from_inference_server(inference_server, base_model=None,
  function get_model_retry (line 420) | def get_model_retry(**kwargs):
  function get_root_url (line 446) | def get_root_url(url):
  function get_inf_models (line 472) | def get_inf_models(inference_server, verbose=False):
  function get_model (line 534) | def get_model(
  function get_hf_model (line 1246) | def get_hf_model(load_8bit: bool = False,
  function set_model_max_len (line 1545) | def set_model_max_len(max_seq_len, tokenizer, verbose=False, reward_type...
  function pop_unused_model_kwargs (line 1559) | def pop_unused_model_kwargs(model_kwargs):
  function get_score_model (line 1572) | def get_score_model(score_model: str = None,
  function prep_model_state_none (line 1654) | def prep_model_state_none():
  function model_lock_to_state (line 1664) | def model_lock_to_state(model_dict1, cache_model_state=False, **kwargs):
  function _model_lock_to_state (line 1693) | def _model_lock_to_state(model_dict_json, kwargs_json):
  function __model_lock_to_state (line 1700) | def __model_lock_to_state(model_dict1, **kwargs):
  function get_on_disk_models (line 1866) | def get_on_disk_models(llamacpp_path, use_auth_token, trust_remote_code):

FILE: src/output_parser.py
  class H2OMRKLOutputParser (line 71) | class H2OMRKLOutputParser(MRKLOutputParser):
    method get_format_instructions (line 74) | def get_format_instructions(self) -> str:
    method parse (line 77) | def parse(self, text: str) -> Union[AgentAction, AgentFinish]:
    method _type (line 117) | def _type(self) -> str:
  class H2OPythonMRKLOutputParser (line 121) | class H2OPythonMRKLOutputParser(H2OMRKLOutputParser):
    method get_format_instructions (line 122) | def get_format_instructions(self) -> str:

FILE: src/pandas_agent_langchain.py
  function _get_multi_prompt (line 31) | def _get_multi_prompt(
  function _get_single_prompt (line 75) | def _get_single_prompt(
  function _get_prompt_and_tools (line 117) | def _get_prompt_and_tools(
  function _get_functions_single_prompt (line 164) | def _get_functions_single_prompt(
  function _get_functions_multi_prompt (line 193) | def _get_functions_multi_prompt(
  function _get_functions_prompt_and_tools (line 230) | def _get_functions_prompt_and_tools(
  function create_pandas_dataframe_agent (line 277) | def create_pandas_dataframe_agent(
  function create_csv_agent (line 364) | def create_csv_agent(

FILE: src/prepare_offline.py
  function noop_load (line 1) | def noop_load(*args, **kwargs):
  function go_prepare_offline (line 5) | def go_prepare_offline(*args, **kwargs):

FILE: src/prompter.py
  function get_prompt (line 287) | def get_prompt(prompt_type, prompt_dict, context, reduced, making_contex...
  function generate_prompt (line 1570) | def generate_prompt(data_point, prompt_type, prompt_dict, reduced, makin...
  function inject_chatsep (line 1643) | def inject_chatsep(prompt_type, prompt, chat_sep=None):
  class Prompter (line 1650) | class Prompter(object):
    method __init__ (line 1651) | def __init__(self, prompt_type, prompt_dict, debug=False, stream_outpu...
    method stop_sequences (line 1696) | def stop_sequences(self):
    method generate_prompt (line 1702) | def generate_prompt(self, data_point, reduced=False, context_from_hist...
    method get_response (line 1744) | def get_response(self, outputs, prompt=None, sanitize_bot_response=Fal...
    method fix_text (line 1848) | def fix_text(prompt_type1, text1):
  function step_forward_prompts (line 1883) | def step_forward_prompts(which):
  function step_back_prompts (line 1898) | def step_back_prompts(which):
  function get_vllm_extra_dict (line 1922) | def get_vllm_extra_dict(tokenizer, stop_sequences=[], repetition_penalty...
  function get_system_prompts (line 2066) | def get_system_prompts():
  function get_llava_prompts (line 2086) | def get_llava_prompts():
  function get_response_verification_prompt (line 2095) | def get_response_verification_prompt(instruction,
  function get_correctness_eval_verification_prompt (line 2164) | def get_correctness_eval_verification_prompt(query,
  function get_faithfulness_eval_verification_prompt (line 2191) | def get_faithfulness_eval_verification_prompt(information,
  function get_faithfulness_refine_verification_prompt (line 2214) | def get_faithfulness_refine_verification_prompt(information,
  function get_relevancy_eval_prompt (line 2240) | def get_relevancy_eval_prompt(query_and_response, context):
  function get_relevancy_refine_prompt (line 2261) | def get_relevancy_refine_prompt(query_str, context_str):
  function gradio_to_llm (line 2282) | def gradio_to_llm(x, bot=False):
  function history_for_llm (line 2310) | def history_for_llm(history):
  function get_llm_history (line 2328) | def get_llm_history(history, only_text=False):
  function apply_chat_template (line 2357) | def apply_chat_template(instruction, system_prompt, history,
  function template_supports_system_prompt (line 2414) | def template_supports_system_prompt(tokenizer):
  function convert_messages_and_extract_images (line 2431) | def convert_messages_and_extract_images(tuple_list):
  function model_name_to_prompt_type (line 2464) | def model_name_to_prompt_type(model_name, inference_server,

FILE: src/prompter_utils.py
  function get_use_chat_template (line 6) | def get_use_chat_template(tokenizer, prompt_type=None):
  function has_chat_template (line 14) | def has_chat_template(tokenizer):
  function get_chat_template (line 22) | def get_chat_template(tokenizer):
  function base64_encode_jinja_template (line 32) | def base64_encode_jinja_template(template_str):
  function base64_decode_jinja_template (line 38) | def base64_decode_jinja_template(encoded_str):
  function is_base64 (line 48) | def is_base64(s):

FILE: src/read_wiki_full.py
  function unescape (line 22) | def unescape(x):
  function get_views (line 33) | def get_views():
  class MWDumpDirectLoader (line 46) | class MWDumpDirectLoader(MWDumpLoader):
    method __init__ (line 47) | def __init__(self, data: str, encoding: Optional[str] = "utf8",
    method load (line 61) | def load(self) -> List[Document]:
  function search_index (line 105) | def search_index(search_term, index_filename):
  function get_start_bytes (line 121) | def get_start_bytes(index_filename):
  function get_wiki_filenames (line 131) | def get_wiki_filenames():
  function get_documents_by_search_term (line 141) | def get_documents_by_search_term(search_term):
  function get_one_chunk (line 153) | def get_one_chunk(wiki_filename, start_byte, end_byte, return_file=True,
  function get_all_documents (line 180) | def get_all_documents(small_test=2, n_jobs=None, use_views=True):
  function test_by_search_term (line 220) | def test_by_search_term():
  function test_start_bytes (line 231) | def test_start_bytes():
  function test_get_all_documents (line 236) | def test_get_all_documents():
  function get_one_pageviews (line 245) | def get_one_pageviews(fil):
  function test_agg_pageviews (line 261) | def test_agg_pageviews(gen_files=False):
  function test_reduce_pageview (line 283) | def test_reduce_pageview():
  function test_do_wiki_full_all (line 309) | def test_do_wiki_full_all():

FILE: src/sagemaker.py
  class ChatContentHandler (line 11) | class ChatContentHandler(LLMContentHandler):
    method transform_input (line 15) | def transform_input(self, prompt: str, model_kwargs: typing.Dict) -> b...
    method transform_output (line 24) | def transform_output(self, output: bytes) -> str:
  class BaseContentHandler (line 29) | class BaseContentHandler(LLMContentHandler):
    method transform_input (line 33) | def transform_input(self, prompt: str, model_kwargs: typing.Dict) -> b...
    method transform_output (line 37) | def transform_output(self, output: bytes) -> str:
  class H2OSagemakerEndpoint (line 42) | class H2OSagemakerEndpoint(SagemakerEndpoint):
    method validate_environment (line 48) | def validate_environment(cls, values: typing.Dict) -> typing.Dict:
    method get_token_ids (line 83) | def get_token_ids(self, text: str) -> typing.List[int]:

FILE: src/stopping.py
  function update_terminate_responses (line 10) | def update_terminate_responses(terminate_response, tokenizer=None, trust...
  class StoppingCriteriaSub (line 45) | class StoppingCriteriaSub(StoppingCriteria):
    method __init__ (line 47) | def __init__(self, stops=[], stop_words=[], encounters=[], device="cud...
    method __call__ (line 65) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen...
  function get_stopping (line 98) | def get_stopping(prompt_type, prompt_dict, tokenizer, device, base_model,

FILE: src/stt.py
  function get_transcriber (line 11) | def get_transcriber(model="openai/whisper-base.en", use_gpu=True, gpu_id...
  function audio_bytes_to_numpy (line 25) | def audio_bytes_to_numpy(audio_bytes):
  function transcribe (line 45) | def transcribe(audio_state1, new_chunk, transcriber=None, max_chunks=Non...

FILE: src/tts.py
  function get_speech_model (line 26) | def get_speech_model():
  function gen_t5 (line 41) | def gen_t5(text, processor=None, model=None, speaker_embedding=None, voc...
  function get_tts_model (line 47) | def get_tts_model(t5_model="microsoft/speecht5_tts",
  function get_speakers (line 65) | def get_speakers():
  function get_speakers_gr (line 76) | def get_speakers_gr(value=None):
  function process_audio (line 86) | def process_audio(sampling_rate, waveform):
  function predict_from_audio (line 106) | def predict_from_audio(processor, model, speaker_embedding, vocoder, aud...
  function generate_speech (line 124) | def generate_speech(response, speaker,
  function predict_from_text (line 162) | def predict_from_text(text, speaker, tts_speed, processor=None, model=No...
  function get_speaker_embedding (line 219) | def get_speaker_embedding(speaker, device):
  function _predict_from_text (line 242) | def _predict_from_text(text, speaker, processor=None, model=None, vocode...
  function audio_to_html (line 270) | def audio_to_html(audio):
  function text_to_speech (line 281) | def text_to_speech(text, sr=16000):
  function test_bark (line 290) | def test_bark():

FILE: src/tts_coqui.py
  function list_models (line 26) | def list_models():
  function get_xtt (line 31) | def get_xtt(model_name="tts_models/multilingual/multi-dataset/xtts_v2", ...
  function get_latent (line 77) | def get_latent(speaker_wav, voice_cleanup=False, model=None, gpt_cond_le...
  function get_voice_streaming (line 96) | def get_voice_streaming(prompt, language, latent, suffix="0", model=None...
  function generate_speech (line 136) | def generate_speech(response,
  function sentence_to_wave (line 188) | def sentence_to_wave(sentence, supported_languages, tts_speed,
  function get_role_to_wave_map (line 280) | def get_role_to_wave_map():
  function allowed_roles (line 290) | def allowed_roles():
  function get_roles (line 294) | def get_roles(choices=None, value=None):
  function predict_from_text (line 308) | def predict_from_text(response, chatbot_role, language, roles_map, tts_s...
  function filter_wave_1 (line 361) | def filter_wave_1(speaker_wav):
  function filter_wave_2 (line 380) | def filter_wave_2(speaker_wav):
  function get_languages_gr (line 422) | def get_languages_gr(visible=True, value=None):

FILE: src/tts_sentence_parsing.py
  function setup_nltk (line 7) | def setup_nltk():
  function init_sentence_state (line 18) | def init_sentence_state():
  function unpack_state (line 23) | def unpack_state(sentence_state):
  function pack_state (line 30) | def pack_state(sentence_state, *args):
  function split_sentences (line 40) | def split_sentences(sentence, n=250):
  function _get_sentences (line 90) | def _get_sentences(response, verbose=False, min_start=15, max_length=250):
  function get_sentence (line 108) | def get_sentence(response, sentence_state, is_final=False, verbose=False):
  function clean_sentence (line 131) | def clean_sentence(sentence, verbose=False):
  function detect_language (line 206) | def detect_language(prompt, supported_languages, verbose=False):

FILE: src/tts_utils.py
  function get_wave_header (line 11) | def get_wave_header(frame_input=b"", channels=1, sample_width=2, sample_...
  function prepare_speech (line 27) | def prepare_speech(sr=24000):
  function get_no_audio (line 32) | def get_no_audio(return_as_byte=True, return_nonbyte_as_file=False, sr=N...
  function combine_audios (line 43) | def combine_audios(audios, audio=None, channels=1, sample_width=2, sr=24...
  function chunk_speed_change (line 78) | def chunk_speed_change(chunk, sr, tts_speed=1.0):
  function pydub_to_np (line 112) | def pydub_to_np(audio: pydub.AudioSegment) -> (np.ndarray, int):

FILE: src/utils.py
  function H2O_Fire (line 55) | def H2O_Fire(component=None):
  function set_seed (line 82) | def set_seed(seed: int):
  function flatten_list (line 99) | def flatten_list(lis):
  function clear_torch_cache (line 110) | def clear_torch_cache(allow_skip=False):
  function ping (line 123) | def ping():
  function ping_gpu (line 131) | def ping_gpu():
  function ping_gpu_memory (line 143) | def ping_gpu_memory():
  function get_torch_allocated (line 150) | def get_torch_allocated():
  function get_device (line 155) | def get_device(n_gpus=None):
  function system_info (line 167) | def system_info():
  function system_info_print (line 224) | def system_info_print():
  function zip_data (line 234) | def zip_data(root_dirs=None, zip_file=None, base_dir='./', fail_any_exce...
  function _zip_data (line 244) | def _zip_data(root_dirs=None, zip_file=None, base_dir='./'):
  function tar_data (line 269) | def tar_data(root_dirs=None, tar_file=None, base_dir='./', fail_any_exce...
  function _tar_data (line 279) | def _tar_data(root_dirs=None, tar_file=None, base_dir='./'):
  function save_generate_output (line 304) | def save_generate_output(prompt=None, output=None, base_model=None, save...
  function _save_generate_tokens (line 319) | def _save_generate_tokens(response_no_refs, extra_dict):
  function _save_generate_output (line 329) | def _save_generate_output(prompt=None, output=None, base_model=None, sav...
  function s3up (line 373) | def s3up(filename):
  function _s3up (line 382) | def _s3up(filename):
  function get_githash (line 405) | def get_githash():
  function copy_code (line 426) | def copy_code(run_id):
  class NullContext (line 445) | class NullContext(threading.local):
    method __init__ (line 452) | def __init__(self, *args, **kwargs):
    method __enter__ (line 455) | def __enter__(self):
    method __exit__ (line 458) | def __exit__(self, exc_type, exc_value, exc_traceback):
    method finally_act (line 461) | def finally_act(self):
  class AsyncNullContext (line 465) | class AsyncNullContext(threading.local):
    method __init__ (line 472) | def __init__(self, *args, **kwargs):
    method __aenter__ (line 475) | async def __aenter__(self):
    method __aexit__ (line 478) | async def __aexit__(self, exc_type, exc_value, exc_traceback):
    method finally_act (line 481) | async def finally_act(self):
  function wrapped_partial (line 485) | def wrapped_partial(func, *args, **kwargs):
  class ThreadException (line 498) | class ThreadException(Exception):
  class EThread (line 502) | class EThread(threading.Thread):
    method __init__ (line 504) | def __init__(self, group=None, target=None, name=None,
    method run (line 514) | def run(self):
    method join (line 534) | def join(self, timeout=None):
  function import_matplotlib (line 544) | def import_matplotlib():
  function get_sha (line 558) | def get_sha(value):
  function sanitize_filename (line 562) | def sanitize_filename(name, file_length_limit=250):
  function shutil_rmtree (line 585) | def shutil_rmtree(*args, **kwargs):
  function remove (line 594) | def remove(path: str):
  function makedirs (line 606) | def makedirs(path, exist_ok=True, tmp_ok=False, use_base=False):
  function atomic_move_simple (line 649) | def atomic_move_simple(src, dst):
  function atomic_copy (line 657) | def atomic_copy(src="", dst=None, content=None):
  function move_tree (line 673) | def move_tree(src, dst, include_root=True):
  function copy_tree (line 696) | def copy_tree(src, dst, follow_symlink=False):
  function download_simple (line 711) | def download_simple(url, dest=None, overwrite=False, verbose=False):
  function download (line 770) | def download(url, dest=None, dest_path=None):
  function get_doc (line 816) | def get_doc(x):
  function get_source (line 820) | def get_source(x):
  function markdown_to_html (line 824) | def markdown_to_html(content):
  function is_markdown (line 841) | def is_markdown(string):
  function get_accordion_named (line 856) | def get_accordion_named(content, title, font_size=8):
  function hyde_titles (line 863) | def hyde_titles(level):
  function get_accordion (line 877) | def get_accordion(x, font_size=2, head_acc=50):
  function get_url (line 883) | def get_url(x, from_str=False, short_name=False, font_size=2):
  function get_short_name (line 903) | def get_short_name(name, maxl=50):
  function cuda_vis_check (line 914) | def cuda_vis_check(total_gpus):
  function get_ngpus_vis (line 942) | def get_ngpus_vis(raise_if_exception=True):
  function get_mem_gpus (line 978) | def get_mem_gpus(raise_if_exception=True, ngpus=None):
  class ForkContext (line 1011) | class ForkContext(threading.local):
    method __init__ (line 1017) | def __init__(self, args=None, kwargs=None, forkdata_capable=True):
    method __enter__ (line 1033) | def __enter__(self):
    method __exit__ (line 1045) | def __exit__(self, exc_type, exc_value, exc_traceback):
    method finally_act (line 1048) | def finally_act(self):
  class _ForkDataContext (line 1058) | class _ForkDataContext(threading.local):
    method __init__ (line 1059) | def __init__(
    method args (line 1076) | def args(self) -> Tuple:
    method args (line 1081) | def args(self, args):
    method kwargs (line 1090) | def kwargs(self) -> Dict:
    method kwargs (line 1095) | def kwargs(self, kwargs):
    method _reset (line 1103) | def _reset(self):
    method get_args_kwargs (line 1108) | def get_args_kwargs(self, func, args, kwargs) -> Tuple[Callable, Tuple...
    method get_args_kwargs_for_traced_func (line 1122) | def get_args_kwargs_for_traced_func(func, args, kwargs):
  function using_conda (line 1153) | def using_conda():
  function get_python_paths (line 1162) | def get_python_paths():
  function _traced_func (line 1187) | def _traced_func(func, *args, **kwargs):
  function call_subprocess_onetask (line 1197) | def call_subprocess_onetask(func, args=None, kwargs=None):
  class ProgressParallel (line 1217) | class ProgressParallel(Parallel):
    method __init__ (line 1218) | def __init__(self, use_tqdm=True, total=None, *args, **kwargs):
    method __call__ (line 1223) | def __call__(self, *args, **kwargs):
    method print_progress (line 1227) | def print_progress(self):
  function get_kwargs (line 1234) | def get_kwargs(func, exclude_names=None, **kwargs):
  function hash_file (line 1283) | def hash_file(file):
  function start_faulthandler (line 1310) | def start_faulthandler():
  function get_hf_server (line 1323) | def get_hf_server(inference_server):
  class FakeTokenizer (line 1374) | class FakeTokenizer:
    method __init__ (line 1380) | def __init__(self, model_max_length=2048,
    method encode (line 1417) | def encode(self, x, *args, return_tensors="pt", **kwargs):
    method decode (line 1446) | def decode(self, x, *args, **kwargs):
    method num_tokens_from_string (line 1465) | def num_tokens_from_string(self, prompt: str) -> int:
    method heuristic_encode (line 1482) | def heuristic_encode(self, text: str) -> list:
    method __call__ (line 1489) | def __call__(self, x, *args, **kwargs):
  function get_local_ip (line 1493) | def get_local_ip():
  function set_openai (line 1678) | def set_openai(inference_server, model_name=None):
  function get_model_name (line 1785) | def get_model_name(model_name, openai_client):
  function get_list_or_str (line 1805) | def get_list_or_str(x):
  function deepcopy_by_pickle_object (line 1819) | def deepcopy_by_pickle_object(object):
  function url_alive (line 1832) | def url_alive(url):
  function return_good_url (line 1846) | def return_good_url(url):
  function is_probably_url (line 1865) | def is_probably_url(url):
  function dict_to_html (line 1872) | def dict_to_html(x, small=True, api=False):
  function split_into_sentences (line 1888) | def split_into_sentences(text):
  function text_to_html (line 1894) | def text_to_html(x, api=False):
  function lg_to_gr (line 1914) | def lg_to_gr(
  function enqueue_output (line 2016) | def enqueue_output(file, queue):
  function read_popen_pipes (line 2023) | def read_popen_pipes(p):
  function start_process (line 2048) | def start_process(cmd):
  function execute_cmd_stream (line 2058) | def execute_cmd_stream(cmd=None, script_content=None, cwd=None, env=None...
  function str_to_list (line 2188) | def str_to_list(x, allow_none=False):
  function str_to_dict (line 2210) | def str_to_dict(x):
  function get_token_count (line 2225) | def get_token_count(x, tokenizer, token_count_fun=None, add_special_toke...
  function reverse_ucurve_list (line 2255) | def reverse_ucurve_list(lst):
  function undo_reverse_ucurve_list (line 2275) | def undo_reverse_ucurve_list(lst):
  function get_size (line 2301) | def get_size(start_path='.'):
  function get_test_name_core (line 2313) | def get_test_name_core():
  class FullSet (line 2319) | class FullSet(set):
    method __contains__ (line 2320) | def __contains__(self, item):
  function create_relative_symlink (line 2327) | def create_relative_symlink(target, link_name):
  function get_gradio_tmp (line 2362) | def get_gradio_tmp():
  function in_gradio_root (line 2369) | def in_gradio_root(file):
  function get_is_gradio_h2oai (line 2376) | def get_is_gradio_h2oai():
  function split_list (line 2384) | def split_list(input_list, split_size):
  function get_lock_file (line 2389) | def get_lock_file(name):
  function merge_dict (line 2398) | def merge_dict(dict1, dict2):
  function is_uuid4 (line 2404) | def is_uuid4(string):
  function is_full_git_hash (line 2410) | def is_full_git_hash(s):
  function get_show_username (line 2415) | def get_show_username(username1):
  function get_code_blocks (line 2427) | def get_code_blocks(response):
  function get_json (line 2431) | def get_json(response, fixup=True, json_schema_type=None):
  function extract_values (line 2441) | def extract_values(data):
  function contains_schema (line 2468) | def contains_schema(data):
  function handle_json (line 2483) | def handle_json(data):
  function repair_json_by_type (line 2490) | def repair_json_by_type(response, json_schema_type=None):
  function _get_json (line 2512) | def _get_json(response, fixup=True, json_schema_type=None):
  function preprocess_code_blocks (line 2554) | def preprocess_code_blocks(stream_content):
  function extract_code_block_content (line 2562) | def extract_code_block_content(stream_content):
  function has_starting_code_block (line 2573) | def has_starting_code_block(text):
  function looks_like_json (line 2578) | def looks_like_json(text):
  function is_json_vllm (line 2593) | def is_json_vllm(model, base_model, inference_server, verbose=False):
  function get_vllm_version (line 2616) | def get_vllm_version(openai_client, inference_server, verbose=False):
  function get_docs_tokens (line 2640) | def get_docs_tokens(tokenizer, text_context_list=[], max_input_tokens=No...
  function get_limited_text (line 2675) | def get_limited_text(hard_limit_tokens, text, tokenizer, verbose=False):
  function deduplicate_names (line 2714) | def deduplicate_names(names):
  function download_image (line 2736) | def download_image(image_url, save_dir):
  function check_input_type (line 2779) | def check_input_type(input_string):
  function get_youtube_urls (line 2812) | def get_youtube_urls():
  function get_llama_lower_hf (line 2997) | def get_llama_lower_hf(llama_lower):
  function get_depth_normal (line 3005) | def get_depth_normal(lst):
  function get_gradio_depth (line 3012) | def get_gradio_depth(lst):
  function is_empty (line 3033) | def is_empty(obj):
  function create_typed_dict (line 3071) | def create_typed_dict(schema: Dict[str, Any], name: str = "Schema") -> t...
  function get_supports_schema (line 3109) | def get_supports_schema(inference_server, base_model, response_format='j...
  function dedup_list (line 3135) | def dedup_list(x):

FILE: src/utils_langchain.py
  class StreamingGradioCallbackHandler (line 38) | class StreamingGradioCallbackHandler(BaseCallbackHandler):
    method __init__ (line 43) | def __init__(self, timeout: Optional[float] = None, block=True, max_ti...
    method on_llm_start (line 55) | def on_llm_start(
    method on_llm_new_token (line 66) | def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
    method on_llm_end (line 77) | def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
    method on_llm_error (line 81) | def on_llm_error(
    method __iter__ (line 87) | def __iter__(self):
    method __next__ (line 90) | def __next__(self):
  class H2OCharacterTextSplitter (line 111) | class H2OCharacterTextSplitter(RecursiveCharacterTextSplitter):
    method __init__ (line 112) | def __init__(
    method from_huggingface_tokenizer (line 125) | def from_huggingface_tokenizer(cls, tokenizer: Any, **kwargs: Any) -> ...
  function select_docs_with_score (line 132) | def select_docs_with_score(docs_with_score, top_k_docs, one_doc_size):
  function split_merge_docs (line 144) | def split_merge_docs(docs_with_score, tokenizer=None, max_input_tokens=N...
  function _chunk_sources (line 294) | def _chunk_sources(sources, chunk=True, chunk_size=512, language=None, d...
  function add_parser (line 361) | def add_parser(docs1, parser):
  function _add_meta (line 365) | def _add_meta(docs1, file, headsize=50, filei=0, parser='NotSet', file_a...
  function fix_json_meta (line 388) | def fix_json_meta(docs1):
  class H2OMapReduceDocumentsChain (line 396) | class H2OMapReduceDocumentsChain(MapReduceDocumentsChain):
    method combine_docs (line 400) | def combine_docs(
    method acombine_docs (line 441) | async def acombine_docs(
    method terminate_callbacks (line 482) | def terminate_callbacks(self):
    method _chain_type (line 492) | def _chain_type(self) -> str:
  function _load_map_chain (line 496) | def _load_map_chain(
  function load_general_summarization_chain (line 560) | def load_general_summarization_chain(
  class H2OSemanticScholarAPIWrapper (line 601) | class H2OSemanticScholarAPIWrapper(BaseModel):
    method validate_environment (line 645) | def validate_environment(cls, values: Dict) -> Dict:
    method run (line 659) | def run(self, query: str) -> str:
  class H2OHuggingFaceHubEmbeddings (line 682) | class H2OHuggingFaceHubEmbeddings(HuggingFaceHubEmbeddings):
    method embed_documents (line 683) | def embed_documents(self, texts: List[str]) -> List[List[float]]:
  function make_sources_file (line 720) | def make_sources_file(langchain_mode, source_files_added):
  function convert_to_genai_schema (line 733) | def convert_to_genai_schema(json_schema: Union[Dict[str, Any], str], nam...
  function convert_object_schema (line 752) | def convert_object_schema(json_schema: Dict[str, Any], name: str) -> Sch...
  function convert_array_schema (line 772) | def convert_array_schema(json_schema: Dict[str, Any], name: str) -> Schema:
  function convert_primitive_schema (line 781) | def convert_primitive_schema(json_schema: Dict[str, Any], name: str) -> ...
  class PyMuPDF4LLMLoader (line 806) | class PyMuPDF4LLMLoader(BasePDFLoader):
    method __init__ (line 809) | def __init__(
    method _lazy_load (line 829) | def _lazy_load(self, **kwargs: Any) -> Iterator[Document]:
    method load (line 846) | def load(self, **kwargs: Any) -> List[Document]:
    method lazy_load (line 849) | def lazy_load(self) -> Iterator[Document]:
  class PyMuPDF4LLMParser (line 853) | class PyMuPDF4LLMParser(BaseBlobParser):
    method __init__ (line 856) | def __init__(
    method lazy_parse (line 869) | def lazy_parse(self, blob: Blob) -> Iterator[Document]:  # type: ignor...
    method _extract_images_from_page (line 901) | def _extract_images_from_page(

FILE: src/utils_procs.py
  function rlimitproc (line 10) | def rlimitproc(pp, rlim):
  function get_all_rlimit (line 28) | def get_all_rlimit(pid=None):
  function reulimit (line 44) | def reulimit(pid=None, verbose=False):
  function get_nproc_limit (line 85) | def get_nproc_limit(pid=None):
  function wrap_psutil (line 101) | def wrap_psutil(func):
  function psfunc_list (line 113) | def psfunc_list(func, *args, **kwargs):
  function psfunc (line 121) | def psfunc(func, *args, **kwargs):
  function psattr (line 141) | def psattr(obj, attr):
  function get_file_limit (line 160) | def get_file_limit(pid=None):

FILE: src/utils_sys.py
  class StreamProxy (line 6) | class StreamProxy:
    method __init__ (line 7) | def __init__(self, original_stream):
    method write (line 10) | def write(self, *args, **kwargs):
    method flush (line 19) | def flush(self, *args, **kwargs):
    method handle_closed_file_error (line 28) | def handle_closed_file_error(self, operation):
    method close (line 36) | def close(self):
    method __getattr__ (line 47) | def __getattr__(self, name):
    method __setattr__ (line 50) | def __setattr__(self, name, value):
  class FinalizeStream (line 63) | class FinalizeStream:
    method __init__ (line 64) | def __init__(self, proxy):
    method __setattr__ (line 67) | def __setattr__(self, key, value):
    method __getattr__ (line 80) | def __getattr__(self, item):
  function protect_stream (line 84) | def protect_stream(stream_name):
  function protect_stdout_stderr (line 93) | def protect_stdout_stderr():

FILE: src/vision/extract_movie.py
  function extract_unique_frames (line 8) | def extract_unique_frames(urls=None, file=None, download_dir=None, expor...

FILE: src/vision/flux.py
  function get_pipe_make_image (line 11) | def get_pipe_make_image(gpu_id):
  function get_pipe_make_image_2 (line 22) | def get_pipe_make_image_2(gpu_id):
  function make_image (line 33) | def make_image(prompt, filename=None, gpu_id='auto', pipe=None,

FILE: src/vision/playv2.py
  function get_pipe_make_image (line 11) | def get_pipe_make_image(gpu_id):
  function make_image (line 26) | def make_image(prompt, filename=None, gpu_id='auto', pipe=None,

FILE: src/vision/sdxl_turbo.py
  function get_device (line 14) | def get_device(gpu_id):
  function get_pipe_make_image (line 22) | def get_pipe_make_image(gpu_id='auto'):
  function make_image (line 30) | def make_image(prompt, filename=None, gpu_id='auto', pipe=None,
  function get_pipe_change_image (line 66) | def get_pipe_change_image(gpu_id='auto'):
  function change_image (line 73) | def change_image(prompt, init_image=None, init_file=None, filename=None,...

FILE: src/vision/stable_diffusion_xl.py
  function get_pipe_make_image (line 10) | def get_pipe_make_image(gpu_id, refine=True,
  function make_image (line 56) | def make_image(prompt,

FILE: src/vision/utils_vision.py
  function is_animated_gif (line 20) | def is_animated_gif(file_path):
  function gif_to_mp4 (line 36) | def gif_to_mp4(gif_path):
  function is_video_file (line 50) | def is_video_file(file_path):
  function img_to_base64 (line 71) | def img_to_base64(image_file, resolution=None, output_format=None, str_b...
  function base64_to_img (line 106) | def base64_to_img(img_str, output_path):
  function video_to_base64frames (line 133) | def video_to_base64frames(video_path):
  function video_to_frames (line 151) | def video_to_frames(video_path, output_dir, resolution=None, image_forma...
  function count_frames (line 233) | def count_frames(video_path):
  function process_file_list (line 252) | def process_file_list(file_list, output_dir, resolution=None, image_form...
  function fix_llava_prompt (line 309) | def fix_llava_prompt(file,
  function llava_prep (line 328) | def llava_prep(file_list,
  function _llava_prep (line 348) | def _llava_prep(file,
  function get_prompt_with_texts (line 395) | def get_prompt_with_texts(texts, prompt, max_new_tokens, min_max_new_tok...
  function get_llava_response (line 428) | def get_llava_response(file=None,
  function get_llava_stream (line 514) | def get_llava_stream(file, llava_model,
  function get_image_model_dict (line 663) | def get_image_model_dict(enable_image,
  function pdf_to_base64_pngs (line 709) | def pdf_to_base64_pngs(pdf_path, quality=75, max_size=(1024, 1024), ext=...

FILE: tests/conftest.py
  function pytest_itemcollected (line 6) | def pytest_itemcollected(item):
  function pytest_sessionstart (line 10) | def pytest_sessionstart(session):

FILE: tests/memory_hog_script.py
  function use_memory (line 4) | def use_memory():

FILE: tests/test_async_iterator_pipe.py
  class TestTimeoutIterator (line 6) | class TestTimeoutIterator(unittest.TestCase):
    method test_normal_iteration (line 8) | def test_normal_iteration(self):
    method test_multiple_next_after_exception (line 27) | def test_multiple_next_after_exception(self):
    method test_multiple_close (line 49) | def test_multiple_close(self):
    method test_put_after_close (line 70) | def test_put_after_close(self):
    method test_normal_iteration_via_for_loop (line 91) | def test_normal_iteration_via_for_loop(self):

FILE: tests/test_async_timeout_iterator.py
  function iter_simple (line 7) | async def iter_simple():
  function iter_with_sleep (line 12) | async def iter_with_sleep():
  function iter_with_exception (line 20) | async def iter_with_exception():
  class TestTimeoutIterator (line 27) | class TestTimeoutIterator(unittest.TestCase):
    method test_normal_iteration (line 29) | def test_normal_iteration(self):
    method test_normal_iteration_for_loop (line 45) | def test_normal_iteration_for_loop(self):
    method test_timeout_block (line 57) | def test_timeout_block(self):
    method test_timeout_block_for_loop (line 73) | def test_timeout_block_for_loop(self):
    method test_fixed_timeout (line 85) | def test_fixed_timeout(self):
    method test_fixed_timeout (line 100) | def test_fixed_timeout(self):
    method test_timeout_update (line 112) | def test_timeout_update(self):
    method test_custom_sentinel (line 131) | def test_custom_sentinel(self):
    method test_feature_timeout_reset (line 146) | def test_feature_timeout_reset(self):
    method test_function_set_reset_on_next (line 160) | def test_function_set_reset_on_next(self):
    method test_iterator_raises_exception (line 176) | def test_iterator_raises_exception(self):
    method test_interrupt_thread (line 190) | def test_interrupt_thread(self):

FILE: tests/test_cli.py
  function test_cli (line 8) | def test_cli(monkeypatch):
  function test_cli_langchain (line 21) | def test_cli_langchain(base_model, monkeypatch):
  function test_cli_langchain_llamacpp (line 53) | def test_cli_langchain_llamacpp(monkeypatch):
  function test_cli_llamacpp (line 84) | def test_cli_llamacpp(monkeypatch):
  function test_cli_h2ogpt (line 113) | def test_cli_h2ogpt(monkeypatch):
  function test_cli_langchain_h2ogpt (line 127) | def test_cli_langchain_h2ogpt(monkeypatch):

FILE: tests/test_client_calls.py
  function test_client1 (line 22) | def test_client1():
  function test_client1_lock_choose_model (line 40) | def test_client1_lock_choose_model():
  function test_client1_context (line 84) | def test_client1_context(base_model):
  function test_client1api (line 114) | def test_client1api():
  function test_client1api_lean (line 134) | def test_client1api_lean(save_dir, admin_pass):
  function test_client1api_lean_lock_choose_model (line 195) | def test_client1api_lean_lock_choose_model():
  function test_client1api_lean_chat_server (line 268) | def test_client1api_lean_chat_server():
  function test_client_chat_nostream (line 288) | def test_client_chat_nostream():
  function test_client_chat_nostream_gpt4all (line 295) | def test_client_chat_nostream_gpt4all():
  function test_client_chat_nostream_gpt4all_llama (line 304) | def test_client_chat_nostream_gpt4all_llama():
  function test_client_chat_nostream_llama7b (line 318) | def test_client_chat_nostream_llama7b():
  function test_client_chat_nostream_llama2_long (line 336) | def test_client_chat_nostream_llama2_long(max_seq_len, prompt_num, model...
  function run_client_chat_with_server (line 1278) | def run_client_chat_with_server(prompt='Who are you?', stream_output=Fal...
  function test_client_chat_stream (line 1317) | def test_client_chat_stream():
  function run_client_nochat_with_server (line 1321) | def run_client_nochat_with_server(prompt='Who are you?', stream_output=F...
  function test_client_nochat_stream (line 1358) | def test_client_nochat_stream(gradio_ui_stream_chunk_size, gradio_ui_str...
  function test_client_chat_stream_langchain (line 1368) | def test_client_chat_stream_langchain():
  function test_client_chat_stream_langchain_steps (line 1392) | def test_client_chat_stream_langchain_steps(max_new_tokens, top_k_docs):
  function test_client_system_prompts (line 1536) | def test_client_system_prompts(system_prompt, chat_conversation):
  function test_client_long_chat (line 2410) | def test_client_long_chat():
  function test_client_chat_stream_langchain_steps2 (line 2438) | def test_client_chat_stream_langchain_steps2(max_new_tokens, top_k_docs):
  function test_doc_hash (line 2505) | def test_doc_hash():
  function test_client_chat_stream_long (line 2540) | def test_client_chat_stream_long():
  function test_autogptq (line 2553) | def test_autogptq(base_model):
  function test_autoawq (line 2593) | def test_autoawq():
  function check_langchain (line 2632) | def check_langchain():
  function test_exllama (line 2689) | def test_exllama(mode):
  function test_attention_sinks (line 2738) | def test_attention_sinks(max_seq_len, attention_sinks):
  function test_client_long (line 2791) | def test_client_long():
  function test_fast_up (line 2809) | def test_fast_up():
  function test_fast_up_preload (line 2815) | def test_fast_up_preload():
  function test_fast_up_auth (line 2832) | def test_fast_up_auth():
  function test_fast_up_auth2 (line 2839) | def test_fast_up_auth2():
  function test_lock_up (line 2853) | def test_lock_up(visible_models):
  function test_client_stress (line 2873) | def test_client_stress(repeat):
  function test_client_stress_stream (line 2910) | def test_client_stress_stream(repeat):
  function test_text_generation_inference_server1 (line 2929) | def test_text_generation_inference_server1():
  function kill_function_server (line 2966) | def kill_function_server():
  function test_client_chat_stream_langchain_steps3 (line 2977) | def test_client_chat_stream_langchain_steps3(loaders, enforce_h2ogpt_api...
  function run_client_chat_stream_langchain_steps3 (line 2987) | def run_client_chat_stream_langchain_steps3(loaders, enforce_h2ogpt_api_...
  function test_client_load_unload_models (line 3435) | def test_client_load_unload_models(model_choice):
  function test_client_curated_base_models (line 3597) | def test_client_curated_base_models(base_model, stream_output):
  function test_client_chat_stream_langchain_openai_embeddings (line 3630) | def test_client_chat_stream_langchain_openai_embeddings():
  function test_client_clone (line 3680) | def test_client_clone(stream_output):
  function test_client_timeout (line 3704) | def test_client_timeout(stream_output, max_time):
  function test_client_chat_stream_langchain_fake_embeddings_stress (line 3780) | def test_client_chat_stream_langchain_fake_embeddings_stress(repeat):
  function test_client_upload_simple (line 3794) | def test_client_upload_simple(repeat):
  function test_client_chat_stream_langchain_fake_embeddings_stress_no_llm (line 3809) | def test_client_chat_stream_langchain_fake_embeddings_stress_no_llm(repe...
  function go_upload_gradio (line 3819) | def go_upload_gradio():
  function test_client_chat_stream_langchain_fake_embeddings (line 3888) | def test_client_chat_stream_langchain_fake_embeddings(data_kind, base_mo...
  function run_client_chat_stream_langchain_fake_embeddings (line 4023) | def run_client_chat_stream_langchain_fake_embeddings(data_kind, base_mod...
  function test_client_summarization (line 4424) | def test_client_summarization(prompt_summary, inference_server, top_k_do...
  function test_client_summarization_from_text (line 4583) | def test_client_summarization_from_text():
  function test_client_summarization_from_url (line 4651) | def test_client_summarization_from_url(url, top_k_docs):
  function test_fastsys (line 4723) | def test_fastsys(stream_output, bits, prompt_type):
  function test_hyde (line 4805) | def test_hyde(stream_output, hyde_level, hyde_template):
  function set_env (line 4864) | def set_env(tts_model):
  function test_client1_tts (line 4882) | def test_client1_tts(tts_model):
  function play_audio (line 4911) | def play_audio(audio, sr=16000):
  function test_client1_tts_stream (line 4943) | def test_client1_tts_stream(tts_model, base_model):
  function check_final_res (line 5000) | def check_final_res(res, base_model='llama'):
  function check_curl_plain_api (line 5023) | def check_curl_plain_api():
  function test_client1_tts_api (line 5061) | def test_client1_tts_api(tts_model, stream_output, h2ogpt_key):
  function play_audio_str (line 5105) | def play_audio_str(audio_str1, n):
  function playsound_wav (line 5143) | def playsound_wav(x):
  function test_pure_client_test (line 5156) | def test_pure_client_test():
  function test_client_upload_to_user_not_allowed (line 5183) | def test_client_upload_to_user_not_allowed():
  function test_client_upload_to_my_not_allowed (line 5230) | def test_client_upload_to_my_not_allowed():
  function test_client_upload_to_user_or_my_not_allowed (line 5277) | def test_client_upload_to_user_or_my_not_allowed():
  function test_client1_image_qa_original (line 5305) | def test_client1_image_qa_original():
  function test_client_chat_stream_langchain_metadata (line 5342) | def test_client_chat_stream_langchain_metadata(metadata_in_context):
  function test_client_openai_langchain (line 5379) | def test_client_openai_langchain(auth_access, guest_name, do_auth):
  function run_sound_test0 (line 5612) | def run_sound_test0(client, text):
  function run_sound_test1 (line 5624) | def run_sound_test1(client):
  function run_sound_test2 (line 5640) | def run_sound_test2(client):
  function run_sound_test3 (line 5656) | def run_sound_test3(client):
  function test_client_openai_chat_history (line 5735) | def test_client_openai_chat_history(base_model):
  function test_max_new_tokens (line 5826) | def test_max_new_tokens(max_new_tokens, temperature):
  function test_client1_image_qa (line 6021) | def test_client1_image_qa(langchain_action, langchain_mode, base_model):
  function get_creation_date (line 6130) | def get_creation_date(file_path):
  function test_client1_images_qa (line 6142) | def test_client1_images_qa(langchain_action, langchain_mode, base_model,...
  function test_pdf_to_base_64_images (line 6198) | def test_pdf_to_base_64_images():
  function test_get_image_file (line 6211) | def test_get_image_file():
  function get_test_server_client (line 6324) | def get_test_server_client(base_model):
  function test_guided_json (line 6363) | def test_guided_json(langchain_action, langchain_mode, response_format, ...
  function check_response (line 6430) | def check_response(response, base_model, guided_json):
  function openai_guided_json (line 6443) | def openai_guided_json(gradio_client, base_model, kwargs, use_instruction):
  function test_client1_image_text_qa (line 6580) | def test_client1_image_text_qa(langchain_action, langchain_mode, base_mo...
  function test_client1_lock_choose_model_via_api (line 6688) | def test_client1_lock_choose_model_via_api(admin_pass):
  function test_client1_lock_choose_model_via_api_vision (line 6730) | def test_client1_lock_choose_model_via_api_vision(admin_pass):
  function test_max_new_tokens_vs_min_max_new_tokens (line 6774) | def test_max_new_tokens_vs_min_max_new_tokens():

FILE: tests/test_client_readme.py
  function test_readme_example (line 9) | def test_readme_example(local_server, persist):

FILE: tests/test_eval.py
  function test_eval1 (line 13) | def test_eval1(cpu, bits, base_model):
  function test_eval_json (line 20) | def test_eval_json():
  function run_eval1 (line 54) | def run_eval1(cpu=False, bits=None, base_model='h2oai/h2ogpt-oig-oasst1-...
  function test_eval_json_langchain (line 262) | def test_eval_json_langchain():

FILE: tests/test_eval_models.py
  function test_score_eval (line 34) | def test_score_eval(base_model):
  function test_get_falcons (line 64) | def test_get_falcons(base_model):
  function test_get_landmark_llama (line 85) | def test_get_landmark_llama():

FILE: tests/test_imports.py
  function test_transformers (line 5) | def test_transformers():

FILE: tests/test_inference_servers.py
  function test_gradio_inference_server (line 24) | def test_gradio_inference_server(base_model, force_langchain_evaluate, d...
  function run_docker (line 192) | def run_docker(inf_port, base_model, low_mem_mode=False, do_shared=True):
  function gpus_cmd (line 242) | def gpus_cmd():
  function run_vllm_docker (line 255) | def run_vllm_docker(inf_port, base_model, tokenizer=None):
  function run_h2ogpt_docker (line 323) | def run_h2ogpt_docker(port, base_model, inference_server=None, max_new_t...
  function test_hf_inference_server (line 381) | def test_hf_inference_server(base_model, force_langchain_evaluate, do_la...
  function test_openai_inference_server (line 531) | def test_openai_inference_server(inference_server, force_langchain_evalu...
  function test_gradio_tgi_docker (line 629) | def test_gradio_tgi_docker(base_model):
  function test_gradio_vllm_docker (line 695) | def test_gradio_vllm_docker(base_model):
  function test_replicate_inference_server (line 764) | def test_replicate_inference_server(force_langchain_evaluate,

FILE: tests/test_iterator_pipe.py
  class TestQueueToIterator (line 5) | class TestQueueToIterator(unittest.TestCase):
    method test_normal_iteration (line 7) | def test_normal_iteration(self):
    method test_normal_custom_sentinel (line 20) | def test_normal_custom_sentinel(self):
    method test_multiple_close (line 34) | def test_multiple_close(self):
    method test_multiple_next_after_close (line 50) | def test_multiple_next_after_close(self):
    method test_put_after_close (line 66) | def test_put_after_close(self):

FILE: tests/test_langchain_simple.py
  function test_langchain_simple_h2ogpt (line 7) | def test_langchain_simple_h2ogpt():
  function test_langchain_simple_vicuna (line 13) | def test_langchain_simple_vicuna():
  function run_langchain_simple (line 17) | def run_langchain_simple(base_model='h2oai/h2ogpt-oasst1-512-12b', promp...

FILE: tests/test_langchain_units.py
  function test_qa_wiki_openai (line 35) | def test_qa_wiki_openai():
  function test_qa_wiki_stuff_hf (line 41) | def test_qa_wiki_stuff_hf():
  function test_qa_wiki_map_reduce_hf (line 49) | def test_qa_wiki_map_reduce_hf():
  function run_qa_wiki_fork (line 53) | def run_qa_wiki_fork(*args, **kwargs):
  function run_qa_wiki (line 62) | def run_qa_wiki(use_openai_model=False, first_para=True, text_limit=None...
  function check_ret (line 78) | def check_ret(ret):
  function test_qa_wiki_db_openai (line 94) | def test_qa_wiki_db_openai():
  function test_qa_wiki_db_hf (line 109) | def test_qa_wiki_db_hf():
  function test_qa_wiki_db_chunk_hf (line 128) | def test_qa_wiki_db_chunk_hf():
  function test_qa_wiki_db_chunk_openai (line 145) | def test_qa_wiki_db_chunk_openai():
  function test_qa_github_db_chunk_openai (line 163) | def test_qa_github_db_chunk_openai():
  function test_qa_daidocs_db_chunk_hf (line 181) | def test_qa_daidocs_db_chunk_hf():
  function test_qa_daidocs_db_chunk_hf_faiss (line 199) | def test_qa_daidocs_db_chunk_hf_faiss():
  function test_qa_daidocs_db_chunk_hf_dbs (line 221) | def test_qa_daidocs_db_chunk_hf_dbs(db_type, top_k_docs):
  function get_test_model (line 253) | def get_test_model(base_model='h2oai/h2ogpt-oig-oasst1-512-6_9b',
  function test_qa_daidocs_db_chunk_hf_dbs_switch_embedding (line 314) | def test_qa_daidocs_db_chunk_hf_dbs_switch_embedding(db_type):
  function test_qa_wiki_db_chunk_hf_dbs_llama (line 366) | def test_qa_wiki_db_chunk_hf_dbs_llama(db_type):
  function test_qa_daidocs_db_chunk_openai (line 402) | def test_qa_daidocs_db_chunk_openai():
  function test_qa_daidocs_db_chunk_openaiembedding_hfmodel (line 419) | def test_qa_daidocs_db_chunk_openaiembedding_hfmodel():
  function test_get_dai_pickle (line 436) | def test_get_dai_pickle():
  function test_get_dai_db_dir (line 445) | def test_get_dai_db_dir():
  function test_make_add_db (line 455) | def test_make_add_db(repeat, db_type):
  function test_zip_add (line 654) | def test_zip_add(db_type):
  function test_tar_add (line 679) | def test_tar_add(db_type, tar_type):
  function test_url_add (line 703) | def test_url_add(db_type):
  function test_urls_add (line 719) | def test_urls_add(db_type):
  function test_urls_file_add (line 743) | def test_urls_file_add(db_type):
  function test_html_add (line 771) | def test_html_add(db_type):
  function test_docx_add (line 804) | def test_docx_add(db_type):
  function test_docx_add2 (line 825) | def test_docx_add2(db_type):
  function test_xls_add (line 848) | def test_xls_add(db_type):
  function test_md_add (line 869) | def test_md_add(db_type):
  function test_rst_add (line 894) | def test_rst_add(db_type):
  function test_xml_add (line 915) | def test_xml_add(db_type):
  function test_eml_add (line 936) | def test_eml_add(db_type):
  function test_simple_eml_add (line 956) | def test_simple_eml_add(db_type):
  function test_odt_add (line 989) | def test_odt_add(db_type):
  function test_pptx_add (line 1009) | def test_pptx_add(db_type):
  function test_pdf_add (line 1035) | def test_pdf_add(db_type, enable_pdf_ocr, enable_pdf_doctr, use_pymupdf,...
  function test_image_pdf_add (line 1112) | def test_image_pdf_add(db_type, enable_pdf_ocr, enable_pdf_doctr, use_py...
  function test_simple_pptx_add (line 1186) | def test_simple_pptx_add(db_type):
  function test_epub_add (line 1207) | def test_epub_add(db_type):
  function test_msg_add (line 1231) | def test_msg_add(db_type):
  function test_png_add (line 1266) | def test_png_add(captions_model, caption_gpu, pre_load_image_audio_model...
  function run_png_add (line 1309) | def run_png_add(captions_model=None, caption_gpu=False,
  function check_content_captions (line 1446) | def check_content_captions(docs, captions_model, enable_pix2struct):
  function check_content_doctr (line 1460) | def check_content_doctr(docs):
  function check_content_ocr (line 1467) | def check_content_ocr(docs):
  function check_source (line 1474) | def check_source(docs, test_file1):
  function test_caption_add (line 1486) | def test_caption_add(image_file, db_type):
  function test_simple_rtf_add (line 1526) | def test_simple_rtf_add(db_type):
  function test_url_more_add (line 1564) | def test_url_more_add(db_type):
  function test_json_add (line 1620) | def test_json_add(db_type):
  function test_jsonl_gz_add (line 1647) | def test_jsonl_gz_add(db_type):
  function test_url_more_subunit (line 1671) | def test_url_more_subunit():
  function test_many_text (line 1695) | def test_many_text(db_type, num):
  function test_youtube_audio_add (line 1709) | def test_youtube_audio_add(db_type):
  function test_youtube_full_add (line 1729) | def test_youtube_full_add(db_type):
  function test_mp3_add (line 1750) | def test_mp3_add(db_type):
  function test_mp4_add (line 1769) | def test_mp4_add(db_type):
  function test_chroma_filtering (line 1795) | def test_chroma_filtering():
  function test_merge_docs (line 2011) | def test_merge_docs(data_kind, max_input_tokens):
  function test_split_and_merge (line 2091) | def test_split_and_merge():
  function test_crawl (line 2110) | def test_crawl():
  function test_hyde_acc (line 2118) | def test_hyde_acc():

FILE: tests/test_long_context.py
  function num_tokens_from_string (line 16) | def num_tokens_from_string(string: str, model_name=None) -> int:
  function make_key (line 29) | def make_key():
  function make_value (line 33) | def make_value():
  function get_prompt (line 43) | def get_prompt(before, after):
  function create_long_prompt_with_secret (line 47) | def create_long_prompt_with_secret(prompt_len=None, secret_pos=None, mod...
  function test_gradio_long_context_uuid_key_value_retrieval (line 87) | def test_gradio_long_context_uuid_key_value_retrieval(base_model, rope_s...
  function test_huggyllama_transformers_pr (line 163) | def test_huggyllama_transformers_pr(base_model, type, factor):

FILE: tests/test_manual_test.py
  function test_chat_conversation (line 9) | def test_chat_conversation():
  function test_upload_one_file (line 29) | def test_upload_one_file():
  function test_upload_multiple_file (line 34) | def test_upload_multiple_file():
  function test_upload_url (line 39) | def test_upload_url():
  function test_upload_arxiv (line 45) | def test_upload_arxiv():
  function test_upload_pasted_text (line 51) | def test_upload_pasted_text():
  function test_no_db_dirs (line 61) | def test_no_db_dirs():
  function test_upload_unsupported_file (line 68) | def test_upload_unsupported_file():
  function test_upload_to_UserData_and_MyData (line 73) | def test_upload_to_UserData_and_MyData():
  function test_chat_control (line 78) | def test_chat_control():
  function test_subset_only (line 83) | def test_subset_only():
  function test_add_new_doc (line 89) | def test_add_new_doc():
  function test_model_lock (line 95) | def test_model_lock():
  function test_async_gradio (line 100) | def test_async_gradio():
  function test_stt_gradio (line 106) | def test_stt_gradio():

FILE: tests/test_metrics.py
  function test_bleurt (line 5) | def test_bleurt():
  function test_sacrebleu (line 15) | def test_sacrebleu():
  function test_bleu (line 34) | def test_bleu():
  function test_squad_v1 (line 47) | def test_squad_v1():
  function test_squad_v2 (line 57) | def test_squad_v2():
  function test_rougue (line 69) | def test_rougue():
  function test_bertscore (line 79) | def test_bertscore():
  function test_chrf (line 89) | def test_chrf():
  function test_chrfpp (line 101) | def test_chrfpp():
  function test_wiki_split (line 113) | def test_wiki_split():
  function test_super_glue (line 124) | def test_super_glue():
  function test_quip (line 136) | def test_quip():
  function test_glue (line 208) | def test_glue():
  function test_google_bleu (line 233) | def test_google_bleu():
  function test_meteor (line 254) | def test_meteor():

FILE: tests/test_perf_benchmarks.py
  function test_perf_benchmarks (line 55) | def test_perf_benchmarks(backend, base_model, task, bits, ngpus):
  function test_plot_results (line 247) | def test_plot_results():

FILE: tests/test_pipeline.py
  function test_export_copy (line 7) | def test_export_copy():
  function test_pipeline1 (line 49) | def test_pipeline1():
  function test_pipeline2 (line 86) | def test_pipeline2():
  function test_pipeline3 (line 112) | def test_pipeline3():

FILE: tests/test_prompter.py
  function test_train_prompt (line 24) | def test_train_prompt(prompt_type='instruct', data_point=0):
  function test_test_prompt (line 30) | def test_test_prompt(prompt_type='instruct', data_point=0):
  function test_test_prompt2 (line 37) | def test_test_prompt2(prompt_type='human_bot', data_point=0):
  function get_prompt_from_messages (line 184) | def get_prompt_from_messages(messages, model="mistralai/Mistral-7B-Instr...
  function get_aquila_prompt (line 201) | def get_aquila_prompt(messages, model_base_name='AquilaChat2-34B-16K', w...
  function test_prompt_with_context (line 279) | def test_prompt_with_context(prompt_type, system_prompt, chat_conversati...
  function test_prompt_with_no_context (line 454) | def test_prompt_with_no_context(prompt_type, system_prompt, expected):
  function test_source (line 481) | def test_source():
  function falcon180_format_prompt (line 487) | def falcon180_format_prompt(message, history, system_prompt):
  function test_falcon180 (line 500) | def test_falcon180():
  function test_hf_image_chat_template (line 510) | def test_hf_image_chat_template():
  function test_get_llm_history (line 571) | def test_get_llm_history(history, only_text, expected):
  function test_history_to_context (line 637) | def test_history_to_context(history, system_prompt, model_max_length):

FILE: tests/test_requirements.py
  function get_all_requirements (line 10) | def get_all_requirements():
  function get_requirements (line 25) | def get_requirements(req_file="requirements.txt"):
  function test_requirements (line 50) | def test_requirements():
  function get_version (line 98) | def get_version(package, url_pattern=URL_PATTERN):
  function test_what_latest_packages (line 113) | def test_what_latest_packages():
  function test_make_packages (line 132) | def test_make_packages():

FILE: tests/test_sentence_parsing.py
  function test_get_sentence_stream (line 92) | def test_get_sentence_stream(bot, sentences_expected):
  function test_get_sentence_no_stream (line 120) | def test_get_sentence_no_stream(bot, sentences_expected):

FILE: tests/test_timeout_iterator.py
  function iter_simple (line 7) | def iter_simple():
  function iter_with_sleep (line 12) | def iter_with_sleep():
  function iter_with_exception (line 20) | def iter_with_exception():
  class TestTimeoutIterator (line 27) | class TestTimeoutIterator(unittest.TestCase):
    method test_normal_iteration (line 29) | def test_normal_iteration(self):
    method test_normal_iteration_for_loop (line 39) | def test_normal_iteration_for_loop(self):
    method test_timeout_block (line 47) | def test_timeout_block(self):
    method test_timeout_block_for_loop (line 56) | def test_timeout_block_for_loop(self):
    method test_fixed_timeout (line 64) | def test_fixed_timeout(self):
    method test_fixed_timeout_for_loop (line 74) | def test_fixed_timeout_for_loop(self):
    method test_timeout_update (line 82) | def test_timeout_update(self):
    method test_custom_sentinel (line 95) | def test_custom_sentinel(self):
    method test_feature_timeout_reset (line 105) | def test_feature_timeout_reset(self):
    method test_function_set_reset_on_next (line 113) | def test_function_set_reset_on_next(self):
    method test_iterator_raises_exception (line 123) | def test_iterator_raises_exception(self):
    method test_interrupt_thread (line 131) | def test_interrupt_thread(self):

FILE: tests/test_tokenizer.py
  function nltkTokenize (line 9) | def nltkTokenize(text):
  function regTokenize (line 19) | def regTokenize(text):
  function test_tokenizer1 (line 30) | def test_tokenizer1():
  function run_tokenizer1 (line 56) | def run_tokenizer1(prompt):
  function test_fake_tokenizer (line 87) | def test_fake_tokenizer():
  function test_tokenizer_base_model1 (line 100) | def test_tokenizer_base_model1():
  function test_tokenizer_base_model2 (line 113) | def test_tokenizer_base_model2():

FILE: tests/test_tts.py
  function test_sentence_to_wave (line 11) | def test_sentence_to_wave():
  function test_generate_speech (line 39) | def test_generate_speech():
  function test_full_generate_speech (line 54) | def test_full_generate_speech():
  function test_predict_from_text (line 113) | def test_predict_from_text(bot, sentences_expected):

FILE: tests/test_ui.py
  function test_newline_replace (line 5) | def test_newline_replace():

FILE: tests/test_utils.py
  function test_get_list_or_str (line 28) | def test_get_list_or_str():
  function test_stream_popen1 (line 35) | def test_stream_popen1():
  function test_stream_popen2 (line 51) | def test_stream_popen2():
  function test_stream_python_execution (line 71) | def test_stream_python_execution(capsys):
  function test_stream_python_execution_empty_lines (line 118) | def test_stream_python_execution_empty_lines(capsys):
  function test_memory_limit (line 153) | def test_memory_limit():
  function test_limited_prompt (line 169) | def test_limited_prompt(instruction, chat_conversation, iinput, context,...
  function test_reverse_ucurve (line 270) | def test_reverse_ucurve():
  function check_gradio (line 294) | def check_gradio():
  function test_is_uuid4 (line 300) | def test_is_uuid4():
  function test_is_git_hash (line 318) | def test_is_git_hash():
  function test_chat_template (line 326) | def test_chat_template():
  function test_chat_template_images (line 352) | def test_chat_template_images():
  function test_partial_codeblock (line 387) | def test_partial_codeblock():
  function test_partial_codeblock2 (line 557) | def test_partial_codeblock2():
  function test_extract_code_block_content (line 569) | def test_extract_code_block_content():
  function test_repair_json (line 713) | def test_repair_json(method):
  function test_json_repair_more (line 764) | def test_json_repair_more():
  function test_dedup (line 876) | def test_dedup():
  function test_handle_json_normal (line 883) | def test_handle_json_normal():
  function test_handle_json_schema (line 896) | def test_handle_json_schema():
  function test_handle_json_mixed (line 930) | def test_handle_json_mixed():
  function test_handle_json_empty (line 956) | def test_handle_json_empty():
  function test_handle_json_no_schema (line 961) | def test_handle_json_no_schema():
  function test_json_repair_on_string (line 970) | def test_json_repair_on_string():
  function test_check_input_type (line 984) | def test_check_input_type():
  function test_process_file_list (line 1009) | def test_process_file_list():
  function test_process_file_list_extract_frames (line 1039) | def test_process_file_list_extract_frames():
  function test_process_youtube (line 1069) | def test_process_youtube():
  function test_process_animated_gif (line 1091) | def test_process_animated_gif():
  function test_process_animated_gif2 (line 1113) | def test_process_animated_gif2():
  function test_process_animated_gif3 (line 1134) | def test_process_animated_gif3():
  function test_process_mixed (line 1156) | def test_process_mixed():
  function test_update_db (line 1180) | def test_update_db():
  function test_encode_chat_template (line 1221) | def test_encode_chat_template():
  function test_depth (line 1269) | def test_depth():
  function test_schema_to_typed (line 1332) | def test_schema_to_typed():
  function test_genai_schema (line 1375) | def test_genai_schema():
  function test_genai_schema_more (line 1418) | def test_genai_schema_more():
  function test_pymupdf4llm (line 1494) | def test_pymupdf4llm():
  function compute_stats (line 1533) | def compute_stats(times_in_seconds):

FILE: tests/test_vision.py
  function test_llava_client2 (line 9) | def test_llava_client2():
  function test_llava_client_stream (line 20) | def test_llava_client_stream():
  function test_make_image (line 33) | def test_make_image():
  function test_change_image (line 40) | def test_change_image():
  function test_video_extraction (line 49) | def test_video_extraction():
  function test_make_image_playv2 (line 61) | def test_make_image_playv2():
  function test_fastfood (line 68) | def test_fastfood():

FILE: tests/utils.py
  function get_inf_port (line 19) | def get_inf_port():
  function get_inf_server (line 29) | def get_inf_server():
  function get_mods (line 39) | def get_mods():
  function do_skip_test (line 45) | def do_skip_test(name):
  function wrap_test_forked (line 55) | def wrap_test_forked(func):
  function run_test (line 77) | def run_test(func, *args, **kwargs):
  function get_sha (line 81) | def get_sha(value):
  function get_test_name (line 85) | def get_test_name():
  function make_user_path_test (line 91) | def make_user_path_test():
  function get_llama (line 109) | def get_llama(llama_type=3):
  function kill_weaviate (line 140) | def kill_weaviate(db_type):
  function count_tokens_llm (line 148) | def count_tokens_llm(prompt, base_model='h2oai/h2ogpt-oig-oasst1-512-6_9...
  function count_tokens (line 160) | def count_tokens(prompt, base_model='h2oai/h2ogpt-oig-oasst1-512-6_9b'):

FILE: win_run_app.py
  function copy_tree (line 35) | def copy_tree(src, dst):
  function setup_paths (line 44) | def setup_paths():
  function _main (line 83) | def _main():
  function main (line 148) | def main():

Copy disabled (too large) Download .json

Condensed preview — 287 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (19,729K chars).

[
  {
    "path": ".dockerignore",
    "chars": 157,
    "preview": ".git\n.npm\n.dockerignore\n.pytest_cache\n.cache\n.local\n.github\n.nv\n.benchmarks\n.bash_history\n.gitignore\nh2ogpt.egg-info\nven"
  },
  {
    "path": ".gitattributes",
    "chars": 0,
    "preview": ""
  },
  {
    "path": ".github/workflows/python-package-publish.yml",
    "chars": 1684,
    "preview": "name: Build & Publish h2oGPT Python wheel to PYPI\n\non:\n  workflow_dispatch:\n    inputs:\n      pypi-index:\n        type: "
  },
  {
    "path": ".gitignore",
    "chars": 480,
    "preview": "out/\n7B/\n13B/\n__pycache__/\ncheckpoint**\nminimal-llama**\nupload.py\nlora-**\n*ckpt\nwandb\nevaluate.py\ntest_data.json\ntodo.tx"
  },
  {
    "path": "Dockerfile",
    "chars": 1055,
    "preview": "# devel needed for bitsandbytes requirement of libcudart.so, otherwise runtime sufficient\nFROM nvidia/cuda:12.1.1-cudnn8"
  },
  {
    "path": "LICENSE",
    "chars": 11397,
    "preview": "                                Apache License\n                           Version 2.0, January 2004\n                    "
  },
  {
    "path": "Makefile",
    "chars": 4052,
    "preview": "all: clean dist\n\nPACKAGE_VERSION              := `cat version.txt | tr -d '\\n'`\nBUILD_TAG                    := $(shell "
  },
  {
    "path": "README.md",
    "chars": 14271,
    "preview": "# h2oGPT\n\nTurn ★ into ⭐ (top-right corner) if you like the project!\n\nQuery and summarize your documents or just chat wit"
  },
  {
    "path": "benchmarks/llm_gpu_benchmark.py",
    "chars": 5582,
    "preview": "\n\n# %%\nimport json\n\nimport pandas as pd\nimport plotly.express as px\nimport plotly.graph_objects as go\nimport plotly.io a"
  },
  {
    "path": "benchmarks/llm_gpu_benchmark_text-generation-inference.html",
    "chars": 26092,
    "preview": "<html>\n<head><meta charset=\"utf-8\" /></head>\n<body>\n    <div>                        <script type=\"text/javascript\">wind"
  },
  {
    "path": "benchmarks/llm_gpu_benchmark_transformers.html",
    "chars": 30392,
    "preview": "<html>\n<head><meta charset=\"utf-8\" /></head>\n<body>\n    <div>                        <script type=\"text/javascript\">wind"
  },
  {
    "path": "benchmarks/llm_gpu_benchmarks.json",
    "chars": 81511,
    "preview": "[\n  {\n    \"backend\": \"transformers\",\n    \"base_model\": \"h2oai/h2ogpt-4096-llama2-7b-chat\",\n    \"task\": \"summary_and_gene"
  },
  {
    "path": "benchmarks/perf.json",
    "chars": 70492,
    "preview": "{\"backend\": \"transformers\", \"base_model\": \"h2oai/h2ogpt-4096-llama2-7b-chat\", \"task\": \"summary_and_generate\", \"bits\": 16"
  },
  {
    "path": "benchmarks/perf.md",
    "chars": 22904,
    "preview": "# Backend: transformers\n\nFor [Interactive visualization of the results](https://raw.githubusercontent.com/h2oai/h2ogpt/b"
  },
  {
    "path": "benchmarks/rag_benchmark.md",
    "chars": 913312,
    "preview": "----------------------------------------------------------------------------------------------------\n# h2oGPTe RAG Bench"
  },
  {
    "path": "blog/README.md",
    "chars": 6683,
    "preview": "# Building the World's Best Open-Source Large Language Model: H2O.ai's Journey\n\nby Arno Candel, PhD, CTO H2O.ai, April 1"
  },
  {
    "path": "ci/jenkinsfile",
    "chars": 8052,
    "preview": "#!/usr/bin/groovy\n\n@Library('test-shared-library@dai_pipeline') _\n\nimport ai.h2o.ci.buildsummary.StagesSummary\nimport gr"
  },
  {
    "path": "cloud/packer/Jenkinsfile",
    "chars": 4041,
    "preview": "import org.jenkinsci.plugins.pipeline.modeldefinition.Utils\n\nproperties(\n    [\n        parameters(\n            [\n       "
  },
  {
    "path": "cloud/packer/README.md",
    "chars": 884,
    "preview": "# h2oGPT Packer Templates\n\nThese scripts help create images in public clouds that can then submitted to Azure/GCP Market"
  },
  {
    "path": "cloud/packer/h2oai-h2ogpt-4096-llama2-13b-chat.sh",
    "chars": 329,
    "preview": "#!/bin/bash -e\n\nsudo systemctl daemon-reload\nsudo systemctl enable h2ogpt_nginx.service\nsudo systemctl enable vllm.servi"
  },
  {
    "path": "cloud/packer/h2ogpt-azure.json",
    "chars": 3959,
    "preview": "{\n    \"variables\": {\n        \"client_id\": \"<AZURE CLIENT ID>\",\n        \"client_secret\": \"<AZURE CLIENT SECRET>\",\n       "
  },
  {
    "path": "cloud/packer/h2ogpt-gcp.json",
    "chars": 2892,
    "preview": "{\n  \"variables\": {\n    \"project_id\": \"eng-llm\",\n    \"account_file\": \"<NAME OF GCP CREDENTIALS JSON FILE>\",\n    \"h2ogpt_v"
  },
  {
    "path": "cloud/packer/install_h2ogpt.sh",
    "chars": 433,
    "preview": "#!/bin/bash -e\n\nexport PATH=$PATH:/home/ubuntu/.local/bin\nsudo mkdir -p /workspace && cd /workspace\nsudo chmod a+rwx .\n\n"
  },
  {
    "path": "cloud/packer/setup_environment.sh",
    "chars": 2357,
    "preview": "#!/bin/bash -e\n\nsudo DEBIAN_FRONTEND=noninteractive apt-get -y update\nsudo DEBIAN_FRONTEND=noninteractive apt-get -y --n"
  },
  {
    "path": "cloud/packer/startup-scripts/h2ogpt.service",
    "chars": 204,
    "preview": "[Unit]\nDescription=h2oGPT Server\nAfter=network.target\n\n[Service]\nType=simple\nUser=ubuntu\nWorkingDirectory=/workspace\nExe"
  },
  {
    "path": "cloud/packer/startup-scripts/h2ogpt_nginx.service",
    "chars": 209,
    "preview": "[Unit]\nDescription=h2oGPT Nginx Server\nAfter=network.target\n\n[Service]\nType=simple\nUser=ubuntu\nWorkingDirectory=/workspa"
  },
  {
    "path": "cloud/packer/startup-scripts/run_h2ogpt.sh",
    "chars": 836,
    "preview": "#!/bin/bash -e\n\nwhile true; do\n  http_code=$(curl -s -o /dev/null -w \"%{http_code}\" http://localhost:5000/v1/completions"
  },
  {
    "path": "cloud/packer/startup-scripts/run_nginx.sh",
    "chars": 736,
    "preview": "#!/bin/bash -e\n\nwhile true; do\n  http_code=$(curl -s -o /dev/null -w \"%{http_code}\" http://localhost:5000/v1/completions"
  },
  {
    "path": "cloud/packer/startup-scripts/run_vllm.sh",
    "chars": 536,
    "preview": "#!/bin/bash -e\n\ntps=$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader | wc -l | awk '{if ($1 > 1) print int($1/2);"
  },
  {
    "path": "cloud/packer/startup-scripts/temp.conf",
    "chars": 520,
    "preview": "server {\n    listen 80;\n    listen [::]:80;\n    server_name <|_SUBST_PUBLIC_IP|>;  # Change this to your domain name\n\n  "
  },
  {
    "path": "cloud/packer/startup-scripts/vllm.service",
    "chars": 200,
    "preview": "[Unit]\nDescription=vLLM Server\nAfter=network.target\n\n[Service]\nType=simple\nUser=ubuntu\nWorkingDirectory=/workspace\nExecS"
  },
  {
    "path": "data/README-template.md",
    "chars": 491,
    "preview": "---\nlicense: apache-2.0\nlanguage:\n- en\nthumbnail: https://h2o.ai/etc.clientlibs/h2o/clientlibs/clientlib-site/resources/"
  },
  {
    "path": "data/censor_words.txt",
    "chars": 95,
    "preview": "fuck-buddy\nfuck-buddys\nclusterfuck\nfuckup\nfuckups\ndumbfuck\ndumbfucks\nmindfuck\n*fucking\nfuckin'\n"
  },
  {
    "path": "data/config.json",
    "chars": 4389598,
    "preview": "[\n  {\n    \"prompt_type\": \"plain\",\n    \"instruction\": \"<human>: What does max_runtime_minutes do? <bot>: max runtime minu"
  },
  {
    "path": "data/create_data_cards.py",
    "chars": 6754,
    "preview": "import shutil\n\nimport pandas as pd\nimport os\n\nimport huggingface_hub\nimport pytest\nfrom datasets import load_dataset\n\n\n@"
  },
  {
    "path": "data/dai_docs.train.json",
    "chars": 993323,
    "preview": "[\n  {\n    \"output\": \" BlueData DataTap Setup\\n\\n\\nThis section provides instructions for configuring Driverless AI to wo"
  },
  {
    "path": "data/dai_docs.train_cleaned.json",
    "chars": 1209336,
    "preview": "[\n  {\n    \"output\": \"Monotonicity Constraints\\nMonotonicity can be enforced for the feature engineering pipeline, the\\nf"
  },
  {
    "path": "data/dai_docs.valid.json",
    "chars": 13218,
    "preview": "[\n  {\n    \"output\": \" .. _feature_store:\\n\\n##########################\\nH2O AI Feature Store Setup\\n####################"
  },
  {
    "path": "data/dai_faq.json",
    "chars": 74830,
    "preview": "[\n  {\n    \"instruction\": \"How is Driverless AI different than any other black box ML algorithm?\",\n    \"output\": \"Driverl"
  },
  {
    "path": "data/h2ogpt-personality.json",
    "chars": 22818,
    "preview": "[\n  {\n    \"input\": \"<human>: What's your name? <bot>: I'm h2oGPT, a large language model by H2O.ai. \",\n    \"prompt_type\""
  },
  {
    "path": "data/merged.json",
    "chars": 5965566,
    "preview": "[\n  {\n    \"prompt_type\": \"human_bot\",\n    \"instruction\": \"Explain the following expert setting for Driverless AI\",\n    \""
  },
  {
    "path": "dev_installers/mac/README.md",
    "chars": 2384,
    "preview": "# One Click Installers for MacOS\n\nThis document provide the details to build one click installers for MacOS. To manually"
  },
  {
    "path": "dev_installers/mac/build_mac_installer.sh",
    "chars": 3441,
    "preview": "# This script should be run from project root\n\n# Create conda environment to build installer\nif ! command -v conda &> /d"
  },
  {
    "path": "dev_installers/mac/h2ogpt-osx-m1-cpu.spec",
    "chars": 1837,
    "preview": "# -*- mode: python ; coding: utf-8 -*-\nfrom PyInstaller.utils.hooks import collect_data_files\nfrom PyInstaller.utils.hoo"
  },
  {
    "path": "dev_installers/mac/h2ogpt-osx-m1-gpu.spec",
    "chars": 1837,
    "preview": "# -*- mode: python ; coding: utf-8 -*-\nfrom PyInstaller.utils.hooks import collect_data_files\nfrom PyInstaller.utils.hoo"
  },
  {
    "path": "dev_installers/mac/mac_run_app.py",
    "chars": 1503,
    "preview": "import os\nimport sys\nimport time\nimport webbrowser\n\nprint('__file__: %s' % __file__)\npath1 = os.path.dirname(os.path.abs"
  },
  {
    "path": "docker-compose-cpu.yml",
    "chars": 295,
    "preview": "version: '3'\n\nservices:\n  h2ogpt:\n    build:\n      context: .\n      dockerfile: Dockerfile\n    restart: always\n    shm_s"
  },
  {
    "path": "docker-compose-vllm.yml",
    "chars": 1377,
    "preview": "version: '3'\n\nservices:\n  h2ogpt:\n    build:\n      context: .\n      dockerfile: Dockerfile\n    restart: always\n    shm_s"
  },
  {
    "path": "docker-compose.yml",
    "chars": 453,
    "preview": "version: '3'\n\nservices:\n  h2ogpt:\n    build:\n      context: .\n      dockerfile: Dockerfile\n    restart: always\n    shm_s"
  },
  {
    "path": "docker_build_script_ubuntu.sh",
    "chars": 4190,
    "preview": "#!/bin/bash\nset -o pipefail\nset -ex\n\nexport DEBIAN_FRONTEND=noninteractive\nexport PATH=/h2ogpt_conda/bin:$PATH\nexport HO"
  },
  {
    "path": "docs/Dockerfile.delta2",
    "chars": 2318,
    "preview": "FROM gcr.io/vorvan/h2oai/h2oai-h2ogpt-runtime:0.2.1-1290\nUSER root\n#\nCOPY src/enums.py /workspace/src/enums.py\nCOPY src/"
  },
  {
    "path": "docs/Dockerfile.internvl",
    "chars": 1032,
    "preview": "# if from main:\n# (base) ubuntu@compute-permanent-node-406:~/lmdeploy$ docker build . -f docker/Dockerfile -t internvlma"
  },
  {
    "path": "docs/FAQ.md",
    "chars": 147100,
    "preview": "## Frequently asked questions\n\n### vLLM driver requirements\n\nvllm >= 0.5.0 requires a cuda >=12.4 driver, else docker wi"
  },
  {
    "path": "docs/FINETUNE.md",
    "chars": 4898,
    "preview": "## Fine-tuning\n\nMake sure you have followed the [native installation instructions](INSTALL.md).\n\n\n### Fine-tuning vs Pre"
  },
  {
    "path": "docs/INSTALL.md",
    "chars": 4859,
    "preview": "## h2oGPT Installation Help\n\nThe following sections describe how to get a working Python environment on a Linux system.\n"
  },
  {
    "path": "docs/LINKS.md",
    "chars": 12964,
    "preview": "### Code to consider including:\n[flan-alpaca](https://github.com/declare-lab/flan-alpaca)<br />\n[text-generation-webui]("
  },
  {
    "path": "docs/README_Agents.md",
    "chars": 596,
    "preview": "## h2oGPT integration with LangChain Agents\n\nVarious agents from LangChain are included:\n* Search -- Works sometimes wit"
  },
  {
    "path": "docs/README_CLI.md",
    "chars": 1416,
    "preview": "### CLI chat\n\nThe CLI can be used instead of gradio by running for some base model, e.g.:\n```bash\npython generate.py --b"
  },
  {
    "path": "docs/README_CLIENT.md",
    "chars": 27690,
    "preview": "## Client APIs\n\nA Gradio API and an OpenAI-compliant API are supported. You can also use `curl` to some extent for basic"
  },
  {
    "path": "docs/README_CPU.md",
    "chars": 2382,
    "preview": "## CPU Details\n\nDetails that do not depend upon whether you are running on CPU for Linux, Windows, or macOS.\n\n### LLaMa."
  },
  {
    "path": "docs/README_DOCKER.md",
    "chars": 20319,
    "preview": "# Run or Build h2oGPT Docker\n\n* Install Docker for [Linux](https://docs.docker.com/engine/install/ubuntu/)\n* Install Doc"
  },
  {
    "path": "docs/README_GPU.md",
    "chars": 10742,
    "preview": "# GPU Details\n\nHugging Face type models and [LLaMa.cpp models](https://github.com/ggerganov/llama.cpp#description) are s"
  },
  {
    "path": "docs/README_InferenceServers.md",
    "chars": 34102,
    "preview": "# Inference Servers\n\nOne can connect to Hugging Face text generation inference server, gradio servers running h2oGPT, Op"
  },
  {
    "path": "docs/README_LINUX.md",
    "chars": 11534,
    "preview": "# Linux\n\nThis page describes how to manually install and run h2oGPT on Linux. Note that the following instructions are f"
  },
  {
    "path": "docs/README_LangChain.md",
    "chars": 35531,
    "preview": "## h2oGPT integration with LangChain and Chroma/FAISS/Qdrant/Weaviate for Vector DB\n\nOur goal is to make it easy to have"
  },
  {
    "path": "docs/README_MACOS.md",
    "chars": 5314,
    "preview": "# macOS\n\nSupports CPU and MPS (Metal M1/M2).\n\n- [Install](#install)\n- [Run](#run)\n\n## Install\n* Download and Install [Mi"
  },
  {
    "path": "docs/README_SerpAPI.md",
    "chars": 657,
    "preview": "## h2oGPT integration with LangChain and SerpAPI\n\nWeb search augments LLM context with additional information obtained f"
  },
  {
    "path": "docs/README_WHEEL.md",
    "chars": 3959,
    "preview": "# Python Wheel\n\n### Building wheel for your platform\n\n```bash\ngit clone https://github.com/h2oai/h2ogpt.git\ncd h2ogpt\npy"
  },
  {
    "path": "docs/README_WINDOWS.md",
    "chars": 6282,
    "preview": "# Windows 10/11\n\n* Single `.bat` file for installation (if you do not skip any optional packages, takes about 9GB filled"
  },
  {
    "path": "docs/README_offline.md",
    "chars": 11299,
    "preview": "# Offline Mode and Security:\n\n## TL;DR\n\nTo run offline, either do smart or manual way.\n\n* Smart Download\n    1) Run onli"
  },
  {
    "path": "docs/README_quickstart.md",
    "chars": 3753,
    "preview": "# Quick Start\n\n## Install\n\nTo quickly try out h2oGPT with limited document Q/A capability, create a fresh Python 3.10 en"
  },
  {
    "path": "docs/README_ui.md",
    "chars": 17701,
    "preview": "# UI overview\n\nBy default, `generate.py` runs a Gradio server with a UI as well as an OpenAI server wrapping the Gradio "
  },
  {
    "path": "docs/TRITON.md",
    "chars": 3860,
    "preview": "## Triton Inference Server\n\nTo get optimal performance for inference for h2oGPT models, we will be using the [FastTransf"
  },
  {
    "path": "docs/autogen.patch",
    "chars": 1602,
    "preview": "--- /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/autogen/token_count_utils.py\t2024-07-29 21:31:51.63085"
  },
  {
    "path": "docs/autogen2.patch",
    "chars": 3788,
    "preview": "--- /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/autogen/agentchat/conversable_agent.py\t2024-08-30 22:5"
  },
  {
    "path": "docs/build_windows_gpu.sh",
    "chars": 2985,
    "preview": "# https://pypi.org/project/pynsist/\n# https://stackoverflow.com/questions/69352179/package-streamlit-app-and-run-executa"
  },
  {
    "path": "docs/google.patch",
    "chars": 819,
    "preview": "--- /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/langchain_google_genai/chat_models.py\t2024-07-25 17:02"
  },
  {
    "path": "docs/h2oGPT_CPU.ipynb",
    "chars": 91449,
    "preview": "{\n  \"metadata\": {\n    \"kernelspec\": {\n      \"display_name\": \"Python 3\",\n      \"name\": \"python3\"\n    },\n    \"language_inf"
  },
  {
    "path": "docs/h2oGPT_GPU.ipynb",
    "chars": 132160,
    "preview": "{\n  \"metadata\": {\n    \"kernelspec\": {\n      \"display_name\": \"Python 3\",\n      \"name\": \"python3\"\n    },\n    \"language_inf"
  },
  {
    "path": "docs/linux_install.sh",
    "chars": 12103,
    "preview": "#!/bin/bash\nset -o pipefail\nset -ex\n\nshopt -s expand_aliases\nif ! test -f /usr/bin/sudo; then\n  echo \"No sudo\"\n  alias s"
  },
  {
    "path": "docs/linux_install_full.sh",
    "chars": 2206,
    "preview": "#!/bin/bash\nset -o pipefail\nset -ex\n\necho -e \"\\n\\n\\n\\t\\tSTART\\n\\n\\n\";\n\n# ensure not in h2ogpt repo folder\ncd $HOME\n\n# Ch"
  },
  {
    "path": "docs/openai.patch",
    "chars": 440,
    "preview": "--- /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/openai/_streaming.py\t2024-07-30 23:11:13.902075163 -07"
  },
  {
    "path": "docs/pytubefix.patch",
    "chars": 467,
    "preview": "--- /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/pytubefix/extract.py\t2024-08-17 02:11:12.847159003 -07"
  },
  {
    "path": "docs/run_patches.sh",
    "chars": 3235,
    "preview": "#!/bin/bash\nset -o pipefail\nset -ex\n\n#\n#* Deal with not-thread-safe things in LangChain:\n#\nsp=`python3.10 -c 'import sit"
  },
  {
    "path": "docs/setup_docker_linux.sh",
    "chars": 2557,
    "preview": "# BUILD\nsudo apt-get update\nsudo apt install software-properties-common\nsudo apt-get install build-essential\n\n# DRIVER +"
  },
  {
    "path": "docs/tos.md",
    "chars": 4810,
    "preview": "Terms of Service for H2O.ai's hosted running versions of h2oGPT, e.g. on Hugging Face Spaces, AWS, Google Colab, Kaggle,"
  },
  {
    "path": "docs/trans.patch",
    "chars": 1086,
    "preview": "--- /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/transformers/modeling_utils.py\t2024-06-17 10:32:30.807"
  },
  {
    "path": "docs/trans2.patch",
    "chars": 2186,
    "preview": "--- /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/transformers/generation/utils.py\t2024-07-25 14:52:00.8"
  },
  {
    "path": "docs/windows_freezelist.txt",
    "chars": 9220,
    "preview": "absl-py==2.0.0\naccelerate==0.25.0\naiofiles==23.2.1\naiohttp==3.9.1\naiosignal==1.3.1\naltair==5.2.0\nannotated-types==0.6.0\n"
  },
  {
    "path": "docs/windows_install.bat",
    "chars": 6514,
    "preview": "@echo off\n\nCALL conda install weasyprint pygobject -c conda-forge -y\n\nREM Install primary dependencies.\nREM Remove any b"
  },
  {
    "path": "docs/xtt.patch",
    "chars": 1571,
    "preview": "--- /home/jon/miniconda3/envs/h2ogpt/lib/python3.10/site-packages/TTS/tts/layers/xtts/stream_generator.py\t2024-07-14 17:"
  },
  {
    "path": "finetune.py",
    "chars": 30782,
    "preview": "import os\nimport sys\nfrom functools import partial\nfrom typing import List, Union\nimport numpy as np\n\nif os.path.dirname"
  },
  {
    "path": "generate.py",
    "chars": 375,
    "preview": "import os\nimport sys\n\nif os.path.dirname(os.path.abspath(__file__)) not in sys.path:\n    sys.path.append(os.path.dirname"
  },
  {
    "path": "gradio_utils/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "gradio_utils/css.py",
    "chars": 5616,
    "preview": "def get_css(kwargs, select_string) -> str:\n    if kwargs['h2ocolors']:\n        css_code = \"\"\"footer {visibility: hidden;"
  },
  {
    "path": "gradio_utils/google_auth.py",
    "chars": 5896,
    "preview": "from enums import split_google\nfrom utils import sanitize_filename\n\n\ndef setup_app(name_login='google_login', name_app='"
  },
  {
    "path": "gradio_utils/grclient.py",
    "chars": 81499,
    "preview": "from __future__ import annotations\n\nimport atexit\nimport concurrent\nimport copy\nimport difflib\nimport re\nimport threadin"
  },
  {
    "path": "gradio_utils/prompt_form.py",
    "chars": 18093,
    "preview": "import functools\nimport os\nimport math\nimport csv\nimport datetime\n\nimport filelock\nimport gradio as gr\n\nfrom utils impor"
  },
  {
    "path": "gradio_utils/yield_utils.py",
    "chars": 775,
    "preview": "from pydantic import BaseModel\n\n\nclass ReturnType(BaseModel):\n    reply: str | list[str] | None\n    reply_final: str | l"
  },
  {
    "path": "h2ogpt/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "helm/h2ogpt-chart/.helmignore",
    "chars": 349,
    "preview": "# Patterns to ignore when building packages.\n# This supports shell glob matching, relative path matching, and\n# negation"
  },
  {
    "path": "helm/h2ogpt-chart/Chart.yaml",
    "chars": 1143,
    "preview": "apiVersion: v2\nname: h2ogpt\ndescription: A Helm chart for h2ogpt\n\n# A chart can be either an 'application' or a 'library"
  },
  {
    "path": "helm/h2ogpt-chart/templates/_helpers.tpl",
    "chars": 1965,
    "preview": "{{/*\nExpand the name of the chart.\n*/}}\n{{- define \"h2ogpt.name\" -}}\n{{- default .Chart.Name .Values.nameOverride | trun"
  },
  {
    "path": "helm/h2ogpt-chart/templates/config-map.yaml",
    "chars": 1926,
    "preview": "\n{{- if .Values.h2ogpt.enabled }}\napiVersion: v1\nkind: ConfigMap\nmetadata:\n  name: {{ include \"h2ogpt.fullname\" . }}-con"
  },
  {
    "path": "helm/h2ogpt-chart/templates/deployment.yaml",
    "chars": 32286,
    "preview": "{{- if and .Values.vllm.enabled .Values.tgi.enabled }}\n  {{- fail \"Both TGI and vLLM cannot be enabled at the same time."
  },
  {
    "path": "helm/h2ogpt-chart/templates/service.yaml",
    "chars": 2630,
    "preview": "{{- if .Values.h2ogpt.enabled }}\napiVersion: v1\nkind: Service\nmetadata:\n  name: {{ include \"h2ogpt.fullname\" . }}-web\n  "
  },
  {
    "path": "helm/h2ogpt-chart/values.yaml",
    "chars": 5605,
    "preview": "nameOverride: \"\"\nfullnameOverride: \"\"\nnamespaceOverride: \"\"\n\nh2ogpt:\n  enabled: true\n  stack:\n    # -- Run h2oGPT and vL"
  },
  {
    "path": "iterators/__init__.py",
    "chars": 218,
    "preview": "from .timeout_iterator import TimeoutIterator, AsyncTimeoutIterator\nfrom .iterator_pipe import IteratorPipe, AsyncIterat"
  },
  {
    "path": "iterators/iterator_pipe.py",
    "chars": 2519,
    "preview": "import queue\nimport asyncio\n\n\nclass IteratorPipe:\n    \"\"\"\n    Iterator Pipe creates an iterator that can be fed in data "
  },
  {
    "path": "iterators/timeout_iterator.py",
    "chars": 5600,
    "preview": "import queue\nimport asyncio\nimport threading\nimport traceback\n\n\nclass TimeoutIterator:\n    \"\"\"\n    Wrapper class to add "
  },
  {
    "path": "metrics/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "metrics/quip.py",
    "chars": 9899,
    "preview": "import os\n\nimport datasets\nimport pandas as pd\nimport sacrebleu as scb\nfrom packaging import version\nfrom sacrebleu impo"
  },
  {
    "path": "models/README-template.md",
    "chars": 4995,
    "preview": "---\nlicense: apache-2.0\nlanguage:\n- en\nlibrary_name: transformers\ninference: false\nthumbnail: https://h2o.ai/etc.clientl"
  },
  {
    "path": "models/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "models/create_model_cards.py",
    "chars": 13159,
    "preview": "import shutil\nimport os\n\nimport huggingface_hub\nimport pytest\nimport torch\nfrom transformers import AutoModelForCausalLM"
  },
  {
    "path": "models/gpu_mem_track.py",
    "chars": 5024,
    "preview": "import gc\nimport datetime\nimport inspect\n\nimport torch\nimport numpy as np\n\ndtype_memory_size_dict = {\n    torch.float64:"
  },
  {
    "path": "models/makevllm.sh",
    "chars": 1386,
    "preview": "pip download openai==1.3.7 --no-deps\nmkdir -p openai_wheel\nmv openai-1.3.7-py3-none-any.whl openai_wheel\ncd openai_wheel"
  },
  {
    "path": "models/predict_aquila.py",
    "chars": 14715,
    "preview": "\"\"\"\nCopied from https://github.com/lm-sys/FastChat.\nLater we will contribute our changes into it.\n\"\"\"\nimport dataclasses"
  },
  {
    "path": "models/test_scrape1.py",
    "chars": 1409,
    "preview": "import os\nos.environ[\"COQUI_TOS_AGREED\"] = \"1\"\n\n\nimport pytest\nfrom tests.utils import wrap_test_forked\n\nfrom TTS.api im"
  },
  {
    "path": "notebooks/h2oGPT_api_examples.ipynb",
    "chars": 99684,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# h2oGPT API call example\\n\",\n    \""
  },
  {
    "path": "openai_server/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "openai_server/agent_prompting.py",
    "chars": 63560,
    "preview": "import ast\nimport json\nimport os\nimport sys\nimport tempfile\nimport uuid\n\nfrom openai_server.agent_utils import get_have_"
  },
  {
    "path": "openai_server/agent_tools/aider_code_generation.py",
    "chars": 3477,
    "preview": "import argparse\nimport os\nimport subprocess\nimport sys\n\ntry:\n    from importlib.metadata import distribution, PackageNot"
  },
  {
    "path": "openai_server/agent_tools/ask_question_about_documents.py",
    "chars": 12597,
    "preview": "import json\nimport os\nimport argparse\nimport re\nimport sys\nimport time\nimport uuid\n\nif 'src' not in sys.path:\n    sys.pa"
  },
  {
    "path": "openai_server/agent_tools/ask_question_about_image.py",
    "chars": 6681,
    "preview": "import os\nimport argparse\nimport tempfile\nimport logging\nimport time\n\n\n# Set up logging\nlogging.basicConfig(level=loggin"
  },
  {
    "path": "openai_server/agent_tools/audio_transcription.py",
    "chars": 3734,
    "preview": "import os\nimport argparse\nimport uuid\n\n\ndef check_valid_extension(file):\n    \"\"\"\n    OpenAI only allows certain file typ"
  },
  {
    "path": "openai_server/agent_tools/bing_search.py",
    "chars": 6693,
    "preview": "import os\nimport argparse\nimport json\nfrom azure.core.credentials import AzureKeyCredential\nfrom web_search_client impor"
  },
  {
    "path": "openai_server/agent_tools/common/utils.py",
    "chars": 2547,
    "preview": "import os\nimport shutil\nimport uuid\nfrom urllib.parse import urlparse\n\nimport requests\n\n\ndef is_url_valid_and_alive(url,"
  },
  {
    "path": "openai_server/agent_tools/convert_document_to_text.py",
    "chars": 9599,
    "preview": "import argparse\nimport sys\nimport uuid\n\nif 'src' not in sys.path:\n    sys.path.append('src')\n\nfrom src.function_client i"
  },
  {
    "path": "openai_server/agent_tools/download_web_video.py",
    "chars": 4036,
    "preview": "import argparse\nimport os\nimport random\n\n\ndef selenium(base_url, video_url):\n    from selenium import webdriver\n    from"
  },
  {
    "path": "openai_server/agent_tools/driverless_ai_data_science.py",
    "chars": 28685,
    "preview": "import argparse\nimport os\nimport shutil\nfrom zipfile import ZipFile\n\nimport pandas as pd\nfrom matplotlib import pyplot a"
  },
  {
    "path": "openai_server/agent_tools/google_search.py",
    "chars": 14878,
    "preview": "import os\nimport argparse\nimport json\nfrom typing import Dict, Any\nfrom serpapi import (\n    SerpApiClient, GoogleSearch"
  },
  {
    "path": "openai_server/agent_tools/image_generation.py",
    "chars": 7380,
    "preview": "import ast\nimport base64\nimport os\nimport argparse\nimport sys\nimport uuid\n\n\ndef main():\n    parser = argparse.ArgumentPa"
  },
  {
    "path": "openai_server/agent_tools/mermaid_renderer.py",
    "chars": 5587,
    "preview": "import argparse\nimport os\nimport subprocess\nimport tempfile\nimport datetime\nimport random\nimport string\nimport shlex\nimp"
  },
  {
    "path": "openai_server/agent_tools/news_query.py",
    "chars": 6144,
    "preview": "import requests\nimport os\nimport argparse\nfrom datetime import datetime, timedelta\n\n\ndef fetch_everything(api_key, query"
  },
  {
    "path": "openai_server/agent_tools/query_to_web_image.py",
    "chars": 2217,
    "preview": "import matplotlib\n\nmatplotlib.use('Agg')  # Set the backend to non-interactive\nimport matplotlib.pyplot as plt\n\nplt.ioff"
  },
  {
    "path": "openai_server/agent_tools/scholar_papers_query.py",
    "chars": 7315,
    "preview": "import os\nimport argparse\nimport requests\nimport json\nfrom semanticscholar import SemanticScholar\nimport arxiv\n\n\ndef set"
  },
  {
    "path": "openai_server/agent_tools/wolfram_alpha_math_science_query.py",
    "chars": 3935,
    "preview": "import wolframalpha\nimport requests\nimport os\nimport argparse\n\n\ndef sanitize_filename(name):\n    bad_chars = ['[', ']', "
  },
  {
    "path": "openai_server/agent_utils.py",
    "chars": 16339,
    "preview": "import functools\nimport inspect\nimport os\nimport re\nimport shutil\nimport sys\nimport time\n\nimport requests\nfrom PIL impor"
  },
  {
    "path": "openai_server/autogen_2agent_backend.py",
    "chars": 15453,
    "preview": "import os\nimport tempfile\nimport uuid\n\nfrom openai_server.backend_utils import structure_to_messages, run_download_api_a"
  },
  {
    "path": "openai_server/autogen_agents.py",
    "chars": 8703,
    "preview": "from openai_server.autogen_utils import terminate_message_func\nfrom openai_server.agent_utils import current_datetime\n\n\n"
  },
  {
    "path": "openai_server/autogen_multi_agent_backend.py",
    "chars": 10342,
    "preview": "import os\nimport tempfile\n\nfrom autogen.agentchat import gather_usage_summary\n\nfrom openai_server.backend_utils import s"
  },
  {
    "path": "openai_server/autogen_streaming.py",
    "chars": 3708,
    "preview": "import asyncio\nimport multiprocessing\nimport queue\nimport threading\nimport traceback\nimport typing\nfrom contextlib impor"
  },
  {
    "path": "openai_server/autogen_utils.py",
    "chars": 56907,
    "preview": "import asyncio\nimport copy\nimport functools\nimport json\nimport logging\nimport os\nimport re\nimport shutil\nimport subproce"
  },
  {
    "path": "openai_server/backend.py",
    "chars": 32229,
    "preview": "import ast\nimport asyncio\nimport base64\nimport functools\nimport io\nimport json\nimport os\nimport platform\nimport re\nimpor"
  },
  {
    "path": "openai_server/backend_utils.py",
    "chars": 15691,
    "preview": "import json\nimport os\nimport re\nimport uuid\nfrom collections import defaultdict\n\n\ndef concatenate_messages(messages, rol"
  },
  {
    "path": "openai_server/chat_history_render.py",
    "chars": 6289,
    "preview": "import re\nimport textwrap\nfrom typing import List, Dict\n\nmarkdown_mark = \"---\"\n\n\ndef chat_to_pretty_markdown(\n        ch"
  },
  {
    "path": "openai_server/cogvlm2_server/cogvlm2.py",
    "chars": 14263,
    "preview": "# https://raw.githubusercontent.com/THUDM/CogVLM2/main/basic_demo/openai_api_demo.py\nimport asyncio\n# HOST=0.0.0.0 PORT="
  },
  {
    "path": "openai_server/cogvlm2_server/requirements.txt",
    "chars": 328,
    "preview": "# https://raw.githubusercontent.com/THUDM/CogVLM2/main/basic_demo/requirements.txt\nxformers\ntorch>=2.0.0\ntorchvision\ntra"
  },
  {
    "path": "openai_server/log.py",
    "chars": 310,
    "preview": "import logging\n\n# create logger\nlogger = logging.getLogger('__name__')\nlevel = logging.INFO\nlogger.setLevel(level)\n\n# --"
  },
  {
    "path": "openai_server/openai_client.py",
    "chars": 30414,
    "preview": "import ast\nimport contextlib\nimport gc\nimport os\nimport shutil\nimport tempfile\nimport threading\nimport traceback\nimport "
  },
  {
    "path": "openai_server/server.py",
    "chars": 46155,
    "preview": "import copy\nimport io\nimport logging\nimport os\nimport sys\nimport ast\nimport json\nimport time\nimport traceback\nimport uui"
  },
  {
    "path": "openai_server/server_start.py",
    "chars": 10106,
    "preview": "import inspect\nimport json\nimport os\nimport subprocess\nimport sys\nimport argparse\nimport logging\nimport typing\nimport uu"
  },
  {
    "path": "openai_server/test_autogen_utils.py",
    "chars": 21453,
    "preview": "import re\nfrom pathlib import Path\n\nimport pytest\n\nfrom openai_server.autogen_utils import H2OLocalCommandLineCodeExecut"
  },
  {
    "path": "openai_server/test_backend_utils.py",
    "chars": 2658,
    "preview": "import sys\n\n\ndef test_extract_xml_tags():\n    xml_input = \"\"\"\n<doc>\n<name>Zulu is hot..pdf</name>\n<page>1</page>\n<text>\n"
  },
  {
    "path": "openai_server/test_conversion.py",
    "chars": 36495,
    "preview": "import os\nimport sys\nfrom typing import List, Dict\n\nimport pytest\n\nsys.path.append('openai_server')\nfrom openai_server.b"
  },
  {
    "path": "openai_server/test_openai_server.py",
    "chars": 25044,
    "preview": "import json\nimport shutil\nimport sys\nimport tempfile\nimport time\nimport uuid\n\nimport pytest\nimport os\nimport ast\n\n# to a"
  },
  {
    "path": "openai_server/test_prompt_caching.py",
    "chars": 4113,
    "preview": "import sys\n\nimport pytest\nfrom typing import List, Dict\n\nif 'src' not in sys.path:\n    sys.path.append('src')\n\nfrom src."
  },
  {
    "path": "papers/technical-report/compile.sh",
    "chars": 37,
    "preview": "#!/bin/sh\nlatexmk -pdf h2oGPT-TR.tex\n"
  },
  {
    "path": "papers/technical-report/conf.sty",
    "chars": 11333,
    "preview": "% partial rewrite of the LaTeX2e package for submissions to the\n% Conference on Neural Information Processing Systems (N"
  },
  {
    "path": "papers/technical-report/h2oGPT-TR.tex",
    "chars": 68079,
    "preview": "\\documentclass{article}\n\n\\usepackage[final]{conf}\n\n\\usepackage[utf8]{inputenc} % allow utf-8 input\n\\usepackage[T1]{fonte"
  },
  {
    "path": "reqs_optional/reqs_constraints.txt",
    "chars": 395,
    "preview": "# ensure doesn't drift, e.g. Issue #1348\ntorch==2.2.1; sys_platform != \"darwin\" and platform_machine != \"arm64\"\ntorch==2"
  },
  {
    "path": "reqs_optional/requirements_optional_agents.txt",
    "chars": 1670,
    "preview": "google-search-results>=2.4.2\n\n# for AutoGPT:\nduckduckgo-search>=4.1.1\ngradio_tools>=0.0.9\nwikipedia>=1.4.0\nwolframalpha>"
  },
  {
    "path": "reqs_optional/requirements_optional_audio.txt",
    "chars": 991,
    "preview": "pydub>=0.25.1\nlibrosa>=0.10.1\nffmpeg>=1.4\nyt_dlp>=2024.10.22\nwavio>=0.0.8\n# Audio speed-up and slowdown (best quality), "
  },
  {
    "path": "reqs_optional/requirements_optional_cpu_only.txt",
    "chars": 55,
    "preview": "faiss-cpu>=1.7.4\n# for unstructured\nonnxruntime==1.15.0"
  },
  {
    "path": "reqs_optional/requirements_optional_doctr.txt",
    "chars": 160,
    "preview": "python-doctr @ git+https://github.com/h2oai/doctr.git@aee9b1c369e37af9e18265660935bce2c4447d65\nweasyprint>=60.1\nimutils>"
  },
  {
    "path": "reqs_optional/requirements_optional_gpu_only.txt",
    "chars": 429,
    "preview": "faiss-gpu>=1.7.2\n# for unstructured\nonnxruntime-gpu==1.15.0\nauto-gptq>=0.7.1\n#optimum>=1.17.1\n# autoawq for cuda 12.1, e"
  },
  {
    "path": "reqs_optional/requirements_optional_image.txt",
    "chars": 176,
    "preview": "# Vision/Image packages\nfiftyone>=0.24.1\npytube\ndiffusers>=0.29.0\nyt-dlp>=2024.8.6\n# if want to use gif_to_mp4()\n# movie"
  },
  {
    "path": "reqs_optional/requirements_optional_langchain.gpllike.txt",
    "chars": 96,
    "preview": "pymupdf>=1.23.8 # AGPL license\npymupdf4llm>=0.0.12 # AGPL license\n# extract-msg==0.41.1  # GPL3\n"
  },
  {
    "path": "reqs_optional/requirements_optional_langchain.metrics.txt",
    "chars": 285,
    "preview": "bert_score>=0.3.13\nevaluate @ git+https://github.com/huggingface/evaluate@7d7d81dd3ffec0812e2edb09f86b3b1e31d61118\nsacre"
  },
  {
    "path": "reqs_optional/requirements_optional_langchain.txt",
    "chars": 2774,
    "preview": "# ensure constrained to requirements.txt version:\ntorch==2.2.1; sys_platform != \"darwin\" and platform_machine != \"arm64\""
  },
  {
    "path": "reqs_optional/requirements_optional_langchain.urls.txt",
    "chars": 181,
    "preview": "# sometimes unstructured fails, these work in those cases.  See Issue #320\nplaywright>=1.37.0\n# requires Chrome binary t"
  },
  {
    "path": "reqs_optional/requirements_optional_llamacpp_gpt4all.txt",
    "chars": 88,
    "preview": "gpt4all==1.0.5\n\n# requires env to be set for specific systems\nllama-cpp-python==0.2.87\n\n"
  },
  {
    "path": "reqs_optional/requirements_optional_training.txt",
    "chars": 77,
    "preview": "#xformers==0.0.20\n# optional for finetune\ntensorboard>=2.13.0\nneptune>=1.2.0\n"
  },
  {
    "path": "reqs_optional/requirements_optional_wikiprocessing.txt",
    "chars": 116,
    "preview": "# Only for converting full wiki into db, not required to use db for wiki_full\nmwxml>=0.3.3\nmwparserfromhell>=0.6.4\n\n"
  },
  {
    "path": "requirements.txt",
    "chars": 2902,
    "preview": "# no websockets, more cloud friendly\n# able to make gradio clean-up states\n\n# gradio @ https://h2o-release.s3.amazonaws."
  },
  {
    "path": "setup.py",
    "chars": 3790,
    "preview": "import os\n\nimport setuptools\nfrom typing import List\nfrom setuptools import find_packages\n\nfor_pypi = os.getenv('PYPI') "
  },
  {
    "path": "spaces/chatbot/repo_to_spaces.sh",
    "chars": 2241,
    "preview": "#!/bin/sh\n\n# NOTE: start in h2ogpt repo base directory\n# i.e. can run below to update both spaces (assumes repos already"
  },
  {
    "path": "spaces/demo/app.py",
    "chars": 2421,
    "preview": "import gradio as gr\nimport torch\nimport os\nfrom transformers import AutoTokenizer, AutoModelForCausalLM\n\ntheme = gr.them"
  },
  {
    "path": "spaces/demo/app_client_test.py",
    "chars": 1003,
    "preview": "\"\"\"\nClient test.\n\nRun server:\n\npython app.py\n\nThen run this client:\n\npython app_client_test.py\n\nNOTE: To access a privat"
  },
  {
    "path": "spaces/demo/requirements.txt",
    "chars": 53,
    "preview": "transformers==4.28.1\ntorch==2.0.0\naccelerate==0.18.0\n"
  },
  {
    "path": "src/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "src/audio_langchain.py",
    "chars": 17045,
    "preview": "import logging\nimport os\nimport tempfile\nimport time\nimport uuid\nfrom typing import Dict, Iterator, Optional, Tuple\n\nfro"
  },
  {
    "path": "src/basic_nltk.py",
    "chars": 504,
    "preview": "import os\n\nos.environ['NLTK_DATA'] = './nltk_data'\n\nfrom nltk.downloader import download\n\n# download('all')\ndownload('to"
  },
  {
    "path": "src/cli.py",
    "chars": 9765,
    "preview": "import copy\nimport torch\n\nfrom evaluate_params import eval_func_param_names, input_args_list\nfrom gen import evaluate, c"
  },
  {
    "path": "src/client_test.py",
    "chars": 26736,
    "preview": "\"\"\"\nClient test.\n\nRun server:\n\npython generate.py  --base_model=h2oai/h2ogpt-oig-oasst1-512-6_9b\n\nNOTE: For private mode"
  },
  {
    "path": "src/create_data.py",
    "chars": 80120,
    "preview": "\"\"\"\nDataset creation tools.\n\nKeep to-level imports clean of non-trivial imports for specific tools,\nbecause this file is"
  },
  {
    "path": "src/db_utils.py",
    "chars": 9990,
    "preview": "import json\nimport os\nimport sqlite3\nimport uuid\n\nfrom enums import LangChainMode\n\n\ndef set_userid(db1s, requests_state1"
  },
  {
    "path": "src/enums.py",
    "chars": 51452,
    "preview": "from enum import Enum\n\n\nclass PromptType(Enum):\n    template = -3\n    unknown = -2\n    custom = -1\n    plain = 0\n    ins"
  },
  {
    "path": "src/eval.py",
    "chars": 16317,
    "preview": "import os\nimport numpy as np\nimport pandas as pd\nimport torch\nfrom matplotlib import pyplot as plt\n\nfrom evaluate_params"
  },
  {
    "path": "src/evaluate_params.py",
    "chars": 4600,
    "preview": "input_args_list = [\n    \"model_state\",\n    \"my_db_state\",\n    \"selection_docs_state\",\n    \"requests_state\",\n    \"roles_s"
  },
  {
    "path": "src/export_hf_checkpoint.py",
    "chars": 10108,
    "preview": "import os\nimport json\nimport shutil\nimport subprocess\n\nimport torch\nfrom accelerate import infer_auto_device_map, dispat"
  },
  {
    "path": "src/function_client.py",
    "chars": 3335,
    "preview": "import os\nimport pickle\n\nimport requests\nimport json\n\n\ndef execute_function_on_server(host: str, port: int, function_nam"
  },
  {
    "path": "src/function_server.py",
    "chars": 6578,
    "preview": "import asyncio\nimport os\nimport pickle\nimport sys\nimport json\nimport inspect\nimport threading\nimport traceback\nimport uu"
  },
  {
    "path": "src/gen.py",
    "chars": 306136,
    "preview": "import ast\nimport copy\nimport functools\nimport inspect\nimport queue\nimport sys\nimport os\nimport json\nimport time\nimport "
  },
  {
    "path": "src/gpt4all_llm.py",
    "chars": 22258,
    "preview": "import inspect\nimport os\nimport time\nfrom typing import Dict, Any, Optional, List, Iterator\n\nimport filelock\nfrom langch"
  },
  {
    "path": "src/gpt_langchain.py",
    "chars": 497954,
    "preview": "import ast\nimport asyncio\nimport copy\nimport functools\nimport glob\nimport gzip\nimport importlib\nimport inspect\nimport js"
  },
  {
    "path": "src/gradio_funcs.py",
    "chars": 76237,
    "preview": "import ast\nimport copy\nimport functools\nimport json\nimport os\nimport tempfile\nimport time\nimport traceback\nimport uuid\ni"
  },
  {
    "path": "src/gradio_runner.py",
    "chars": 442971,
    "preview": "import ast\nimport base64\nimport copy\nimport functools\nimport inspect\nimport itertools\nimport json\nimport os\nimport platf"
  },
  {
    "path": "src/gradio_themes.py",
    "chars": 10662,
    "preview": "from __future__ import annotations\n\nfrom typing import Iterable\n\nfrom gradio.themes.soft import Soft\nfrom gradio.themes "
  }
]

// ... and 87 more files (download for full content)

About this extraction

This page contains the full source code of the h2oai/h2ogpt GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 287 files (18.1 MB), approximately 4.8M tokens, and a symbol index with 1858 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo