gitextract_8x6d8dn9/

├── .gitignore
├── LICENSE
├── MODEL_LICENSE
├── README.md
├── app.py
├── app_fastapi.py
├── chatglm/
│   ├── configuration_chatglm.py
│   ├── modeling_chatglm.py
│   ├── quantization.py
│   └── tokenization_chatglm.py
├── chatglm2/
│   ├── configuration_chatglm.py
│   ├── modeling_chatglm.py
│   ├── quantization.py
│   └── tokenization_chatglm.py
├── chatglm3/
│   ├── configuration_chatglm.py
│   ├── modeling_chatglm.py
│   ├── quantization.py
│   └── tokenization_chatglm.py
├── check_bad_cache_files.py
├── download_model.py
├── env_offline.bat
├── env_venv.bat
├── glm4/
│   ├── configuration_chatglm.py
│   ├── modeling_chatglm.py
│   └── tokenization_chatglm.py
├── gptq/
│   ├── README.md
│   ├── gptq.py
│   ├── llama.py
│   ├── llama_inference.py
│   ├── modelutils.py
│   ├── quant.py
│   ├── quant_cuda.cpp
│   ├── quant_cuda_kernel.cu
│   ├── setup_cuda.py
│   └── test_kernel.py
├── predictors/
│   ├── base.py
│   ├── chatglm2_predictor.py
│   ├── chatglm3_predictor.py
│   ├── chatglm_predictor.py
│   ├── debug.py
│   ├── glm4_predictor.py
│   ├── llama.py
│   └── llama_gptq.py
├── setup_offline.bat
├── setup_venv.bat
├── start.bat
├── start_api.bat
├── start_offline.bat
├── start_offline_api.bat
├── start_offline_cmd.bat
├── start_venv.bat
├── test_fastapi.py
├── test_models.py
└── utils_env.py