Copy disabled (too large)
Download .txt
Showing preview only (12,703K chars total). Download the full file to get everything.
Repository: positive666/Prompt-Can-Anything
Branch: main
Commit: 403d3678b5e0
Files: 407
Total size: 11.9 MB
Directory structure:
gitextract__iw3ng36/
├── .gitignore
├── .gitmodules
├── LICENSE
├── README.md
├── README_zh.md
├── a2f.py
├── app.py
├── audio2face_pb2.py
├── audio2face_pb2_grpc.py
├── audio2face_streaming_utils.py
├── audio_segment.py
├── auto_label_demo.py
├── batch_clean_gpu.txt
├── crazy_functions/
│ ├── Langchain知识库.py
│ ├── Latex全文润色.py
│ ├── Latex全文翻译.py
│ ├── Latex输出PDF结果.py
│ ├── __init__.py
│ ├── chatglm微调工具.py
│ ├── crazy_functions_test.py
│ ├── crazy_utils.py
│ ├── latex_fns/
│ │ ├── latex_actions.py
│ │ └── latex_toolbox.py
│ ├── live_audio/
│ │ ├── aliyunASR.py
│ │ └── audio_io.py
│ ├── test_project/
│ │ ├── cpp/
│ │ │ ├── cppipc/
│ │ │ │ ├── buffer.cpp
│ │ │ │ ├── ipc.cpp
│ │ │ │ ├── policy.h
│ │ │ │ ├── pool_alloc.cpp
│ │ │ │ ├── prod_cons.h
│ │ │ │ ├── queue.h
│ │ │ │ ├── shm.cpp
│ │ │ │ ├── waiter.h
│ │ │ │ └── 来源
│ │ │ ├── libJPG/
│ │ │ │ ├── jpgd.cpp
│ │ │ │ ├── jpgd.h
│ │ │ │ ├── jpge.cpp
│ │ │ │ ├── jpge.h
│ │ │ │ └── 来源
│ │ │ └── longcode/
│ │ │ ├── jpgd.cpp
│ │ │ ├── jpge.cpp
│ │ │ └── prod_cons.h
│ │ ├── latex/
│ │ │ └── attention/
│ │ │ ├── background.tex
│ │ │ ├── introduction.tex
│ │ │ ├── model_architecture.tex
│ │ │ ├── parameter_attention.tex
│ │ │ └── 来源
│ │ ├── python/
│ │ │ └── dqn/
│ │ │ ├── __init__.py
│ │ │ ├── dqn.py
│ │ │ ├── policies.py
│ │ │ └── 来源
│ │ └── 其他测试
│ ├── 下载arxiv论文翻译摘要.py
│ ├── 交互功能函数模板.py
│ ├── 代码重写为全英文_多线程.py
│ ├── 图片生成.py
│ ├── 对话历史存档.py
│ ├── 总结word文档.py
│ ├── 总结音视频.py
│ ├── 批量Markdown翻译.py
│ ├── 批量总结PDF文档.py
│ ├── 批量总结PDF文档pdfminer.py
│ ├── 批量翻译PDF文档_多线程.py
│ ├── 数学动画生成manim.py
│ ├── 理解PDF文档内容.py
│ ├── 生成函数注释.py
│ ├── 联网的ChatGPT.py
│ ├── 联网的ChatGPT_bing版.py
│ ├── 虚空终端.py
│ ├── 解析JupyterNotebook.py
│ ├── 解析项目源代码.py
│ ├── 询问多个大语言模型.py
│ ├── 语音助手.py
│ ├── 读文章写摘要.py
│ ├── 谷歌检索小助手.py
│ ├── 辅助回答.py
│ └── 高级功能函数模板.py
├── gradio_demo.py
├── llm_cards/
│ ├── bridge_all.py
│ ├── bridge_chatglm.py
│ ├── bridge_chatgpt.py
│ ├── bridge_stackclaude.py
│ ├── core_functional.py
│ ├── crazy_functional.py
│ ├── requirements_chatglm.txt
│ └── requirements_slackclaude.txt
├── model_cards/
│ ├── Tag2Text/
│ │ ├── MANIFEST.in
│ │ ├── batch_inference.py
│ │ ├── datasets/
│ │ │ ├── openimages_common_214/
│ │ │ │ ├── imgs/
│ │ │ │ │ └── .gitkeep
│ │ │ │ ├── openimages_common_214_ram_annots.txt
│ │ │ │ ├── openimages_common_214_ram_taglist.txt
│ │ │ │ ├── openimages_common_214_tag2text_idannots.txt
│ │ │ │ └── openimages_common_214_tag2text_tagidlist.txt
│ │ │ └── openimages_rare_200/
│ │ │ ├── imgs/
│ │ │ │ └── .gitkeep
│ │ │ ├── openimages_rare_200_ram_annots.txt
│ │ │ └── openimages_rare_200_ram_taglist.txt
│ │ ├── inference_ram.py
│ │ ├── inference_ram_openset.py
│ │ ├── inference_tag2text.py
│ │ ├── ram/
│ │ │ ├── __init__.py
│ │ │ ├── configs/
│ │ │ │ ├── med_config.json
│ │ │ │ ├── q2l_config.json
│ │ │ │ └── swin/
│ │ │ │ ├── config_swinB_384.json
│ │ │ │ └── config_swinL_384.json
│ │ │ ├── data/
│ │ │ │ ├── ram_tag_list.txt
│ │ │ │ ├── ram_tag_list_chinese.txt
│ │ │ │ ├── ram_tag_list_threshold.txt
│ │ │ │ └── tag_list.txt
│ │ │ ├── inference.py
│ │ │ ├── models/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── bert.py
│ │ │ │ ├── ram.py
│ │ │ │ ├── swin_transformer.py
│ │ │ │ ├── tag2text.py
│ │ │ │ ├── utils.py
│ │ │ │ └── vit.py
│ │ │ ├── transform.py
│ │ │ └── utils/
│ │ │ ├── __init__.py
│ │ │ ├── metrics.py
│ │ │ └── openset_utils.py
│ │ ├── requirements_groundingDINO.txt
│ │ ├── setup.cfg
│ │ └── setup.py
│ ├── autoback.py
│ ├── groundingdino/
│ │ ├── __init__.py
│ │ ├── config/
│ │ │ ├── GroundingDINO_SwinB.cfg.py
│ │ │ └── GroundingDINO_SwinT_OGC.py
│ │ ├── datasets/
│ │ │ ├── __init__.py
│ │ │ └── transforms.py
│ │ ├── models/
│ │ │ ├── GroundingDINO/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── backbone/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── backbone.py
│ │ │ │ │ ├── position_encoding.py
│ │ │ │ │ └── swin_transformer.py
│ │ │ │ ├── bertwarper.py
│ │ │ │ ├── csrc/
│ │ │ │ │ ├── MsDeformAttn/
│ │ │ │ │ │ ├── ms_deform_attn.h
│ │ │ │ │ │ ├── ms_deform_attn_cpu.cpp
│ │ │ │ │ │ ├── ms_deform_attn_cpu.h
│ │ │ │ │ │ ├── ms_deform_attn_cuda.cu
│ │ │ │ │ │ ├── ms_deform_attn_cuda.h
│ │ │ │ │ │ └── ms_deform_im2col_cuda.cuh
│ │ │ │ │ ├── cuda_version.cu
│ │ │ │ │ └── vision.cpp
│ │ │ │ ├── fuse_modules.py
│ │ │ │ ├── groundingdino.py
│ │ │ │ ├── ms_deform_attn.py
│ │ │ │ ├── transformer.py
│ │ │ │ ├── transformer_vanilla.py
│ │ │ │ └── utils.py
│ │ │ ├── __init__.py
│ │ │ └── registry.py
│ │ ├── util/
│ │ │ ├── __init__.py
│ │ │ ├── box_ops.py
│ │ │ ├── get_tokenlizer.py
│ │ │ ├── inference.py
│ │ │ ├── logger.py
│ │ │ ├── misc.py
│ │ │ ├── slconfig.py
│ │ │ ├── slio.py
│ │ │ ├── time_counter.py
│ │ │ ├── utils.py
│ │ │ ├── visualizer.py
│ │ │ └── vl_utils.py
│ │ └── version.py
│ ├── lama/
│ │ ├── .gitignore
│ │ ├── LICENSE
│ │ ├── README.md
│ │ ├── bin/
│ │ │ ├── analyze_errors.py
│ │ │ ├── blur_predicts.py
│ │ │ ├── calc_dataset_stats.py
│ │ │ ├── debug/
│ │ │ │ └── analyze_overlapping_masks.sh
│ │ │ ├── evaluate_predicts.py
│ │ │ ├── evaluator_example.py
│ │ │ ├── extract_masks.py
│ │ │ ├── filter_sharded_dataset.py
│ │ │ ├── gen_debug_mask_dataset.py
│ │ │ ├── gen_mask_dataset.py
│ │ │ ├── gen_mask_dataset_hydra.py
│ │ │ ├── gen_outpainting_dataset.py
│ │ │ ├── make_checkpoint.py
│ │ │ ├── mask_example.py
│ │ │ ├── paper_runfiles/
│ │ │ │ ├── blur_tests.sh
│ │ │ │ ├── env.sh
│ │ │ │ ├── find_best_checkpoint.py
│ │ │ │ ├── generate_test_celeba-hq.sh
│ │ │ │ ├── generate_test_ffhq.sh
│ │ │ │ ├── generate_test_paris.sh
│ │ │ │ ├── generate_test_paris_256.sh
│ │ │ │ ├── generate_val_test.sh
│ │ │ │ ├── predict_inner_features.sh
│ │ │ │ └── update_test_data_stats.sh
│ │ │ ├── predict.py
│ │ │ ├── predict_inner_features.py
│ │ │ ├── report_from_tb.py
│ │ │ ├── sample_from_dataset.py
│ │ │ ├── side_by_side.py
│ │ │ ├── split_tar.py
│ │ │ ├── to_jit.py
│ │ │ └── train.py
│ │ ├── colab/
│ │ │ └── LaMa_inpainting.ipynb
│ │ ├── conda_env.yml
│ │ ├── configs/
│ │ │ ├── analyze_mask_errors.yaml
│ │ │ ├── data_gen/
│ │ │ │ ├── random_medium_256.yaml
│ │ │ │ ├── random_medium_512.yaml
│ │ │ │ ├── random_thick_256.yaml
│ │ │ │ ├── random_thick_512.yaml
│ │ │ │ ├── random_thin_256.yaml
│ │ │ │ └── random_thin_512.yaml
│ │ │ ├── debug_mask_gen.yaml
│ │ │ ├── eval1.yaml
│ │ │ ├── eval2.yaml
│ │ │ ├── eval2_cpu.yaml
│ │ │ ├── eval2_gpu.yaml
│ │ │ ├── eval2_jpg.yaml
│ │ │ ├── eval2_segm.yaml
│ │ │ ├── eval2_segm_test.yaml
│ │ │ ├── eval2_test.yaml
│ │ │ ├── places2-categories_157.txt
│ │ │ ├── prediction/
│ │ │ │ └── default.yaml
│ │ │ ├── test_large_30k.lst
│ │ │ └── training/
│ │ │ ├── ablv2_work.yaml
│ │ │ ├── ablv2_work_ffc075.yaml
│ │ │ ├── ablv2_work_md.yaml
│ │ │ ├── ablv2_work_no_fm.yaml
│ │ │ ├── ablv2_work_no_segmpl.yaml
│ │ │ ├── ablv2_work_no_segmpl_csdilirpl.yaml
│ │ │ ├── ablv2_work_no_segmpl_csdilirpl_celeba_csdilirpl1_new.yaml
│ │ │ ├── ablv2_work_no_segmpl_csirpl.yaml
│ │ │ ├── ablv2_work_no_segmpl_csirpl_celeba_csirpl03_new.yaml
│ │ │ ├── ablv2_work_no_segmpl_vgg.yaml
│ │ │ ├── ablv2_work_no_segmpl_vgg_celeba_l2_vgg003_new.yaml
│ │ │ ├── ablv2_work_nodil_segmpl.yaml
│ │ │ ├── ablv2_work_small_holes.yaml
│ │ │ ├── big-lama-celeba.yaml
│ │ │ ├── big-lama-regular-celeba.yaml
│ │ │ ├── big-lama-regular.yaml
│ │ │ ├── big-lama.yaml
│ │ │ ├── data/
│ │ │ │ ├── abl-02-thin-bb.yaml
│ │ │ │ ├── abl-04-256-mh-dist-celeba.yaml
│ │ │ │ ├── abl-04-256-mh-dist-web.yaml
│ │ │ │ └── abl-04-256-mh-dist.yaml
│ │ │ ├── discriminator/
│ │ │ │ └── pix2pixhd_nlayer.yaml
│ │ │ ├── evaluator/
│ │ │ │ └── default_inpainted.yaml
│ │ │ ├── generator/
│ │ │ │ ├── ffc_resnet_075.yaml
│ │ │ │ ├── pix2pixhd_global.yaml
│ │ │ │ ├── pix2pixhd_global_sigmoid.yaml
│ │ │ │ └── pix2pixhd_multidilated_catin_4dil_9b.yaml
│ │ │ ├── hydra/
│ │ │ │ ├── no_time.yaml
│ │ │ │ └── overrides.yaml
│ │ │ ├── lama-fourier-celeba.yaml
│ │ │ ├── lama-fourier.yaml
│ │ │ ├── lama-regular-celeba.yaml
│ │ │ ├── lama-regular.yaml
│ │ │ ├── lama_small_train_masks.yaml
│ │ │ ├── location/
│ │ │ │ ├── celeba_example.yaml
│ │ │ │ ├── docker.yaml
│ │ │ │ └── places_example.yaml
│ │ │ ├── optimizers/
│ │ │ │ └── default_optimizers.yaml
│ │ │ ├── trainer/
│ │ │ │ ├── any_gpu_large_ssim_ddp_final.yaml
│ │ │ │ ├── any_gpu_large_ssim_ddp_final_benchmark.yaml
│ │ │ │ └── any_gpu_large_ssim_ddp_final_celeba.yaml
│ │ │ └── visualizer/
│ │ │ └── directory.yaml
│ │ ├── docker/
│ │ │ ├── 1_generate_masks_from_raw_images.sh
│ │ │ ├── 2_predict.sh
│ │ │ ├── 3_evaluate.sh
│ │ │ ├── Dockerfile
│ │ │ ├── Dockerfile-cuda111
│ │ │ ├── build-cuda111.sh
│ │ │ ├── build.sh
│ │ │ └── entrypoint.sh
│ │ ├── fetch_data/
│ │ │ ├── celebahq_dataset_prepare.sh
│ │ │ ├── celebahq_gen_masks.sh
│ │ │ ├── eval_sampler.py
│ │ │ ├── places_challenge_train_download.sh
│ │ │ ├── places_standard_evaluation_prepare_data.sh
│ │ │ ├── places_standard_test_val_gen_masks.sh
│ │ │ ├── places_standard_test_val_prepare.sh
│ │ │ ├── places_standard_test_val_sample.sh
│ │ │ ├── places_standard_train_prepare.sh
│ │ │ ├── sampler.py
│ │ │ ├── train_shuffled.flist
│ │ │ └── val_shuffled.flist
│ │ ├── models/
│ │ │ └── ade20k/
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ ├── color150.mat
│ │ │ ├── mobilenet.py
│ │ │ ├── object150_info.csv
│ │ │ ├── resnet.py
│ │ │ ├── segm_lib/
│ │ │ │ ├── nn/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── modules/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── batchnorm.py
│ │ │ │ │ │ ├── comm.py
│ │ │ │ │ │ ├── replicate.py
│ │ │ │ │ │ ├── tests/
│ │ │ │ │ │ │ ├── test_numeric_batchnorm.py
│ │ │ │ │ │ │ └── test_sync_batchnorm.py
│ │ │ │ │ │ └── unittest.py
│ │ │ │ │ └── parallel/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── data_parallel.py
│ │ │ │ └── utils/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── data/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── dataloader.py
│ │ │ │ │ ├── dataset.py
│ │ │ │ │ ├── distributed.py
│ │ │ │ │ └── sampler.py
│ │ │ │ └── th.py
│ │ │ └── utils.py
│ │ ├── requirements.txt
│ │ └── saicinpainting/
│ │ ├── __init__.py
│ │ ├── evaluation/
│ │ │ ├── __init__.py
│ │ │ ├── data.py
│ │ │ ├── evaluator.py
│ │ │ ├── losses/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base_loss.py
│ │ │ │ ├── fid/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── fid_score.py
│ │ │ │ │ └── inception.py
│ │ │ │ ├── lpips.py
│ │ │ │ └── ssim.py
│ │ │ ├── masks/
│ │ │ │ ├── README.md
│ │ │ │ ├── __init__.py
│ │ │ │ ├── countless/
│ │ │ │ │ ├── .gitignore
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── countless2d.py
│ │ │ │ │ ├── countless3d.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── test.py
│ │ │ │ └── mask.py
│ │ │ ├── refinement.py
│ │ │ ├── utils.py
│ │ │ └── vis.py
│ │ ├── training/
│ │ │ ├── __init__.py
│ │ │ ├── data/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── aug.py
│ │ │ │ ├── datasets.py
│ │ │ │ └── masks.py
│ │ │ ├── losses/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── adversarial.py
│ │ │ │ ├── constants.py
│ │ │ │ ├── distance_weighting.py
│ │ │ │ ├── feature_matching.py
│ │ │ │ ├── perceptual.py
│ │ │ │ ├── segmentation.py
│ │ │ │ └── style_loss.py
│ │ │ ├── modules/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── depthwise_sep_conv.py
│ │ │ │ ├── fake_fakes.py
│ │ │ │ ├── ffc.py
│ │ │ │ ├── multidilated_conv.py
│ │ │ │ ├── multiscale.py
│ │ │ │ ├── pix2pixhd.py
│ │ │ │ ├── spatial_transform.py
│ │ │ │ └── squeeze_excitation.py
│ │ │ ├── trainers/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ └── default.py
│ │ │ └── visualizers/
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ ├── colors.py
│ │ │ ├── directory.py
│ │ │ └── noop.py
│ │ └── utils.py
│ ├── requirements.txt
│ ├── segment_anything/
│ │ ├── __init__.py
│ │ ├── automatic_mask_generator.py
│ │ ├── build_sam.py
│ │ ├── modeling/
│ │ │ ├── __init__.py
│ │ │ ├── common.py
│ │ │ ├── image_encoder.py
│ │ │ ├── mask_decoder.py
│ │ │ ├── prompt_encoder.py
│ │ │ ├── sam.py
│ │ │ └── transformer.py
│ │ ├── predictor.py
│ │ └── utils/
│ │ ├── __init__.py
│ │ ├── amg.py
│ │ ├── onnx.py
│ │ └── transforms.py
│ └── setup.py
├── requirements.txt
├── requirements_llm_extra.txt
├── themes/
│ ├── common.js
│ ├── default.css
│ ├── default.py
│ ├── green.css
│ ├── green.py
│ └── theme.py
└── utils/
├── AudioRecorder.py
├── AudioTrans.py
├── __init__.py
├── audio.py
├── check_proxy.py
├── colorful.py
├── conf.py
├── dataloads.py
├── downloads.py
├── ops.py
├── plot.py
├── text2speech.py
├── textsplitter/
│ ├── __init__.py
│ ├── ali_text_splitter.py
│ ├── chinese_text_splitter.py
│ └── zh_title_enhance.py
├── toolbox.py
├── torch_utils.py
└── video.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
runs/
train_imgs/
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
#key
ChatGPT/config
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# checkpoint
*.pth
outputs/
checkpoints/
gfpan/
results/
.idea/
weights/
voice_dir/
SadTalker/
VITS/
config_private.py
private_upload
gpt_log
================================================
FILE: .gitmodules
================================================
[submodule "VisualGLM_6B"]
path = VisualGLM_6B
url = https://github.com/positive666/VisualGLM_6B.git
================================================
FILE: LICENSE
================================================
GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU General Public License is a free, copyleft license for
software and other kinds of works.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users. We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors. You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights. Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received. You must make sure that they, too, receive
or can get the source code. And you must show them these terms so they
know their rights.
Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.
For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software. For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.
Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so. This is fundamentally incompatible with the aim of
protecting users' freedom to change the software. The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable. Therefore, we
have designed this version of the GPL to prohibit the practice for those
products. If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.
Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary. To prevent this, the GPL assures that
patents cannot be used to render the program non-free.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Use with the GNU Affero General Public License.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
Also add information on how to contact you by electronic and paper mail.
If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:
<program> Copyright (C) <year> <name of author>
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
<https://www.gnu.org/licenses/>.
The GNU General Public License does not permit incorporating your program
into proprietary programs. If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License. But first, please read
<https://www.gnu.org/licenses/why-not-lgpl.html>.
================================================
FILE: README.md
================================================
# Prompt-Can-Anything
<p align="center"> English | <a href="README_zh.md">中文</a></p>
This is a gradio library and research repository that combines SOTA AI applications. It can help you achieve anything - all you need to do is provide prompts and make one click. Through the prompts and creativity of SOTA models, you can do anything.You don't have to install all the features, you can install them according to the features you want to use.
**Motivation**
Currently, the “Anything” AI intelligent agent backend has been accumulated for engineering and research. This requires the use of more multi-modal tasks and zero-shot models, not only to provide multi-modal AI processing web UI, but also to gradually enrich its functionality.
You can accomplish anything through this project! Let’s learn more about the development progress and plan of this project, and the final complete intelligent agent that combines the local GPT repository can help you call any AI task! Questions, stars, forks,You can also become a developer.
## Feature
1. (YOCO) It is not just a tool that can prompt anything
🔥 Data Engine:
In addition, we will introduce video, audio, and 3D annotations in the future. YOCO relies on integrated multimodal models and auxiliary generators such as ChatGPT. Of course, it is not omnipotent. Through effective fully automatic annotation and stable diffusion series methods to produce and control data that meet the requirements, we complete the “data engine” and generate customized label formats that facilitate the training of conventional models.
🔥 Model Training:
For each model, we not only need to use it, but also read its paper, fine-tuning methods, and communicate with the original author to try some development work for improvement and better training. We use fine-tune large models and customized label formats generated by YOCO to more efficiently train conventional models.
<img src="asset/data_engine.png" alt="structure" style="zoom: 33%;" />
2. 🚀 Interactive content creation and visual GPT
Integrate diversified GPT, mainly using the port of chatgpt, and use the open-source Tsinghua VISUALGLM to deploy and fine-tune localized GPT, as well as try to improve the model structure. Through multimodal application tools, we can conduct dialogues and content creation.
easy example( asr->llM_model->tts->a2f app)
https://github.com/positive666/Prompt-Can-Anything/assets/28972473/c9cc64af-939d-480f-a684-08d8db34b25f
3. ⭐ 3D && 2D Avatar(comming soon)
Complete a role design interaction through a 3D Engine combined with multimodal tasks such as GPT;
Complete a role design interaction through the Sadtalker open source project and multimodal tasks such as GPT.
4. 🔥🔥🚀 Unlimited potential “Anything”
Through continuous creativity and accumulation, we will integrate and learn from Sota AI. We will record each integrated model and provide a detailed explanation and summary in the article. The author will summarize all the AI-related knowledge reserves and engineering experience for the local large model (this part is the final development function and is planned).
<img src="asset/v1.15.png" alt="structure" style="zoom: 33%;" />
<details open >
<summary>⭐ Research🚀 project🔥 Inspiration(In preparation)</summary>
At research level, Zero-shot comparative learning is research trend, we hope to understand as much as possible the model design details of the project we are applying, so that we want to combine text, images, and audio to design a strong aligned backbone.
At project level, Tensorrt acceleration of the basic model accelerates efficiency.
</details>
### <div align="left"> 🔥 [August , Update plan preview , Welcome fork] </div>
- 🔥 add gpt_academic repo crazy functions and add langchain\agent comming soon
- Optimization of speech problems and code logic optimization before optimization, add Gilgen
- 🔥Official latest model integration test for Tag2text version 2 in early June,add RAM(Done)
- One-click fine-tuning button function, adding: visualglm (Done)
- Voice text processing link GPT, joining chatglm with a2f APP( Done)
### <div align="left">⭐[News list] </div>
-【2023/8/7】 Fix bug with llm(chatglm2,gpt3.5 loads and improve gradio ui)
-【2023/7/21】 update tag2text and ram with offical repo
-【2023/6/7】 v1.15:add submodule SadTalker,update UI
-【2023/6/6】 v1.15:environment installation problems and supplementary instructions, special models are called independently, and no need to install dependencies; Added the function of one-click fine-tuning of VisualGLM, considering machine configuration and video memory with caution
-【2023/6/5】 v1.15 a vide demo and plan,fix asr bug ,chatgpt with asr and tts
-【2023/5/31】 Fixed the already issue, add tts demo, the Linux platform is tested through all open features
-【2023/5/23】 add web demo:Add VisualGLM ,chatgpt from [Academic-gpt](https://github.com/binary-husky/gpt_academic)
-【2023/5/7】 add web demo:At present, the function of text generation, detection and segmentation of images or image folders on the website has been tested normally, and the program does not need to be restarted, and the last model loading configuration is remembered, and it will be continuously optimized in the future.
-【2023/5/4】 add semantic segmentatio label, add args(--color-flag --save-mask )
-【2023/4/26】 YOCO,Automatic annotation TOOLS:Commit preliminary code ,For the input image or folder, you can obtain the results of detection, segmentation, and text annotation , optional chatgpt api.
## Preliminary-Works
- [VisualGLM-6B](https://github.com/THUDM/VisualGLM-6B) : Visual ChatGlm(6B)
- [Segment Anything](https://github.com/facebookresearch/segment-anything) : Strong segmentation model. But it needs prompts (like boxes/points/text) to generate masks.
- [Grounding DINO](https://github.com/IDEA-Research/GroundingDINO) : Strong zero-shot detector which is capable of to generate high quality boxes and labels with free-form text.
- [Stable-Diffusion](https://github.com/CompVis/stable-diffusion) : Amazing strong text-to-image diffusion model.
- [Tag2text](https://github.com/xinyu1205/Tag2Text) : Efficient and controllable vision-language model which can simultaneously output superior image captioning and image tagging.
- [SadTalker](https://github.com/OpenTalker/SadTalker): Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation
- [lama](https://github.com/advimman/lama) : Resolution-robust large mask Inpainting with Fourier Convolutions
- [gpt_academic](https://github.com/binary-husky/gpt_academic) : LLM tools.
## :hammer_and_wrench: YOCO: Quick Start
First, Make sure you have a basic gpu deep learning environment.
(Linux is recommended, Windows may have problems compiling Grounded-DINO Deformable- transformer operator, see [Grounding DINO](https://github.com/IDEA-Research/GroundingDINO) )
```bash
git clone https://github.com/positive666/Prompt-Can-Anything
cd Prompt-Can-Anything
```
**Install environment **
Installation of basic environment
```
pip install -r requiremens
or
pip install -i https://mirrors.aliyun.com/pypi/simple/ -r requirements.txt
```
Installation of Ground detector (compiling)
```bash
cd model_cards
pip install -e .
```
Installation of Tsinghua VisualGLM (optional, better to use LINUX system, installation plan will be updated after testing on Windows)
```bash
git submodule update --init --recursive
cd VisualGLM_6B && pip install -i https://mirrors.aliyun.com/pypi/simple/ -r requirements.txt
```
Install SadTalker (optional )
```bash
git clone https://github.com/Winfredy/SadTalker.git
cd SadTalker && pip install -i https://mirrors.aliyun.com/pypi/simple/ -r requirements.txt
```
Tips:create two directories, checkpoints and gfpgan, and place them in the root directory. Download the extracted weights from the official website and put them into two folders,
Installation of LAMA model (optional, not yet released):
This environment has a relatively strict requirement for the Python version, you may need to manually override the installation by version specified in the txt below:
```
pip install -r model_cards/lama/requirements.txt
```
Installation of diffuser (optional):
```bash
pip install --upgrade diffusers[torch]
```
For more content, you can check requirements, “pip install < your missing packages>”, if there is an installation version issue, please carefully look at the requirement version.
**Linux environment issue**:
1. for pyaudio
Method 1:
pip may not be successful on the Linux platform, go to this page[pyaudio-wheels · PyPI](https://pypi.org/project/pyaudio-wheels/#files), select the version corresponding to your Python version, download it and pip install the whl file. Detailed instructions will be provided in the future.
Method 2:
```
sudo apt-get install portaudio19-dev
sudo apt-get install python3-all-dev
pip install pyaudio
```
2. use qlora fine tune question
```
pip install bitsandbytes -i https://mirrors.aliyun.com/pypi/simple
```
**Windows installation issue**
as Linux
For more content, you can check the requirements, “pip install < your missing packages>”, and if there are version installation issues, please check the version carefully in the requirements.
**Run**
1. downloads models weights
<!-- insert a table -->
<table>
<thead>
<tr style="text-align: left;">
<th></th>
<th>name</th>
<th>backbone</th>
<th>Data</th>
<th>Checkpoint</th>
<th>model-config</th>
</tr>
</thead>
<tbody>
<tr>
<th>1</th>
<td>Tag2Text-Swin</td>
<td>Swin-Base</td>
<td>COCO, VG, SBU, CC-3M, CC-12M</td>
<td><a href="https://huggingface.co/spaces/xinyu1205/Tag2Text/blob/main/tag2text_swin_14m.pth">Download link</a></td>
<tr>
<th>2</th>
<td>Segment-anything</td>
<td>vit</td>
<td> </td>
<td><a href="https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth">Download link</a>| <a
<td><a href="https://dl.fbaipublicfiles.com/segment_anything/sam_vit_l_0b3195.pth">Download link</a>| <a
<td><a href="https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth">Download link</a></td>
<tr>
<th>3</th>
<td>Lama</td>
<td>FFC</td>
<td> </td>
<td><a href="https://disk.yandex.ru/d/ouP6l8VJ0HpMZg">Download link</a></td>
<tr>
<th>4</th>
<td>GroundingDINO-T</td>
<td>Swin-T</td>
<td>O365,GoldG,Cap4M</td>
<td><a href="https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth">Github link</a> | <a href="https://huggingface.co/ShilongLiu/GroundingDINO/resolve/main/groundingdino_swint_ogc.pth">HF link</a></td>
<td><a href="https://github.com/IDEA-Research/GroundingDINO/blob/main/groundingdino/config/GroundingDINO_SwinT_OGC.py">link</a></td>
</tr>
<tr>
<th>5</th>
<td>GroundingDINO-B</td>
<td>Swin-B</td>
<td>COCO,O365,GoldG,Cap4M,OpenImage,ODinW-35,RefCOCO</td>
<td><a href="https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha2/groundingdino_swinb_cogcoor.pth">Github link</a> | <a href="https://huggingface.co/ShilongLiu/GroundingDINO/resolve/main/groundingdino_swinb_cogcoor.pth">HF link</a>
<td><a href="https://github.com/IDEA-Research/GroundingDINO/blob/main/groundingdino/config/GroundingDINO_SwinB.cfg.py">link</a></td>
</tr>
</tbody>
</table>
2. Configure privacy files and parameters in config_private.py. After downloading the model, configure the path in the “MODEL_xxxx_PATH” variable. If using ChatGPT, configure its proxy and API key. (If there are networking issues with other services such as TTS during use on the web UI, first turn off the VPN connection and only open it when using ChatGPT).
**🏃Demo**
[Video demo 1 online on baidu clound ](https://pan.baidu.com/s/1AllUjuOVhzJh7abe71iCxg?pwd=c6v6)
[ Video demo 2 ] (https://pan.baidu.com/s/1jdP9mgUhyfLh_hz1W3pkeQ?pwd=c6v6)
1. Auto-label
```bash
"--input_prompt" : You can manually input a prompt. For example, if you only want to detect target categories that interest you, you can directly input the prompt to the grounded detection model, or input it to the Tag2Text model.
'--color-flag': Using BOX’s tags, distinguish between category and instance segmentation: the category color of speech segmentation is distinguished using BOX’s tags.
```
python auto_lable_demo.py --source <data path> --save-txt --save-mask --save-xml --save_caption
Example:
Support multi-tasks, such as :
default tasks include images understand /detect/instance segment .....(add methods for image generation and inpainting )
<img src="asset/1.jpg" style="zoom: 32%;" />
"Prompt" control models output, example
<img src="asset/d2.png" style="zoom: 35%;" >
<img src="asset/image-20230427093103453.png" alt="image-20230427093103453" style="zoom: 33%;" />
2. webui(all)
```pyhton
python app.py
```
<img src="asset/default_all.png" alt="image-20230508075845259" style="zoom:33%;" />
<img src="asset/demo1.png" style="zoom:25%;" />
<img src="asset/v1.1_demo.png" alt="image-20230527022556630" style="zoom:50%;" />
2.1 audio2face with llm model (Beta)
In Fact, ASR\TTS\LLM ,They are all arbitrarily replaceable.
this is a easy example, support chatglm,chatgpt(you can use anything llm model,but you need custom )
start asr&tts with audio2face
you need install audio2face in omniverse APP,see
https://www.nvidia.cn/omniverse/
step1. In audio2face,open a demo ,choose a Player ,auto build Trt engine ,(not support GTX10xx GPU),latest version support chinese!
get model pim path.
<img src="asset/a2f.png" alt="image-20230725122731372" style="zoom: 33%;"/>
<img src="asset/a2f2023.png" alt="image-20230331372" style="zoom: 33%;"/>

step 2. in webui , configure your Prim path "Avatar_instance_A" in config_private.py , click"start system" and" Speech_system"
<img src="asset/start-chat.png" style="zoom:67%;">
## 🔨To Do List
- [x] Release demo and code.
- [x] web ui demo
- [x] Support ChatGPT/VISUALGLM/ASR/TTS
- [x] YOCO labeling fine-tuning of VISUALGLM demo[next week]
- [x] 3D && 2D avatar
- [ ] Complete the planned AI combination “Anything”
- [ ] Fine-tune the segmentation and ground detectors of SAM, and expand the input control of SAM
- [ ] Release training methods
- [ ] Knowledge cloning
## :cupid: Acknowledgements
- [gpt_academic](https://github.com/binary-husky/gpt_academic)
- [Segment Anything](https://github.com/facebookresearch/segment-anything)
- [Grounding DINO](https://github.com/IDEA-Research/GroundingDINO)
- [Tag2text](https://github.com/xinyu1205/Tag2Text)
- [SadTalker](https://github.com/OpenTalker/SadTalker)
- [lama](https://github.com/advimman/lama)
- [ VisualGLM-6B](https://github.com/THUDM/VisualGLM-6B.git)
Thanks for their great work!
================================================
FILE: README_zh.md
================================================
# Prompt-Can-Anything
这是一个结合SOTA AI的应用web库以及研究的储备库,它能够帮你实现一切:你只需要提供提示!只需一次点击!通过SOTA模型的提示和创意,你可以做任何事情。
**动机**
当前:为工程和研究所积累的AI智能体后台”安尼森“,这需要使用更多的多模态任务以及zero-shot模型,不仅提供多模态的AI处理web UI,逐渐丰富的功能。
目标:你可以通过它完成一切事情!让我们详细了解下该项目的开发进度和计划,最终完整的智能体结合本地储备的GPT可以帮你调用一切AI任务!欢迎提问、star和fork,以及伸出援助之手~
## 特性
1. (YOCO)它不仅是一个可以提示任何事情的工具
🔥 数据引擎:
此外,我们将在未来引入视频、音频和3D注释,YOCO依赖于集成的多模态模型以及GPT等辅助生成,当然它并不是万能的,通过有效的全自动标注和stable diffusion系列的方法去生产和控制符合需求的数据,完成”数据引擎“,并且生成的定制化的标签格式,去便于训练常规模型。
🔥 模型训练:
对于每一个模型我们不仅要做到使用,还在阅读它的论文和微调方法以及和原作者交流,尝试一些改进和更好训练的开发工作,Fine-tune大模型和通过YOCO生成的定制化的标签格式,更高效地训练常规模型。
<img src="asset/data_engine.png" alt="structure" style="zoom: 33%;" />
2. 🚀交互内容创作和视觉&&语音GPT
集成多样化GPT,目前主要以chatgpt的端口为主,利用开源的清华VISUALGLM,我们实现本地化GPT的部署和微调,以及尝试改进模型结构,通过多模态的应用工具进行对话和内容创作,支持语音识别、语音合成、并发送Audio2face.
这是一个最简单的例子
https://github.com/positive666/Prompt-Can-Anything/assets/28972473/c9cc64af-939d-480f-a684-08d8db34b25f
3. ⭐ 应用角色扮演—— 3D &&2D 虚拟人(开发中)
通过3D引擎去结合GPT等多模态任务完成一个角色设计互动;
通过saldtalker开源项目去结合GPT等多模态任务完成一个角色设计互动;
4. 🔥🔥🚀无限的潜力“安尼森”
不断的创意和积累,SOTA -AI的集成和学习,我们会通过记录每一个集成的模型,对它进行一次详解,总结在文章中。
作者AI相关所有知识储备和工程经验总结给本地大模型(这部分是最终开发功能,计划中)
<img src="asset/v1.15.png" alt="structure" style="zoom: 33%;" />
<details open>
<summary>⭐ 研究 🚀 项目 🔥 灵感(筹备中)</summary>
在研究层面上,零样本迁移比较学习是热门的研究趋势,我希望尽可能理解正在应用的项目的模型设计细节,这样我们想将文本、图像和音频相结合设计一个强大的对齐backbone。
在项目层面上,可考虑Tensorrt加速基本模型或者其他的模型转换方式可以提高效率。
</details>
### <div align="left">🔥 [8月更新预告,更新频繁,感兴趣关注]</div>
- 修复了LLM调用相关的BUG和界面调整,正在更新langchains和Agent
- 更新了ram&7tag2Text【Done】
- 修复优化开源GLM的一些功能,一键微调按钮和各种微调模型
- 语音文本处理链接gpt,加入chatglm 【Done】
- Gilgen测试代码更新
- ram&7tag2TexT等解析文章
</details>
### <div align="left">⭐ [更新列表]</div>
- 【2023/8/7】 v1.2: 修复了界面已知BUG,分离了部分依赖,修复了chatglm2和多模型加载问题,完整添加了最新的学术GPT功能,并在更新langchains更多功能
- 【2023/7/21】 v1.15: 更新了Tag2text和ram的代码,支持RAM,是一个中英识别标签的双模态模型
- 【2023/6/7】 v1.15 :加入子项目SadTalker,更新UP界面,语音对话功能界面更新
- 【2023/6/6】 v1.15版本:修复了已知的环境安装问题和补充说明,特殊的模型独立了调用,不需要可以不用安装依赖了;添加了一键微调VisualGLM的功能,考虑机器配置和显存慎用;
- 【2023/6/5】 修复whisper asr的bug,内部可选模型,但是考虑显存不建议超过small,上传百度云一个介绍。
- 【2023/5/31】添加Web演示:修复已知问题BUG,添加TTS模块(临时版本),LINUX系统上测试通过了所有开放的功能,补充一些说明和测试。(修改重载:每次勾选加载模型和释放模型后,因为太多的本地化的大模型,如果部署本地GPT显卡必须要20G+,但目前机制无法动态释放调节释放多个模型显存,这个按钮只能帮助你选择、组合串联cv模型的使用方式了)
- 【2023/5/29】添加Web演示:加入了学术chatgpt部分功能,感谢他们的工作,其次添加了一键生成VisualGLM-6B数据集标注功能,后续可一键微调
- 【2023/5/23】添加Web演示:加入清华的VisualGLM-6B版本
- 【2023/5/7】添加Web演示:目前,已经测试了文本生成、图像或图像文件夹的检测和分割功能,程序无需重新启动,记住了最后的模型加载配置,并将在未来持续优化。
- 【2023/5/4】添加语义分割标签,添加args(--color-flag --save-mask)
- 【2023/4/26】YOCO,自动标注工具:提交初步代码,针对输入图像或文件夹,可以获得检测、分割和文本注释的结果,额外提供选择chatgpt api。
**预备工作**
- [VisualGLM-6B](https://github.com/THUDM/VisualGLM-6B.git) : Visual ChatGlm.
- [Segment Anything](https://github.com/facebookresearch/segment-anything):强大的分割模型。但它需要提示(如包围框/点/掩码、文本)来生成蒙版。
- [Grounding DINO](https://github.com/IDEA-Research/GroundingDINO):强大的零样本泛化检测器,能够使用自由格式文本生成高质量框和标签。
- [Stable-Diffusion](https://github.com/CompVis/stable-diffusion):文本-图像扩散模型。
- [Tag2text](https://github.com/xinyu1205/Tag2Text):高效可控的视觉-语言模型,可以同时输出优越的图像字幕和图像标记。
- [SadTalker](https://github.com/OpenTalker/SadTalker):单图声音驱动人脸的方法
- [lama](https://github.com/advimman/lama):分辨率鲁棒的大屏蔽填充与傅立叶卷积
- [gpt_academic](https://github.com/binary-husky/gpt_academic) : 丰富的LLM工具箱。
## :hammer_and_wrench: YOCO: 快速入门
首先,需要有基本的gpu深度学习环境。
(强烈建议使用Linux,Windows可能在编译Grounded-DINO Deformable和配置Visualglm时候算子时出现问题,参见[Grounding DINO](https://github.com/IDEA-Research/GroundingDINO))
```bash
git clone https://github.com/positive666/Prompt-Can-Anything
cd Prompt-Can-Anything
```
安装基本环境:
```
pip install -r requirements
或者
pip install -i https://mirrors.aliyun.com/pypi/simple/ -r requirements.txt
```
安装Ground检测器(编译):
```
cd model_cards
pip install -e .
```
安装清华智谱视觉VisualGLM(可选,最好用LINUX系统,window后面测试后补充安装方案):
```bash
git submodule update --init --recursive
cd VisualGLM_6B && pip install -i https://mirrors.aliyun.com/pypi/simple/ -r requirements.txt
```
安装SadTalker(optional )
```bash
git clone https://github.com/Winfredy/SadTalker.git
cd SadTalker && pip install -i https://mirrors.aliyun.com/pypi/simple/ -r requirements.txt
```
Tips:创建checkpoints 和gfpgan两个目录,放置在根目录下。从官网下载解压的权重分别放进两个文件夹!!
安装LAMA模型(可选还未发布):
这个环境对Python版本要求比较苛刻,可能需要按照下面txt的版本手动覆盖安装
```
pip install -r model_cards/lama/requirements.txt
```
安装扩散器(可选):
```bash
pip install --upgrade diffusers[torch]
```
更多内容,可以查看requirements, “pip install < your missing packages>”, 如果出现安装版本问题,请仔细看requirements的版本
**Linux环境问题【易出现问题的库】**
1. 对于pyaudio
方法一:在Linux平台可能通过pip并不一定成功,进入这里[pyaudio-wheels · PyPI](https://pypi.org/project/pyaudio-wheels/#files),选择对应你Python的版本,下载后pip安装whl,后续会详细补充。
方法二:
```
sudo apt-get install portaudio19-dev
sudo apt-get install python3-all-dev
pip install pyaudio
```
2.VisualGLM训练环境:使用qlora微调int4模型问题:
```
pip install bitsandbytes -i https://mirrors.aliyun.com/pypi/simple
```
**Windows安装问题**
目前除了LLM的加速和微调三方库,无特殊问题。
运行
1. 下载模型权重
<!-- insert a table -->
<table>
<thead>
<tr style="text-align: left;">
<th></th>
<th>名称</th>
<th>骨干</th>
<th>数据</th>
<th>权重</th>
<th>模型配置</th>
</tr>
</thead>
<tbody>
<tr>
<th>1</th>
<td>Tag2Text-Swin</td>
<td>Swin-Base</td>
<td>COCO、VG、SBU、CC-3M、CC-12M</td>
<td><a href="https://huggingface.co/spaces/xinyu1205/Tag2Text/blob/main/tag2text_swin_14m.pth">下载链接</a></td>
<tr>
<th>2</th>
<td>Segment-anything</td>
<td>vit</td>
<td> </td>
<td><a href="https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth">下载链接</a>| <a
<td><a href="https://dl.fbaipublicfiles.com/segment_anything/sam_vit_l_0b3195.pth">下载链接</a>| <a
<td><a href="https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth">下载链接</a></td>
<tr>
<th>3</th>
<td>Lama</td>
<td>FFC</td>
<td> </td>
<td><a href="https://disk.yandex.ru/d/ouP6l8VJ0HpMZg">下载链接</a></td>
<tr>
<th>4</th>
<td>GroundingDINO-T</td>
<td>Swin-T</td>
<td>O365、GoldG、Cap4M</td>
<td><a href="https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth">Github链接</a> | <a href="https://huggingface.co/ShilongLiu/GroundingDINO/resolve/main/groundingdino_swint_ogc.pth">HF链接</a></td>
<td><a href="https://github.com/IDEA-Research/GroundingDINO/blob/main/groundingdino/config/GroundingDINO_SwinT_OGC.py">链接</a></td>
</tr>
<tr>
</table>
2. 配置隐私文件和参数在 config_private.py 下,下载模型后将路径配置在" MODEL_xxxx_PATH“的变量 ,如果使用 chatgpt ,配置其代理和API密钥,可能在WEBUI使用过程中,如果其他服务如tts有联网问题,先关掉VPN链接,仅当使用Chatgpt时候打开。
## 🏃Demo
[视频Demo介绍 ](https://pan.baidu.com/s/1AllUjuOVhzJh7abe71iCxg?pwd=c6v6)
[ Video demo 2 ] (https://pan.baidu.com/s/1jdP9mgUhyfLh_hz1W3pkeQ?pwd=c6v6)
1. 自动标注的测试样例
```bash
"--input_prompt" : 你可以手动输入prompt,比如你只想检测你感兴趣的目标类别,可以直接输入给grounded检测模型,也可以输入给tag2text
'--color-flag': 使用BOX的标签同类别和实例分割区别:语义分割的类别颜色
```
支持多种任务,例如:
默认任务包括图像理解/检测/实例分割…(以及后修添加图像生成和编辑的方法去制作新数据)
<img src="asset/1.jpg" style="zoom: 39%;" />
"Prompt" control models output, example
<img src="asset/d2.png" style="zoom: 57%;" >
python auto_label_demo.py --source <data path> --save-txt --save-mask --save-xml --save_caption
<img src="asset/image-20230427093103453.png" alt="image-20230427093103453" style="zoom:25%;" />
2. webui
```pyhton
python app.py
```
<img src="asset/anything.png" alt="image-20230527022556630" style="zoom: 33%;" />
<img src="asset/default_all.png" alt="image-20230508075845259" style="zoom:33%;" />
<img src="asset/demo1.png" alt="visual_chatglm" style="zoom:33%;" />
2.语音大语言模型&&驱动a2f
这是一个简单的例子,实际上asr、tts\llm_model\这些组件是可以任意替换的,只要你具备基本的开发能力,通过语言模型和语音驱动去完成A2F的服务,你需要安装Omniverse软件和Audio2face的应用,GPU不能是比较旧的帕斯卡架构,详情可以看https://www.nvidia.cn/omniverse/
步骤1.在Omniverse中,点击如图下的例子,安装一个Demo player,它会自动完成tensortt的构建,然后可以如下图中获取Player的路径Prim Path
<img src="E:/code/git_code/Prompt-Can-Anything/asset/a2f.png" alt="image-20230725122731372" style="zoom:50%;" />
<img src="asset/a2f2023.png" alt="image-20230331372" style="zoom: 33%;"/>

步骤2. 程序运行起来后,将上面获得的路径拷贝,填写在config_private的“Avatar_instance_A”,在web端如图下操作点击 ‘start system’后,点击加载“Speech_system”启动语音模式,但是注意TTS是网络服务。
<img src="asset/start-chat.png" style="zoom:50%;" >
## 🔨计划清单
- [x] 释放初版
- [x] web ui 界面调整
- [x] 支持chatgpt/VISUALGLM/ASR/TTS
- [x] Yoco一键标注微调VISUALGLM Demo
- [x] 3d &&2d avatvor
- [ ] 完成计划的AI结合体“安尼森”
- [ ] 微调sam分割器 and ground检测器 ,拓展SAM的输入控制
- [ ] 释放训练方法.
- [ ] 知识克隆
## 参考工作
- [gpt_academic](https://github.com/binary-husky/gpt_academic)
- [Segment Anything](https://github.com/facebookresearch/segment-anything)
- [Grounding DINO](https://github.com/IDEA-Research/GroundingDINO)
- [Tag2text](https://github.com/xinyu1205/Tag2Text)
- [SadTalker](https://github.com/OpenTalker/SadTalker)
- [lama](https://github.com/advimman/lama)
- [VisualGLM-6B](https://github.com/THUDM/VisualGLM-6B.git)
感谢他们的出色工作!
================================================
FILE: a2f.py
================================================
import argparse
import functools
import os
import yaml
import numpy as np
import ffmpeg
import grpc
import grpc
import audio2face_pb2
import audio2face_pb2_grpc
from pydub import AudioSegment
from pydub.silence import split_on_silence
import soundfile
from audio2face_streaming_utils import push_audio_track_stream,push_audio_track,push_stream
import pyaudio
import wave
from queue import Queue
import time
import whisper
import requests
#from llm_cards.bridge_chatgpt import predict
from config_private import API_KEY
import uuid
import re
import asyncio
import threading
# 创建事件,用于线程间同步
send_event = threading.Event()
# 按秒截取音频
def get_part_wav(sound, start_time, end_time, part_wav_path):
save_path = os.path.dirname(part_wav_path)
if not os.path.exists(save_path):
os.makedirs(save_path)
start_time = int(start_time) * 1000
end_time = int(end_time) * 1000
word = sound[start_time:end_time]
word.export(part_wav_path, format="wav")
def crop_wav(path, crop_len):
for src_wav_path in os.listdir(path):
wave_path = os.path.join(path, src_wav_path)
print(wave_path[-4:])
if wave_path[-4:] != '.wav':
continue
file = wave.open(wave_path)
# 帧总数
a = file.getparams().nframes
# 采样频率
f = file.getparams().framerate
# 获取音频时间长度
t = int(a / f)
print('总时长为 %d s' % t)
# 读取语音
sound = AudioSegment.from_wav(wave_path)
for start_time in range(0, t, crop_len):
save_path = os.path.join(path, os.path.basename(wave_path)[:-4], str(uuid.uuid1()) + '.wav')
get_part_wav(sound, start_time, start_time + crop_len, save_path)
from concurrent.futures import ThreadPoolExecutor
def process_chunk(model, chunk, detect_language):
# make log-Mel spectrogram and move to the same device as the model
mel = whisper.log_mel_spectrogram(chunk).to(model.device)
# detect the spoken language
speech_language = 'zh'
if detect_language :
_, probs = model.detect_language(mel)
speech_language = max(probs, key=probs.get)
# decode the audio
options = whisper.DecodingOptions()
result = whisper.decode(model, mel, options)
return result.text, speech_language
def speech_recognition(inputs, model,stream_model=False,detect_language=False):
# whisper
all_result=''
speech_language='zh'
executor = ThreadPoolExecutor()
results = []
audio=None
if not stream_model:
audio,sr= soundfile.read(inputs, dtype='float32')
else:
print('numpy data')
sr,audio=inputs
data = audio / 65538
audio = data.astype(np.float32)
print(sr)
chunk_size= sr*30
print((audio))
for i in range(0, len(audio), chunk_size):
chunk_end = min(i + chunk_size, len(audio))
chunk = whisper.pad_or_trim(audio[i:chunk_end])
# submit the chunk to the thread pool for processing
results.append(executor.submit(process_chunk, model, chunk, detect_language))
# print the recognized text and the detected language
for result in results:
text, language = result.result()
#print(text)
all_result += text
speech_language = language
# # print the recognized text
# all_result+=result.text
return all_result, speech_language
Avatar_instance_A='/World/audio2face/PlayerStreaming'
a2f_url = 'localhost:50051' # The audio2face url by default
sample_rate_Omniverse = 22050 # Audio frame rate
# 录音参数
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
RECORD_SECONDS =5
audio_file = "F:\\VoiceprintRecognition-Pytorch-develop\\error001.wav"
buffer_length=int(RATE / CHUNK * RECORD_SECONDS)
record_file='record.wav'
p = pyaudio.PyAudio()
def mic_audio(record_file="record.wav"):
# 打开录音
import keyboard
stream = p.open(
input_device_index=1,
format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("Recording...")
frames = []
while True:
data = stream.read(CHUNK)
frames.append(data)
if keyboard.is_pressed('s'):
break
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(record_file, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
return 'OK'
import edge_tts
async def tts_send(text,onmiverse=False,send_file='voice_dir/send_a2f.wav'):
if text is not None:
sentences = re.split(r'[!?。: ]', text)
sentences = [s.strip() for s in sentences if s.strip()]
sentences_len=len(sentences)
audio_chunks = {}
async def process_sentences():
tasks = []
for i, sentence in enumerate(sentences):
if len(sentence) > 0:
# 提交任务到协程池
task = asyncio.create_task(speak(sentence, i % sentences_len))
tasks.append(task)
await asyncio.gather(*tasks)
async def speak(sentence, worker_id):
# 合成语音
print(worker_id)
audio_stream =edge_tts.Communicate(sentence, voice='zh-CN-YunxiNeural', rate='+1%', volume='+1%').stream()
async for package in audio_stream:
if package['type'] == 'audio':
# 获取音频数据的字节流(chunk)
audio_chunk = package['data']
# 将音频数据添加到字典中
if worker_id not in audio_chunks:
audio_chunks[worker_id] = []
audio_chunks[worker_id].append(audio_chunk)
await process_sentences()
# 将每个协程合成的音频数据拼接起来
audio_data = b''
for i in range(sentences_len):
if i in audio_chunks:
for chunk in audio_chunks[i]:
audio_data += chunk
with open(f'{send_file}', 'wb') as f:
f.write(audio_data)
if onmiverse:
audio_data, samplerate = soundfile.read(f'{send_file}', dtype="float32")
if len(audio_data.shape) > 1:
audio_data = np.average(audio_data, axis=1)
push_audio_track_stream(a2f_url, audio_data, samplerate, Avatar_instance_A)
async def tts_a2f(text):
import edge_tts
import soundfile as sf
import numpy as np
from audio2face_streaming_utils import push_audio_track_stream
generate_wave = edge_tts.Communicate(text, voice='zh-CN-YunxiNeural', rate='-5%', volume='+1%')
await generate_wave.save('./voice_dir/send_frame.wav')
try:
audio_data, samplerate = sf.read('./voice_dir/send_frame.wav', dtype="float32")
if len(audio_data.shape) > 1:
audio_data = np.average(audio_data, axis=1)
push_audio_track_stream(a2f_url, audio_data, samplerate , Avatar_instance_A)
print("send done")
return 'Send Done!'
except Exception as e:
print(f"检查是否开启omniverse!!!")
def push_stream(url,player,dir="voice_dir/send_omniverse.wav"):
from audio2face_streaming_utils import push_audio_track_stream
import soundfile
import numpy as np
retry=0
while True:
try:
audio_data,sr= soundfile.read(dir, dtype='float32');break
except :
print("tts合成速度稍慢,等待....")
retry += 1
print('正在重试')
if retry >=2: raise TimeoutError
if len(audio_data.shape) > 1:
audio_data = np.average(audio_data, axis=1)
push_audio_track_stream(url, audio_data, sr, player)
def audio_synthesis(gpt_replying_buffer,url,player):
import threading
threading.Thread(target=process_send_stream, args=(gpt_replying_buffer,url,player,)).start()
def process_send_stream(gpt_replying_buffer,url,player):
import subprocess
dir="voice_dir/send_omniverse.wav"
cmd = f'edge-tts --voice {"zh-CN-YunxiNeural"} --text "{gpt_replying_buffer}" --write-media {dir} '
subprocess.run(cmd, shell=True)
time.sleep(0.5)
push_stream(url,player,dir)
def receive_max(q,Text):
global receive_flag
receive_flag=True
sentences = re.split(r'[!?。: ,]', Text)
sentences = [s.strip() for s in sentences if s.strip()]
# from VITS import
while True :
if len(sentences)>0 :
#audio_data=vit_tts(sentences.pop(0)
#audio_data=r'voice_dir/send_frame.wav'
audio_data=edge_tts.Communicate(sentences.pop(0), voice='zh-CN-YunxiNeural', rate='+1%', volume='+1%')
q.put((audio_data,True))
print('done')
else :
print('语音合成线程结束......')
receive_flag=False
break
###--------线程:收集数据,中转处理源buffer收集后发送------------###
def send_stream2(q):
global mess
global receive_flag
mess=False
with grpc.insecure_channel(a2f_url) as channel:
stub= audio2face_pb2_grpc.Audio2FaceStub(channel)
def create_generator():
global mess
while True:
if not q.empty():
#取出队列中的音频文件路径和对应的发送标志位
#print("检查缓存容量 :",q.qsize())
#time.sleep(2)
audio_data,send_flag = q.get()
if not send_flag:
# TODO: 将音频文件发送出去
print(f'Sending audio...')
audio_data,sr= soundfile.read('voice_dir/send_framex.wav', dtype='float32')
if len(audio_data.shape) > 1:
audio_data = np.average(audio_data, axis=1)
#yield audio2face_pb2.PushAudioStreamRequest(start_marker=Avatar_instance_A)
#for i in range(len(audio_data) // sr//10 + 1):
# chunk = audio_data[i * sr//10: i * sr//10+ sr//10]
#yield audio2face_pb2.PushAudioStreamRequest(audio_data=chunk.astype(np.float32).tobytes())
push_audio_track_stream(a2f_url, audio_data, sr, Avatar_instance_A)
send_flag=True
# 重置事件状态
send_event.clear()
else:
if not receive_flag:
print("发送线程结束")
break
else:
continue
stub.PushAudioStream(create_generator())
def audio_chatbot(text):
q = Queue()
t1 = threading.Thread(target=receive_max,args=(q,text))
t2 = threading.Thread(target=send_stream2,args=(q,))
t1.start()
t2.start()
# t1.join()
#t2.join()
global receive_flag
while True:
send_flag=True
# 从队列中取出音频文件路径和对应的发送标志位
audio, send_flag = q.get()
if not send_flag:
# 将音频文件路径放回队列(因为发送是在另一个线程中完成的)
q.put((audio,False))
# 设置事件,通知发送线程可以发送该音频
send_event.set()
if not receive_flag:
break
if __name__ == "__main__":
text = "这里是一段较长的文本,需要拆分成多个句子来进行语音合成!句子也可以用问号来结尾吗?\
当然可以。我要实现一个人工智能,这里是一段较长的文本,需要拆分成多个句子来进行语音合成!句子也可以用问号来结尾吗?当然可以。我要实现一个人工智能,但是我需要很多时间和精力完成\
这里是一段较长的文本,需要拆分成多个句子来进行语音合成!句子也可以用问号来结尾吗?当然可以。我要实现一个人工智能,但是我需要很多时间和精力完成"
# 启动主程序
audio_chatbot(text)
# t1=time.time()
# asyncio.run(tts_send(text))
# print(time.time()-t1)
# # t1 = threading.Thread(target=send_stream)
# t1=time.time()
# #asyncio.run(tts_a2f(text))
# print(time.time()-t1)
================================================
FILE: app.py
================================================
from model_cards.autoback import AutoBackend
import argparse
import os
import platform
import sys
from pathlib import Path
import numpy as np
import torch
import torch.backends.cudnn as cudnn
import matplotlib.pyplot as plt
from PIL import Image,ImageDraw,ImageFont
from utils.ops import (LOGGER, Profile, check_file, check_requirements, colorstr, cv2,
dilate_mask, increment_path , scale_boxes, xyxy2xywh,save_format)
from utils.plot import Annotator, save_one_box,show_box,show_mask,save_mask_data,Draw_img
from config_private import *
from llm_cards.bridge_all import predict_all,talk_all
from llm_cards.bridge_chatgpt import Talk_with_app
from llm_cards.core_functional import get_core_functions
from utils.toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, load_chat_cookies, DummyWith
from utils.torch_utils import select_device
from utils import VID_FORMATS,IMG_FORMATS,write_categories
import gradio as gr
import random
import json
import multiprocessing as mp
import asyncio
import concurrent.futures
from utils.colorful import *
functional = get_core_functions()
VisualGLM_dir=f"VisualGLM_6B"
sys.path.append(VisualGLM_dir)
FILE = Path(__file__).resolve()
ROOT = FILE.parents[0] # root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
global categories
categories = {}
global category_colors
category_colors={}
# 初始对应类别编号
class_ids = []
global speech_AI
speech_AI={'asr':{'whisper':None},'tts':{'tts_VITS':None,'tts_edge': None}} ## speech
global models_config
models_config = {'tag2text': None, 'ram': None,'lama': None,'sam': None,'grounded': None,'sd': None, ## cv with text
'visual_glm': None , 'trans_zh': None,'gligen': None}
NUM_WORKERS=1
JSON_DATASETS=[]
operation_running = False
def toggle_operation(flag):
import whisper
from a2f import speech_recognition,mic_audio,keyboard
if speech_AI['asr']['whisper'] is None:
speech_AI['asr']['whisper']=whisper.load_model("small",
download_root="weights")
print("asr加载完毕,开始录音!")
text=[]
speech_text=''
while True:
# result_txt="你好我没有正确识别到结果"
if keyboard.is_pressed('q'):
mic_audio('voice_dir/send_asr.wav')
speech_text,__=speech_recognition('voice_dir/send_asr.wav',speech_AI['asr']['whisper'],False)
break
print(speech_text)
text.append(speech_text)
return text
async def sadtalker_demo(checkpoint_path,config_path,source_image,
driven_audio,
preprocess_type,
is_still_mode,
enhancer,
batch_size,
size_of_image,
pose_style,
exp_weight):
sys.path.append('SadTalker')
from SadTalker.app import SadTalker
sadtaker_model=SadTalker(checkpoint_path, config_path, lazy_load=True)
output = await asyncio.to_thread(sadtaker_model.test, source_image,
driven_audio,
preprocess_type,
is_still_mode,
enhancer,
batch_size,
size_of_image,
pose_style,
exp_weight)
return output
def train_visualGLM(name,model_size,mode,train_iters,resume_data,
max_source_length,max_target_length,lora_rank,layer_range_s,layer_range_e,pre_seq_len,
train_data,valid_data,distributed_backend,lr_decay_style,warmup,
checkpoint_activations,save_interval,eval_interval,save_path,
split,eval_iters,eval_batch_size ,zero_stage,
lr,batch_size,accumulation_steps,method_type):
model_args=[max_source_length,max_target_length,lora_rank,layer_range_s,layer_range_e,pre_seq_len]
gpt_option=[name,int(model_size),mode,int(train_iters),resume_data, #23
train_data,valid_data,distributed_backend,lr_decay_style,warmup,
checkpoint_activations,int(save_interval),int(eval_interval),save_path,
int(split),int(eval_iters),int(eval_batch_size),int(zero_stage),
lr,int(batch_size),int(accumulation_steps)]
processes = []
for i in range(NUM_WORKERS):
p = mp.Process(target=start_finetuning_process, args=(gpt_option,model_args,method_type))
p.start()
processes.append(p)
for p in processes:
p.join()
return 'OK'
#具体参数待修复调整
def start_finetuning_process(gpt_option,model_args,method_type):
print('fine subprocess start')
script_path = os.path.abspath(__file__)
script_dir = os.path.dirname(script_path)
print(script_dir+'/'+VisualGLM_dir)
main_dir = os.path.dirname(script_dir)
model_args = f'--max_source_length {model_args[0]} --max_target_length {model_args[1]} --lora_rank {model_args[2]} --layer_range {model_args[3]} {model_args[4]} --pre_seq_len {model_args[5]}'
options_nccl = 'NCCL_DEBUG=info NCCL_IB_DISABLE=0 NCCL_NET_GDR_LEVEL=2'
host_file_path = 'hostfile_single'
gpt_option_prefix=f" \
--experiment-name finetune-{gpt_option[0]} \
--model-parallel-size {gpt_option[1]} \
--mode {gpt_option[2]} \
--train-iters {gpt_option[3]} \
--resume-dataloader \
{model_args} \
--train-data {gpt_option[5]} \
--valid-data {gpt_option[6]} \
--distributed-backend {gpt_option[7]} \
--lr-decay-style {gpt_option[8]}\
--warmup {gpt_option[9]} \
--checkpoint-activations \
--save-interval {gpt_option[11]} \
--eval-interval {gpt_option[12]} \
--save {gpt_option[13]} \
--split {gpt_option[14]}\
--eval-iters {gpt_option[15]} \
--eval-batch-size {gpt_option[16]}\
--zero-stage {gpt_option[17]} \
--lr {gpt_option[18]} \
--batch-size {gpt_option[19]} "
lora=f" \
--skip-init \
--fp16 \
--use_lora "
qlora=f"--gradient-accumulation-steps {gpt_option[20]} \
--skip-init \
--fp16 \
--use_qlora"
ptune=f" \
--skip-init \
--fp16 \
--use_ptuning"
if method_type=='use_qlora':
gpt_options=gpt_option_prefix+qlora
elif method_type=='use_lora':
gpt_options=gpt_option_prefix+lora
elif method_type=='use_ptuning':
gpt_options=gpt_option_prefix+ptune
else:
LOGGER.info("没有选择训练方法!!!")
return
run_cmd = f'{options_nccl} deepspeed --master_port 16666 --hostfile {host_file_path} {VisualGLM_dir}/finetune_visualglm.py {gpt_options} '
os.system(run_cmd)
async def load_speech_model(asr_method,tts_method):
import whisper
global speech_AI
if asr_method=='whisper' :
speech_AI['asr']['whisper']= whisper.load_model("small",download_root="weights")
LOGGER.info('loads whisper')
elif not asr_method and speech_AI['asr']['whisper']:
LOGGER.info('free memory')
speech_AI['asr']['whisper']=None
else:
LOGGER.info('pass')
if tts_method =="VITS":
print('调试中,很快更新')
# speech_AI['tts']['VITS'] =
# LOGGER.info('loads whisper')
elif not tts_method:
LOGGER.info('pass')
return '语音识别记载完成'
def save_text2img_data(prompt,label,img_name,zh_select):
global JSON_DATASETS
if not prompt :
prompt=f"这张图片的背景里有什么内容?"
if not zh_select:
prompt=f'What contents are present in the background of this picture?'
example = {
"img": f"{img_name}",
"prompt": prompt,
"label": label
}
JSON_DATASETS.append(example)
async def load_auto_backend_models(lama, sam, det,tag2text,ram, trans_zh, visual_glm,device=0, quant=4, bar=None):
try:
with concurrent.futures.ThreadPoolExecutor() as pool:
wait_coros = asyncio.get_event_loop().run_in_executor(pool, load_auto_backend_model, lama, sam, det, tag2text,ram,trans_zh, visual_glm,device, quant, bar)
await asyncio.wait([wait_coros])
await asyncio.sleep(0.01)
except Exception as e:
LOGGER.info("An error occurred: ", e)
return 'windows可能会出现问题,请再次点击加载按钮,也可以检查后台'
return 'Loads Done !'
def load_auto_backend_model(lama,sam,det,tag2text,ram,trans_zh,visual_glm,device,quant,bar):
"""
加载模型库
"""
# Load model
global models_config
if visual_glm and not models_config['visual_glm']:
from VisualGLM_6B.chatglm import VisualGLM
models_config['visual_glm']=VisualGLM(gpu_device=int(device),quant=int(quant))
LOGGER.info(f'GPU{int(device)}———量化VisualGLM模型:int{int(quant)}')
elif not visual_glm:
LOGGER.info('no select visualGLM')
models_config['visual_glm']=None
else:
LOGGER.info('free or no visual_glm')
device = select_device(device)
if tag2text and not models_config['tag2text']:
models_config['tag2text'] = AutoBackend("tag2text",weights=Tag2Text_Model_Path,device=device)
elif not tag2text :
LOGGER.info('no tag2text')
models_config['tag2text'] =None
else :
LOGGER.info('free or tag2text pass')
if det and not models_config['grounded']:
models_config['grounded'] = AutoBackend("grounded-DINO",weights=GROUNED_MODEL_TYPE['S'], device=device,
args_config= 'model_cards/groundingdino/config/GroundingDINO_SwinT_OGC.py')
elif not det :
models_config['grounded'] =None
else :
LOGGER.info('free or grounded pass')
if sam and not models_config['sam']:
models_config['sam']= AutoBackend("segment-anything",weights=SAM_MODEL_TYPE['vit_h'] ,device=device)
elif not sam :
models_config['sam'] =None
else:
LOGGER.info("PASS SAM")
if ram and not models_config['ram']:
LOGGER.info("ram loads")
models_config['ram']= AutoBackend('ram',weights=Ram_Model_Path ,device=device)
elif not ram :
models_config['ram'] =None
else:
LOGGER.info("PASS ram")
if trans_zh and not models_config['trans_zh']:
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
cn_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-zh",cache_dir='weights')
cn_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-zh",cache_dir='weights')
translator = pipeline("text2text-generation", model=cn_model, tokenizer=cn_tokenizer)
models_config['trans_zh']= translator
elif not trans_zh :
models_config['trans_zh'] =None
else :
LOGGER.info('zh model pass')
if lama and not models_config['lama']:
models_config['lama']= AutoBackend("lama",weights=None,args_config='model_cards/lama/configs/prediction/default.yaml',device=device)
elif not lama :
models_config['lama'] =None
else :
LOGGER.info('free or lama pass')
return 'OK'
def Auto_run(
source= 'data/images', # file/dir/URL/glob, 0 for webcam
img_input='',
input_prompt="Anything in this image",
conf_thres=0.3, # confidence threshold
iou_thres=0.5, # NMS IOU threshold
text_thres=0.2,
device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
quant=4,
save_conf=False, # save confidences in --save-txt labels
img_save=False, # do not save images/videos
visualize=False, # visualize features
project=ROOT / 'runs/detect', # save results to project/name
name='exp', # save results to project/name
exist_ok=False, # existing project/name ok, do not increment
lama=False, # use lama models
sam=True, # use segment-anythings
det=True, # use grounded detect model with text
tag2text=False,
ram=False,
save_txt=False, # save results to *.txt
save_xml=False, # save results to *.xml
save_mask=False,
save_caption=False,
batch_process=False,
color_flag=False,
zh_select=False,
record_audio=None,
up_audio=None,
process_name=0,
):
global models_config
global category_colors
global JSON_DATASETS
cls_index = -1 # 设置默认值为 -1
if img_input:
source =img_input
source = str(source)
img_paths=None
if os.path.isdir(source):
img_paths = [os.path.join(source, f) for f in os.listdir(source) if
Path(f).suffix[1:] in (IMG_FORMATS + VID_FORMATS)]
else:
img_paths = [source]
# Directories
is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
# save_img = img_save and not source.endswith('.txt') # save inference images
is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
#webcam = source.isnumeric() or source.endswith('.streams') or (is_url )
if is_url and is_file:
source = check_file(source) # download
save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
(save_dir / 'xmls' if save_xml else save_dir).mkdir(parents=True, exist_ok=True) # make dir
(save_dir / 'masks' if save_mask else save_dir).mkdir(parents=True, exist_ok=True) # make dir
(save_dir / 'captions' if save_caption else save_dir).mkdir(parents=True, exist_ok=True) # make dir
p = Path(str(save_dir) ) # to Path
seen=0
# loda data and inference
caption=None
for source in (img_paths):
im = cv2.imread(source)
name_p= source.split('/')[-1].split('.')[0]
img_rgb = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
preds=None
masks=[]
prompt=input_prompt
if tag2text:
LOGGER.info(f'text_prompt:{prompt}')
preds = models_config['tag2text'](im = img_rgb ,prompt=prompt,box_threshold=conf_thres,text_threshold=text_thres,iou_threshold=iou_thres)
# Currently ", " is better for detecting single tags
# while ". " is a little worse in some case
prompt=preds[0].replace(' |', ',')
caption=preds[2]
LOGGER.info(f"Caption: {caption}")
LOGGER.info(f"Tags: {prompt}")
if zh_select and prompt :
caption=models_config['trans_zh'](caption, max_length=1000, clean_up_tokenization_spaces=True)[0]["generated_text"]
if save_caption:
save_text2img_data(None, caption,name_p,zh_select)
#save_format(label_format="txt",save_path=f'{save_dir}/captions',img_name=name_p, results=caption)
if ram:
LOGGER.info(f'ram No need prompt:{prompt}')
en_tag,zh_tag = models_config['ram'](im = img_rgb,prompt=prompt,box_threshold=conf_thres,text_threshold=text_thres,iou_threshold=iou_thres)
prompt=en_tag.replace(' |', ',')
zh_tag=zh_tag.replace(' |', ', ')
#LOGGER.info(preds[1])
LOGGER.info(f"en_Tags: {prompt}")
print(f"zh_Tags : {zh_tag}")
# if zh_select and prompt :
# caption=models_config['trans_zh'](caption, max_length=1000, clean_up_tokenization_spaces=True)[0]["generated_text"]
# if save_caption:
# save_text2img_data(None, caption,name_p,zh_select)
if det:
if input_prompt:
prompt=input_prompt
LOGGER.info('your input prompt replace default:',prompt)
preds= models_config['grounded'](im = img_rgb,prompt=prompt, box_threshold=conf_thres,text_threshold=text_thres, iou_threshold=iou_thres)
if sam and det :
if preds[0].numel()>0:
masks= models_config['sam'](im = img_rgb, prompt=preds[0],box_threshold=conf_thres,text_threshold=text_thres, iou_threshold=iou_thres)
if save_mask:
save_mask_data(str(save_dir)+'/masks', caption, masks, preds[0], preds[2],name_p)
# Write results
if img_save:
seen+=1
plt.figure(figsize=(20,18))
plt.imshow(img_rgb)
if det:
for box,label in zip(preds[0],preds[2]):
show_box(box.numpy(),plt.gca(),label)
if sam :
for mask in masks:
show_mask(mask.cpu().numpy(),plt.gca(),random_color=True)
if tag2text:
plt.title('Captioning: ' + caption + '\n' + 'Tagging:' + prompt + '\n')
plt.axis('off')
plt.savefig(f'{save_dir}/{seen}.jpg',bbox_iches='tight',dpi=600,pad_inches=0.0)
if lama and masks is not None :
masks_prompts= masks.detach().cpu().numpy().astype(np.uint8) * 255
for idx, mask in enumerate(masks_prompts):
sub_mask = [dilate_mask(ma, 15) for ma in mask]
img_inpainted_p= f'{save_dir}/mask_{idx}.png'
idx=idx+1
img_inpainted = models_config['lama'](
im=img_rgb, prompt=sub_mask[0])
Image.fromarray(img_inpainted.astype(np.uint8)).save(img_inpainted_p)
img_rgb=img_inpainted
for category in categories:
if category not in category_colors:
category_colors[category] = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
gn = torch.tensor(im.shape)[[1, 0, 1, 0]] # normalization gain whwh
if (color_flag or save_txt) and(det ) :
seg_mask = np.zeros_like(img_rgb) # img_array 为输入图像的数组表示
category_color=[]
for xyxy, conf, cls,mask in zip(preds[0],preds[1],preds[2],masks): #per im boxes
xywh = (xyxy2xywh((xyxy).view(1,4)) / gn).view(-1).tolist() # normalized xywh
if cls not in categories:
categories.update({
str(cls): len(categories)})
write_categories(cls,f'{save_dir}/classes_id.txt')
cls_index = len(categories) - 1
category_colors.update({
str(cls): (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))})
category_color=category_colors[str(cls)]
else:
cls_index = categories[str(cls)]
if str(cls) not in category_colors:
category_colors.update({
str(cls): (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))})
category_color=category_colors[str(cls)]
line = (cls_index, xywh, conf) if save_conf else (cls_index, xywh) # label format
line = str(line).replace('[', '').replace(']', '').replace("(",'').replace(")"," ").replace(",", " " * 2)
if save_mask:
h, w = mask.shape[-2:]
mask_color = np.array(category_color).reshape((1, 1, -1))
seg_mask = seg_mask + mask.cpu().numpy().reshape(h, w, 1) * mask_color # add
if save_txt:
save_format(label_format="txt",save_path=f'{save_dir}/labels', img_name=name_p, results=line)
if save_mask:
plt.figure(figsize=(10,10))
plt.imshow(seg_mask)
#plt.title('Captioning: ' + caption + '\n' + 'Tagging:' + prompt + '\n')
plt.axis('off')
plt.savefig(os.path.join(f'{save_dir}/masks', f'{name_p}_cls.jpg'), bbox_inches="tight", dpi=300, pad_inches=0.0)
if save_xml:
h,w=im.shape[:2]
save_format("xml",f'{save_dir}/xmls' ,name_p, Path(source).parent, preds, h, w)
if det:
img_rgb= Image.fromarray(np.uint8(img_rgb), mode='RGB')
draw_img=ImageDraw.Draw(img_rgb)
for box,label in zip(preds[0],preds[2]):
Draw_img( box, draw_img,'box',label,category_colors[str(label)] if color_flag else None)
if sam:
img_mask=Image.new('RGBA',img_rgb.size,color=(0,0,0,0) )
draw_mask=ImageDraw.Draw(img_mask)
for mask in masks:
Draw_img(mask[0].cpu().numpy(),draw_mask,'mask',None,category_colors[str(label)] if color_flag else None)
img_rgb.paste(img_mask, mask=img_mask)
#img_rgb.save(f'{save_dir}/{seen}.jpg')
if save_txt:
#class_ids.append(cls)
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}/labels")
if save_xml:
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}/xmls")
if save_caption:
with open(f'{save_dir}/captions/dataset.json', 'a',encoding='utf-8') as f:
json.dump(JSON_DATASETS,f,ensure_ascii=False)
f.write('\n')
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}/captions")
if save_mask:
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}/masks")
LOGGER.info('Done...')
return [[img_rgb],caption,prompt,len(categories)]
def visual_chat(prompt_input, temperature, top_p, image_prompt, result_text,record_audio,upload_audio,omniverse=False):
global models_config
print(f"是否连接omniverse:{omniverse}")
if models_config['visual_glm']:
if image_prompt and prompt_input:
__, result_text=(models_config['visual_glm'].request_model(prompt_input, temperature, top_p, image_prompt, result_text))
if omniverse:
from a2f import tts_a2f
asyncio.run(tts_a2f(result_text[-1][-1]))
return "",result_text
else :
LOGGER.info("请检查你的输入格式和glm模型的参数配置!!!")
else:
return result_text,"没有加载部署的VisualGLM模型!!!"
def clear_fn_image(value):
return [("", "Hi, What do you want to know ?或者你想从图像中知道什么?")]
if __name__ == "__main__":
#check_requirements(exclude=('tensorboard', 'thop'))
proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, AVAIL_LLM_MODELS, AUTO_CLEAR_TXT = \
get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'AVAIL_LLM_MODELS', 'AUTO_CLEAR_TXT')
AUTO_CLEAR_TXT = get_conf('AUTO_CLEAR_TXT')
# 如果WEB_PORT是-1, 则随机选取WEB端口
PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT
functional = get_core_functions()
from themes.theme import adjust_theme, advanced_css, theme_declaration
# 高级函数插件
from llm_cards.crazy_functional import get_crazy_functions
crazy_fns = get_crazy_functions()
import logging, uuid
os.makedirs("gpt_log", exist_ok=True)
try:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO, encoding="utf-8", format="%(asctime)s %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
except:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO, format="%(asctime)s %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
# Disable logging output from the 'httpx' logger
logging.getLogger("httpx").setLevel(logging.WARNING)
print("所有问询记录将自动保存在本地目录./gpt_log/chat_secrets.log, 请注意自我隐私保护哦!")
# 处理markdown文本格式的转变
gr.Chatbot.postprocess = format_io
# 代理与自动更新
from utils.check_proxy import check_proxy, auto_update, warm_up_modules
proxy_info = check_proxy(proxies)
voice_dir='voice_dir'
if not os.path.exists(voice_dir):
os.mkdir(voice_dir)
inputxs=[]
outputs=[]
cancel_handles = []
with gr.Blocks(title="Prompt-Can-Anythings",reload=True, theme=adjust_theme(), analytics_enabled=False,full_width=True,css=advanced_css) as block:
gr.HTML( f"<h1 align=\"center\"> Prompt-Can-Anythings_v1.15 (周更迭代中)</h1>")
cookies = gr.State({'api_key': API_KEY, 'llm_model': LLM_MODEL})
with gr.Row().style(equal_height=False):
with gr.Column(scale=1):
with gr.Accordion('视觉模型配置', open=False):
with gr.TabItem('本地模型配置'):
box_threshold=gr.inputs.Number(label='Confidence Threshold', default=0.3)
iou_threshold=gr.inputs.Number(label='Iou Threshold', default=0.5)
text_threshold=gr.inputs.Number(label='Text Threshold', default=0.25)
device_input=gr.inputs.Textbox(label='device',default='0')
quant=gr.inputs.Number(label='quant levels',default=4)
with gr.TabItem('其他【不需要修改】'):
option_inputs = {
'Save Conf': gr.inputs.Checkbox(label='Save Conf',default=False),
'Save img': gr.inputs.Checkbox(label='Save img',default=False),
'Visualize': gr.inputs.Checkbox(label='Visualize',default=False),
'Project': gr.inputs.Textbox(label='Project:save dir_path',default='runs/detect'),
'Name': gr.inputs.Textbox(label='Name',default='exp'),
'Exist Ok': gr.inputs.Checkbox(label='Exist Ok',default=False)
}
inputxs.extend(list(option_inputs.values()))
with gr.Accordion('Method_Options:free combo', open=True):
methods_options={'Lama': gr.inputs.Checkbox(label='Lama model[近期更新测试中]',default=False),
'Sam': gr.inputs.Checkbox(label='Sam[当前仅支持检测器的BOX输入]',default=False),
'Det': gr.inputs.Checkbox(label='Grounded[可输入文本的检测器]',default=False),
'Tag2text': gr.inputs.Checkbox(label='Tag2text[图文理解]',default=False),
'ram': gr.inputs.Checkbox(label='ram[识别标签]',default=False)
}
visual_glm=gr.inputs.Checkbox(label='VisualGLM',default=False)
chatgpt=gr.inputs.Checkbox(label='ChatGPT(目前为网络服务自动挂载)',default=True)
loads_model_button=gr.Button('热重载模型',variant="primary")
loads_flag=gr.inputs.Textbox(label="加载模型进度")
list_methods=list(methods_options.values())
inputxs.extend(list_methods)
with gr.Accordion('format Options', open=False):
save_options={
'Save txt': gr.inputs.Checkbox(label='Save txt [collect class nums]',default=False),
'Save xml': gr.inputs.Checkbox(label='Save xml',default=False),
'Save Mask': gr.inputs.Checkbox(label='Save Mask',default=False),
'Save Caption': gr.inputs.Checkbox(label='Save Caption',default=False),
'Batch Process': gr.inputs.Checkbox(label='Batch Process[暂不支持]',default=False),
'Color Flag': gr.inputs.Checkbox(label='Color Flag[标识语义]',default=False)
}
inputxs.extend(list(save_options.values()))
dir_inputs =gr.inputs.Textbox(label='加载本地图像文件夹路径',default='train_imgs')
with gr.Accordion('LLM模型配置', open=False):
checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "底部输入区", "输入清除键", "插件参数区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区")
md_dropdown = gr.Dropdown(AVAIL_LLM_MODELS, value=LLM_MODEL, label="更换LLM模型源 [暂时仅支持chatgpt/glm2]").style(container=False)
max_length_sl = gr.Slider(minimum=256, maximum=4096, value=512, step=1, interactive=True, label="Local LLM MaxLength")
with gr.Row():
quant_chatglm= gr.Dropdown(MODEL_QUANTIZE,value=None,label="llm quantize[chatglm] ").style(container=False)
top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="nucleus sampling",)
temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
with gr.Accordion('VisualGLM模型配置', open=False):
visual_temperature = gr.Slider(maximum=1, value=0.8, minimum=0, label='VisualGLMTemperature')
visual_top_p = gr.Slider(maximum=1, value=0.4, minimum=0, label='VisualGLM top_P')
with gr.Accordion('语音模型配置', open=False):
with gr.Row():
asr_select = gr.Dropdown(ASR_METHOD,value='whisper', label="语音识别方法").style(container=False)
tts_select = gr.Dropdown(TTS_METHOD,value='VITS', label="语音合成方法").style(container=False)
asr_gpt = gr.inputs.Checkbox(label='ASR gpt [无需加载按钮]',default=False).style(height=1,width=1)
asr_button = gr.Button('Loads SPEECH_AI').style(height=5,width=5)
with gr.Accordion('大模型对话系统配置', open=True):
with gr.Row():
chat_app = gr.inputs.Checkbox(label='start system',default=False).style(height=1,width=1)
chat_app_button = gr.Button('Speech_system').style(height=5,width=5)
with gr.Accordion('ViusalGLM训练配置', open=False):
with gr.Row():
train_methods=gr.Dropdown(AVAIL_METHOD_FINETUNE,value=METHOD_FINETUNE, label="微调方法").style(container=False)
visualglm_args=[
gr.inputs.Textbox(label="Experiment_Name", default="visualglm-6b"),
gr.inputs.Number(label="Model Parallel Size", default=1),
gr.inputs.Textbox(label="mode", default='finetune'),
gr.Slider(minimum=1, maximum=3000, value=300, step=1, interactive=True, label="train-iters"),
gr.inputs.Checkbox(label="resume dataloader", default=True),
gr.Slider(minimum=16, maximum=256, value=64, step=1, interactive=True, label="max_source_length"),
gr.Slider(minimum=16, maximum=1024, value=256, step=1, interactive=True, label="max_target_length"),
gr.Slider(minimum=1, maximum=100, value=10, step=1, interactive=True, label="lora_rank"),
gr.Slider(minimum=0, maximum=256, value=0, step=1, interactive=True, label="layer_range_start"),
gr.Slider(minimum=0, maximum=20, value=14, step=1, interactive=True, label="layer_range_end"),
gr.Slider(minimum=1, maximum=60, value=4, step=1, interactive=True, label="pre_seq_len"),
gr.inputs.Textbox(label="Train Data", default="fewshot-data/dataset.json"),
gr.inputs.Textbox(label="Eval Data", default="fewshot-data/dataset.json"),
gr.inputs.Textbox(label="distributed backend", default="nccl"),
gr.inputs.Dropdown(label="lr decay style ", choices=["cosine", "linear"], default="cosine"),
gr.inputs.Number(label="warmup", default=0.02),
gr.inputs.Checkbox(label="checkpoint-activations", default=True) ,
gr.inputs.Number(label="Save Interval", default=300),
gr.inputs.Number(label="Eval Interval", default=10000),
gr.inputs.Textbox(label="Save Directory", default="./checkpoints"),
gr.inputs.Number(label="split", default=1),
gr.inputs.Number(label="Eval Iters", default=10),
gr.inputs.Number(label="Eval Batch Size", default=8),
gr.inputs.Textbox(label='Zero Stage',default=1),
gr.inputs.Number(label="lr", default=0.0001),
gr.inputs.Number(label="batch size", default=4),
gr.inputs.Number(label="gradient accumulation steps", default=4),
]
fine_tune=gr.Button('Finetune VisualGLM').style(height=5,width=5)
with gr.Accordion('sadtakler配置', open=False):
with gr.Tabs(elem_id="sadtalker_checkbox"):
with gr.TabItem('Settings'):
gr.Markdown("need help? please visit our [[best practice page](https://github.com/OpenTalker/SadTalker/blob/main/docs/best_practice.md)] for more detials")
with gr.Column(variant='panel'):
# width = gr.Slider(minimum=64, elem_id="img2img_width", maximum=2048, step=8, label="Manually Crop Width", value=512) # img2img_width
# height = gr.Slider(minimum=64, elem_id="img2img_height", maximum=2048, step=8, label="Manually Crop Height", value=512) # img2img_width
with gr.Row():
pose_style = gr.Slider(minimum=0, maximum=46, step=1, label="Pose style", value=0) #
exp_weight = gr.Slider(minimum=0, maximum=3, step=0.1, label="expression scale", value=1) #
with gr.Row():
sadtalker_path=gr.inputs.Textbox(label="checkpoint path", default="checkpoints")
sadtalker_config=gr.inputs.Textbox(label="config path", default="SadTalker/src/config")
with gr.Row():
size_of_image = gr.Radio([256, 512], value=256, label='face model resolution', info="use 256/512 model?") #
preprocess_type = gr.Radio(['crop', 'resize','full', 'extcrop', 'extfull'], value='crop', label='preprocess', info="How to handle input image?")
with gr.Row():
is_still_mode = gr.Checkbox(label="Still Mode (fewer hand motion, works with preprocess `full`)")
batch_size = gr.Slider(label="batch size in generation", step=1, maximum=10, value=2)
enhancer = gr.Checkbox(label="GFPGAN as Face enhancer")
sadtalker_submit = gr.Button('Generate_video', elem_id="sadtalker_generate", variant='primary')
with gr.Column(variant='panel',scale=15):
with gr.Tabs(elem_id="Process_audio"):
with gr.TabItem('Upload OR TTS'):
with gr.Column(variant='panel'):
with gr.Row():
record_audio = gr.Audio(label="record your voice", source="microphone",type='filepath')
#Recording_audio=gr.Button('Recording_asr',elem_id="speech2text", variant='primary')
with gr.Row():
upload_audio = gr.Audio(label="Input audio(./wav/.mp3)", source="upload",type='filepath').style(height=20,width=120)
input_text = gr.Textbox(label="Generating audio from text", lines=2, placeholder="please enter some text here, we genreate the audio from TTS.")
with gr.Row():
asr = gr.Button('Generate text',elem_id="text_generate", variant='primary')
tts = gr.Button('Generate audio',elem_id="audio_generate", variant='primary')
with gr.TabItem('Omniverse App'):
with gr.Row():
omniverse_switch = gr.inputs.Checkbox(label='Omniverse A2F',default=False)
#audio_to_face=gr.Button('send a Audio to Omniverse ', variant='primary')
def t2s(text,method):
from a2f import tts_send2
send_dir=f'{voice_dir}/send_a2f.wav'
if method=='VITS':
print('更新中,暂不支持')
elif method=='edge_tts' :
asyncio.run(tts_send2(text,False,send_dir))
return send_dir
def s2t(speech_file,stream_mode=False):
from a2f import speech_recognition
speech_text, speech_language=speech_recognition(speech_file, speech_AI['asr']['whisper'],stream_mode) #
return speech_text
with gr.Tabs(elem_id="上传图像"):
with gr.TabItem('Upload image'):
with gr.Row():
image_prompt = gr.Image(label="Source image", source="upload", type="filepath").style(height=200,width=180)
prompt_input=gr.inputs.Textbox(lines=2, label="prompt with image/仅与图像相关 : (Optional,注意每个功能请考虑在这个框里的TEXT提示词要不要先清空)")
inputs = [dir_inputs,image_prompt,prompt_input,box_threshold,iou_threshold,text_threshold,device_input,quant]
inputs.extend(inputxs)
with gr.Row():
run_button = gr.Button('Run CV_Task',variant="primary"); run_button.style(size="sm")
clear_button= gr.Button("清除文本", variant="secondary"); clear_button.style(size="sm")
with gr.Row():
resetBtn = gr.Button("重置", variant="secondary"); resetBtn.style(size="sm")
stopBtn2 = gr.Button("停止", variant="secondary"); stopBtn2.style(size="sm")
clearBtn = gr.Button("清除", variant="secondary", visible=False); clearBtn.style(size="sm")
with gr.Row():
status = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行。当前模型: {LLM_MODEL} \n {proxy_info}", elem_id="state-panel")
with gr.Tabs(elem_id="Chatbox"):
with gr.TabItem('对话区'):
with gr.Accordion("输入区", open=True, elem_id="input-panel") as area_input_primary:
with gr.Row():
chat_txt=gr.Textbox(lines=3,show_label=False, placeholder="question").style(container=False)
with gr.Accordion("备选输入区", open=True, visible=False) as area_input_secondary:
with gr.Row():
txt = gr.Textbox(show_label=False, placeholder="Input question here.", label="输入区2").style(container=False)
with gr.Row():
run_button_chat = gr.Button('Chat_Sumbit',variant="primary")
run_button_2 = gr.Button('VisualGLM',variant="primary")
with gr.Accordion("学术ChatGPT基础功能", open=False) as area_basic_fn:
with gr.Row():
for k in functional:
if ("Visible" in functional[k]) and (not functional[k]["Visible"]): continue
variant = functional[k]["Color"] if "Color" in functional[k] else "secondary"
functional[k]["Button"] = gr.Button(k, variant=variant)
with gr.Accordion("函数插件区", open=False, elem_id="plugin-panel") as area_crazy_fn:
with gr.Row():
gr.Markdown("插件可读取“输入区”文本/路径作为参数(上传文件自动修正路径)")
with gr.Row():
for k in crazy_fns:
if not crazy_fns[k].get("AsButton", True): continue
variant = crazy_fns[k]["Color"] if "Color" in crazy_fns[k] else "secondary"
crazy_fns[k]["Button"] = gr.Button(k, variant=variant)
crazy_fns[k]["Button"].style(size="sm")
with gr.Row():
with gr.Accordion("更多函数插件", open=False):
# update
dropdown_fn_list = crazy_fns.keys()
with gr.Row():
dropdown = gr.Dropdown(dropdown_fn_list, value=r"打开插件列表", label="", show_label=False).style(container=False)
with gr.Row():
plugin_advanced_arg = gr.Textbox(show_label=True, label="高级参数输入区", visible=False,
placeholder="特殊函数插件的高级参数输入区").style(container=False)
with gr.Row():
switchy_bt = gr.Button(r"请先从插件列表中选择", variant="secondary")
with gr.Row():
with gr.Accordion("点击展开“文件上传区”。上传本地文件/压缩包供函数插件调用。", open=False) as area_file_up:
file_upload = gr.Files(label="任何文件, 但推荐上传压缩文件(zip, tar)", file_count="multiple")
with gr.Column(scale=20):
with gr.Accordion('输出区', open=True):
with gr.TabItem('图像输出'):
gallery = gr.Gallery(label="Generated images",show_label=False,elem_id="gallery",).style(preview=True, grid=2, object_fit="scale-down")
with gr.TabItem('视频输出'):
video_output = gr.Video(label="Generated video", format="mp4").style(width=600)
with gr.TabItem('图文理解'):
with gr.Row():
output_text = gr.Textbox(label="tag2text",lines=2)
with gr.Row():
output_tag= gr.outputs.Textbox(label="Tag").style(height=1)
with gr.Row():
zh_select=gr.inputs.Checkbox(label='英译中 Tag2Text【选后需重载模型】',default=False).style(width=1)
with gr.Row():
output_classes= gr.Textbox(label="Class Numbers ",lines=1,
placeholder="generate classes numbers,color flag or save_txt must be ture/你必须启动存储txt的功能,这个是全局的").style(conatiner=False,width=1)
with gr.Row():
with gr.Accordion("备选输入区", open=True, visible=False) as area_input_secondary:
system_prompt = gr.Textbox(show_label=True, placeholder=f"Chat Prompt", label="下方输入对话支持图像和文本", value="AI assistant.")
#stopBtn2 = gr.Button("停止", variant="secondary"); stopBtn2.style(size="sm")
clearBtn2 = gr.Button("清除", variant="secondary", visible=False); clearBtn2.style(size="sm")
with gr.Row():
with gr.Column(scale=2):
result_text = gr.Chatbot(label=f'当前模型:{LLM_MODEL}', value=[("", "Hi, What do you want to know ?")]).style(height=CHATBOT_HEIGHT)
history = gr.State([])
#Recording_audio.click(fn=toggle_operation,inputs=[asr_select],outputs=[input_text]) # 将 toggle_operation 函数绑定到按钮
# 功能区显示开关与功能区的互动
def fn_area_visibility(a):
ret = {}
ret.update({area_basic_fn: gr.update(visible=("基础功能区" in a))})
ret.update({area_crazy_fn: gr.update(visible=("函数插件区" in a))})
ret.update({area_input_primary: gr.update(visible=("底部输入区" not in a))})
ret.update({area_input_secondary: gr.update(visible=("底部输入区" in a))})
ret.update({clearBtn: gr.update(visible=("输入清除键" in a))})
ret.update({clearBtn2: gr.update(visible=("输入清除键" in a))})
ret.update({plugin_advanced_arg: gr.update(visible=("插件参数区" in a))})
if "底部输入区" in a: ret.update({txt: gr.update(value="")})
return ret
checkboxes.select(fn_area_visibility, [checkboxes], [area_basic_fn, area_crazy_fn, area_input_primary, area_input_secondary, chat_txt,txt , clearBtn, clearBtn2, plugin_advanced_arg] )
sadtalker_submit.click(fn=sadtalker_demo,inputs=[sadtalker_path,sadtalker_config,image_prompt,upload_audio, preprocess_type,is_still_mode,enhancer,
batch_size, size_of_image, pose_style, exp_weight],outputs=[video_output])
#audio_to_face.click(fn=t2s, inputs=[result_text,input_text,gr.State(True),omniverse_switch], outputs=[upload_audio] )
asr_button.click(fn=load_speech_model,inputs=[asr_select,tts_select],outputs=[loads_flag])
asr.click(fn=s2t, inputs=[upload_audio], outputs=[input_text])
tts.click(fn=t2s, inputs=[input_text,tts_select], outputs=[upload_audio])
# fine tune VisualGLM
visualglm_args.append(train_methods)
fine_tune.click(fn=train_visualGLM,inputs=visualglm_args,outputs=[txt])
# visualGLM inputs
cs=[]
cs.extend(list_methods)
cs.extend([zh_select, visual_glm,device_input, quant, loads_flag])
loads_model_button.click(fn=load_auto_backend_models,inputs=cs,outputs=[loads_flag])
inputs.append(zh_select)
def on_md_dropdown_changed(k):
return {result_text: gr.update(label="当前模型:"+k)}
md_dropdown.select(on_md_dropdown_changed, [md_dropdown],[result_text])
outputs = [gallery, output_text, output_tag,output_classes]
input_combo = [cookies, max_length_sl, md_dropdown,chat_txt,txt,top_p, temperature, result_text, history,system_prompt,plugin_advanced_arg,omniverse_switch,record_audio,asr_gpt,quant_chatglm,chat_app]
output_combo = [cookies, result_text, history, status]
# output_combo2=[result_text, history, status]
predict_args = dict(fn=ArgsGeneralWrapper(predict_all), inputs=input_combo, outputs=output_combo)
chat_args=dict(fn=ArgsGeneralWrapper(talk_all), inputs=input_combo, outputs=output_combo)
run_button.click(fn=Auto_run, inputs=inputs, outputs=outputs)
# 提交按钮、重置按钮
cancel_handles.append(chat_txt.submit(**predict_args))
cancel_handles.append(txt.submit(**predict_args))
cancel_handles.append(run_button_chat.click(**predict_args))
cancel_handles.append(run_button_2.click(**predict_args))
cancel_handles.append(chat_app_button.click(**chat_args))
resetBtn.click(lambda: ([], [], "已重置"), None, [result_text, history, status])
stopBtn2.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles)
clearBtn.click(lambda: ("",""), None, [chat_txt,txt])
clearBtn2.click(lambda: ("",""), None, [chat_txt,txt])
if AUTO_CLEAR_TXT:
run_button_chat.click(lambda: ("",""), None, [chat_txt,txt])
run_button_2.click(lambda: ("",""), None, [chat_txt,txt])
chat_txt.submit(lambda: ("",""), None, [chat_txt,txt])
txt.submit(lambda: ("",""), None, [chat_txt,txt])
for k in functional:
if ("Visible" in functional[k]) and (not functional[k]["Visible"]): continue
dict_args=dict(fn=ArgsGeneralWrapper(predict_all), inputs=[*input_combo, gr.State(True),gr.State(k)], outputs=output_combo)
cancel_handles.append(functional[k]["Button"].click(**dict_args))
# 文件上传区,接收文件后与chatbot的互动
file_upload.upload(on_file_uploaded, [file_upload, result_text, chat_txt, txt, checkboxes], [result_text, chat_txt, txt])
# 函数插件-固定按钮区
for k in crazy_fns:
print(f'检查插件名字{k},是否载入')
if not crazy_fns[k].get("AsButton", True): continue
click_handle = crazy_fns[k]["Button"].click(ArgsGeneralWrapper(crazy_fns[k]["Function"]), [*input_combo, gr.State(PORT)], output_combo)
click_handle.then(on_report_generated, [cookies, file_upload, result_text], [cookies, file_upload, result_text])
cancel_handles.append(click_handle)
# 函数插件-下拉菜单与随变按钮的互动
def on_dropdown_changed(k):
variant = crazy_fns[k]["Color"] if "Color" in crazy_fns[k] else "secondary"
ret = {switchy_bt: gr.update(value=k, variant=variant)}
if crazy_fns[k].get("AdvancedArgs", False): # 是否唤起高级插件参数区
ret.update({plugin_advanced_arg: gr.update(visible=True, label=f"插件[{k}]的高级参数说明:" + crazy_fns[k].get("ArgsReminder", [f"没有提供高级参数功能说明"]))})
else:
ret.update({plugin_advanced_arg: gr.update(visible=False, label=f"插件[{k}]不需要高级参数。")})
return ret
dropdown.select(on_dropdown_changed, [dropdown], [switchy_bt, plugin_advanced_arg] )
def on_md_dropdown_changed(k):
return {result_text: gr.update(label="当前模型:"+k)}
md_dropdown.select(on_md_dropdown_changed, [md_dropdown], [result_text] )
# 随变按钮的回调函数注册
def route(request: gr.Request, k, *args, **kwargs):
if k in [r"打开插件列表", r"请先从插件列表中选择"]: return
yield from ArgsGeneralWrapper(crazy_fns[k]["Function"])(request, *args, **kwargs)
click_handle = switchy_bt.click(route,[switchy_bt, *input_combo, gr.State(PORT)], output_combo)
click_handle.then(on_report_generated, [cookies, file_upload, result_text], [cookies, file_upload, result_text])
cancel_handles.append(click_handle)
# 终止按钮的回调函数注册
# stopBtn.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles)
stopBtn2.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles)
#VisualGLM run
run_button_2.click(fn=visual_chat,inputs=[chat_txt, visual_temperature, visual_top_p, image_prompt,
result_text,record_audio,upload_audio,omniverse_switch],
outputs=[txt, result_text])
prompt_input.submit(fn=visual_chat,inputs=[chat_txt, visual_temperature, visual_top_p, image_prompt,
result_text,record_audio,upload_audio,omniverse_switch],
outputs=[txt,result_text])
#upload_audio.upload(fn=clear_fn_image, inputs=clear_button, outputs=[result_text])
image_prompt.upload(fn=clear_fn_image, inputs=clear_button, outputs=[result_text])
clear_button.click(lambda: ("","","","",""), None, [prompt_input,result_text,txt, input_text,chat_txt])
image_prompt.clear(fn=clear_fn_image, inputs=clear_button, outputs=[result_text])
# def init_cookie(cookies, chatbot):
# # 为每一位访问的用户赋予一个独一无二的uuid编码
# cookies.update({'uuid': uuid.uuid4()})
# return cookies
def auto_opentab_delay(port=7586):
import threading, webbrowser, time
LOGGER.info(f"\n如果浏览器没有自动打开,请复制并转到以下URL:")
LOGGER.info(f"\t(亮色主题): http://localhost:{port}")
LOGGER.info(f"\t(暗色主题): http://localhost:{port}/?__theme=dark")
def open():
time.sleep(2) # 打开浏览器
DARK_MODE, = get_conf('DARK_MODE')
if DARK_MODE: webbrowser.open_new_tab(f"http://localhost:{port}/?__theme=dark")
else: webbrowser.open_new_tab(f"http://localhost:{port}")
threading.Thread(target=open, name="open-browser", daemon=True).start()
#threading.Thread(target=auto_update, name="self-upgrade", daemon=True).start()
auto_opentab_delay(7901)
block.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name='0.0.0.0', server_port=7901,debug=True, share=False)
================================================
FILE: audio2face_pb2.py
================================================
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: audio2face.proto
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor.FileDescriptor(
name="audio2face.proto",
package="nvidia.audio2face",
syntax="proto3",
serialized_options=None,
create_key=_descriptor._internal_create_key,
serialized_pb=b'\n\x10\x61udio2face.proto\x12\x11nvidia.audio2face"{\n\x10PushAudioRequest\x12\x15\n\rinstance_name\x18\x01 \x01(\t\x12\x12\n\nsamplerate\x18\x02 \x01(\x05\x12\x12\n\naudio_data\x18\x03 \x01(\x0c\x12(\n block_until_playback_is_finished\x18\x04 \x01(\x08"5\n\x11PushAudioResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t"\x85\x01\n\x16PushAudioStreamRequest\x12@\n\x0cstart_marker\x18\x01 \x01(\x0b\x32(.nvidia.audio2face.PushAudioRequestStartH\x00\x12\x14\n\naudio_data\x18\x02 \x01(\x0cH\x00\x42\x13\n\x11streaming_request"l\n\x15PushAudioRequestStart\x12\x15\n\rinstance_name\x18\x01 \x01(\t\x12\x12\n\nsamplerate\x18\x02 \x01(\x05\x12(\n block_until_playback_is_finished\x18\x03 \x01(\x08";\n\x17PushAudioStreamResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t2\xd4\x01\n\nAudio2Face\x12X\n\tPushAudio\x12#.nvidia.audio2face.PushAudioRequest\x1a$.nvidia.audio2face.PushAudioResponse"\x00\x12l\n\x0fPushAudioStream\x12).nvidia.audio2face.PushAudioStreamRequest\x1a*.nvidia.audio2face.PushAudioStreamResponse"\x00(\x01\x62\x06proto3',
)
_PUSHAUDIOREQUEST = _descriptor.Descriptor(
name="PushAudioRequest",
full_name="nvidia.audio2face.PushAudioRequest",
filename=None,
file=DESCRIPTOR,
containing_type=None,
create_key=_descriptor._internal_create_key,
fields=[
_descriptor.FieldDescriptor(
name="instance_name",
full_name="nvidia.audio2face.PushAudioRequest.instance_name",
index=0,
number=1,
type=9,
cpp_type=9,
label=1,
has_default_value=False,
default_value=b"".decode("utf-8"),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
create_key=_descriptor._internal_create_key,
),
_descriptor.FieldDescriptor(
name="samplerate",
full_name="nvidia.audio2face.PushAudioRequest.samplerate",
index=1,
number=2,
type=5,
cpp_type=1,
label=1,
has_default_value=False,
default_value=0,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
create_key=_descriptor._internal_create_key,
),
_descriptor.FieldDescriptor(
name="audio_data",
full_name="nvidia.audio2face.PushAudioRequest.audio_data",
index=2,
number=3,
type=12,
cpp_type=9,
label=1,
has_default_value=False,
default_value=b"",
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
create_key=_descriptor._internal_create_key,
),
_descriptor.FieldDescriptor(
name="block_until_playback_is_finished",
full_name="nvidia.audio2face.PushAudioRequest.block_until_playback_is_finished",
index=3,
number=4,
type=8,
cpp_type=7,
label=1,
has_default_value=False,
default_value=False,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
create_key=_descriptor._internal_create_key,
),
],
extensions=[],
nested_types=[],
enum_types=[],
serialized_options=None,
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=39,
serialized_end=162,
)
_PUSHAUDIORESPONSE = _descriptor.Descriptor(
name="PushAudioResponse",
full_name="nvidia.audio2face.PushAudioResponse",
filename=None,
file=DESCRIPTOR,
containing_type=None,
create_key=_descriptor._internal_create_key,
fields=[
_descriptor.FieldDescriptor(
name="success",
full_name="nvidia.audio2face.PushAudioResponse.success",
index=0,
number=1,
type=8,
cpp_type=7,
label=1,
has_default_value=False,
default_value=False,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
create_key=_descriptor._internal_create_key,
),
_descriptor.FieldDescriptor(
name="message",
full_name="nvidia.audio2face.PushAudioResponse.message",
index=1,
number=2,
type=9,
cpp_type=9,
label=1,
has_default_value=False,
default_value=b"".decode("utf-8"),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
create_key=_descriptor._internal_create_key,
),
],
extensions=[],
nested_types=[],
enum_types=[],
serialized_options=None,
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=164,
serialized_end=217,
)
_PUSHAUDIOSTREAMREQUEST = _descriptor.Descriptor(
name="PushAudioStreamRequest",
full_name="nvidia.audio2face.PushAudioStreamRequest",
filename=None,
file=DESCRIPTOR,
containing_type=None,
create_key=_descriptor._internal_create_key,
fields=[
_descriptor.FieldDescriptor(
name="start_marker",
full_name="nvidia.audio2face.PushAudioStreamRequest.start_marker",
index=0,
number=1,
type=11,
cpp_type=10,
label=1,
has_default_value=False,
default_value=None,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
create_key=_descriptor._internal_create_key,
),
_descriptor.FieldDescriptor(
name="audio_data",
full_name="nvidia.audio2face.PushAudioStreamRequest.audio_data",
index=1,
number=2,
type=12,
cpp_type=9,
label=1,
has_default_value=False,
default_value=b"",
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
create_key=_descriptor._internal_create_key,
),
],
extensions=[],
nested_types=[],
enum_types=[],
serialized_options=None,
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[
_descriptor.OneofDescriptor(
name="streaming_request",
full_name="nvidia.audio2face.PushAudioStreamRequest.streaming_request",
index=0,
containing_type=None,
create_key=_descriptor._internal_create_key,
fields=[],
)
],
serialized_start=220,
serialized_end=353,
)
_PUSHAUDIOREQUESTSTART = _descriptor.Descriptor(
name="PushAudioRequestStart",
full_name="nvidia.audio2face.PushAudioRequestStart",
filename=None,
file=DESCRIPTOR,
containing_type=None,
create_key=_descriptor._internal_create_key,
fields=[
_descriptor.FieldDescriptor(
name="instance_name",
full_name="nvidia.audio2face.PushAudioRequestStart.instance_name",
index=0,
number=1,
type=9,
cpp_type=9,
label=1,
has_default_value=False,
default_value=b"".decode("utf-8"),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
create_key=_descriptor._internal_create_key,
),
_descriptor.FieldDescriptor(
name="samplerate",
full_name="nvidia.audio2face.PushAudioRequestStart.samplerate",
index=1,
number=2,
type=5,
cpp_type=1,
label=1,
has_default_value=False,
default_value=0,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
create_key=_descriptor._internal_create_key,
),
_descriptor.FieldDescriptor(
name="block_until_playback_is_finished",
full_name="nvidia.audio2face.PushAudioRequestStart.block_until_playback_is_finished",
index=2,
number=3,
type=8,
cpp_type=7,
label=1,
has_default_value=False,
default_value=False,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
create_key=_descriptor._internal_create_key,
),
],
extensions=[],
nested_types=[],
enum_types=[],
serialized_options=None,
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=355,
serialized_end=463,
)
_PUSHAUDIOSTREAMRESPONSE = _descriptor.Descriptor(
name="PushAudioStreamResponse",
full_name="nvidia.audio2face.PushAudioStreamResponse",
filename=None,
file=DESCRIPTOR,
containing_type=None,
create_key=_descriptor._internal_create_key,
fields=[
_descriptor.FieldDescriptor(
name="success",
full_name="nvidia.audio2face.PushAudioStreamResponse.success",
index=0,
number=1,
type=8,
cpp_type=7,
label=1,
has_default_value=False,
default_value=False,
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
create_key=_descriptor._internal_create_key,
),
_descriptor.FieldDescriptor(
name="message",
full_name="nvidia.audio2face.PushAudioStreamResponse.message",
index=1,
number=2,
type=9,
cpp_type=9,
label=1,
has_default_value=False,
default_value=b"".decode("utf-8"),
message_type=None,
enum_type=None,
containing_type=None,
is_extension=False,
extension_scope=None,
serialized_options=None,
file=DESCRIPTOR,
create_key=_descriptor._internal_create_key,
),
],
extensions=[],
nested_types=[],
enum_types=[],
serialized_options=None,
is_extendable=False,
syntax="proto3",
extension_ranges=[],
oneofs=[],
serialized_start=465,
serialized_end=524,
)
_PUSHAUDIOSTREAMREQUEST.fields_by_name["start_marker"].message_type = _PUSHAUDIOREQUESTSTART
_PUSHAUDIOSTREAMREQUEST.oneofs_by_name["streaming_request"].fields.append(
_PUSHAUDIOSTREAMREQUEST.fields_by_name["start_marker"]
)
_PUSHAUDIOSTREAMREQUEST.fields_by_name["start_marker"].containing_oneof = _PUSHAUDIOSTREAMREQUEST.oneofs_by_name[
"streaming_request"
]
_PUSHAUDIOSTREAMREQUEST.oneofs_by_name["streaming_request"].fields.append(
_PUSHAUDIOSTREAMREQUEST.fields_by_name["audio_data"]
)
_PUSHAUDIOSTREAMREQUEST.fields_by_name["audio_data"].containing_oneof = _PUSHAUDIOSTREAMREQUEST.oneofs_by_name[
"streaming_request"
]
DESCRIPTOR.message_types_by_name["PushAudioRequest"] = _PUSHAUDIOREQUEST
DESCRIPTOR.message_types_by_name["PushAudioResponse"] = _PUSHAUDIORESPONSE
DESCRIPTOR.message_types_by_name["PushAudioStreamRequest"] = _PUSHAUDIOSTREAMREQUEST
DESCRIPTOR.message_types_by_name["PushAudioRequestStart"] = _PUSHAUDIOREQUESTSTART
DESCRIPTOR.message_types_by_name["PushAudioStreamResponse"] = _PUSHAUDIOSTREAMRESPONSE
_sym_db.RegisterFileDescriptor(DESCRIPTOR)
PushAudioRequest = _reflection.GeneratedProtocolMessageType(
"PushAudioRequest",
(_message.Message,),
{
"DESCRIPTOR": _PUSHAUDIOREQUEST,
"__module__": "audio2face_pb2"
# @@protoc_insertion_point(class_scope:nvidia.audio2face.PushAudioRequest)
},
)
_sym_db.RegisterMessage(PushAudioRequest)
PushAudioResponse = _reflection.GeneratedProtocolMessageType(
"PushAudioResponse",
(_message.Message,),
{
"DESCRIPTOR": _PUSHAUDIORESPONSE,
"__module__": "audio2face_pb2"
# @@protoc_insertion_point(class_scope:nvidia.audio2face.PushAudioResponse)
},
)
_sym_db.RegisterMessage(PushAudioResponse)
PushAudioStreamRequest = _reflection.GeneratedProtocolMessageType(
"PushAudioStreamRequest",
(_message.Message,),
{
"DESCRIPTOR": _PUSHAUDIOSTREAMREQUEST,
"__module__": "audio2face_pb2"
# @@protoc_insertion_point(class_scope:nvidia.audio2face.PushAudioStreamRequest)
},
)
_sym_db.RegisterMessage(PushAudioStreamRequest)
PushAudioRequestStart = _reflection.GeneratedProtocolMessageType(
"PushAudioRequestStart",
(_message.Message,),
{
"DESCRIPTOR": _PUSHAUDIOREQUESTSTART,
"__module__": "audio2face_pb2"
# @@protoc_insertion_point(class_scope:nvidia.audio2face.PushAudioRequestStart)
},
)
_sym_db.RegisterMessage(PushAudioRequestStart)
PushAudioStreamResponse = _reflection.GeneratedProtocolMessageType(
"PushAudioStreamResponse",
(_message.Message,),
{
"DESCRIPTOR": _PUSHAUDIOSTREAMRESPONSE,
"__module__": "audio2face_pb2"
# @@protoc_insertion_point(class_scope:nvidia.audio2face.PushAudioStreamResponse)
},
)
_sym_db.RegisterMessage(PushAudioStreamResponse)
_AUDIO2FACE = _descriptor.ServiceDescriptor(
name="Audio2Face",
full_name="nvidia.audio2face.Audio2Face",
file=DESCRIPTOR,
index=0,
serialized_options=None,
create_key=_descriptor._internal_create_key,
serialized_start=527,
serialized_end=739,
methods=[
_descriptor.MethodDescriptor(
name="PushAudio",
full_name="nvidia.audio2face.Audio2Face.PushAudio",
index=0,
containing_service=None,
input_type=_PUSHAUDIOREQUEST,
output_type=_PUSHAUDIORESPONSE,
serialized_options=None,
create_key=_descriptor._internal_create_key,
),
_descriptor.MethodDescriptor(
name="PushAudioStream",
full_name="nvidia.audio2face.Audio2Face.PushAudioStream",
index=1,
containing_service=None,
input_type=_PUSHAUDIOSTREAMREQUEST,
output_type=_PUSHAUDIOSTREAMRESPONSE,
serialized_options=None,
create_key=_descriptor._internal_create_key,
),
],
)
_sym_db.RegisterServiceDescriptor(_AUDIO2FACE)
DESCRIPTOR.services_by_name["Audio2Face"] = _AUDIO2FACE
# @@protoc_insertion_point(module_scope)
================================================
FILE: audio2face_pb2_grpc.py
================================================
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
"""Client and server classes corresponding to protobuf-defined services."""
import grpc
import audio2face_pb2 as audio2face__pb2
class Audio2FaceStub(object):
"""Missing associated documentation comment in .proto file."""
def __init__(self, channel):
"""Constructor.
Args:
channel: A grpc.Channel.
"""
self.PushAudio = channel.unary_unary(
"/nvidia.audio2face.Audio2Face/PushAudio",
request_serializer=audio2face__pb2.PushAudioRequest.SerializeToString,
response_deserializer=audio2face__pb2.PushAudioResponse.FromString,
)
self.PushAudioStream = channel.stream_unary(
"/nvidia.audio2face.Audio2Face/PushAudioStream",
request_serializer=audio2face__pb2.PushAudioStreamRequest.SerializeToString,
response_deserializer=audio2face__pb2.PushAudioStreamResponse.FromString,
)
class Audio2FaceServicer(object):
"""Missing associated documentation comment in .proto file."""
def PushAudio(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details("Method not implemented!")
raise NotImplementedError("Method not implemented!")
def PushAudioStream(self, request_iterator, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details("Method not implemented!")
raise NotImplementedError("Method not implemented!")
def add_Audio2FaceServicer_to_server(servicer, server):
rpc_method_handlers = {
"PushAudio": grpc.unary_unary_rpc_method_handler(
servicer.PushAudio,
request_deserializer=audio2face__pb2.PushAudioRequest.FromString,
response_serializer=audio2face__pb2.PushAudioResponse.SerializeToString,
),
"PushAudioStream": grpc.stream_unary_rpc_method_handler(
servicer.PushAudioStream,
request_deserializer=audio2face__pb2.PushAudioStreamRequest.FromString,
response_serializer=audio2face__pb2.PushAudioStreamResponse.SerializeToString,
),
}
generic_handler = grpc.method_handlers_generic_handler("nvidia.audio2face.Audio2Face", rpc_method_handlers)
server.add_generic_rpc_handlers((generic_handler,))
# This class is part of an EXPERIMENTAL API.
class Audio2Face(object):
"""Missing associated documentation comment in .proto file."""
@staticmethod
def PushAudio(
request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None,
):
return grpc.experimental.unary_unary(
request,
target,
"/nvidia.audio2face.Audio2Face/PushAudio",
audio2face__pb2.PushAudioRequest.SerializeToString,
audio2face__pb2.PushAudioResponse.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
)
@staticmethod
def PushAudioStream(
request_iterator,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None,
):
return grpc.experimental.stream_unary(
request_iterator,
target,
"/nvidia.audio2face.Audio2Face/PushAudioStream",
audio2face__pb2.PushAudioStreamRequest.SerializeToString,
audio2face__pb2.PushAudioStreamResponse.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
)
================================================
FILE: audio2face_streaming_utils.py
================================================
"""
This demo script shows how to send audio data to Audio2Face Streaming Audio Player via gRPC requests.
There are two options:
* Send the whole track at once using PushAudioRequest()
* Send the audio chunks seuqntially in a stream using PushAudioStreamRequest()
For the second option this script emulates the stream of chunks, generated by splitting an input WAV audio file.
But in a real application such stream of chunks may be aquired from some other streaming source:
* streaming audio via internet, streaming Text-To-Speech, etc
gRPC protocol details could be find in audio2face.proto
"""
import sys
import grpc
import time
import numpy as np
import soundfile
import audio2face_pb2_grpc
import audio2face_pb2
def push_audio_track(url, audio_data, samplerate, instance_name):
"""
This function pushes the whole audio track at once via PushAudioRequest()
PushAudioRequest parameters:
* audio_data: bytes, containing audio data for the whole track, where each sample is encoded as 4 bytes (float32)
* samplerate: sampling rate for the audio data
* instance_name: prim path of the Audio2Face Streaming Audio Player on the stage, were to push the audio data
* block_until_playback_is_finished: if True, the gRPC request will be blocked until the playback of the pushed track is finished
The request is passed to PushAudio()
"""
block_until_playback_is_finished = True # ADJUST
with grpc.insecure_channel(url) as channel:
stub = audio2face_pb2_grpc.Audio2FaceStub(channel)
request = audio2face_pb2.PushAudioRequest()
request.audio_data = audio_data.astype(np.float32).tobytes()
request.samplerate = samplerate
request.instance_name = instance_name
request.block_until_playback_is_finished = block_until_playback_is_finished
print("Sending audio data...")
response = stub.PushAudio(request)
if response.success:
print("SUCCESS")
else:
print(f"ERROR: {response.message}")
print("Closed channel")
def push_audio_track_stream(url, audio_data, samplerate, instance_name):
"""
This function pushes audio chunks sequentially via PushAudioStreamRequest()
The function emulates the stream of chunks, generated by splitting input audio track.
But in a real application such stream of chunks may be aquired from some other streaming source.
The first message must contain start_marker field, containing only meta information (without audio data):
* samplerate: sampling rate for the audio data
* instance_name: prim path of the Audio2Face Streaming Audio Player on the stage, were to push the audio data
* block_until_playback_is_finished: if True, the gRPC request will be blocked until the playback of the pushed track is finished (after the last message)
Second and other messages must contain audio_data field:
* audio_data: bytes, containing audio data for an audio chunk, where each sample is encoded as 4 bytes (float32)
All messages are packed into a Python generator and passed to PushAudioStream()
"""
#print(type(audio_data))
chunk_size = samplerate // 10 # ADJUST
sleep_between_chunks = 0.01 # ADJUST
block_until_playback_is_finished = True # ADJUST
#print(type(audio_data))
with grpc.insecure_channel(url) as channel:
stub = audio2face_pb2_grpc.Audio2FaceStub(channel)
def make_generator():
start_marker = audio2face_pb2.PushAudioRequestStart(
samplerate=samplerate,
instance_name=instance_name,
block_until_playback_is_finished=block_until_playback_is_finished,
)
# At first, we send a message with start_marker
yield audio2face_pb2.PushAudioStreamRequest(start_marker=start_marker)
# Then we send messages with audio_data
for i in range(len(audio_data) // chunk_size + 1):
#time.sleep(sleep_between_chunks)
chunk = audio_data[i * chunk_size : i * chunk_size + chunk_size]
yield audio2face_pb2.PushAudioStreamRequest(audio_data=chunk.astype(np.float32).tobytes())
request_generator = make_generator()
print("Sending audio data...")
response = stub.PushAudioStream(request_generator)
if response.success:
print("SUCCESS")
else:
print(f"ERROR: {response.message}")
print("Channel closed")
def push_stream(url, audio_data, samplerate, instance_name):
"""
This function pushes audio chunks sequentially via PushAudioStreamRequest()
The function emulates the stream of chunks, generated by splitting input audio track.
But in a real application such stream of chunks may be aquired from some other streaming source.
The first message must contain start_marker field, containing only meta information (without audio data):
* samplerate: sampling rate for the audio data
* instance_name: prim path of the Audio2Face Streaming Audio Player on the stage, were to push the audio data
* block_until_playback_is_finished: if True, the gRPC request will be blocked until the playback of the pushed track is finished (after the last message)
Second and other messages must contain audio_data field:
* audio_data: bytes, containing audio data for an audio chunk, where each sample is encoded as 4 bytes (float32)
All messages are packed into a Python generator and passed to PushAudioStream()
"""
print(len(audio_data))
chunk_size = samplerate // 10 # ADJUST
sleep_between_chunks = 0.01 # ADJUST
block_until_playback_is_finished = True # ADJUST
print(type(audio_data))
with grpc.insecure_channel(url) as channel:
print("Channel creadted")
stub = audio2face_pb2_grpc.Audio2FaceStub(channel)
def make_generator():
start_marker = audio2face_pb2.PushAudioRequestStart(
samplerate=samplerate,
instance_name=instance_name,
block_until_playback_is_finished=block_until_playback_is_finished,
)
# At first, we send a message with start_marker
yield audio2face_pb2.PushAudioStreamRequest(start_marker=start_marker)
# Then we send messages with audio_data
for i in range(len(audio_data) // chunk_size + 1):
#time.sleep(sleep_between_chunks)
chunk = audio_data[i * chunk_size : i * chunk_size + chunk_size]
yield audio2face_pb2.PushAudioStreamRequest(audio_data=chunk.astype(np.float32).tobytes())
request_generator = make_generator()
print("Sending audio data...")
response = stub.PushAudioStream(request_generator)
if response.success:
print("SUCCESS")
return True
else:
print(f"ERROR: {response.message}")
# print("Channel closed")
================================================
FILE: audio_segment.py
================================================
import os
import gradio as gr
from pydub import AudioSegment
# function to crop audio according to the given start and end time
def crop_audio(file_path, start_time, end_time):
audio = AudioSegment.from_file(file_path)
cropped_audio = audio[start_time:end_time]
filename = os.path.splitext(os.path.basename(file_path))[0]
if not os.path.exists("cropped_audio"):
os.makedirs("cropped_audio")
cropped_file_path = os.path.join("cropped_audio",f"{filename}_{start_time//1000}_{end_time//1000}.wav")
cropped_audio.export(cropped_file_path, format="wav")
return cropped_file_path
# function to split audio file into segments
def split_audio_file(file_path, output_path, segment_time=3000):
audio = AudioSegment.from_file(file_path)
file_name = os.path.splitext(os.path.basename(file_path))[0]
# Calculating total segments that will be created.
total_segments = int(audio.duration_seconds // (segment_time/1000)) + 1
# Creating each segment and saving to the output folder
for segment_number in range(total_segments):
start_time = segment_number * segment_time
end_time = start_time + segment_time
segment_file_path = os.path.join(output_path, f"{file_name}_{start_time//1000}_{end_time//1000}.wav")
segment = audio[start_time:end_time]
segment.export(segment_file_path, format="wav")
return output_path
# main function
def audio_processing(file_path, output_path, label):
# 分割音频文件
if not os.path.exists(output_path):
os.makedirs(output_path)
split_audio_file(file_path, output_path)
# 获取手动选择的音频段并裁剪
cropped_files_paths = []
for root, dirs, files in os.walk(output_path):
for file in files:
if file.endswith('.wav'):
file_path = os.path.abspath(os.path.join(root, file))
cropped_file_path = crop_audio(file_path, 0, 1000) # 注意此处仅提供示例裁剪了1s的音频
cropped_files_paths.append(cropped_file_path)
# 生成txt文件
txt_file = open('file_labels.txt', 'a')
for index, cropped_file_path in enumerate(cropped_files_paths):
segment_label = label + '_' + str(index)
# 将文件路径和标签写入txt文件
txt_file.write(f"{cropped_file_path}\t{segment_label}\n")
txt_file.close()
print("处理完成!")
# 定义输入界面, 接收音频文件、输出文件夹和标签
iface = gr.Interface(
fn=audio_processing,
inputs=[gr.inputs.File(label="上传音频文件"),
gr.inputs.Textbox(label="输出文件夹路径"),
gr.inputs.Textbox(label="标签")],
outputs="text",
title="音频处理工具",
description="通过鼠标点击音频的任意区间保存片段")
iface.launch()
================================================
FILE: auto_label_demo.py
================================================
from model_cards.autoback import AutoBackend
import argparse
import os
import platform
import sys
from pathlib import Path
import numpy as np
import torch
import torch.backends.cudnn as cudnn
import matplotlib.pyplot as plt
from PIL import Image
import random
FILE = Path(__file__).resolve()
ROOT = FILE.parents[0] # root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
from utils.ops import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
dilate_mask, increment_path, non_max_suppression ,print_args, scale_boxes, xyxy2xywh,save_format)
from utils.plot import Annotator, save_one_box,show_box,show_mask,save_mask_data
from utils.torch_utils import select_device
from config_private import SAM_MODEL_TYPE,GROUNED_MODEL_TYPE,Tag2Text_Model_Path,GLIGEN_META_LIST
from utils import VID_FORMATS,IMG_FORMATS,write_categories
import json
import xml.etree.cElementTree as ET
from tqdm import tqdm
# 初始已知类别列表
global categories
categories = {}
global category_colors
category_colors={}
# 初始对应类别编号
class_ids = []
models_config = {'tag2text': None, 'lama': None,'sam': None,'grounded': None,'sd': None,'visual_glm': None,'trans_zh': None,'gilgen':None}
JSON_DATASETS=[]
def save_text2img_data(output_dir, prompt,label,img_name):
global JSON_DATASETS
if not prompt:
prompt=f"这张图片的背景里有什么内容?"
example = {
"img": f"{img_name}",
"prompt": prompt,
"label": label
}
JSON_DATASETS.append((example))
def load_auto_backend_models(opt):
"""
加载多个模型
"""
# Load model
device = select_device(opt.device)
if opt.tag2text:
models_config['tag2text'] = AutoBackend("tag2text",weights=Tag2Text_Model_Path,device=device, fp16=opt.half)
if opt.det:
models_config['grounded'] = AutoBackend("grounded-DINO",weights=GROUNED_MODEL_TYPE['S'], device=device,
args_config= 'model_cards/groundingdino/config/GroundingDINO_SwinT_OGC.py', fp16=opt.half)
if opt.sam:
models_config['sam']= AutoBackend("segment-anything",weights=SAM_MODEL_TYPE['vit_h'] ,device=device, fp16=opt.half)
if opt.lama:
models_config['lama']= AutoBackend("lama",weights=None,args_config='model_cards/lama/configs/prediction/default.yaml',device=device)
if opt.gligen:
models_config['gligen']=AutoBackend("gligen",weights=GLIGEN_META_LIST[0])
print('【loads models done】')
def Auto_run(weights=ROOT / '', # model.pt path(s)
source= 'data/images', # file/dir/URL/glob, 0 for webcam
input_prompt="Anything in this image",
data=ROOT / 'data/', # dataset.yaml path
imgsz=(1920, 1080), # inference size (height, width)
conf_thres=0.25, # confidence threshold
iou_thres=0.45, # NMS IOU threshold
text_thres=0.3,
max_det=1000, # maximum detections per image
device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
view_img=False, # show results
save_txt=False, # save results to *.txt
save_xml=False, # save results to *.xml
save_conf=False, # save confidences in --save-txt labels
save_crop=False, # save cropped prediction boxes
nosave=False, # do not save images/videos
classes=None, # filter by class: --class 0, or --class 0 2 3
zh_select=False,
agnostic_nms=False, # class-agnostic NMS
augment=False, # augmented inference
visualize=False, # visualize features
update=False, # update all models
project=ROOT / 'runs/detect', # save results to project/name
name='exp', # save results to project/name
exist_ok=False, # existing project/name ok, do not increment
line_thickness=3, # bounding box thickness (pixels)
hide_labels=False, # hide labels
hide_conf=False, # hide confidences
half=False, # use FP16 half-precision inference
trace=False, # u
lama=False, # use lama models
sam=True, # use segment-anythings
det=True, # use grounded detect model with text
tag2text=True,
save_mask=False,
save_caption=False,
batch_process=False,
color_flag=False,
process_name=0,
gligen=False,
):
global models_config
global category_colors
global JSON_DATASETS
LOGGER.info(f'当前的进程ID:{process_name},加载的模型列表:{models_config.keys()}')
cls_index = -1 # 设置默认值为 -1
source = str(source)
print(f'input:{source}')
img_paths=None
if os.path.isdir(source):
img_paths = [os.path.join(source, f) for f in os.listdir(source) if
Path(f).suffix[1:] in (IMG_FORMATS + VID_FORMATS)]
elif os.path.isfile(source):
img_paths = [source]
else:
return False
# 获取文件夹中的所有图像
is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
save_img = not nosave and not source.endswith('.txt') # save inference images
is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
#webcam = source.isnumeric() or source.endswith('.streams') or (is_url )
if is_url and is_file:
source = check_file(source) # download
# Directories
save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
(save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
(save_dir / 'xmls' if save_xml else save_dir).mkdir(parents=True, exist_ok=True) # make dir
(save_dir / 'masks' if save_mask else save_dir).mkdir(parents=True, exist_ok=True) # make dir
(save_dir / 'captions' if save_caption else save_dir).mkdir(parents=True, exist_ok=True) # make dir
seen=0
# loda data and inference
caption=None
for source in tqdm(img_paths,desc="Processing"):
im = cv2.imread(source)
name_p= source.split('/')[-1].split('.')[0]
img_rgb = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
preds=None
masks=[]
prompt=input_prompt
if tag2text:
preds = models_config['tag2text'](im = img_rgb ,prompt=prompt,box_threshold=conf_thres,text_threshold=text_thres,iou_threshold=iou_thres)
# Currently ", " is better for detecting single tags
# while ". " is a little worse in some case
prompt=preds[0].replace(' |', ',')
caption=preds[2]
print(f"Caption: {caption}")
print(f"Tags: {prompt}")
if zh_select:
caption=models_config['trans_zh'](prompt, max_length=1000, clean_up_tokenization_spaces=True)[0]["generated_text"]
if save_caption:
save_format(label_format="txt",save_path=f'{save_dir}/captions',img_name=name_p, results=caption)
if det:
if input_prompt:
prompt=input_prompt
print('grouned start input prompt:',prompt)
preds= models_config['grounded'](im = img_rgb,prompt=prompt, box_threshold=conf_thres,text_threshold=text_thres, iou_threshold=iou_thres)
if sam and det :
if preds[0].numel()>0:
print('sam start input prompt:',preds[0])
masks= models_config['sam'](im = img_rgb, prompt=preds[0],box_threshold=conf_thres,text_threshold=text_thres, iou_threshold=iou_thres)
if save_mask:
save_mask_data(str(save_dir)+'/masks', caption, masks, preds[0], preds[2],name_p)
# Write results
if save_img:
seen+=1
plt.figure(figsize=(10,10))
plt.imshow(img_rgb)
if det:
for box,label in zip(preds[0],preds[2]):
show_box(box.numpy(),plt.gca(),label)
for mask in masks:
show_mask(mask.cpu().numpy(),plt.gca(),random_color=True)
if tag2text:
plt.title('Captioning: ' + caption + '\n' + 'Tagging:' + prompt + '\n')
plt.axis('off')
plt.savefig(f'{save_dir}/{seen}.png',bbox_iches='tight',dpi=300,pad_inches=0.0)
if lama and masks is not None :
masks_prompts= masks.detach().cpu().numpy().astype(np.uint8) * 255
for idx, mask in enumerate(masks_prompts):
sub_mask = [dilate_mask(ma, 15) for ma in mask]
img_inpainted_p= f'{save_dir}/mask_{idx}.png'
idx=idx+1
img_inpainted = models_config['lama'](
im=img_rgb, prompt=sub_mask[0])
Image.fromarray(img_inpainted.astype(np.uint8)).save(img_inpainted_p)
img_rgb=img_inpainted
for category in categories:
if category not in category_colors:
category_colors[category] = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
gn = torch.tensor(im.shape)[[1, 0, 1, 0]] # normalization gain whwh
if color_flag or save_txt:
seg_mask = np.zeros_like(img_rgb) # img_array
category_color=[]
for xyxy, conf, cls,mask in zip(preds[0],preds[1],preds[2],masks): #per im boxes
xywh = (xyxy2xywh((xyxy).view(1,4)) / gn).view(-1).tolist() # normalized xywh
if cls not in categories:
# print(f'Add {cls} to categories: {categories}')
categories.update({
str(cls): len(categories)})
write_categories(cls,f'{save_dir}/classes_id.txt')
cls_index = len(categories) - 1
category_colors.update({
str(cls): (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))})
category_color=category_colors[str(cls)]
else:
cls_index = categories[str(cls)]
if str(cls) not in category_colors:
category_colors.update({
str(cls): (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))})
category_color=category_colors[str(cls)]
line = (cls_index, xywh, conf) if save_conf else (cls_index, xywh) # label format
line = str(line).replace('[', '').replace(']', '').replace("(",'').replace(")"," ").replace(",", " " * 2)
if save_mask:
h, w = mask.shape[-2:]
mask_color = np.array(category_color).reshape((1, 1, -1))
seg_mask = seg_mask + mask.cpu().numpy().reshape(h, w, 1) * mask_color # add
if save_txt:
save_format(label_format="txt",save_path=f'{save_dir}/labels', img_name=name_p, results=line)
if color_flag and save_mask:
plt.figure(figsize=(10,10))
plt.imshow(seg_mask)
plt.title('Captioning: ' + caption + '\n' + 'Tagging:' + prompt + '\n')
plt.axis('off')
plt.savefig(os.path.join(f'{save_dir}/masks', f'{name_p}_cls.jpg'), bbox_inches="tight", dpi=300, pad_inches=0.0)
if save_xml:
h,w=im.shape[:2]
save_format("xml",f'{save_dir}/xmls' ,name_p, Path(source).parent,
preds, h,w)
if save_txt:
#class_ids.append(cls)
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}/labels")
if save_xml:
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}/xmls")
if save_caption:
with open(f'{save_dir}/dataset.json', 'a',encoding='utf-8') as f:
json.dump(JSON_DATASETS,f,ensure_ascii=False)
f.write('\n')
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}/captions")
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}/captions")
if save_mask:
LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}/masks")
def run_do(shared_args,process_name=0):
Auto_run(**vars(shared_args), process_name=process_name)
def parse_opt():
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'your model path', help='model path(s)')
parser.add_argument('--source', type=str, default=ROOT / 'train_imgs', help='file/dir/URL/glob, 0 for webcam')
parser.add_argument('--input_prompt', type=str, default='', help='provide prompt words')
parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path')
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
parser.add_argument('--text-thres', type=float, default=0.3, help='confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.5, help='NMS IoU threshold')
parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
parser.add_argument('--device', default='0', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='show results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-xml', action='store_true', help='save results to *.xml')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
parser.add_argument('--zh_select', action='store_true', default=False)
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--visualize', action='store_true', help='visualize features')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name')
parser.add_argument('--name', default='exp', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
parser.add_argument('--trace', action='store_true', help='trace model')
parser.add_argument('--lama',default=False, action='store_true', help='lama model')
parser.add_argument('--sam', default=False,action='store_true', help='seg model')
parser.add_argument('--det',default=False, action='store_true', help='det model')
parser.add_argument('--tag2text', default=True,action='store_true', help='tag2text model ')
parser.add_argument('--save-mask', default=False,action='store_true', help='mask save json')
parser.add_argument('--save-caption', default=True,action='store_true', help='caption ')
parser.add_argument('--batch-process', action='store_true', help='therads process file')
parser.add_argument('--color-flag', action='store_true', help='class-color ')
parser.add_argument('--gligen', action='store_true', help='class-color ')
opt = parser.parse_args()
print_args(vars(opt))
return opt
import threading
import concurrent.futures
def main(opt):
check_requirements(exclude=('tensorboard', 'thop'))
global models_config
# if not opt.input_prompt and opt.input_prompt=='':
# LOGGER.info(' input prompt')
# words_name= input("please your prompt words: ")
# opt.input_prompt=words_name
load_auto_backend_models(opt)
LOGGER.info(f"模型加载成功{models_config.keys()}")
if opt.batch_process and os.path.isdir(opt.source):
#检查目录是否存在以及检查是否为目录的操作
if not os.path.exists(opt.source):
LOGGER.info(f"Error: Input directory {opt.source} does not exist.")
return
seen=0
# output_dir=f'{opt.source}_subs{seen}'
segment_size =100
for file_name in opt.source:
file_path = os.path.join(opt.source, file_name)
# pass
if not Path(file_path).suffix[1:] in (IMG_FORMATS + VID_FORMATS):
continue
# 使用Pillow库读取图像文件并将其转换为NumPy数组
img = Image.open(file_path)
img_array = np.asarray(img)
# 多线程处理每个图像段
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = [] # 用于保存每个线程的未来对象
# 分段并发读取并进行处理
for i in range(0, img_array.shape[0], segment_size):
start_row = i
end_row = min(i + segment_size, img_array.shape[0])
future = executor.submit(run_do, img_array, start_row, end_row)
futures.append(future)
# 获取所有未来对象的结果
for future in concurrent.futures.as_completed(futures):
segment = future.result()
else:
Auto_run(**vars(opt))
if __name__ == "__main__":
opt = parse_opt()
main(opt)
================================================
FILE: batch_clean_gpu.txt
================================================
sudo fuser -v /dev/nvidia* |awk '{for(i=1;i<=NF;i++)print "kill -9 " $i;}' | sudo sh
================================================
FILE: crazy_functions/Langchain知识库.py
================================================
from utils.toolbox import CatchException, update_ui, ProxyNetworkActivate
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_files_from_everything
@CatchException
def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
"""
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行
plugin_kwargs 插件模型的参数,暂时没有用武之地
chatbot 聊天显示框的句柄,用于显示给用户
history 聊天历史,前情提要
system_prompt 给gpt的静默提醒
web_port 当前软件运行的端口号
"""
history = [] # 清空历史,以免输入溢出
chatbot.append(("这是什么功能?", "[Local Message] 从一批文件(txt, md, tex)中读取数据构建知识库, 然后进行问答。"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# resolve deps
try:
from zh_langchain import construct_vector_store
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from .crazy_utils import knowledge_archive_interface
except Exception as e:
chatbot.append(
["依赖不足",
"导入依赖失败。正在尝试自动安装,请查看终端的输出或耐心等待..."]
)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
from .crazy_utils import try_install_deps
try_install_deps(['zh_langchain==0.2.1', 'pypinyin'])
# < --------------------读取参数--------------- >
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
kai_id = plugin_kwargs.get("advanced_arg", 'default')
# < --------------------读取文件--------------- >
file_manifest = []
spl = ["txt", "doc", "docx", "email", "epub", "html", "json", "md", "msg", "pdf", "ppt", "pptx", "rtf"]
for sp in spl:
_, file_manifest_tmp, _ = get_files_from_everything(txt, type=f'.{sp}')
file_manifest += file_manifest_tmp
if len(file_manifest) == 0:
chatbot.append(["没有找到任何可读取文件", "当前支持的格式包括: txt, md, docx, pptx, pdf, json等"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# < -------------------预热文本向量化模组--------------- >
chatbot.append(['<br/>'.join(file_manifest), "正在预热文本向量化模组, 如果是第一次运行, 将消耗较长时间下载中文向量化模型..."])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
print('Checking Text2vec ...')
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
with ProxyNetworkActivate(): # 临时地激活代理网络
HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
# < -------------------构建知识库--------------- >
chatbot.append(['<br/>'.join(file_manifest), "正在构建知识库..."])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
print('Establishing knowledge archive ...')
with ProxyNetworkActivate(): # 临时地激活代理网络
kai = knowledge_archive_interface()
kai.feed_archive(file_manifest=file_manifest, id=kai_id)
kai_files = kai.get_loaded_file()
kai_files = '<br/>'.join(kai_files)
# chatbot.append(['知识库构建成功', "正在将知识库存储至cookie中"])
# yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# chatbot._cookies['langchain_plugin_embedding'] = kai.get_current_archive_id()
# chatbot._cookies['lock_plugin'] = 'crazy_functions.Langchain知识库->读取知识库作答'
# chatbot.append(['完成', "“根据知识库作答”函数插件已经接管问答系统, 提问吧! 但注意, 您接下来不能再使用其他插件了,刷新页面即可以退出知识库问答模式。"])
chatbot.append(['构建完成', f"当前知识库内的有效文件:\n\n---\n\n{kai_files}\n\n---\n\n请切换至“知识库问答”插件进行知识库访问, 或者使用此插件继续上传更多文件。"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
@CatchException
def 读取知识库作答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port=-1):
# resolve deps
try:
from zh_langchain import construct_vector_store
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from .crazy_utils import knowledge_archive_interface
except Exception as e:
chatbot.append(["依赖不足", "导入依赖失败。正在尝试自动安装,请查看终端的输出或耐心等待..."])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
from .crazy_utils import try_install_deps
try_install_deps(['zh_langchain==0.2.1'])
# < ------------------- --------------- >
kai = knowledge_archive_interface()
if 'langchain_plugin_embedding' in chatbot._cookies:
resp, prompt = kai.answer_with_archive_by_id(txt, chatbot._cookies['langchain_plugin_embedding'])
else:
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
kai_id = plugin_kwargs.get("advanced_arg", 'default')
resp, prompt = kai.answer_with_archive_by_id(txt, kai_id)
chatbot.append((txt, '[Local Message] ' + prompt))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=prompt, inputs_show_user=txt,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
sys_prompt=system_prompt
)
history.extend((prompt, gpt_say))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
================================================
FILE: crazy_functions/Latex全文润色.py
================================================
from utils.toolbox import update_ui, trimmed_format_exc
from utils.toolbox import CatchException, report_execption, write_results_to_file, zip_folder
class PaperFileGroup():
def __init__(self):
self.file_paths = []
self.file_contents = []
self.sp_file_contents = []
self.sp_file_index = []
self.sp_file_tag = []
# count_token
from llm_cards.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
self.get_token_num = get_token_num
def run_file_split(self, max_token_limit=1900):
"""
将长文本分离开来
"""
for index, file_content in enumerate(self.file_contents):
if self.get_token_num(file_content) < max_token_limit:
self.sp_file_contents.append(file_content)
self.sp_file_index.append(index)
self.sp_file_tag.append(self.file_paths[index])
else:
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
for j, segment in enumerate(segments):
self.sp_file_contents.append(segment)
self.sp_file_index.append(index)
self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
print('Segmentation: done')
def merge_result(self):
self.file_result = ["" for _ in range(len(self.file_paths))]
for r, k in zip(self.sp_file_result, self.sp_file_index):
self.file_result[k] += r
def write_result(self):
manifest = []
for path, res in zip(self.file_paths, self.file_result):
with open(path + '.polish.tex', 'w', encoding='utf8') as f:
manifest.append(path + '.polish.tex')
f.write(res)
return manifest
def zip_result(self):
import os, time
folder = os.path.dirname(self.file_paths[0])
t = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
zip_folder(folder, './gpt_log/', f'{t}-polished.zip')
def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='polish'):
import time, os, re
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
# <-------- 读取Latex文件,删除其中的所有注释 ---------->
pfg = PaperFileGroup()
for index, fp in enumerate(file_manifest):
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
file_content = f.read()
# 定义注释的正则表达式
comment_pattern = r'(?<!\\)%.*'
# 使用正则表达式查找注释,并替换为空字符串
clean_tex_content = re.sub(comment_pattern, '', file_content)
# 记录删除注释后的文本
pfg.file_paths.append(fp)
pfg.file_contents.append(clean_tex_content)
# <-------- 拆分过长的latex文件 ---------->
pfg.run_file_split(max_token_limit=1024)
n_split = len(pfg.sp_file_contents)
# <-------- 多线程润色开始 ---------->
if language == 'en':
if mode == 'polish':
inputs_array = ["Below is a section from an academic paper, polish this section to meet the academic standard, " +
"improve the grammar, clarity and overall readability, do not modify any latex command such as \section, \cite and equations:" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
else:
inputs_array = [r"Below is a section from an academic paper, proofread this section." +
r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " +
r"Answer me only with the revised text:" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
inputs_show_user_array = [f"Polish {f}" for f in pfg.sp_file_tag]
sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
elif language == 'zh':
if mode == 'polish':
inputs_array = [f"以下是一篇学术论文中的一段内容,请将此部分润色以满足学术标准,提高语法、清晰度和整体可读性,不要修改任何LaTeX命令,例如\section,\cite和方程式:" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
else:
inputs_array = [f"以下是一篇学术论文中的一段内容,请对这部分内容进行语法矫正。不要修改任何LaTeX命令,例如\section,\cite和方程式:" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
inputs_show_user_array = [f"润色 {f}" for f in pfg.sp_file_tag]
sys_prompt_array=["你是一位专业的中文学术论文作家。" for _ in range(n_split)]
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
inputs_array=inputs_array,
inputs_show_user_array=inputs_show_user_array,
llm_kwargs=llm_kwargs,
chatbot=chatbot,
history_array=[[""] for _ in range(n_split)],
sys_prompt_array=sys_prompt_array,
# max_workers=5, # 并行任务数量限制,最多同时执行5个,其他的排队等待
scroller_max_len = 80
)
# <-------- 文本碎片重组为完整的tex文件,整理结果为压缩包 ---------->
try:
pfg.sp_file_result = []
for i_say, gpt_say in zip(gpt_response_collection[0::2], gpt_response_collection[1::2]):
pfg.sp_file_result.append(gpt_say)
pfg.merge_result()
pfg.write_result()
pfg.zip_result()
except:
print(trimmed_format_exc())
# <-------- 整理结果,退出 ---------->
create_report_file_name = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + f"-chatgpt.polish.md"
res = write_results_to_file(gpt_response_collection, file_name=create_report_file_name)
history = gpt_response_collection
chatbot.append((f"{fp}完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
@CatchException
def Latex英文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
# 基本信息:功能、贡献者
chatbot.append([
"函数插件功能?",
"对整个Latex项目进行润色。函数插件贡献者: Binary-Husky"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import tiktoken
except:
report_execption(chatbot, history,
a=f"解析项目: {txt}",
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
history = [] # 清空历史,以免输入溢出
import glob, os
if os.path.exists(txt):
project_folder = txt
else:
if txt == "": txt = '空空如也的输入栏'
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
if len(file_manifest) == 0:
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en')
@CatchException
def Latex中文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
# 基本信息:功能、贡献者
chatbot.append([
"函数插件功能?",
"对整个Latex项目进行润色。函数插件贡献者: Binary-Husky"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import tiktoken
except:
report_execption(chatbot, history,
a=f"解析项目: {txt}",
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
history = [] # 清空历史,以免输入溢出
import glob, os
if os.path.exists(txt):
project_folder = txt
else:
if txt == "": txt = '空空如也的输入栏'
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
if len(file_manifest) == 0:
report_execption(chatbot, h
gitextract__iw3ng36/
├── .gitignore
├── .gitmodules
├── LICENSE
├── README.md
├── README_zh.md
├── a2f.py
├── app.py
├── audio2face_pb2.py
├── audio2face_pb2_grpc.py
├── audio2face_streaming_utils.py
├── audio_segment.py
├── auto_label_demo.py
├── batch_clean_gpu.txt
├── crazy_functions/
│ ├── Langchain知识库.py
│ ├── Latex全文润色.py
│ ├── Latex全文翻译.py
│ ├── Latex输出PDF结果.py
│ ├── __init__.py
│ ├── chatglm微调工具.py
│ ├── crazy_functions_test.py
│ ├── crazy_utils.py
│ ├── latex_fns/
│ │ ├── latex_actions.py
│ │ └── latex_toolbox.py
│ ├── live_audio/
│ │ ├── aliyunASR.py
│ │ └── audio_io.py
│ ├── test_project/
│ │ ├── cpp/
│ │ │ ├── cppipc/
│ │ │ │ ├── buffer.cpp
│ │ │ │ ├── ipc.cpp
│ │ │ │ ├── policy.h
│ │ │ │ ├── pool_alloc.cpp
│ │ │ │ ├── prod_cons.h
│ │ │ │ ├── queue.h
│ │ │ │ ├── shm.cpp
│ │ │ │ ├── waiter.h
│ │ │ │ └── 来源
│ │ │ ├── libJPG/
│ │ │ │ ├── jpgd.cpp
│ │ │ │ ├── jpgd.h
│ │ │ │ ├── jpge.cpp
│ │ │ │ ├── jpge.h
│ │ │ │ └── 来源
│ │ │ └── longcode/
│ │ │ ├── jpgd.cpp
│ │ │ ├── jpge.cpp
│ │ │ └── prod_cons.h
│ │ ├── latex/
│ │ │ └── attention/
│ │ │ ├── background.tex
│ │ │ ├── introduction.tex
│ │ │ ├── model_architecture.tex
│ │ │ ├── parameter_attention.tex
│ │ │ └── 来源
│ │ ├── python/
│ │ │ └── dqn/
│ │ │ ├── __init__.py
│ │ │ ├── dqn.py
│ │ │ ├── policies.py
│ │ │ └── 来源
│ │ └── 其他测试
│ ├── 下载arxiv论文翻译摘要.py
│ ├── 交互功能函数模板.py
│ ├── 代码重写为全英文_多线程.py
│ ├── 图片生成.py
│ ├── 对话历史存档.py
│ ├── 总结word文档.py
│ ├── 总结音视频.py
│ ├── 批量Markdown翻译.py
│ ├── 批量总结PDF文档.py
│ ├── 批量总结PDF文档pdfminer.py
│ ├── 批量翻译PDF文档_多线程.py
│ ├── 数学动画生成manim.py
│ ├── 理解PDF文档内容.py
│ ├── 生成函数注释.py
│ ├── 联网的ChatGPT.py
│ ├── 联网的ChatGPT_bing版.py
│ ├── 虚空终端.py
│ ├── 解析JupyterNotebook.py
│ ├── 解析项目源代码.py
│ ├── 询问多个大语言模型.py
│ ├── 语音助手.py
│ ├── 读文章写摘要.py
│ ├── 谷歌检索小助手.py
│ ├── 辅助回答.py
│ └── 高级功能函数模板.py
├── gradio_demo.py
├── llm_cards/
│ ├── bridge_all.py
│ ├── bridge_chatglm.py
│ ├── bridge_chatgpt.py
│ ├── bridge_stackclaude.py
│ ├── core_functional.py
│ ├── crazy_functional.py
│ ├── requirements_chatglm.txt
│ └── requirements_slackclaude.txt
├── model_cards/
│ ├── Tag2Text/
│ │ ├── MANIFEST.in
│ │ ├── batch_inference.py
│ │ ├── datasets/
│ │ │ ├── openimages_common_214/
│ │ │ │ ├── imgs/
│ │ │ │ │ └── .gitkeep
│ │ │ │ ├── openimages_common_214_ram_annots.txt
│ │ │ │ ├── openimages_common_214_ram_taglist.txt
│ │ │ │ ├── openimages_common_214_tag2text_idannots.txt
│ │ │ │ └── openimages_common_214_tag2text_tagidlist.txt
│ │ │ └── openimages_rare_200/
│ │ │ ├── imgs/
│ │ │ │ └── .gitkeep
│ │ │ ├── openimages_rare_200_ram_annots.txt
│ │ │ └── openimages_rare_200_ram_taglist.txt
│ │ ├── inference_ram.py
│ │ ├── inference_ram_openset.py
│ │ ├── inference_tag2text.py
│ │ ├── ram/
│ │ │ ├── __init__.py
│ │ │ ├── configs/
│ │ │ │ ├── med_config.json
│ │ │ │ ├── q2l_config.json
│ │ │ │ └── swin/
│ │ │ │ ├── config_swinB_384.json
│ │ │ │ └── config_swinL_384.json
│ │ │ ├── data/
│ │ │ │ ├── ram_tag_list.txt
│ │ │ │ ├── ram_tag_list_chinese.txt
│ │ │ │ ├── ram_tag_list_threshold.txt
│ │ │ │ └── tag_list.txt
│ │ │ ├── inference.py
│ │ │ ├── models/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── bert.py
│ │ │ │ ├── ram.py
│ │ │ │ ├── swin_transformer.py
│ │ │ │ ├── tag2text.py
│ │ │ │ ├── utils.py
│ │ │ │ └── vit.py
│ │ │ ├── transform.py
│ │ │ └── utils/
│ │ │ ├── __init__.py
│ │ │ ├── metrics.py
│ │ │ └── openset_utils.py
│ │ ├── requirements_groundingDINO.txt
│ │ ├── setup.cfg
│ │ └── setup.py
│ ├── autoback.py
│ ├── groundingdino/
│ │ ├── __init__.py
│ │ ├── config/
│ │ │ ├── GroundingDINO_SwinB.cfg.py
│ │ │ └── GroundingDINO_SwinT_OGC.py
│ │ ├── datasets/
│ │ │ ├── __init__.py
│ │ │ └── transforms.py
│ │ ├── models/
│ │ │ ├── GroundingDINO/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── backbone/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── backbone.py
│ │ │ │ │ ├── position_encoding.py
│ │ │ │ │ └── swin_transformer.py
│ │ │ │ ├── bertwarper.py
│ │ │ │ ├── csrc/
│ │ │ │ │ ├── MsDeformAttn/
│ │ │ │ │ │ ├── ms_deform_attn.h
│ │ │ │ │ │ ├── ms_deform_attn_cpu.cpp
│ │ │ │ │ │ ├── ms_deform_attn_cpu.h
│ │ │ │ │ │ ├── ms_deform_attn_cuda.cu
│ │ │ │ │ │ ├── ms_deform_attn_cuda.h
│ │ │ │ │ │ └── ms_deform_im2col_cuda.cuh
│ │ │ │ │ ├── cuda_version.cu
│ │ │ │ │ └── vision.cpp
│ │ │ │ ├── fuse_modules.py
│ │ │ │ ├── groundingdino.py
│ │ │ │ ├── ms_deform_attn.py
│ │ │ │ ├── transformer.py
│ │ │ │ ├── transformer_vanilla.py
│ │ │ │ └── utils.py
│ │ │ ├── __init__.py
│ │ │ └── registry.py
│ │ ├── util/
│ │ │ ├── __init__.py
│ │ │ ├── box_ops.py
│ │ │ ├── get_tokenlizer.py
│ │ │ ├── inference.py
│ │ │ ├── logger.py
│ │ │ ├── misc.py
│ │ │ ├── slconfig.py
│ │ │ ├── slio.py
│ │ │ ├── time_counter.py
│ │ │ ├── utils.py
│ │ │ ├── visualizer.py
│ │ │ └── vl_utils.py
│ │ └── version.py
│ ├── lama/
│ │ ├── .gitignore
│ │ ├── LICENSE
│ │ ├── README.md
│ │ ├── bin/
│ │ │ ├── analyze_errors.py
│ │ │ ├── blur_predicts.py
│ │ │ ├── calc_dataset_stats.py
│ │ │ ├── debug/
│ │ │ │ └── analyze_overlapping_masks.sh
│ │ │ ├── evaluate_predicts.py
│ │ │ ├── evaluator_example.py
│ │ │ ├── extract_masks.py
│ │ │ ├── filter_sharded_dataset.py
│ │ │ ├── gen_debug_mask_dataset.py
│ │ │ ├── gen_mask_dataset.py
│ │ │ ├── gen_mask_dataset_hydra.py
│ │ │ ├── gen_outpainting_dataset.py
│ │ │ ├── make_checkpoint.py
│ │ │ ├── mask_example.py
│ │ │ ├── paper_runfiles/
│ │ │ │ ├── blur_tests.sh
│ │ │ │ ├── env.sh
│ │ │ │ ├── find_best_checkpoint.py
│ │ │ │ ├── generate_test_celeba-hq.sh
│ │ │ │ ├── generate_test_ffhq.sh
│ │ │ │ ├── generate_test_paris.sh
│ │ │ │ ├── generate_test_paris_256.sh
│ │ │ │ ├── generate_val_test.sh
│ │ │ │ ├── predict_inner_features.sh
│ │ │ │ └── update_test_data_stats.sh
│ │ │ ├── predict.py
│ │ │ ├── predict_inner_features.py
│ │ │ ├── report_from_tb.py
│ │ │ ├── sample_from_dataset.py
│ │ │ ├── side_by_side.py
│ │ │ ├── split_tar.py
│ │ │ ├── to_jit.py
│ │ │ └── train.py
│ │ ├── colab/
│ │ │ └── LaMa_inpainting.ipynb
│ │ ├── conda_env.yml
│ │ ├── configs/
│ │ │ ├── analyze_mask_errors.yaml
│ │ │ ├── data_gen/
│ │ │ │ ├── random_medium_256.yaml
│ │ │ │ ├── random_medium_512.yaml
│ │ │ │ ├── random_thick_256.yaml
│ │ │ │ ├── random_thick_512.yaml
│ │ │ │ ├── random_thin_256.yaml
│ │ │ │ └── random_thin_512.yaml
│ │ │ ├── debug_mask_gen.yaml
│ │ │ ├── eval1.yaml
│ │ │ ├── eval2.yaml
│ │ │ ├── eval2_cpu.yaml
│ │ │ ├── eval2_gpu.yaml
│ │ │ ├── eval2_jpg.yaml
│ │ │ ├── eval2_segm.yaml
│ │ │ ├── eval2_segm_test.yaml
│ │ │ ├── eval2_test.yaml
│ │ │ ├── places2-categories_157.txt
│ │ │ ├── prediction/
│ │ │ │ └── default.yaml
│ │ │ ├── test_large_30k.lst
│ │ │ └── training/
│ │ │ ├── ablv2_work.yaml
│ │ │ ├── ablv2_work_ffc075.yaml
│ │ │ ├── ablv2_work_md.yaml
│ │ │ ├── ablv2_work_no_fm.yaml
│ │ │ ├── ablv2_work_no_segmpl.yaml
│ │ │ ├── ablv2_work_no_segmpl_csdilirpl.yaml
│ │ │ ├── ablv2_work_no_segmpl_csdilirpl_celeba_csdilirpl1_new.yaml
│ │ │ ├── ablv2_work_no_segmpl_csirpl.yaml
│ │ │ ├── ablv2_work_no_segmpl_csirpl_celeba_csirpl03_new.yaml
│ │ │ ├── ablv2_work_no_segmpl_vgg.yaml
│ │ │ ├── ablv2_work_no_segmpl_vgg_celeba_l2_vgg003_new.yaml
│ │ │ ├── ablv2_work_nodil_segmpl.yaml
│ │ │ ├── ablv2_work_small_holes.yaml
│ │ │ ├── big-lama-celeba.yaml
│ │ │ ├── big-lama-regular-celeba.yaml
│ │ │ ├── big-lama-regular.yaml
│ │ │ ├── big-lama.yaml
│ │ │ ├── data/
│ │ │ │ ├── abl-02-thin-bb.yaml
│ │ │ │ ├── abl-04-256-mh-dist-celeba.yaml
│ │ │ │ ├── abl-04-256-mh-dist-web.yaml
│ │ │ │ └── abl-04-256-mh-dist.yaml
│ │ │ ├── discriminator/
│ │ │ │ └── pix2pixhd_nlayer.yaml
│ │ │ ├── evaluator/
│ │ │ │ └── default_inpainted.yaml
│ │ │ ├── generator/
│ │ │ │ ├── ffc_resnet_075.yaml
│ │ │ │ ├── pix2pixhd_global.yaml
│ │ │ │ ├── pix2pixhd_global_sigmoid.yaml
│ │ │ │ └── pix2pixhd_multidilated_catin_4dil_9b.yaml
│ │ │ ├── hydra/
│ │ │ │ ├── no_time.yaml
│ │ │ │ └── overrides.yaml
│ │ │ ├── lama-fourier-celeba.yaml
│ │ │ ├── lama-fourier.yaml
│ │ │ ├── lama-regular-celeba.yaml
│ │ │ ├── lama-regular.yaml
│ │ │ ├── lama_small_train_masks.yaml
│ │ │ ├── location/
│ │ │ │ ├── celeba_example.yaml
│ │ │ │ ├── docker.yaml
│ │ │ │ └── places_example.yaml
│ │ │ ├── optimizers/
│ │ │ │ └── default_optimizers.yaml
│ │ │ ├── trainer/
│ │ │ │ ├── any_gpu_large_ssim_ddp_final.yaml
│ │ │ │ ├── any_gpu_large_ssim_ddp_final_benchmark.yaml
│ │ │ │ └── any_gpu_large_ssim_ddp_final_celeba.yaml
│ │ │ └── visualizer/
│ │ │ └── directory.yaml
│ │ ├── docker/
│ │ │ ├── 1_generate_masks_from_raw_images.sh
│ │ │ ├── 2_predict.sh
│ │ │ ├── 3_evaluate.sh
│ │ │ ├── Dockerfile
│ │ │ ├── Dockerfile-cuda111
│ │ │ ├── build-cuda111.sh
│ │ │ ├── build.sh
│ │ │ └── entrypoint.sh
│ │ ├── fetch_data/
│ │ │ ├── celebahq_dataset_prepare.sh
│ │ │ ├── celebahq_gen_masks.sh
│ │ │ ├── eval_sampler.py
│ │ │ ├── places_challenge_train_download.sh
│ │ │ ├── places_standard_evaluation_prepare_data.sh
│ │ │ ├── places_standard_test_val_gen_masks.sh
│ │ │ ├── places_standard_test_val_prepare.sh
│ │ │ ├── places_standard_test_val_sample.sh
│ │ │ ├── places_standard_train_prepare.sh
│ │ │ ├── sampler.py
│ │ │ ├── train_shuffled.flist
│ │ │ └── val_shuffled.flist
│ │ ├── models/
│ │ │ └── ade20k/
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ ├── color150.mat
│ │ │ ├── mobilenet.py
│ │ │ ├── object150_info.csv
│ │ │ ├── resnet.py
│ │ │ ├── segm_lib/
│ │ │ │ ├── nn/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── modules/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── batchnorm.py
│ │ │ │ │ │ ├── comm.py
│ │ │ │ │ │ ├── replicate.py
│ │ │ │ │ │ ├── tests/
│ │ │ │ │ │ │ ├── test_numeric_batchnorm.py
│ │ │ │ │ │ │ └── test_sync_batchnorm.py
│ │ │ │ │ │ └── unittest.py
│ │ │ │ │ └── parallel/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── data_parallel.py
│ │ │ │ └── utils/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── data/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── dataloader.py
│ │ │ │ │ ├── dataset.py
│ │ │ │ │ ├── distributed.py
│ │ │ │ │ └── sampler.py
│ │ │ │ └── th.py
│ │ │ └── utils.py
│ │ ├── requirements.txt
│ │ └── saicinpainting/
│ │ ├── __init__.py
│ │ ├── evaluation/
│ │ │ ├── __init__.py
│ │ │ ├── data.py
│ │ │ ├── evaluator.py
│ │ │ ├── losses/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base_loss.py
│ │ │ │ ├── fid/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── fid_score.py
│ │ │ │ │ └── inception.py
│ │ │ │ ├── lpips.py
│ │ │ │ └── ssim.py
│ │ │ ├── masks/
│ │ │ │ ├── README.md
│ │ │ │ ├── __init__.py
│ │ │ │ ├── countless/
│ │ │ │ │ ├── .gitignore
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── countless2d.py
│ │ │ │ │ ├── countless3d.py
│ │ │ │ │ ├── requirements.txt
│ │ │ │ │ └── test.py
│ │ │ │ └── mask.py
│ │ │ ├── refinement.py
│ │ │ ├── utils.py
│ │ │ └── vis.py
│ │ ├── training/
│ │ │ ├── __init__.py
│ │ │ ├── data/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── aug.py
│ │ │ │ ├── datasets.py
│ │ │ │ └── masks.py
│ │ │ ├── losses/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── adversarial.py
│ │ │ │ ├── constants.py
│ │ │ │ ├── distance_weighting.py
│ │ │ │ ├── feature_matching.py
│ │ │ │ ├── perceptual.py
│ │ │ │ ├── segmentation.py
│ │ │ │ └── style_loss.py
│ │ │ ├── modules/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── depthwise_sep_conv.py
│ │ │ │ ├── fake_fakes.py
│ │ │ │ ├── ffc.py
│ │ │ │ ├── multidilated_conv.py
│ │ │ │ ├── multiscale.py
│ │ │ │ ├── pix2pixhd.py
│ │ │ │ ├── spatial_transform.py
│ │ │ │ └── squeeze_excitation.py
│ │ │ ├── trainers/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ └── default.py
│ │ │ └── visualizers/
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ ├── colors.py
│ │ │ ├── directory.py
│ │ │ └── noop.py
│ │ └── utils.py
│ ├── requirements.txt
│ ├── segment_anything/
│ │ ├── __init__.py
│ │ ├── automatic_mask_generator.py
│ │ ├── build_sam.py
│ │ ├── modeling/
│ │ │ ├── __init__.py
│ │ │ ├── common.py
│ │ │ ├── image_encoder.py
│ │ │ ├── mask_decoder.py
│ │ │ ├── prompt_encoder.py
│ │ │ ├── sam.py
│ │ │ └── transformer.py
│ │ ├── predictor.py
│ │ └── utils/
│ │ ├── __init__.py
│ │ ├── amg.py
│ │ ├── onnx.py
│ │ └── transforms.py
│ └── setup.py
├── requirements.txt
├── requirements_llm_extra.txt
├── themes/
│ ├── common.js
│ ├── default.css
│ ├── default.py
│ ├── green.css
│ ├── green.py
│ └── theme.py
└── utils/
├── AudioRecorder.py
├── AudioTrans.py
├── __init__.py
├── audio.py
├── check_proxy.py
├── colorful.py
├── conf.py
├── dataloads.py
├── downloads.py
├── ops.py
├── plot.py
├── text2speech.py
├── textsplitter/
│ ├── __init__.py
│ ├── ali_text_splitter.py
│ ├── chinese_text_splitter.py
│ └── zh_title_enhance.py
├── toolbox.py
├── torch_utils.py
└── video.py
Showing preview only (208K chars total). Download the full file or copy to clipboard to get everything.
SYMBOL INDEX (2527 symbols across 220 files)
FILE: a2f.py
function get_part_wav (line 31) | def get_part_wav(sound, start_time, end_time, part_wav_path):
function crop_wav (line 41) | def crop_wav(path, crop_len):
function process_chunk (line 62) | def process_chunk(model, chunk, detect_language):
function speech_recognition (line 78) | def speech_recognition(inputs, model,stream_model=False,detect_language=...
function mic_audio (line 130) | def mic_audio(record_file="record.wav"):
function tts_send (line 163) | async def tts_send(text,onmiverse=False,send_file='voice_dir/send_a2f.wa...
function tts_a2f (line 209) | async def tts_a2f(text):
function push_stream (line 228) | def push_stream(url,player,dir="voice_dir/send_omniverse.wav"):
function audio_synthesis (line 246) | def audio_synthesis(gpt_replying_buffer,url,player):
function process_send_stream (line 250) | def process_send_stream(gpt_replying_buffer,url,player):
function receive_max (line 258) | def receive_max(q,Text):
function send_stream2 (line 281) | def send_stream2(q):
function audio_chatbot (line 319) | def audio_chatbot(text):
FILE: app.py
function toggle_operation (line 59) | def toggle_operation(flag):
function sadtalker_demo (line 80) | async def sadtalker_demo(checkpoint_path,config_path,source_image,
function train_visualGLM (line 104) | def train_visualGLM(name,model_size,mode,train_iters,resume_data,
function start_finetuning_process (line 128) | def start_finetuning_process(gpt_option,model_args,method_type):
function load_speech_model (line 186) | async def load_speech_model(asr_method,tts_method):
function save_text2img_data (line 209) | def save_text2img_data(prompt,label,img_name,zh_select):
function load_auto_backend_models (line 222) | async def load_auto_backend_models(lama, sam, det,tag2text,ram, trans_zh...
function load_auto_backend_model (line 234) | def load_auto_backend_model(lama,sam,det,tag2text,ram,trans_zh,visual_gl...
function Auto_run (line 304) | def Auto_run(
function visual_chat (line 517) | def visual_chat(prompt_input, temperature, top_p, image_prompt, result_t...
function clear_fn_image (line 534) | def clear_fn_image(value):
function t2s (line 725) | def t2s(text,method):
function s2t (line 734) | def s2t(speech_file,stream_mode=False):
function fn_area_visibility (line 830) | def fn_area_visibility(a):
function on_md_dropdown_changed (line 859) | def on_md_dropdown_changed(k):
function on_dropdown_changed (line 900) | def on_dropdown_changed(k):
function on_md_dropdown_changed (line 909) | def on_md_dropdown_changed(k):
function route (line 913) | def route(request: gr.Request, k, *args, **kwargs):
function auto_opentab_delay (line 937) | def auto_opentab_delay(port=7586):
FILE: audio2face_pb2_grpc.py
class Audio2FaceStub (line 8) | class Audio2FaceStub(object):
method __init__ (line 11) | def __init__(self, channel):
class Audio2FaceServicer (line 29) | class Audio2FaceServicer(object):
method PushAudio (line 32) | def PushAudio(self, request, context):
method PushAudioStream (line 38) | def PushAudioStream(self, request_iterator, context):
function add_Audio2FaceServicer_to_server (line 45) | def add_Audio2FaceServicer_to_server(servicer, server):
class Audio2Face (line 63) | class Audio2Face(object):
method PushAudio (line 67) | def PushAudio(
method PushAudioStream (line 96) | def PushAudioStream(
FILE: audio2face_streaming_utils.py
function push_audio_track (line 22) | def push_audio_track(url, audio_data, samplerate, instance_name):
function push_audio_track_stream (line 50) | def push_audio_track_stream(url, audio_data, samplerate, instance_name):
function push_stream (line 95) | def push_stream(url, audio_data, samplerate, instance_name):
FILE: audio_segment.py
function crop_audio (line 8) | def crop_audio(file_path, start_time, end_time):
function split_audio_file (line 19) | def split_audio_file(file_path, output_path, segment_time=3000):
function audio_processing (line 37) | def audio_processing(file_path, output_path, label):
FILE: auto_label_demo.py
function save_text2img_data (line 45) | def save_text2img_data(output_dir, prompt,label,img_name):
function load_auto_backend_models (line 55) | def load_auto_backend_models(opt):
function Auto_run (line 74) | def Auto_run(weights=ROOT / '', # model.pt path(s)
function run_do (line 262) | def run_do(shared_args,process_name=0):
function parse_opt (line 266) | def parse_opt():
function main (line 314) | def main(opt):
FILE: crazy_functions/Langchain知识库.py
function 知识库问答 (line 7) | def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_promp...
function 读取知识库作答 (line 77) | def 读取知识库作答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro...
FILE: crazy_functions/Latex全文润色.py
class PaperFileGroup (line 5) | class PaperFileGroup():
method __init__ (line 6) | def __init__(self):
method run_file_split (line 19) | def run_file_split(self, max_token_limit=1900):
method merge_result (line 37) | def merge_result(self):
method write_result (line 42) | def write_result(self):
method zip_result (line 50) | def zip_result(self):
function 多文件润色 (line 57) | def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chat...
function Latex英文润色 (line 136) | def Latex英文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p...
function Latex中文润色 (line 174) | def Latex中文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p...
function Latex英文纠错 (line 210) | def Latex英文纠错(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p...
FILE: crazy_functions/Latex全文翻译.py
class PaperFileGroup (line 5) | class PaperFileGroup():
method __init__ (line 6) | def __init__(self):
method run_file_split (line 19) | def run_file_split(self, max_token_limit=1900):
function 多文件翻译 (line 38) | def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chat...
function Latex英译中 (line 108) | def Latex英译中(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pr...
function Latex中译英 (line 145) | def Latex中译英(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pr...
FILE: crazy_functions/Latex输出PDF结果.py
function switch_prompt (line 10) | def switch_prompt(pfg, mode, more_requirement):
function desend_to_extracted_folder_if_exist (line 38) | def desend_to_extracted_folder_if_exist(project_folder):
function move_project (line 53) | def move_project(project_folder, arxiv_id=None):
function arxiv_download (line 82) | def arxiv_download(chatbot, history, txt):
function Latex英文纠错加PDF对比 (line 145) | def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, history, sy...
function Latex翻译中文并重新编译PDF (line 221) | def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, ...
FILE: crazy_functions/chatglm微调工具.py
function fetch_items (line 5) | def fetch_items(list_of_items, batch_size):
function string_to_options (line 9) | def string_to_options(arguments):
function 微调数据集生成 (line 35) | def 微调数据集生成(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro...
function 启动微调 (line 83) | def 启动微调(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt...
FILE: crazy_functions/crazy_functions_test.py
function validate_path (line 9) | def validate_path():
function silence_stdout (line 43) | def silence_stdout(func):
class CLI_Printer (line 56) | class CLI_Printer():
method __init__ (line 57) | def __init__(self) -> None:
method print (line 60) | def print(self, buf):
function test_解析一个Python项目 (line 78) | def test_解析一个Python项目():
function test_解析一个Cpp项目 (line 84) | def test_解析一个Cpp项目():
function test_Latex英文润色 (line 90) | def test_Latex英文润色():
function test_Markdown中译英 (line 96) | def test_Markdown中译英():
function test_批量翻译PDF文档 (line 102) | def test_批量翻译PDF文档():
function test_谷歌检索小助手 (line 108) | def test_谷歌检索小助手():
function test_总结word文档 (line 114) | def test_总结word文档():
function test_下载arxiv论文并翻译摘要 (line 120) | def test_下载arxiv论文并翻译摘要():
function test_联网回答问题 (line 126) | def test_联网回答问题():
function test_解析ipynb文件 (line 139) | def test_解析ipynb文件():
function test_数学动画生成manim (line 146) | def test_数学动画生成manim():
function test_Markdown多语言 (line 154) | def test_Markdown多语言():
function test_Langchain知识库 (line 163) | def test_Langchain知识库():
function test_Langchain知识库读取 (line 176) | def test_Langchain知识库读取():
function test_Latex (line 182) | def test_Latex():
function test_chatglm_finetune (line 217) | def test_chatglm_finetune():
FILE: crazy_functions/crazy_utils.py
function input_clipping (line 4) | def input_clipping(inputs, history, max_token_limit):
function request_gpt_model_in_new_thread_with_ui_alive (line 38) | def request_gpt_model_in_new_thread_with_ui_alive(
function can_multi_process (line 133) | def can_multi_process(llm):
function request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency (line 139) | def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_effici...
function breakdown_txt_to_satisfy_token_limit (line 306) | def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
function force_breakdown (line 336) | def force_breakdown(txt, limit, get_token_fn):
function breakdown_txt_to_satisfy_token_limit_for_pdf (line 345) | def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
function read_and_clean_pdf_text (line 396) | def read_and_clean_pdf_text(fp):
function get_files_from_everything (line 574) | def get_files_from_everything(txt, type): # type='.md'
function Singleton (line 619) | def Singleton(cls):
class knowledge_archive_interface (line 631) | class knowledge_archive_interface():
method __init__ (line 632) | def __init__(self) -> None:
method get_chinese_text2vec (line 639) | def get_chinese_text2vec(self):
method feed_archive (line 651) | def feed_archive(self, file_manifest, id="default"):
method get_current_archive_id (line 667) | def get_current_archive_id(self):
method get_loaded_file (line 670) | def get_loaded_file(self):
method answer_with_archive_by_id (line 673) | def answer_with_archive_by_id(self, txt, id):
function try_install_deps (line 702) | def try_install_deps(deps):
class construct_html (line 715) | class construct_html():
method __init__ (line 716) | def __init__(self) -> None:
method add_row (line 744) | def add_row(self, a, b):
method save_file (line 757) | def save_file(self, file_name):
FILE: crazy_functions/latex_fns/latex_actions.py
function split_subprocess (line 15) | def split_subprocess(txt, project_folder, return_dict, opts):
class LatexPaperSplit (line 80) | class LatexPaperSplit():
method __init__ (line 86) | def __init__(self) -> None:
method merge_result (line 95) | def merge_result(self, arr, mode, msg, buggy_lines=[], buggy_line_surg...
method split (line 135) | def split(self, txt, project_folder, opts):
class LatexPaperFileGroup (line 156) | class LatexPaperFileGroup():
method __init__ (line 160) | def __init__(self):
method run_file_split (line 173) | def run_file_split(self, max_token_limit=1900):
method merge_result (line 191) | def merge_result(self):
method write_result (line 196) | def write_result(self):
function Latex精细分解与转化 (line 205) | def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwarg...
function remove_buggy_lines (line 299) | def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_f...
function 编译Latex (line 326) | def 编译Latex(chatbot, history, main_file_original, main_file_modified, wo...
function write_html (line 422) | def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
FILE: crazy_functions/latex_fns/latex_toolbox.py
class LinkedListNode (line 9) | class LinkedListNode():
method __init__ (line 13) | def __init__(self, string, preserve=True) -> None:
function convert_to_linklist (line 21) | def convert_to_linklist(text, mask):
function post_process (line 34) | def post_process(root):
function set_forbidden_text (line 127) | def set_forbidden_text(text, mask, pattern, flags=0):
function reverse_forbidden_text (line 140) | def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=...
function set_forbidden_text_careful_brace (line 158) | def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
function reverse_forbidden_text_careful_brace (line 178) | def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0, f...
function set_forbidden_text_begin_end (line 201) | def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_l...
function find_main_tex_file (line 230) | def find_main_tex_file(file_manifest, mode):
function rm_comments (line 268) | def rm_comments(main_file):
function find_tex_file_ignore_case (line 281) | def find_tex_file_ignore_case(fp):
function merge_tex_files_ (line 293) | def merge_tex_files_(project_foler, main_file, mode):
function merge_tex_files (line 310) | def merge_tex_files(project_foler, main_file, mode):
function mod_inbraket (line 346) | def mod_inbraket(match):
function fix_content (line 359) | def fix_content(final_tex, node_string):
function compile_latex_with_timeout (line 407) | def compile_latex_with_timeout(command, cwd, timeout=60):
function merge_pdfs (line 421) | def merge_pdfs(pdf1_path, pdf2_path, output_path):
FILE: crazy_functions/live_audio/aliyunASR.py
class AliyunASR (line 4) | class AliyunASR():
method test_on_sentence_begin (line 6) | def test_on_sentence_begin(self, message, *args):
method test_on_sentence_end (line 10) | def test_on_sentence_end(self, message, *args):
method test_on_start (line 17) | def test_on_start(self, message, *args):
method test_on_error (line 21) | def test_on_error(self, message, *args):
method test_on_close (line 25) | def test_on_close(self, *args):
method test_on_result_chg (line 29) | def test_on_result_chg(self, message, *args):
method test_on_completed (line 35) | def test_on_completed(self, message, *args):
method audio_convertion_thread (line 40) | def audio_convertion_thread(self, uuid):
method get_token (line 97) | def get_token(self):
FILE: crazy_functions/live_audio/audio_io.py
function Singleton (line 4) | def Singleton(cls):
class RealtimeAudioDistribution (line 16) | class RealtimeAudioDistribution():
method __init__ (line 17) | def __init__(self) -> None:
method clean_up (line 22) | def clean_up(self):
method feed (line 25) | def feed(self, uuid, audio):
method read (line 35) | def read(self, uuid):
function change_sample_rate (line 43) | def change_sample_rate(audio, old_sr, new_sr):
FILE: crazy_functions/test_project/cpp/cppipc/buffer.cpp
type ipc (line 6) | namespace ipc {
class buffer::buffer_ (line 16) | class buffer::buffer_ : public pimpl<buffer_> {
method buffer_ (line 23) | buffer_(void* p, std::size_t s, buffer::destructor_t d, void* a)
function buffer (line 66) | buffer& buffer::operator=(buffer rhs) {
FILE: crazy_functions/test_project/cpp/cppipc/ipc.cpp
type msg_t (line 37) | struct msg_t
method msg_t (line 51) | msg_t() = default;
method msg_t (line 52) | msg_t(msg_id_t cc_id, msg_id_t id, std::int32_t remain, void const * d...
type msg_t<0, AlignSize> (line 40) | struct msg_t<0, AlignSize> {
type msg_t (line 48) | struct msg_t : msg_t<0, AlignSize> {
method msg_t (line 51) | msg_t() = default;
method msg_t (line 52) | msg_t(msg_id_t cc_id, msg_id_t id, std::int32_t remain, void const * d...
function make_cache (line 66) | ipc::buff_t make_cache(T& data, std::size_t size) {
type cache_t (line 72) | struct cache_t {
method cache_t (line 76) | cache_t(std::size_t f, ipc::buff_t && b)
method append (line 80) | void append(void const * data, std::size_t size) {
function cc_acc (line 88) | auto cc_acc() {
function IPC_CONSTEXPR_ (line 93) | IPC_CONSTEXPR_ std::size_t align_chunk_size(std::size_t size) noexcept {
function IPC_CONSTEXPR_ (line 97) | IPC_CONSTEXPR_ std::size_t calc_chunk_size(std::size_t size) noexcept {
type chunk_t (line 102) | struct chunk_t {
type chunk_info_t (line 113) | struct chunk_info_t {
function chunk_t (line 125) | chunk_t *at(std::size_t chunk_size, ipc::storage_id_t id) noexcept {
class chunk_handle_t (line 132) | class chunk_handle_t {
method chunk_info_t (line 136) | chunk_info_t *get_info(std::size_t chunk_size) {
function chunk_info_t (line 155) | chunk_info_t *chunk_storage_info(std::size_t chunk_size) {
function acquire_storage (line 171) | std::pair<ipc::storage_id_t, void*> acquire_storage(std::size_t size, ip...
function release_storage (line 199) | void release_storage(ipc::storage_id_t id, std::size_t size) {
function sub_rc (line 213) | bool sub_rc(ipc::wr<Rp, Rc, ipc::trans::unicast>,
function sub_rc (line 219) | bool sub_rc(ipc::wr<Rp, Rc, ipc::trans::broadcast>,
function recycle_storage (line 232) | void recycle_storage(ipc::storage_id_t id, std::size_t size, ipc::circ::...
function clear_message (line 253) | bool clear_message(void* p) {
type conn_info_head (line 268) | struct conn_info_head {
method conn_info_head (line 275) | conn_info_head(char const * name)
method quit_waiting (line 284) | void quit_waiting() {
method acc (line 290) | auto acc() {
function wait_for (line 301) | bool wait_for(W& waiter, F&& pred, std::uint64_t tm) {
type queue_generator (line 318) | struct queue_generator {
type conn_info_t (line 322) | struct conn_info_t : conn_info_head {
method conn_info_t (line 325) | conn_info_t(char const * name)
method disconnect_receiver (line 332) | void disconnect_receiver() {
type detail_impl (line 343) | struct detail_impl {
method conn_info_t (line 350) | constexpr static conn_info_t* info_of(ipc::handle_t h) noexcept {
method queue_t (line 354) | constexpr static queue_t* queue_of(ipc::handle_t h) noexcept {
method disconnect (line 360) | static void disconnect(ipc::handle_t h) {
method reconnect (line 370) | static bool reconnect(ipc::handle_t * ph, bool start_to_recv) {
method connect (line 392) | static bool connect(ipc::handle_t * ph, char const * name, bool start_...
method destroy (line 400) | static void destroy(ipc::handle_t h) {
method recv_count (line 405) | static std::size_t recv_count(ipc::handle_t h) noexcept {
method wait_for_recv (line 413) | static bool wait_for_recv(ipc::handle_t h, std::size_t r_count, std::u...
method send (line 424) | static bool send(F&& gen_push, ipc::handle_t h, void const * data, std...
method send (line 486) | static bool send(ipc::handle_t h, void const * data, std::size_t size,...
method try_send (line 507) | static bool try_send(ipc::handle_t h, void const * data, std::size_t s...
method recv (line 523) | static ipc::buff_t recv(ipc::handle_t h, std::uint64_t tm) {
method try_recv (line 620) | static ipc::buff_t try_recv(ipc::handle_t h) {
type ipc (line 631) | namespace ipc {
function buff_t (line 681) | buff_t chan_impl<Flag>::recv(ipc::handle_t h, std::uint64_t tm) {
function buff_t (line 691) | buff_t chan_impl<Flag>::try_recv(ipc::handle_t h) {
type chan_impl<ipc::wr<relat::single, relat::single, trans::unicast >> (line 695) | struct chan_impl<ipc::wr<relat::single, relat::single, trans::unicast >>
type chan_impl<ipc::wr<relat::single, relat::multi , trans::broadcast>> (line 698) | struct chan_impl<ipc::wr<relat::single, relat::multi , trans::broadcast>>
type chan_impl<ipc::wr<relat::multi , relat::multi , trans::broadcast>> (line 699) | struct chan_impl<ipc::wr<relat::multi , relat::multi , trans::broadcast>>
FILE: crazy_functions/test_project/cpp/cppipc/policy.h
function namespace (line 10) | namespace ipc {
FILE: crazy_functions/test_project/cpp/cppipc/pool_alloc.cpp
type ipc (line 5) | namespace ipc {
type mem (line 6) | namespace mem {
FILE: crazy_functions/test_project/cpp/cppipc/prod_cons.h
function namespace (line 16) | namespace ipc {
type elem_t (line 112) | struct elem_t {
type rc_t (line 200) | enum : rc_t {
type elem_t (line 206) | struct elem_t {
function rc_t (line 212) | alignas(cache_line_size) rc_t epoch_ { 0 }; // only one writer
type rc_t (line 298) | enum : rc_t {
type elem_t (line 307) | struct elem_t {
function std (line 314) | alignas(cache_line_size) std::atomic<rc_t> epoch_ { 0 };
FILE: crazy_functions/test_project/cpp/cppipc/queue.h
function namespace (line 22) | namespace ipc {
function pop (line 206) | bool pop(T& item) {
FILE: crazy_functions/test_project/cpp/cppipc/shm.cpp
type ipc (line 10) | namespace ipc {
type shm (line 11) | namespace shm {
class handle::handle_ (line 13) | class handle::handle_ : public pimpl<handle_> {
function handle (line 45) | handle& handle::operator=(handle rhs) {
function id_t (line 93) | id_t handle::detach() {
FILE: crazy_functions/test_project/cpp/cppipc/waiter.h
function IPC_UNUSED_ (line 56) | IPC_UNUSED_ std::lock_guard<ipc::sync::mutex> guard {lock_};
function notify (line 66) | bool notify() noexcept {
FILE: crazy_functions/test_project/cpp/libJPG/jpgd.cpp
type jpgd (line 34) | namespace jpgd {
function jpgd_free (line 37) | static inline void jpgd_free(void *p) { FMemory::Free(p); }
type ERGBFormatJPG (line 42) | enum ERGBFormatJPG
type JPEG_MARKER (line 55) | enum JPEG_MARKER
type JPEG_SUBSAMPLING (line 64) | enum JPEG_SUBSAMPLING { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JP...
type Row (line 92) | struct Row
method idct (line 94) | static void idct(int* pTemp, const jpgd_block_t* pSrc)
type Row<0> (line 137) | struct Row<0>
method idct (line 139) | static void idct(int* pTemp, const jpgd_block_t* pSrc)
type Row<1> (line 148) | struct Row<1>
method idct (line 150) | static void idct(int* pTemp, const jpgd_block_t* pSrc)
type Col (line 167) | struct Col
method idct (line 169) | static void idct(uint8* pDst_ptr, const int* pTemp)
type Col<1> (line 228) | struct Col<1>
method idct (line 230) | static void idct(uint8* pDst_ptr, const int* pTemp)
function idct (line 259) | void idct(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr, int block_max...
function idct_4x4 (line 328) | void idct_4x4(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr)
function uint (line 351) | inline uint jpeg_decoder::get_char()
function uint (line 378) | inline uint jpeg_decoder::get_char(bool *pPadding_flag)
function uint8 (line 411) | inline uint8 jpeg_decoder::get_octet()
function uint (line 442) | inline uint jpeg_decoder::get_bits(int num_bits)
function uint (line 470) | inline uint jpeg_decoder::get_bits_no_markers(int num_bits)
function uint8 (line 586) | inline uint8 jpeg_decoder::clamp(int i)
type DCT_Upsample (line 594) | namespace DCT_Upsample
type Matrix44 (line 596) | struct Matrix44
method rows (line 603) | inline int rows() const { return NUM_ROWS; }
method cols (line 604) | inline int cols() const { return NUM_COLS; }
method Element_Type (line 606) | inline const Element_Type & at(int r, int c) const { return v[r][c...
method Element_Type (line 607) | inline Element_Type & at(int r, int c) { return v[r][c...
method Matrix44 (line 609) | inline Matrix44() { }
method Matrix44 (line 611) | inline Matrix44& operator += (const Matrix44& a)
method Matrix44 (line 623) | inline Matrix44& operator -= (const Matrix44& a)
method Matrix44 (line 635) | inline Matrix44 operator + (const Matrix44& a, const Matrix44& b)
method Matrix44 (line 648) | inline Matrix44 operator - (const Matrix44& a, const Matrix44& b)
method add_and_store (line 661) | static inline void add_and_store(jpgd_block_t* pDst, const Matrix4...
method sub_and_store (line 672) | static inline void sub_and_store(jpgd_block_t* pDst, const Matrix4...
type P_Q (line 696) | struct P_Q
method calc (line 698) | static void calc(Matrix44& P, Matrix44& Q, const jpgd_block_t* pSrc)
type R_S (line 775) | struct R_S
method calc (line 777) | static void calc(Matrix44& R, Matrix44& S, const jpgd_block_t* pSrc)
function dequantize_ac (line 1775) | static inline int dequantize_ac(int c, int q) { c *= q; return c; }
function jpgd_block_t (line 2683) | inline jpgd_block_t *jpeg_decoder::coeff_buf_getp(coeff_buf *cb, int b...
FILE: crazy_functions/test_project/cpp/libJPG/jpgd.h
function namespace (line 10) | namespace jpgd
FILE: crazy_functions/test_project/cpp/libJPG/jpge.cpp
type jpge (line 22) | namespace jpge {
function jpge_free (line 25) | static inline void jpge_free(void *p) { FMemory::Free(p);; }
function clear_obj (line 60) | inline void clear_obj(T &obj) { memset(&obj, 0, sizeof(obj)); }
function uint8 (line 63) | static inline uint8 clamp(int i) { if (static_cast<uint>(i) > 255U) { ...
function RGB_to_YCC (line 65) | static void RGB_to_YCC(uint8* pDst, const uint8 *pSrc, int num_pixels)
function RGB_to_Y (line 76) | static void RGB_to_Y(uint8* pDst, const uint8 *pSrc, int num_pixels)
function RGBA_to_YCC (line 82) | static void RGBA_to_YCC(uint8* pDst, const uint8 *pSrc, int num_pixels)
function RGBA_to_Y (line 93) | static void RGBA_to_Y(uint8* pDst, const uint8 *pSrc, int num_pixels)
function Y_to_YCC (line 99) | static void Y_to_YCC(uint8* pDst, const uint8* pSrc, int num_pixels)
function DCT2D (line 125) | static void DCT2D(int32 *p)
type sym_freq (line 144) | struct sym_freq { uint m_key, m_sym_index; }
function sym_freq (line 147) | static inline sym_freq* radix_sort_syms(uint num_syms, sym_freq* pSyms...
function calculate_minimum_redundancy (line 167) | static void calculate_minimum_redundancy(sym_freq *A, int n)
function huffman_enforce_max_code_size (line 189) | static void huffman_enforce_max_code_size(int *pNum_codes, int code_li...
class cfile_stream (line 904) | class cfile_stream : public output_stream
method cfile_stream (line 913) | cfile_stream() : m_pFile(NULL), m_bStatus(false) { }
method open (line 920) | bool open(const char *pFilename)
method close (line 935) | bool close()
method put_buf (line 948) | virtual bool put_buf(const void* pBuf, int64_t len)
method uint (line 954) | uint get_size() const
function compress_image_to_jpeg_file (line 961) | bool compress_image_to_jpeg_file(const char *pFilename, int64_t width,...
class memory_stream (line 989) | class memory_stream : public output_stream
method memory_stream (line 998) | memory_stream(void *pBuf, uint64_t buf_size) : m_pBuf(static_cast<ui...
method put_buf (line 1002) | virtual bool put_buf(const void* pBuf, int64_t len)
method get_size (line 1012) | uint64_t get_size() const
function compress_image_to_jpeg_file_in_memory (line 1018) | bool compress_image_to_jpeg_file_in_memory(void *pDstBuf, int64_t &buf...
FILE: crazy_functions/test_project/cpp/libJPG/jpge.h
function namespace (line 10) | namespace jpge
function class (line 62) | class output_stream
function class (line 71) | class jpeg_encoder
FILE: crazy_functions/test_project/cpp/longcode/jpgd.cpp
type jpgd (line 34) | namespace jpgd {
function jpgd_free (line 37) | static inline void jpgd_free(void *p) { FMemory::Free(p); }
type ERGBFormatJPG (line 42) | enum ERGBFormatJPG
type JPEG_MARKER (line 55) | enum JPEG_MARKER
type JPEG_SUBSAMPLING (line 64) | enum JPEG_SUBSAMPLING { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JP...
type Row (line 92) | struct Row
method idct (line 94) | static void idct(int* pTemp, const jpgd_block_t* pSrc)
type Row<0> (line 137) | struct Row<0>
method idct (line 139) | static void idct(int* pTemp, const jpgd_block_t* pSrc)
type Row<1> (line 148) | struct Row<1>
method idct (line 150) | static void idct(int* pTemp, const jpgd_block_t* pSrc)
type Col (line 167) | struct Col
method idct (line 169) | static void idct(uint8* pDst_ptr, const int* pTemp)
type Col<1> (line 228) | struct Col<1>
method idct (line 230) | static void idct(uint8* pDst_ptr, const int* pTemp)
function idct (line 259) | void idct(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr, int block_max...
function idct_4x4 (line 328) | void idct_4x4(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr)
function uint (line 351) | inline uint jpeg_decoder::get_char()
function uint (line 378) | inline uint jpeg_decoder::get_char(bool *pPadding_flag)
function uint8 (line 411) | inline uint8 jpeg_decoder::get_octet()
function uint (line 442) | inline uint jpeg_decoder::get_bits(int num_bits)
function uint (line 470) | inline uint jpeg_decoder::get_bits_no_markers(int num_bits)
function uint8 (line 586) | inline uint8 jpeg_decoder::clamp(int i)
type DCT_Upsample (line 594) | namespace DCT_Upsample
type Matrix44 (line 596) | struct Matrix44
method rows (line 603) | inline int rows() const { return NUM_ROWS; }
method cols (line 604) | inline int cols() const { return NUM_COLS; }
method Element_Type (line 606) | inline const Element_Type & at(int r, int c) const { return v[r][c...
method Element_Type (line 607) | inline Element_Type & at(int r, int c) { return v[r][c...
method Matrix44 (line 609) | inline Matrix44() { }
method Matrix44 (line 611) | inline Matrix44& operator += (const Matrix44& a)
method Matrix44 (line 623) | inline Matrix44& operator -= (const Matrix44& a)
method Matrix44 (line 635) | inline Matrix44 operator + (const Matrix44& a, const Matrix44& b)
method Matrix44 (line 648) | inline Matrix44 operator - (const Matrix44& a, const Matrix44& b)
method add_and_store (line 661) | static inline void add_and_store(jpgd_block_t* pDst, const Matrix4...
method sub_and_store (line 672) | static inline void sub_and_store(jpgd_block_t* pDst, const Matrix4...
type P_Q (line 696) | struct P_Q
method calc (line 698) | static void calc(Matrix44& P, Matrix44& Q, const jpgd_block_t* pSrc)
type R_S (line 775) | struct R_S
method calc (line 777) | static void calc(Matrix44& R, Matrix44& S, const jpgd_block_t* pSrc)
function dequantize_ac (line 1775) | static inline int dequantize_ac(int c, int q) { c *= q; return c; }
function jpgd_block_t (line 2683) | inline jpgd_block_t *jpeg_decoder::coeff_buf_getp(coeff_buf *cb, int b...
FILE: crazy_functions/test_project/cpp/longcode/jpge.cpp
type jpge (line 22) | namespace jpge {
function jpge_free (line 25) | static inline void jpge_free(void *p) { FMemory::Free(p);; }
function clear_obj (line 60) | inline void clear_obj(T &obj) { memset(&obj, 0, sizeof(obj)); }
function uint8 (line 63) | static inline uint8 clamp(int i) { if (static_cast<uint>(i) > 255U) { ...
function RGB_to_YCC (line 65) | static void RGB_to_YCC(uint8* pDst, const uint8 *pSrc, int num_pixels)
function RGB_to_Y (line 76) | static void RGB_to_Y(uint8* pDst, const uint8 *pSrc, int num_pixels)
function RGBA_to_YCC (line 82) | static void RGBA_to_YCC(uint8* pDst, const uint8 *pSrc, int num_pixels)
function RGBA_to_Y (line 93) | static void RGBA_to_Y(uint8* pDst, const uint8 *pSrc, int num_pixels)
function Y_to_YCC (line 99) | static void Y_to_YCC(uint8* pDst, const uint8* pSrc, int num_pixels)
function DCT2D (line 125) | static void DCT2D(int32 *p)
type sym_freq (line 144) | struct sym_freq { uint m_key, m_sym_index; }
function sym_freq (line 147) | static inline sym_freq* radix_sort_syms(uint num_syms, sym_freq* pSyms...
function calculate_minimum_redundancy (line 167) | static void calculate_minimum_redundancy(sym_freq *A, int n)
function huffman_enforce_max_code_size (line 189) | static void huffman_enforce_max_code_size(int *pNum_codes, int code_li...
class cfile_stream (line 904) | class cfile_stream : public output_stream
method cfile_stream (line 913) | cfile_stream() : m_pFile(NULL), m_bStatus(false) { }
method open (line 920) | bool open(const char *pFilename)
method close (line 935) | bool close()
method put_buf (line 948) | virtual bool put_buf(const void* pBuf, int64_t len)
method uint (line 954) | uint get_size() const
function compress_image_to_jpeg_file (line 961) | bool compress_image_to_jpeg_file(const char *pFilename, int64_t width,...
class memory_stream (line 989) | class memory_stream : public output_stream
method memory_stream (line 998) | memory_stream(void *pBuf, uint64_t buf_size) : m_pBuf(static_cast<ui...
method put_buf (line 1002) | virtual bool put_buf(const void* pBuf, int64_t len)
method get_size (line 1012) | uint64_t get_size() const
function compress_image_to_jpeg_file_in_memory (line 1018) | bool compress_image_to_jpeg_file_in_memory(void *pDstBuf, int64_t &buf...
FILE: crazy_functions/test_project/cpp/longcode/prod_cons.h
function namespace (line 16) | namespace ipc {
type elem_t (line 112) | struct elem_t {
type rc_t (line 200) | enum : rc_t {
type elem_t (line 206) | struct elem_t {
function rc_t (line 212) | alignas(cache_line_size) rc_t epoch_ { 0 }; // only one writer
type rc_t (line 298) | enum : rc_t {
type elem_t (line 307) | struct elem_t {
function std (line 314) | alignas(cache_line_size) std::atomic<rc_t> epoch_ { 0 };
FILE: crazy_functions/test_project/python/dqn/dqn.py
class DQN (line 16) | class DQN(OffPolicyAlgorithm):
method __init__ (line 58) | def __init__(
method _setup_model (line 123) | def _setup_model(self) -> None:
method _create_aliases (line 130) | def _create_aliases(self) -> None:
method _on_step (line 134) | def _on_step(self) -> None:
method train (line 145) | def train(self, gradient_steps: int, batch_size: int = 100) -> None:
method predict (line 187) | def predict(
method learn (line 214) | def learn(
method _excluded_save_params (line 239) | def _excluded_save_params(self) -> List[str]:
method _get_torch_save_params (line 242) | def _get_torch_save_params(self) -> Tuple[List[str], List[str]]:
FILE: crazy_functions/test_project/python/dqn/policies.py
class QNetwork (line 12) | class QNetwork(BasePolicy):
method __init__ (line 24) | def __init__(
method forward (line 53) | def forward(self, obs: th.Tensor) -> th.Tensor:
method _predict (line 62) | def _predict(self, observation: th.Tensor, deterministic: bool = True)...
method _get_constructor_parameters (line 68) | def _get_constructor_parameters(self) -> Dict[str, Any]:
class DQNPolicy (line 82) | class DQNPolicy(BasePolicy):
method __init__ (line 102) | def __init__(
method _build (line 145) | def _build(self, lr_schedule: Schedule) -> None:
method make_q_net (line 160) | def make_q_net(self) -> QNetwork:
method forward (line 165) | def forward(self, obs: th.Tensor, deterministic: bool = True) -> th.Te...
method _predict (line 168) | def _predict(self, obs: th.Tensor, deterministic: bool = True) -> th.T...
method _get_constructor_parameters (line 171) | def _get_constructor_parameters(self) -> Dict[str, Any]:
class CnnPolicy (line 191) | class CnnPolicy(DQNPolicy):
method __init__ (line 209) | def __init__(
FILE: crazy_functions/下载arxiv论文翻译摘要.py
function download_arxiv_ (line 5) | def download_arxiv_(url_pdf):
function get_name (line 67) | def get_name(_url_):
function 下载arxiv论文并翻译摘要 (line 135) | def 下载arxiv论文并翻译摘要(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys...
FILE: crazy_functions/交互功能函数模板.py
function 交互功能模板函数 (line 6) | def 交互功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pr...
function get_image_page_by_keyword (line 52) | def get_image_page_by_keyword(keyword):
FILE: crazy_functions/代码重写为全英文_多线程.py
function extract_code_block_carefully (line 7) | def extract_code_block_carefully(txt):
function break_txt_into_half_at_some_linebreak (line 17) | def break_txt_into_half_at_some_linebreak(txt):
function 全项目切换英文 (line 26) | def 全项目切换英文(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_prompt...
FILE: crazy_functions/图片生成.py
function gen_image (line 6) | def gen_image(llm_kwargs, prompt, resolution="256x256"):
function 图片生成 (line 47) | def 图片生成(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro...
FILE: crazy_functions/对话历史存档.py
function write_chat_to_file (line 5) | def write_chat_to_file(chatbot, history=None, file_name=None):
function gen_file_preview (line 35) | def gen_file_preview(file_name):
function read_file_to_chat (line 50) | def read_file_to_chat(chatbot, history, file_name):
function 对话历史存档 (line 71) | def 对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prom...
function hide_cwd (line 86) | def hide_cwd(str):
function 载入对话历史存档 (line 93) | def 载入对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pr...
function 删除所有本地对话历史记录 (line 123) | def 删除所有本地对话历史记录(txt, llm_kwargs, plugin_kwargs, chatbot, history, syste...
FILE: crazy_functions/总结word文档.py
function 解析docx (line 7) | def 解析docx(file_manifest, project_folder, llm_kwargs, plugin_kwargs, cha...
function 总结word文档 (line 84) | def 总结word文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pr...
FILE: crazy_functions/总结音视频.py
function split_audio_file (line 4) | def split_audio_file(filename, split_duration=1000):
function AnalyAudio (line 40) | def AnalyAudio(parse_prompt, file_manifest, llm_kwargs, chatbot, history):
function 总结音视频 (line 133) | def 总结音视频(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_promp...
FILE: crazy_functions/批量Markdown翻译.py
class PaperFileGroup (line 7) | class PaperFileGroup():
method __init__ (line 8) | def __init__(self):
method run_file_split (line 21) | def run_file_split(self, max_token_limit=1900):
method merge_result (line 39) | def merge_result(self):
method write_result (line 44) | def write_result(self, language):
function 多文件翻译 (line 53) | def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chat...
function get_files_from_everything (line 115) | def get_files_from_everything(txt, preference=''):
function Markdown英译中 (line 154) | def Markdown英译中(txt, llm_kwargs, plugin_kwargs, chatbot, history, system...
function Markdown中译英 (line 194) | def Markdown中译英(txt, llm_kwargs, plugin_kwargs, chatbot, history, system...
function Markdown翻译指定语言 (line 227) | def Markdown翻译指定语言(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys...
FILE: crazy_functions/批量总结PDF文档.py
function 解析PDF (line 9) | def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chat...
function 批量总结PDF文档 (line 108) | def 批量总结PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p...
FILE: crazy_functions/批量总结PDF文档pdfminer.py
function readPdf (line 7) | def readPdf(pdfPath):
function 解析Paper (line 65) | def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch...
function 批量总结PDF文档pdfminer (line 125) | def 批量总结PDF文档pdfminer(txt, llm_kwargs, plugin_kwargs, chatbot, history, ...
FILE: crazy_functions/批量翻译PDF文档_多线程.py
function 批量翻译PDF文档 (line 9) | def 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, sys_prom...
function 解析PDF (line 59) | def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chat...
class construct_html (line 162) | class construct_html():
method __init__ (line 163) | def __init__(self) -> None:
method add_row (line 191) | def add_row(self, a, b):
method save_file (line 204) | def save_file(self, file_name):
FILE: crazy_functions/数学动画生成manim.py
function inspect_dependency (line 5) | def inspect_dependency(chatbot, history):
function eval_manim (line 15) | def eval_manim(code):
function get_code_block (line 42) | def get_code_block(reply):
function 动画生成 (line 51) | def 动画生成(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt...
function examples_of_manim (line 100) | def examples_of_manim():
FILE: crazy_functions/理解PDF文档内容.py
function 解析PDF (line 8) | def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system...
function 理解PDF文档内容标准文件输入 (line 71) | def 理解PDF文档内容标准文件输入(txt, llm_kwargs, plugin_kwargs, chatbot, history, sy...
FILE: crazy_functions/生成函数注释.py
function 生成函数注释 (line 6) | def 生成函数注释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, cha...
function 批量生成函数注释 (line 37) | def 批量生成函数注释(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pr...
FILE: crazy_functions/联网的ChatGPT.py
function google (line 7) | def google(query, proxies):
function scrape_text (line 30) | def scrape_text(url, proxies) -> str:
function 连接网络回答问题 (line 58) | def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pr...
FILE: crazy_functions/联网的ChatGPT_bing版.py
function bing_search (line 8) | def bing_search(query, proxies=None):
function scrape_text (line 30) | def scrape_text(url, proxies) -> str:
function 连接bing搜索回答问题 (line 58) | def 连接bing搜索回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, syste...
FILE: crazy_functions/虚空终端.py
function get_fn_lib (line 58) | def get_fn_lib():
function inspect_dependency (line 67) | def inspect_dependency(chatbot, history):
function eval_code (line 70) | def eval_code(code, llm_kwargs, plugin_kwargs, chatbot, history, system_...
function get_code_block (line 90) | def get_code_block(reply):
function 终端 (line 99) | def 终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, ...
FILE: crazy_functions/解析JupyterNotebook.py
class PaperFileGroup (line 6) | class PaperFileGroup():
method __init__ (line 7) | def __init__(self):
method run_file_split (line 21) | def run_file_split(self, max_token_limit=1900):
function parseNotebook (line 42) | def parseNotebook(filename, enable_markdown=1):
function ipynb解释 (line 67) | def ipynb解释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch...
function 解析ipynb文件 (line 118) | def 解析ipynb文件(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p...
FILE: crazy_functions/解析项目源代码.py
function 解析源代码新 (line 5) | def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, cha...
function 解析项目本身 (line 106) | def 解析项目本身(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prom...
function 解析一个Python项目 (line 120) | def 解析一个Python项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, syste...
function 解析一个C项目的头文件 (line 139) | def 解析一个C项目的头文件(txt, llm_kwargs, plugin_kwargs, chatbot, history, system...
function 解析一个C项目 (line 159) | def 解析一个C项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro...
function 解析一个Java项目 (line 181) | def 解析一个Java项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_...
function 解析一个前端项目 (line 203) | def 解析一个前端项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pr...
function 解析一个Golang项目 (line 232) | def 解析一个Golang项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, syste...
function 解析一个Rust项目 (line 253) | def 解析一个Rust项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_...
function 解析一个Lua项目 (line 273) | def 解析一个Lua项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p...
function 解析一个CSharp项目 (line 295) | def 解析一个CSharp项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, syste...
function 解析任意code项目 (line 315) | def 解析任意code项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_...
FILE: crazy_functions/询问多个大语言模型.py
function 同时问询 (line 5) | def 同时问询(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt...
function 同时问询_指定模型 (line 34) | def 同时问询_指定模型(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p...
FILE: crazy_functions/语音助手.py
class WatchDog (line 10) | class WatchDog():
method __init__ (line 11) | def __init__(self, timeout, bark_fn, interval=3, msg="") -> None:
method watch (line 19) | def watch(self):
method begin_watch (line 28) | def begin_watch(self):
method feed (line 34) | def feed(self):
function chatbot2history (line 37) | def chatbot2history(chatbot):
class AsyncGptTask (line 45) | class AsyncGptTask():
method __init__ (line 46) | def __init__(self) -> None:
method gpt_thread_worker (line 50) | def gpt_thread_worker(self, i_say, llm_kwargs, history, sys_prompt, ob...
method add_async_gpt_task (line 61) | def add_async_gpt_task(self, i_say, chatbot_index, llm_kwargs, history...
method update_chatbot (line 69) | def update_chatbot(self, chatbot):
class InterviewAssistant (line 79) | class InterviewAssistant(AliyunASR):
method __init__ (line 80) | def __init__(self):
method __del__ (line 90) | def __del__(self):
method init (line 96) | def init(self, chatbot):
method no_audio_for_a_while (line 111) | def no_audio_for_a_while(self):
method begin (line 117) | def begin(self, llm_kwargs, plugin_kwargs, chatbot, history, system_pr...
function 语音助手 (line 172) | def 语音助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt...
FILE: crazy_functions/读文章写摘要.py
function 解析Paper (line 7) | def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, ch...
function 读文章写摘要 (line 50) | def 读文章写摘要(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prom...
FILE: crazy_functions/谷歌检索小助手.py
function get_meta_information (line 5) | def get_meta_information(url, chatbot, history):
function 谷歌检索小助手 (line 67) | def 谷歌检索小助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro...
FILE: crazy_functions/辅助回答.py
function 猜你想问 (line 11) | def 猜你想问(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt...
FILE: crazy_functions/高级功能函数模板.py
function 高阶功能模板函数 (line 5) | def 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pr...
FILE: gradio_demo.py
function auto_opentab_delay (line 41) | def auto_opentab_delay(port=7585):
function load_auto_backend_models (line 50) | def load_auto_backend_models(lama,sam,det,tag2text,device):
function Auto_run (line 85) | def Auto_run(
function main (line 284) | def main(args):
FILE: llm_cards/bridge_all.py
class LazyloadTiktoken (line 28) | class LazyloadTiktoken(object):
method __init__ (line 29) | def __init__(self, model):
method get_encoder (line 34) | def get_encoder(model):
method encode (line 40) | def encode(self, *args, **kwargs):
method decode (line 44) | def decode(self, *args, **kwargs):
function LLM_CATCH_EXCEPTION (line 235) | def LLM_CATCH_EXCEPTION(f):
function predict_no_ui_long_connection (line 249) | def predict_no_ui_long_connection(inputs, llm_kwargs, history, sys_promp...
function predict_all (line 326) | def predict_all(inputs, llm_kwargs, *args, **kwargs):
function talk_all (line 340) | def talk_all(inputs, llm_kwargs, *args, **kwargs):
FILE: llm_cards/bridge_chatglm.py
class GetGLMHandle (line 13) | class GetGLMHandle(Process):
method __init__ (line 14) | def __init__(self,quantize=None):
method check_dependency (line 27) | def check_dependency(self):
method ready (line 36) | def ready(self):
method run (line 39) | def run(self):
method stream_chat (line 95) | def stream_chat(self, **kwargs):
function predict_no_ui_long_connection (line 111) | def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_pr...
function asr_with_gpt (line 143) | def asr_with_gpt(transcriber,text_queue, llm_kwargs, plugin_kwargs, chat...
function send_ui (line 171) | async def send_ui(text_queue,chatbot, history):
function Talk_with_app (line 187) | def Talk_with_app(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[]...
function predict (line 239) | def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], syst...
FILE: llm_cards/bridge_chatgpt.py
function get_full_error (line 31) | def get_full_error(chunk, stream_response):
function predict_no_ui_long_connection (line 43) | def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_pr...
function asr_with_gpt (line 111) | def asr_with_gpt(transcriber,text_queue, llm_kwargs, plugin_kwargs, chat...
function Talk_with_app (line 138) | def Talk_with_app(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[]...
function predict (line 191) | def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], syst...
function generate_payload (line 338) | def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
FILE: llm_cards/bridge_stackclaude.py
class SlackClient (line 23) | class SlackClient(AsyncWebClient):
method open_channel (line 38) | async def open_channel(self):
method chat (line 42) | async def chat(self, text):
method get_slack_messages (line 49) | async def get_slack_messages(self):
method get_reply (line 59) | async def get_reply(self):
class ClaudeHandle (line 82) | class ClaudeHandle(Process):
method __init__ (line 83) | def __init__(self):
method check_dependency (line 95) | def check_dependency(self):
method ready (line 105) | def ready(self):
method async_run (line 108) | async def async_run(self):
method run (line 140) | def run(self):
method stream_chat (line 177) | def stream_chat(self, **kwargs):
function predict_no_ui_long_connection (line 204) | def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_pr...
function predict (line 234) | def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], syst...
FILE: llm_cards/core_functional.py
function get_core_functions (line 9) | def get_core_functions():
function handle_core_functionality (line 81) | def handle_core_functionality(additional_fn, inputs, history, chatbot):
FILE: llm_cards/crazy_functional.py
function get_crazy_functions (line 4) | def get_crazy_functions():
FILE: model_cards/Tag2Text/batch_inference.py
function parse_args (line 20) | def parse_args():
function load_dataset (line 94) | def load_dataset(
function get_class_idxs (line 151) | def get_class_idxs(
function load_thresholds (line 170) | def load_thresholds(
function gen_pred_file (line 201) | def gen_pred_file(
function load_ram (line 217) | def load_ram(
function load_tag2text (line 236) | def load_tag2text(
function forward_ram (line 250) | def forward_ram(model: Module, imgs: Tensor) -> Tensor:
function forward_tag2text (line 267) | def forward_tag2text(
function print_write (line 287) | def print_write(f: TextIO, s: str):
FILE: model_cards/Tag2Text/ram/inference.py
function inference_tag2text (line 8) | def inference_tag2text(image, model, input_tag="None"):
function inference_ram (line 33) | def inference_ram(image, model):
function inference_ram_openset (line 42) | def inference_ram_openset(image, model):
FILE: model_cards/Tag2Text/ram/models/bert.py
class BertEmbeddings_nopos (line 52) | class BertEmbeddings_nopos(nn.Module):
method __init__ (line 55) | def __init__(self, config):
method forward (line 71) | def forward(
class BertEmbeddings (line 100) | class BertEmbeddings(nn.Module):
method __init__ (line 103) | def __init__(self, config):
method forward (line 119) | def forward(
class BertSelfAttention (line 146) | class BertSelfAttention(nn.Module):
method __init__ (line 147) | def __init__(self, config, is_cross_attention):
method save_attn_gradients (line 175) | def save_attn_gradients(self, attn_gradients):
method get_attn_gradients (line 178) | def get_attn_gradients(self):
method save_attention_map (line 181) | def save_attention_map(self, attention_map):
method get_attention_map (line 184) | def get_attention_map(self):
method transpose_for_scores (line 187) | def transpose_for_scores(self, x):
method forward (line 192) | def forward(
class BertSelfOutput (line 284) | class BertSelfOutput(nn.Module):
method __init__ (line 285) | def __init__(self, config):
method forward (line 291) | def forward(self, hidden_states, input_tensor):
class BertAttention (line 298) | class BertAttention(nn.Module):
method __init__ (line 299) | def __init__(self, config, is_cross_attention=False):
method prune_heads (line 305) | def prune_heads(self, heads):
method forward (line 323) | def forward(
class BertIntermediate (line 347) | class BertIntermediate(nn.Module):
method __init__ (line 348) | def __init__(self, config):
method forward (line 356) | def forward(self, hidden_states):
class BertOutput (line 362) | class BertOutput(nn.Module):
method __init__ (line 363) | def __init__(self, config):
method forward (line 369) | def forward(self, hidden_states, input_tensor):
class BertLayer (line 376) | class BertLayer(nn.Module):
method __init__ (line 377) | def __init__(self, config, layer_num):
method forward (line 389) | def forward(
method feed_forward_chunk (line 455) | def feed_forward_chunk(self, attention_output):
class BertEncoder (line 461) | class BertEncoder(nn.Module):
method __init__ (line 462) | def __init__(self, config):
method forward (line 468) | def forward(
class BertPooler (line 561) | class BertPooler(nn.Module):
method __init__ (line 562) | def __init__(self, config):
method forward (line 567) | def forward(self, hidden_states):
class BertPredictionHeadTransform (line 576) | class BertPredictionHeadTransform(nn.Module):
method __init__ (line 577) | def __init__(self, config):
method forward (line 586) | def forward(self, hidden_states):
class BertLMPredictionHead (line 593) | class BertLMPredictionHead(nn.Module):
method __init__ (line 594) | def __init__(self, config):
method forward (line 607) | def forward(self, hidden_states):
class BertOnlyMLMHead (line 613) | class BertOnlyMLMHead(nn.Module):
method __init__ (line 614) | def __init__(self, config):
method forward (line 618) | def forward(self, sequence_output):
class BertPreTrainedModel (line 623) | class BertPreTrainedModel(PreTrainedModel):
method _init_weights (line 633) | def _init_weights(self, module):
class BertModel (line 646) | class BertModel(BertPreTrainedModel):
method __init__ (line 656) | def __init__(self, config, add_pooling_layer=True):
method get_input_embeddings (line 669) | def get_input_embeddings(self):
method set_input_embeddings (line 672) | def set_input_embeddings(self, value):
method _prune_heads (line 675) | def _prune_heads(self, heads_to_prune):
method get_extended_attention_mask (line 684) | def get_extended_attention_mask(self, attention_mask: Tensor, input_sh...
method forward (line 745) | def forward(
class BertLMHeadModel (line 885) | class BertLMHeadModel(BertPreTrainedModel):
method __init__ (line 890) | def __init__(self, config):
method get_output_embeddings (line 898) | def get_output_embeddings(self):
method set_output_embeddings (line 901) | def set_output_embeddings(self, new_embeddings):
method forward (line 904) | def forward(
method prepare_inputs_for_generation (line 1010) | def prepare_inputs_for_generation(self, input_ids, past=None, attentio...
method _reorder_cache (line 1029) | def _reorder_cache(self, past, beam_idx):
FILE: model_cards/Tag2Text/ram/models/ram.py
class RAM (line 20) | class RAM(nn.Module):
method __init__ (line 21) | def __init__(self,
method load_tag_list (line 158) | def load_tag_list(self, tag_list_file):
method del_selfattention (line 165) | def del_selfattention(self):
method generate_tag (line 170) | def generate_tag(self,
method generate_tag_openset (line 217) | def generate_tag_openset(self,
function ram (line 262) | def ram(pretrained='', **kwargs):
FILE: model_cards/Tag2Text/ram/models/swin_transformer.py
class Mlp (line 17) | class Mlp(nn.Module):
method __init__ (line 18) | def __init__(self, in_features, hidden_features=None, out_features=Non...
method forward (line 27) | def forward(self, x):
function window_partition (line 36) | def window_partition(x, window_size):
function window_reverse (line 51) | def window_reverse(windows, window_size, H, W):
class WindowAttention (line 68) | class WindowAttention(nn.Module):
method __init__ (line 82) | def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scal...
method forward (line 116) | def forward(self, x, mask=None):
method extra_repr (line 149) | def extra_repr(self) -> str:
method flops (line 152) | def flops(self, N):
class SwinTransformerBlock (line 166) | class SwinTransformerBlock(nn.Module):
method __init__ (line 185) | def __init__(self, dim, input_resolution, num_heads, window_size=7, sh...
method forward (line 236) | def forward(self, x):
method extra_repr (line 275) | def extra_repr(self) -> str:
method flops (line 279) | def flops(self):
class PatchMerging (line 294) | class PatchMerging(nn.Module):
method __init__ (line 303) | def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm):
method forward (line 310) | def forward(self, x):
method extra_repr (line 333) | def extra_repr(self) -> str:
method flops (line 336) | def flops(self):
class BasicLayer (line 343) | class BasicLayer(nn.Module):
method __init__ (line 363) | def __init__(self, dim, input_resolution, depth, num_heads, window_size,
method forward (line 391) | def forward(self, x):
method extra_repr (line 401) | def extra_repr(self) -> str:
method flops (line 404) | def flops(self):
class PatchEmbed (line 413) | class PatchEmbed(nn.Module):
method __init__ (line 424) | def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=9...
method forward (line 443) | def forward(self, x):
method flops (line 453) | def flops(self):
class SwinTransformer (line 461) | class SwinTransformer(nn.Module):
method __init__ (line 487) | def __init__(self, img_size=224, patch_size=4, in_chans=3, num_classes...
method _init_weights (line 545) | def _init_weights(self, m):
method no_weight_decay (line 555) | def no_weight_decay(self):
method no_weight_decay_keywords (line 559) | def no_weight_decay_keywords(self):
method forward (line 562) | def forward(self, x, idx_to_group_img=None, image_atts=None, **kwargs):
method flops (line 586) | def flops(self):
function interpolate_relative_pos_embed (line 596) | def interpolate_relative_pos_embed(rel_pos_bias, dst_num_pos, param_name...
FILE: model_cards/Tag2Text/ram/models/tag2text.py
class Tag2Text (line 19) | class Tag2Text(nn.Module):
method __init__ (line 21) | def __init__(self,
method load_tag_list (line 128) | def load_tag_list(self, tag_list_file):
method del_selfattention (line 135) | def del_selfattention(self):
method forward (line 141) | def forward(self, image, caption, tag):
method generate (line 231) | def generate(self,
function tag2text (line 360) | def tag2text(pretrained='', **kwargs):
FILE: model_cards/Tag2Text/ram/models/utils.py
function read_json (line 16) | def read_json(rpath):
function tie_encoder_decoder_weights (line 21) | def tie_encoder_decoder_weights(encoder: nn.Module, decoder: nn.Module,
class GroupWiseLinear (line 99) | class GroupWiseLinear(nn.Module):
method __init__ (line 103) | def __init__(self, num_class, hidden_dim, bias=True):
method reset_parameters (line 114) | def reset_parameters(self):
method forward (line 122) | def forward(self, x):
function init_tokenizer (line 130) | def init_tokenizer():
function create_vit (line 138) | def create_vit(vit,
function is_url (line 170) | def is_url(url_or_filename):
function load_checkpoint (line 175) | def load_checkpoint(model, url_or_filename):
function load_checkpoint_swinbase (line 203) | def load_checkpoint_swinbase(model, url_or_filename, kwargs):
function load_checkpoint_swinlarge (line 241) | def load_checkpoint_swinlarge(model, url_or_filename, kwargs):
class AsymmetricLoss (line 281) | class AsymmetricLoss(nn.Module):
method __init__ (line 282) | def __init__(self, gamma_neg=4, gamma_pos=1, clip=0.05, eps=1e-8, disa...
method forward (line 291) | def forward(self, x, y):
FILE: model_cards/Tag2Text/ram/models/vit.py
class Mlp (line 23) | class Mlp(nn.Module):
method __init__ (line 26) | def __init__(self, in_features, hidden_features=None, out_features=Non...
method forward (line 35) | def forward(self, x):
class Attention (line 44) | class Attention(nn.Module):
method __init__ (line 45) | def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, at...
method save_attn_gradients (line 58) | def save_attn_gradients(self, attn_gradients):
method get_attn_gradients (line 61) | def get_attn_gradients(self):
method save_attention_map (line 64) | def save_attention_map(self, attention_map):
method get_attention_map (line 67) | def get_attention_map(self):
method forward (line 70) | def forward(self, x, register_hook=False):
class Block (line 89) | class Block(nn.Module):
method __init__ (line 91) | def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_sc...
method forward (line 107) | def forward(self, x, register_hook=False):
class VisionTransformer (line 113) | class VisionTransformer(nn.Module):
method __init__ (line 118) | def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classe...
method _init_weights (line 167) | def _init_weights(self, m):
method no_weight_decay (line 177) | def no_weight_decay(self):
method forward (line 180) | def forward(self, x, register_blk=-1):
method load_pretrained (line 197) | def load_pretrained(self, checkpoint_path, prefix=''):
function _load_weights (line 202) | def _load_weights(model: VisionTransformer, checkpoint_path: str, prefix...
function interpolate_pos_embed (line 281) | def interpolate_pos_embed(pos_embed_checkpoint, visual_encoder):
FILE: model_cards/Tag2Text/ram/transform.py
function get_transform (line 4) | def get_transform(image_size=384):
FILE: model_cards/Tag2Text/ram/utils/metrics.py
function get_mAP (line 7) | def get_mAP(
function _average_precision (line 41) | def _average_precision(output: ndarray, target: ndarray) -> float:
function get_PR (line 61) | def get_PR(
FILE: model_cards/Tag2Text/ram/utils/openset_utils.py
function article (line 9) | def article(name):
function processed_name (line 13) | def processed_name(name, rm_dot=False):
function build_openset_label_embedding (line 293) | def build_openset_label_embedding(categories=None):
FILE: model_cards/autoback.py
function preprocess_image (line 48) | def preprocess_image(img):
class Ensemble (line 67) | class Ensemble(nn.ModuleList):
method __init__ (line 69) | def __init__(self):
method forward (line 72) | def forward(self, x, augment=False, profile=False, visualize=False):
function torch_safe_load (line 77) | def torch_safe_load(weight):
function is_similar_string (line 102) | def is_similar_string(string):
function attempt_load (line 109) | def attempt_load(weights, device=None):
class AutoBackend (line 124) | class AutoBackend(nn.Module):
method __init__ (line 126) | def __init__(self, methods: str ,weights: None , device=torch.device('...
method forward (line 253) | def forward(self, im, augment=False, visualize=False,prompt= None ,box...
method gligen_inference (line 278) | def gligen_inference(config=None, starting_noise=None,negative_prompt=...
method grounded_inference (line 281) | def grounded_inference(self,im,caption,box_threshold,text_threshold,io...
method sam_inference (line 322) | def sam_inference(self,im,prompt):
method tag2text_inference (line 334) | def tag2text_inference(self,im,prompt):
method ram_inference (line 341) | def ram_inference(self,im):
method lama_inference (line 347) | def lama_inference(self,im,mask) :
method _model_type (line 376) | def _model_type(p='path/to/model.pt'):
FILE: model_cards/groundingdino/datasets/transforms.py
function crop (line 17) | def crop(image, target, region):
function hflip (line 68) | def hflip(image, target):
function resize (line 87) | def resize(image, target, size, max_size=None):
function pad (line 149) | def pad(image, target, padding):
class ResizeDebug (line 162) | class ResizeDebug(object):
method __init__ (line 163) | def __init__(self, size):
method __call__ (line 166) | def __call__(self, img, target):
class RandomCrop (line 170) | class RandomCrop(object):
method __init__ (line 171) | def __init__(self, size):
method __call__ (line 174) | def __call__(self, img, target):
class RandomSizeCrop (line 179) | class RandomSizeCrop(object):
method __init__ (line 180) | def __init__(self, min_size: int, max_size: int, respect_boxes: bool =...
method __call__ (line 187) | def __call__(self, img: PIL.Image.Image, target: dict):
class CenterCrop (line 204) | class CenterCrop(object):
method __init__ (line 205) | def __init__(self, size):
method __call__ (line 208) | def __call__(self, img, target):
class RandomHorizontalFlip (line 216) | class RandomHorizontalFlip(object):
method __init__ (line 217) | def __init__(self, p=0.5):
method __call__ (line 220) | def __call__(self, img, target):
class RandomResize (line 226) | class RandomResize(object):
method __init__ (line 227) | def __init__(self, sizes, max_size=None):
method __call__ (line 232) | def __call__(self, img, target=None):
class RandomPad (line 237) | class RandomPad(object):
method __init__ (line 238) | def __init__(self, max_pad):
method __call__ (line 241) | def __call__(self, img, target):
class RandomSelect (line 247) | class RandomSelect(object):
method __init__ (line 253) | def __init__(self, transforms1, transforms2, p=0.5):
method __call__ (line 258) | def __call__(self, img, target):
class ToTensor (line 264) | class ToTensor(object):
method __call__ (line 265) | def __call__(self, img, target):
class RandomErasing (line 269) | class RandomErasing(object):
method __init__ (line 270) | def __init__(self, *args, **kwargs):
method __call__ (line 273) | def __call__(self, img, target):
class Normalize (line 277) | class Normalize(object):
method __init__ (line 278) | def __init__(self, mean, std):
method __call__ (line 282) | def __call__(self, image, target=None):
class Compose (line 296) | class Compose(object):
method __init__ (line 297) | def __init__(self, transforms):
method __call__ (line 300) | def __call__(self, image, target):
method __repr__ (line 305) | def __repr__(self):
FILE: model_cards/groundingdino/models/GroundingDINO/backbone/backbone.py
class FrozenBatchNorm2d (line 33) | class FrozenBatchNorm2d(torch.nn.Module):
method __init__ (line 42) | def __init__(self, n):
method _load_from_state_dict (line 49) | def _load_from_state_dict(
method forward (line 60) | def forward(self, x):
class BackboneBase (line 73) | class BackboneBase(nn.Module):
method __init__ (line 74) | def __init__(
method forward (line 107) | def forward(self, tensor_list: NestedTensor):
class Backbone (line 119) | class Backbone(BackboneBase):
method __init__ (line 122) | def __init__(
class Joiner (line 146) | class Joiner(nn.Sequential):
method __init__ (line 147) | def __init__(self, backbone, position_embedding):
method forward (line 150) | def forward(self, tensor_list: NestedTensor):
function build_backbone (line 162) | def build_backbone(args):
FILE: model_cards/groundingdino/models/GroundingDINO/backbone/position_encoding.py
class PositionEmbeddingSine (line 30) | class PositionEmbeddingSine(nn.Module):
method __init__ (line 36) | def __init__(self, num_pos_feats=64, temperature=10000, normalize=Fals...
method forward (line 47) | def forward(self, tensor_list: NestedTensor):
class PositionEmbeddingSineHW (line 78) | class PositionEmbeddingSineHW(nn.Module):
method __init__ (line 84) | def __init__(
method forward (line 98) | def forward(self, tensor_list: NestedTensor):
class PositionEmbeddingLearned (line 134) | class PositionEmbeddingLearned(nn.Module):
method __init__ (line 139) | def __init__(self, num_pos_feats=256):
method reset_parameters (line 145) | def reset_parameters(self):
method forward (line 149) | def forward(self, tensor_list: NestedTensor):
function build_position_encoding (line 171) | def build_position_encoding(args):
FILE: model_cards/groundingdino/models/GroundingDINO/backbone/swin_transformer.py
class Mlp (line 24) | class Mlp(nn.Module):
method __init__ (line 27) | def __init__(
method forward (line 38) | def forward(self, x):
function window_partition (line 47) | def window_partition(x, window_size):
function window_reverse (line 61) | def window_reverse(windows, window_size, H, W):
class WindowAttention (line 77) | class WindowAttention(nn.Module):
method __init__ (line 90) | def __init__(
method forward (line 134) | def forward(self, x, mask=None):
class SwinTransformerBlock (line 177) | class SwinTransformerBlock(nn.Module):
method __init__ (line 194) | def __init__(
method forward (line 238) | def forward(self, x, mask_matrix):
class PatchMerging (line 301) | class PatchMerging(nn.Module):
method __init__ (line 308) | def __init__(self, dim, norm_layer=nn.LayerNorm):
method forward (line 314) | def forward(self, x, H, W):
class BasicLayer (line 343) | class BasicLayer(nn.Module):
method __init__ (line 361) | def __init__(
method forward (line 409) | def forward(self, x, H, W):
class PatchEmbed (line 459) | class PatchEmbed(nn.Module):
method __init__ (line 468) | def __init__(self, patch_size=4, in_chans=3, embed_dim=96, norm_layer=...
method forward (line 482) | def forward(self, x):
class SwinTransformer (line 501) | class SwinTransformer(nn.Module):
method __init__ (line 530) | def __init__(
method _freeze_stages (line 636) | def _freeze_stages(self):
method forward_raw (line 678) | def forward_raw(self, x):
method forward (line 712) | def forward(self, tensor_list: NestedTensor):
method train (line 756) | def train(self, mode=True):
function build_swin_transformer (line 762) | def build_swin_transformer(modelname, pretrain_img_size, **kw):
FILE: model_cards/groundingdino/models/GroundingDINO/bertwarper.py
class BertModelWarper (line 17) | class BertModelWarper(nn.Module):
method __init__ (line 18) | def __init__(self, bert_model):
method forward (line 31) | def forward(
class TextEncoderShell (line 169) | class TextEncoderShell(nn.Module):
method __init__ (line 170) | def __init__(self, text_encoder):
method forward (line 175) | def forward(self, **kw):
function generate_masks_with_special_tokens (line 180) | def generate_masks_with_special_tokens(tokenized, special_tokens_list, t...
function generate_masks_with_special_tokens_and_transfer_map (line 224) | def generate_masks_with_special_tokens_and_transfer_map(tokenized, speci...
FILE: model_cards/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn.h
function namespace (line 19) | namespace groundingdino {
FILE: model_cards/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn_cpu.cpp
type groundingdino (line 16) | namespace groundingdino {
function ms_deform_attn_cpu_forward (line 18) | at::Tensor
function ms_deform_attn_cpu_backward (line 30) | std::vector<at::Tensor>
FILE: model_cards/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn_cpu.h
function namespace (line 14) | namespace groundingdino {
FILE: model_cards/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn_cuda.h
function namespace (line 14) | namespace groundingdino {
FILE: model_cards/groundingdino/models/GroundingDINO/csrc/vision.cpp
type groundingdino (line 5) | namespace groundingdino {
function get_cuda_version (line 11) | std::string get_cuda_version() {
function get_compiler_version (line 32) | std::string get_compiler_version() {
function PYBIND11_MODULE (line 53) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: model_cards/groundingdino/models/GroundingDINO/fuse_modules.py
class FeatureResizer (line 14) | class FeatureResizer(nn.Module):
method __init__ (line 20) | def __init__(self, input_feat_size, output_feat_size, dropout, do_ln=T...
method forward (line 28) | def forward(self, encoder_features):
function l1norm (line 36) | def l1norm(X, dim, eps=1e-8):
function l2norm (line 43) | def l2norm(X, dim, eps=1e-8):
function func_attention (line 50) | def func_attention(query, context, smooth=1, raw_feature_norm="softmax",...
class BiMultiHeadAttention (line 99) | class BiMultiHeadAttention(nn.Module):
method __init__ (line 100) | def __init__(self, v_dim, l_dim, embed_dim, num_heads, dropout=0.1, cf...
method _shape (line 129) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
method _reset_parameters (line 132) | def _reset_parameters(self):
method forward (line 146) | def forward(self, v, l, attention_mask_v=None, attention_mask_l=None):
class BiAttentionBlock (line 252) | class BiAttentionBlock(nn.Module):
method __init__ (line 253) | def __init__(
method forward (line 286) | def forward(self, v, l, attention_mask_v=None, attention_mask_l=None):
FILE: model_cards/groundingdino/models/GroundingDINO/groundingdino.py
class GroundingDINO (line 52) | class GroundingDINO(nn.Module):
method __init__ (line 55) | def __init__(
method _reset_parameters (line 204) | def _reset_parameters(self):
method init_ref_points (line 210) | def init_ref_points(self, use_num_queries):
method forward (line 213) | def forward(self, samples: NestedTensor, targets: List = None, **kw):
method _set_aux_loss (line 353) | def _set_aux_loss(self, outputs_class, outputs_coord):
function build_groundingdino (line 364) | def build_groundingdino(args):
FILE: model_cards/groundingdino/models/GroundingDINO/ms_deform_attn.py
function _is_power_of_2 (line 35) | def _is_power_of_2(n):
class MultiScaleDeformableAttnFunction (line 41) | class MultiScaleDeformableAttnFunction(Function):
method forward (line 43) | def forward(
method backward (line 72) | def backward(ctx, grad_output):
function multi_scale_deformable_attn_pytorch (line 93) | def multi_scale_deformable_attn_pytorch(
class MultiScaleDeformableAttention (line 136) | class MultiScaleDeformableAttention(nn.Module):
method __init__ (line 154) | def __init__(
method _reset_parameters (line 194) | def _reset_parameters(self):
method init_weights (line 197) | def init_weights(self):
method freeze_sampling_offsets (line 222) | def freeze_sampling_offsets(self):
method freeze_attention_weights (line 227) | def freeze_attention_weights(self):
method forward (line 232) | def forward(
function create_dummy_class (line 362) | def create_dummy_class(klass, dependency, message=""):
function create_dummy_func (line 391) | def create_dummy_func(func, dependency, message=""):
FILE: model_cards/groundingdino/models/GroundingDINO/transformer.py
class Transformer (line 40) | class Transformer(nn.Module):
method __init__ (line 41) | def __init__(
method _reset_parameters (line 189) | def _reset_parameters(self):
method get_valid_ratio (line 199) | def get_valid_ratio(self, mask):
method init_ref_points (line 208) | def init_ref_points(self, use_num_queries):
method forward (line 211) | def forward(self, srcs, masks, refpoint_embed, pos_embeds, tgt, attn_m...
class TransformerEncoder (line 406) | class TransformerEncoder(nn.Module):
method __init__ (line 407) | def __init__(
method get_reference_points (line 466) | def get_reference_points(spatial_shapes, valid_ratios, device):
method forward (line 482) | def forward(
class TransformerDecoder (line 598) | class TransformerDecoder(nn.Module):
method __init__ (line 599) | def __init__(
method forward (line 633) | def forward(
class DeformableTransformerEncoderLayer (line 738) | class DeformableTransformerEncoderLayer(nn.Module):
method __init__ (line 739) | def __init__(
method with_pos_embed (line 771) | def with_pos_embed(tensor, pos):
method forward_ffn (line 774) | def forward_ffn(self, src):
method forward (line 780) | def forward(
class DeformableTransformerDecoderLayer (line 802) | class DeformableTransformerDecoderLayer(nn.Module):
method __init__ (line 803) | def __init__(
method rm_self_attn_modules (line 852) | def rm_self_attn_modules(self):
method with_pos_embed (line 858) | def with_pos_embed(tensor, pos):
method forward_ffn (line 861) | def forward_ffn(self, tgt):
method forward (line 868) | def forward(
function build_transformer (line 930) | def build_transformer(args):
FILE: model_cards/groundingdino/models/GroundingDINO/transformer_vanilla.py
class TextTransformer (line 33) | class TextTransformer(nn.Module):
method __init__ (line 34) | def __init__(self, num_layers, d_model=256, nheads=8, dim_feedforward=...
method forward (line 47) | def forward(self, memory_text: torch.Tensor, text_attention_mask: torc...
class TransformerEncoderLayer (line 72) | class TransformerEncoderLayer(nn.Module):
method __init__ (line 73) | def __init__(
method with_pos_embed (line 98) | def with_pos_embed(self, tensor, pos: Optional[Tensor]):
method forward (line 101) | def forward(
FILE: model_cards/groundingdino/models/GroundingDINO/utils.py
function _get_clones (line 16) | def _get_clones(module, N, layer_share=False):
function get_sine_pos_embed (line 24) | def get_sine_pos_embed(
function gen_encoder_output_proposals (line 56) | def gen_encoder_output_proposals(
class RandomBoxPerturber (line 119) | class RandomBoxPerturber:
method __init__ (line 120) | def __init__(
method __call__ (line 127) | def __call__(self, refanchors: Tensor) -> Tensor:
function sigmoid_focal_loss (line 138) | def sigmoid_focal_loss(
class MLP (line 171) | class MLP(nn.Module):
method __init__ (line 174) | def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
method forward (line 182) | def forward(self, x):
function _get_activation_fn (line 188) | def _get_activation_fn(activation, d_model=256, batch_dim=0):
function gen_sineembed_for_position (line 204) | def gen_sineembed_for_position(pos_tensor):
class ContrastiveEmbed (line 233) | class ContrastiveEmbed(nn.Module):
method __init__ (line 234) | def __init__(self, max_text_len=256):
method forward (line 242) | def forward(self, x, text_dict):
FILE: model_cards/groundingdino/models/__init__.py
function build_model (line 11) | def build_model(args):
FILE: model_cards/groundingdino/models/registry.py
class Registry (line 18) | class Registry(object):
method __init__ (line 19) | def __init__(self, name):
method __repr__ (line 23) | def __repr__(self):
method __len__ (line 29) | def __len__(self):
method name (line 33) | def name(self):
method module_dict (line 37) | def module_dict(self):
method get (line 40) | def get(self, key):
method registe_with_name (line 43) | def registe_with_name(self, module_name=None, force=False):
method register (line 46) | def register(self, module_build_function, module_name=None, force=False):
FILE: model_cards/groundingdino/util/box_ops.py
function box_cxcywh_to_xyxy (line 9) | def box_cxcywh_to_xyxy(x):
function box_xyxy_to_cxcywh (line 15) | def box_xyxy_to_cxcywh(x):
function box_iou (line 22) | def box_iou(boxes1, boxes2):
function generalized_box_iou (line 39) | def generalized_box_iou(boxes1, boxes2):
function box_iou_pairwise (line 66) | def box_iou_pairwise(boxes1, boxes2):
function generalized_box_iou_pairwise (line 82) | def generalized_box_iou_pairwise(boxes1, boxes2):
function masks_to_boxes (line 107) | def masks_to_boxes(masks):
FILE: model_cards/groundingdino/util/get_tokenlizer.py
function get_tokenlizer (line 4) | def get_tokenlizer(text_encoder_type):
function get_pretrained_language_model (line 21) | def get_pretrained_language_model(text_encoder_type):
FILE: model_cards/groundingdino/util/inference.py
function preprocess_caption (line 21) | def preprocess_caption(caption: str) -> str:
function load_model (line 28) | def load_model(model_config_path: str, model_checkpoint_path: str, devic...
function load_image (line 38) | def load_image(image_path: str) -> Tuple[np.array, torch.Tensor]:
function predict (line 52) | def predict(
function annotate (line 87) | def annotate(image_source: np.ndarray, boxes: torch.Tensor, logits: torc...
class Model (line 110) | class Model:
method __init__ (line 112) | def __init__(
method predict_with_caption (line 125) | def predict_with_caption(
method predict_with_classes (line 165) | def predict_with_classes(
method preprocess_image (line 210) | def preprocess_image(image_bgr: np.ndarray) -> torch.Tensor:
method post_process_result (line 223) | def post_process_result(
method phrases2classes (line 235) | def phrases2classes(phrases: List[str], classes: List[str]) -> np.ndar...
FILE: model_cards/groundingdino/util/logger.py
class _ColorfulFormatter (line 10) | class _ColorfulFormatter(logging.Formatter):
method __init__ (line 11) | def __init__(self, *args, **kwargs):
method formatMessage (line 18) | def formatMessage(self, record):
function setup_logger (line 32) | def setup_logger(output=None, distributed_rank=0, *, color=True, name="i...
function _cached_log_stream (line 92) | def _cached_log_stream(filename):
FILE: model_cards/groundingdino/util/misc.py
class SmoothedValue (line 33) | class SmoothedValue(object):
method __init__ (line 38) | def __init__(self, window_size=20, fmt=None):
method update (line 46) | def update(self, value, n=1):
method synchronize_between_processes (line 51) | def synchronize_between_processes(self):
method median (line 65) | def median(self):
method avg (line 72) | def avg(self):
method global_avg (line 77) | def global_avg(self):
method max (line 85) | def max(self):
method value (line 89) | def value(self):
method __str__ (line 92) | def __str__(self):
function _get_global_gloo_group (line 103) | def _get_global_gloo_group():
function all_gather_cpu (line 115) | def all_gather_cpu(data):
function all_gather (line 173) | def all_gather(data):
function reduce_dict (line 220) | def reduce_dict(input_dict, average=True):
class MetricLogger (line 247) | class MetricLogger(object):
method __init__ (line 248) | def __init__(self, delimiter="\t"):
method update (line 252) | def update(self, **kwargs):
method __getattr__ (line 259) | def __getattr__(self, attr):
method __str__ (line 266) | def __str__(self):
method synchronize_between_processes (line 275) | def synchronize_between_processes(self):
method add_meter (line 279) | def add_meter(self, name, meter):
method log_every (line 282) | def log_every(self, iterable, print_freq, header=None, logger=None):
function get_sha (line 362) | def get_sha():
function collate_fn (line 383) | def collate_fn(batch):
function _max_by_axis (line 390) | def _max_by_axis(the_list):
class NestedTensor (line 399) | class NestedTensor(object):
method __init__ (line 400) | def __init__(self, tensors, mask: Optional[Tensor]):
method imgsize (line 416) | def imgsize(self):
method to (line 425) | def to(self, device):
method to_img_list_single (line 436) | def to_img_list_single(self, tensor, mask):
method to_img_list (line 443) | def to_img_list(self):
method device (line 460) | def device(self):
method decompose (line 463) | def decompose(self):
method __repr__ (line 466) | def __repr__(self):
method shape (line 470) | def shape(self):
function nested_tensor_from_tensor_list (line 474) | def nested_tensor_from_tensor_list(tensor_list: List[Tensor]):
function _onnx_nested_tensor_from_tensor_list (line 502) | def _onnx_nested_tensor_from_tensor_list(tensor_list: List[Tensor]) -> N...
function setup_for_distributed (line 532) | def setup_for_distributed(is_master):
function is_dist_avail_and_initialized (line 548) | def is_dist_avail_and_initialized():
function get_world_size (line 556) | def get_world_size():
function get_rank (line 562) | def get_rank():
function is_main_process (line 568) | def is_main_process():
function save_on_master (line 572) | def save_on_master(*args, **kwargs):
function init_distributed_mode (line 577) | def init_distributed_mode(args):
function accuracy (line 638) | def accuracy(output, target, topk=(1,)):
function accuracy_onehot (line 657) | def accuracy_onehot(pred, gt):
function interpolate (line 669) | def interpolate(input, size=None, scale_factor=None, mode="nearest", ali...
class color_sys (line 687) | class color_sys:
method __init__ (line 688) | def __init__(self, num_colors) -> None:
method __call__ (line 700) | def __call__(self, idx):
function inverse_sigmoid (line 704) | def inverse_sigmoid(x, eps=1e-3):
function clean_state_dict (line 711) | def clean_state_dict(state_dict):
FILE: model_cards/groundingdino/util/slconfig.py
function check_file_exist (line 21) | def check_file_exist(filename, msg_tmpl='file "{}" does not exist'):
class ConfigDict (line 26) | class ConfigDict(Dict):
method __missing__ (line 27) | def __missing__(self, name):
method __getattr__ (line 30) | def __getattr__(self, name):
class SLConfig (line 42) | class SLConfig(object):
method _validate_py_syntax (line 68) | def _validate_py_syntax(filename):
method _file2dict (line 77) | def _file2dict(filename):
method _merge_a_into_b (line 140) | def _merge_a_into_b(a, b):
method fromfile (line 184) | def fromfile(filename):
method __init__ (line 188) | def __init__(self, cfg_dict=None, cfg_text=None, filename=None):
method filename (line 209) | def filename(self):
method text (line 213) | def text(self):
method pretty_text (line 217) | def pretty_text(self):
method __repr__ (line 310) | def __repr__(self):
method __len__ (line 313) | def __len__(self):
method __getattr__ (line 316) | def __getattr__(self, name):
method __getitem__ (line 329) | def __getitem__(self, name):
method __setattr__ (line 332) | def __setattr__(self, name, value):
method __setitem__ (line 337) | def __setitem__(self, name, value):
method __iter__ (line 342) | def __iter__(self):
method dump (line 345) | def dump(self, file=None):
method merge_from_dict (line 353) | def merge_from_dict(self, options):
method __setstate__ (line 386) | def __setstate__(self, state):
method copy (line 389) | def copy(self):
method deepcopy (line 392) | def deepcopy(self):
class DictAction (line 396) | class DictAction(Action):
method _parse_int_float_bool (line 404) | def _parse_int_float_bool(val):
method __call__ (line 419) | def __call__(self, parser, namespace, values, option_string=None):
FILE: model_cards/groundingdino/util/slio.py
class BaseFileHandler (line 23) | class BaseFileHandler(metaclass=ABCMeta):
method load_from_fileobj (line 25) | def load_from_fileobj(self, file, **kwargs):
method dump_to_fileobj (line 29) | def dump_to_fileobj(self, obj, file, **kwargs):
method dump_to_str (line 33) | def dump_to_str(self, obj, **kwargs):
method load_from_path (line 36) | def load_from_path(self, filepath, mode="r", **kwargs):
method dump_to_path (line 40) | def dump_to_path(self, obj, filepath, mode="w", **kwargs):
class JsonHandler (line 45) | class JsonHandler(BaseFileHandler):
method load_from_fileobj (line 46) | def load_from_fileobj(self, file):
method dump_to_fileobj (line 49) | def dump_to_fileobj(self, obj, file, **kwargs):
method dump_to_str (line 52) | def dump_to_str(self, obj, **kwargs):
class PickleHandler (line 56) | class PickleHandler(BaseFileHandler):
method load_from_fileobj (line 57) | def load_from_fileobj(self, file, **kwargs):
method load_from_path (line 60) | def load_from_path(self, filepath, **kwargs):
method dump_to_str (line 63) | def dump_to_str(self, obj, **kwargs):
method dump_to_fileobj (line 67) | def dump_to_fileobj(self, obj, file, **kwargs):
method dump_to_path (line 71) | def dump_to_path(self, obj, filepath, **kwargs):
class YamlHandler (line 75) | class YamlHandler(BaseFileHandler):
method load_from_fileobj (line 76) | def load_from_fileobj(self, file, **kwargs):
method dump_to_fileobj (line 80) | def dump_to_fileobj(self, obj, file, **kwargs):
method dump_to_str (line 84) | def dump_to_str(self, obj, **kwargs):
function is_str (line 102) | def is_str(x):
function slload (line 110) | def slload(file, file_format=None, **kwargs):
function sldump (line 143) | def sldump(obj, file=None, file_format=None, **kwargs):
FILE: model_cards/groundingdino/util/time_counter.py
class TimeCounter (line 5) | class TimeCounter:
method __init__ (line 6) | def __init__(self) -> None:
method clear (line 9) | def clear(self):
method timeit (line 13) | def timeit(self, name):
class TimeHolder (line 19) | class TimeHolder:
method __init__ (line 20) | def __init__(self) -> None:
method update (line 23) | def update(self, _timedict: dict):
method final_res (line 29) | def final_res(self):
method __str__ (line 32) | def __str__(self):
class AverageMeter (line 36) | class AverageMeter(object):
method __init__ (line 39) | def __init__(self, name, fmt=":f", val_only=False):
method reset (line 45) | def reset(self):
method update (line 51) | def update(self, val, n=1):
method __str__ (line 57) | def __str__(self):
FILE: model_cards/groundingdino/util/utils.py
function slprint (line 15) | def slprint(x, name="x"):
function clean_state_dict (line 29) | def clean_state_dict(state_dict):
function renorm (line 38) | def renorm(
class CocoClassMapper (line 66) | class CocoClassMapper:
method __init__ (line 67) | def __init__(self) -> None:
method origin2compact (line 153) | def origin2compact(self, idx):
method compact2origin (line 156) | def compact2origin(self, idx):
function to_device (line 160) | def to_device(item, device):
function get_gaussian_mean (line 174) | def get_gaussian_mean(x, axis, other_axis, softmax=True):
function get_expected_points_from_map (line 200) | def get_expected_points_from_map(hm, softmax=True):
class Embedder (line 222) | class Embedder:
method __init__ (line 223) | def __init__(self, **kwargs):
method create_embedding_fn (line 227) | def create_embedding_fn(self):
method embed (line 251) | def embed(self, inputs):
function get_embedder (line 255) | def get_embedder(multires, i=0):
class APOPMeter (line 275) | class APOPMeter:
method __init__ (line 276) | def __init__(self) -> None:
method update (line 282) | def update(self, pred, gt):
method update_cm (line 293) | def update_cm(self, tp, fp, tn, fn):
function inverse_sigmoid (line 300) | def inverse_sigmoid(x, eps=1e-5):
function get_raw_dict (line 307) | def get_raw_dict(args):
function stat_tensors (line 325) | def stat_tensors(tensor):
class NiceRepr (line 340) | class NiceRepr:
method __nice__ (line 374) | def __nice__(self):
method __repr__ (line 384) | def __repr__(self):
method __str__ (line 394) | def __str__(self):
function ensure_rng (line 405) | def ensure_rng(rng=None):
function random_boxes (line 436) | def random_boxes(num=1, scale=1, rng=None):
class ModelEma (line 473) | class ModelEma(torch.nn.Module):
method __init__ (line 474) | def __init__(self, model, decay=0.9997, device=None):
method _update (line 487) | def _update(self, model, update_fn):
method update (line 496) | def update(self, model):
method set (line 499) | def set(self, model):
class BestMetricSingle (line 503) | class BestMetricSingle:
method __init__ (line 504) | def __init__(self, init_res=0.0, better="large") -> None:
method isbetter (line 512) | def isbetter(self, new_res, old_res):
method update (line 518) | def update(self, new_res, ep):
method __str__ (line 525) | def __str__(self) -> str:
method __repr__ (line 528) | def __repr__(self) -> str:
method summary (line 531) | def summary(self) -> dict:
class BestMetricHolder (line 538) | class BestMetricHolder:
method __init__ (line 539) | def __init__(self, init_res=0.0, better="large", use_ema=False) -> None:
method update (line 546) | def update(self, new_res, epoch, is_ema=False):
method summary (line 560) | def summary(self):
method __repr__ (line 570) | def __repr__(self) -> str:
method __str__ (line 573) | def __str__(self) -> str:
function targets_to (line 577) | def targets_to(targets: List[Dict[str, Any]], device):
function get_phrases_from_posmap (line 599) | def get_phrases_from_posmap(
FILE: model_cards/groundingdino/util/visualizer.py
function renorm (line 22) | def renorm(
class ColorMap (line 50) | class ColorMap:
method __init__ (line 51) | def __init__(self, basergb=[255, 255, 0]):
method __call__ (line 54) | def __call__(self, attnmap):
function rainbow_text (line 66) | def rainbow_text(x, y, ls, lc, **kw):
class COCOVisualizer (line 95) | class COCOVisualizer:
method __init__ (line 96) | def __init__(self, coco=None, tokenlizer=None) -> None:
method visualize (line 99) | def visualize(self, img, tgt, caption=None, dpi=180, savedir="vis"):
method addtgt (line 135) | def addtgt(self, tgt):
method showAnns (line 225) | def showAnns(self, anns, draw_bbox=False):
FILE: model_cards/groundingdino/util/vl_utils.py
function create_positive_map_from_span (line 8) | def create_positive_map_from_span(tokenized, token_span, max_text_len=256):
function build_captions_and_token_span (line 49) | def build_captions_and_token_span(cat_list, force_lowercase):
function build_id2posspan_and_caption (line 90) | def build_id2posspan_and_caption(category_dict: dict):
FILE: model_cards/lama/bin/analyze_errors.py
function draw_score (line 18) | def draw_score(img, score):
function save_global_samples (line 30) | def save_global_samples(global_mask_fnames, mask2real_fname, mask2fake_f...
function save_samples_by_real (line 49) | def save_samples_by_real(worst_best_by_real, mask2fake_fname, fake_info,...
function extract_overlapping_masks (line 85) | def extract_overlapping_masks(mask_fnames, cur_i, fake_scores_table, max...
function main (line 103) | def main(args):
FILE: model_cards/lama/bin/blur_predicts.py
function main (line 13) | def main(args):
FILE: model_cards/lama/bin/calc_dataset_stats.py
function main (line 13) | def main(args):
FILE: model_cards/lama/bin/evaluate_predicts.py
function main (line 14) | def main(args):
FILE: model_cards/lama/bin/evaluator_example.py
class SimpleImageDataset (line 14) | class SimpleImageDataset(Dataset):
method __init__ (line 15) | def __init__(self, root_dir, image_size=(400, 600)):
method __getitem__ (line 20) | def __getitem__(self, index):
method __len__ (line 27) | def __len__(self):
function create_rectangle_mask (line 31) | def create_rectangle_mask(height, width):
class Model (line 39) | class Model():
method __call__ (line 40) | def __call__(self, img_batch, mask_batch):
class SimpleImageSquareMaskDataset (line 46) | class SimpleImageSquareMaskDataset(Dataset):
method __init__ (line 47) | def __init__(self, dataset):
method __getitem__ (line 52) | def __getitem__(self, index):
method __len__ (line 58) | def __len__(self):
FILE: model_cards/lama/bin/extract_masks.py
function main (line 6) | def main(args):
FILE: model_cards/lama/bin/filter_sharded_dataset.py
function is_good_key (line 13) | def is_good_key(key, cats):
function main (line 17) | def main(args):
FILE: model_cards/lama/bin/gen_debug_mask_dataset.py
function generate_masks_for_img (line 16) | def generate_masks_for_img(infile, outmask_pattern, mask_size=200, step=...
function main (line 34) | def main(args):
FILE: model_cards/lama/bin/gen_mask_dataset.py
class MakeManyMasksWrapper (line 17) | class MakeManyMasksWrapper:
method __init__ (line 18) | def __init__(self, impl, variants_n=2):
method get_masks (line 22) | def get_masks(self, img):
function process_images (line 27) | def process_images(src_images, indir, outdir, config):
function main (line 100) | def main(args):
FILE: model_cards/lama/bin/gen_mask_dataset_hydra.py
class MakeManyMasksWrapper (line 19) | class MakeManyMasksWrapper:
method __init__ (line 20) | def __init__(self, impl, variants_n=2):
method get_masks (line 24) | def get_masks(self, img):
function process_images (line 29) | def process_images(src_images, indir, outdir, config):
function main (line 104) | def main(config: OmegaConf):
FILE: model_cards/lama/bin/gen_outpainting_dataset.py
function main (line 34) | def main(args):
FILE: model_cards/lama/bin/make_checkpoint.py
function get_checkpoint_files (line 9) | def get_checkpoint_files(s):
function main (line 16) | def main(args):
FILE: model_cards/lama/bin/paper_runfiles/find_best_checkpoint.py
function ssim_fid100_f1 (line 8) | def ssim_fid100_f1(metrics, fid_scale=100):
function find_best_checkpoint (line 16) | def find_best_checkpoint(model_list, models_dir):
FILE: model_cards/lama/bin/predict.py
function main (line 39) | def main(predict_config: OmegaConf):
FILE: model_cards/lama/bin/predict_inner_features.py
function main (line 39) | def main(predict_config: OmegaConf):
FILE: model_cards/lama/bin/report_from_tb.py
function need_drop (line 21) | def need_drop(tag):
function get_group_and_title (line 28) | def get_group_and_title(tag):
function main (line 37) | def main(args):
FILE: model_cards/lama/bin/sample_from_dataset.py
function save_mask_for_sidebyside (line 13) | def save_mask_for_sidebyside(item, out_file):
function save_img_for_sidebyside (line 20) | def save_img_for_sidebyside(item, out_file):
function save_masked_img_for_sidebyside (line 25) | def save_masked_img_for_sidebyside(item, out_file):
function main (line 35) | def main(args):
FILE: model_cards/lama/bin/side_by_side.py
function main (line 13) | def main(args):
FILE: model_cards/lama/bin/split_tar.py
function main (line 8) | def main(args):
FILE: model_cards/lama/bin/to_jit.py
class JITWrapper (line 14) | class JITWrapper(nn.Module):
method __init__ (line 15) | def __init__(self, model):
method forward (line 19) | def forward(self, image, mask):
function main (line 29) | def main(predict_config: OmegaConf):
FILE: model_cards/lama/bin/train.py
function main (line 30) | def main(config: OmegaConf):
FILE: model_cards/lama/models/ade20k/base.py
class NormalizeTensor (line 25) | class NormalizeTensor:
method __init__ (line 26) | def __init__(self, mean, std, inplace=False):
method __call__ (line 44) | def __call__(self, tensor):
class ModelBuilder (line 56) | class ModelBuilder:
method weights_init (line 59) | def weights_init(m):
method build_encoder (line 68) | def build_encoder(arch='resnet50dilated', fc_dim=512, weights=''):
method build_decoder (line 98) | def build_decoder(arch='ppm_deepsup',
method get_decoder (line 125) | def get_decoder(weights_path, arch_encoder, arch_decoder, fc_dim, drop...
method get_encoder (line 130) | def get_encoder(weights_path, arch_encoder, arch_decoder, fc_dim, segm...
function conv3x3_bn_relu (line 139) | def conv3x3_bn_relu(in_planes, out_planes, stride=1):
class SegmentationModule (line 147) | class SegmentationModule(nn.Module):
method __init__ (line 148) | def __init__(self,
method normalize_input (line 194) | def normalize_input(self, tensor):
method feature_maps_channels (line 200) | def feature_maps_channels(self):
method forward (line 203) | def forward(self, img_data, segSize=None):
method multi_mask_from_multiclass (line 215) | def multi_mask_from_multiclass(self, pred, classes):
method multi_mask_from_multiclass_probs (line 221) | def multi_mask_from_multiclass_probs(scores, classes):
method predict (line 230) | def predict(self, tensor, imgSizes=(-1,), # (300, 375, 450, 525, 600)
method get_edges (line 277) | def get_edges(self, t):
class PPMDeepsup (line 290) | class PPMDeepsup(nn.Module):
method __init__ (line 291) | def __init__(self, num_class=NUM_CLASS, fc_dim=4096,
method forward (line 320) | def forward(self, conv_out, segSize=None):
class Resnet (line 355) | class Resnet(nn.Module):
method __init__ (line 356) | def __init__(self, orig_resnet):
method forward (line 375) | def forward(self, x, return_feature_maps=False):
class ResnetDilated (line 393) | class ResnetDilated(nn.Module):
method __init__ (line 394) | def __init__(self, orig_resnet, dilate_scale=8):
method _nostride_dilate (line 423) | def _nostride_dilate(self, m, dilate):
method forward (line 438) | def forward(self, x, return_feature_maps=False):
class MobileNetV2Dilated (line 459) | class MobileNetV2Dilated(nn.Module):
method __init__ (line 460) | def __init__(self, orig_net, dilate_scale=8):
method _nostride_dilate (line 485) | def _nostride_dilate(self, m, dilate):
method forward (line 500) | def forward(self, x, return_feature_maps=False):
class C1DeepSup (line 515) | class C1DeepSup(nn.Module):
method __init__ (line 516) | def __init__(self, num_class=150, fc_dim=2048, use_softmax=False, drop...
method forward (line 528) | def forward(self, conv_out, segSize=None):
class C1 (line 556) | class C1(nn.Module):
method __init__ (line 557) | def __init__(self, num_class=150, fc_dim=2048, use_softmax=False):
method forward (line 566) | def forward(self, conv_out, segSize=None):
class PPM (line 582) | class PPM(nn.Module):
method __init__ (line 583) | def __init__(self, num_class=150, fc_dim=4096,
method forward (line 607) | def forward(self, conv_out, segSize=None):
FILE: model_cards/lama/models/ade20k/mobilenet.py
function conv_bn (line 22) | def conv_bn(inp, oup, stride):
function conv_1x1_bn (line 30) | def conv_1x1_bn(inp, oup):
class InvertedResidual (line 38) | class InvertedResidual(nn.Module):
method __init__ (line 39) | def __init__(self, inp, oup, stride, expand_ratio):
method forward (line 72) | def forward(self, x):
class MobileNetV2 (line 79) | class MobileNetV2(nn.Module):
method __init__ (line 80) | def __init__(self, n_class=1000, input_size=224, width_mult=1.):
method forward (line 123) | def forward(self, x):
method _initialize_weights (line 129) | def _initialize_weights(self):
function mobilenetv2 (line 145) | def mobilenetv2(pretrained=False, **kwargs):
FILE: model_cards/lama/models/ade20k/resnet.py
function conv3x3 (line 18) | def conv3x3(in_planes, out_planes, stride=1):
class BasicBlock (line 24) | class BasicBlock(nn.Module):
method __init__ (line 27) | def __init__(self, inplanes, planes, stride=1, downsample=None):
method forward (line 37) | def forward(self, x):
class Bottleneck (line 56) | class Bottleneck(nn.Module):
method __init__ (line 59) | def __init__(self, inplanes, planes, stride=1, downsample=None):
method forward (line 72) | def forward(self, x):
class ResNet (line 95) | class ResNet(nn.Module):
method __init__ (line 97) | def __init__(self, block, layers, num_classes=1000):
method _make_layer (line 126) | def _make_layer(self, block, planes, blocks, stride=1):
method forward (line 143) | def forward(self, x):
function resnet50 (line 161) | def resnet50(pretrained=False, **kwargs):
function resnet18 (line 173) | def resnet18(pretrained=False, **kwargs):
FILE: model_cards/lama/models/ade20k/segm_lib/nn/modules/batchnorm.py
function _sum_ft (line 24) | def _sum_ft(tensor):
function _unsqueeze_ft (line 29) | def _unsqueeze_ft(tensor):
class _SynchronizedBatchNorm (line 38) | class _SynchronizedBatchNorm(_BatchNorm):
method __init__ (line 39) | def __init__(self, num_features, eps=1e-5, momentum=0.001, affine=True):
method forward (line 56) | def forward(self, input):
method __data_parallel_replicate__ (line 88) | def __data_parallel_replicate__(self, ctx, copy_id):
method _data_parallel_master (line 98) | def _data_parallel_master(self, intermediates):
method _add_weighted (line 119) | def _add_weighted(self, dest, delta, alpha=1, beta=1, bias=0):
method _compute_mean_std (line 123) | def _compute_mean_std(self, sum_, ssum, size):
class SynchronizedBatchNorm1d (line 142) | class SynchronizedBatchNorm1d(_SynchronizedBatchNorm):
method _check_input_dim (line 198) | def _check_input_dim(self, input):
class SynchronizedBatchNorm2d (line 205) | class SynchronizedBatchNorm2d(_SynchronizedBatchNorm):
method _check_input_dim (line 261) | def _check_input_dim(self, input):
class SynchronizedBatchNorm3d (line 268) | class SynchronizedBatchNorm3d(_SynchronizedBatchNorm):
method _check_input_dim (line 325) | def _check_input_dim(self, input):
FILE: model_cards/lama/models/ade20k/segm_lib/nn/modules/comm.py
class FutureResult (line 18) | class FutureResult(object):
method __init__ (line 21) | def __init__(self):
method put (line 26) | def put(self, result):
method get (line 32) | def get(self):
class SlavePipe (line 46) | class SlavePipe(_SlavePipeBase):
method run_slave (line 49) | def run_slave(self, msg):
class SyncMaster (line 56) | class SyncMaster(object):
method __init__ (line 67) | def __init__(self, master_callback):
method register_slave (line 78) | def register_slave(self, identifier):
method run_master (line 96) | def run_master(self, master_msg):
method nr_slaves (line 130) | def nr_slaves(self):
FILE: model_cards/lama/models/ade20k/segm_lib/nn/modules/replicate.py
class CallbackContext (line 23) | class CallbackContext(object):
function execute_replication_callbacks (line 27) | def execute_replication_callbacks(modules):
class DataParallelWithCallback (line 50) | class DataParallelWithCallback(DataParallel):
method replicate (line 64) | def replicate(self, module, device_ids):
function patch_replication_callback (line 70) | def patch_replication_callback(data_parallel):
FILE: model_cards/lama/models/ade20k/segm_lib/nn/modules/tests/test_numeric_batchnorm.py
function handy_var (line 18) | def handy_var(a, unbias=True):
class NumericTestCase (line 29) | class NumericTestCase(TorchTestCase):
method testNumericBatchNorm (line 30) | def testNumericBatchNorm(self):
FILE: model_cards/lama/models/ade20k/segm_lib/nn/modules/tests/test_sync_batchnorm.py
function handy_var (line 19) | def handy_var(a, unbias=True):
function _find_bn (line 30) | def _find_bn(module):
class SyncTestCase (line 36) | class SyncTestCase(TorchTestCase):
method _syncParameters (line 37) | def _syncParameters(self, bn1, bn2):
method _checkBatchNormResult (line 44) | def _checkBatchNormResult(self, bn1, bn2, input, is_train, cuda=False):
method testSyncBatchNormNormalTrain (line 67) | def testSyncBatchNormNormalTrain(self):
method testSyncBatchNormNormalEval (line 73) | def testSyncBatchNormNormalEval(self):
method testSyncBatchNormSyncTrain (line 79) | def testSyncBatchNormSyncTrain(self):
method testSyncBatchNormSyncEval (line 89) | def testSyncBatchNormSyncEval(self):
method testSyncBatchNorm2DSyncTrain (line 99) | def testSyncBatchNorm2DSyncTrain(self):
FILE: model_cards/lama/models/ade20k/segm_lib/nn/modules/unittest.py
function as_numpy (line 17) | def as_numpy(v):
class TorchTestCase (line 23) | class TorchTestCase(unittest.TestCase):
method assertTensorClose (line 24) | def assertTensorClose(self, a, b, atol=1e-3, rtol=1e-3):
FILE: model_cards/lama/models/ade20k/segm_lib/nn/parallel/data_parallel.py
function async_copy_to (line 13) | def async_copy_to(obj, dev, main_stream=None):
function dict_gather (line 27) | def dict_gather(outputs, target_device, dim=0):
class DictGatherDataParallel (line 48) | class DictGatherDataParallel(nn.DataParallel):
method gather (line 49) | def gather(self, outputs, output_device):
class UserScatteredDataParallel (line 53) | class UserScatteredDataParallel(DictGatherDataParallel):
method scatter (line 54) | def scatter(self, inputs, kwargs, device_ids):
function user_scattered_collate (line 65) | def user_scattered_collate(batch):
function _async_copy (line 69) | def _async_copy(inputs, device_ids):
function _async_copy_stream (line 82) | def _async_copy_stream(inputs, device_ids):
function _get_stream (line 104) | def _get_stream(device):
FILE: model_cards/lama/models/ade20k/segm_lib/utils/data/dataloader.py
class ExceptionWrapper (line 25) | class ExceptionWrapper(object):
method __init__ (line 28) | def __init__(self, exc_info):
function _worker_loop (line 37) | def _worker_loop(dataset, index_queue, data_queue, collate_fn, seed, ini...
function _worker_manager_loop (line 67) | def _worker_manager_loop(in_queue, out_queue, done_event, pin_memory, de...
function default_collate (line 104) | def default_collate(batch):
function pin_memory_batch (line 145) | def pin_memory_batch(batch):
function _set_SIGCHLD_handler (line 163) | def _set_SIGCHLD_handler():
class DataLoaderIter (line 188) | class DataLoaderIter(object):
method __init__ (line 191) | def __init__(self, loader):
method __len__ (line 249) | def __len__(self):
method _get_batch (line 252) | def _get_batch(self):
method __next__ (line 261) | def __next__(self):
method __iter__ (line 290) | def __iter__(self):
method _put_indices (line 293) | def _put_indices(self):
method _process_next_batch (line 302) | def _process_next_batch(self, batch):
method __getstate__ (line 309) | def __getstate__(self):
method _shutdown_workers (line 317) | def _shutdown_workers(self):
method __del__ (line 336) | def __del__(self):
class DataLoader (line 341) | class DataLoader(object):
method __init__ (line 383) | def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None,...
method __iter__ (line 421) | def __iter__(self):
method __len__ (line 424) | def __len__(self):
FILE: model_cards/lama/models/ade20k/segm_lib/utils/data/dataset.py
class Dataset (line 8) | class Dataset(object):
method __getitem__ (line 16) | def __getitem__(self, index):
method __len__ (line 19) | def __len__(self):
method __add__ (line 22) | def __add__(self, other):
class TensorDataset (line 26) | class TensorDataset(Dataset):
method __init__ (line 37) | def __init__(self, data_tensor, target_tensor):
method __getitem__ (line 42) | def __getitem__(self, index):
method __len__ (line 45) | def __len__(self):
class ConcatDataset (line 49) | class ConcatDataset(Dataset):
method cumsum (line 61) | def cumsum(sequence):
method __init__ (line 69) | def __init__(self, datasets):
method __len__ (line 75) | def __len__(self):
method __getitem__ (line 78) | def __getitem__(self, idx):
method cummulative_sizes (line 87) | def cummulative_sizes(self):
class Subset (line 93) | class Subset(Dataset):
method __init__ (line 94) | def __init__(self, dataset, indices):
method __getitem__ (line 98) | def __getitem__(self, idx):
method __len__ (line 101) | def __len__(self):
function random_split (line 105) | def random_split(dataset, lengths):
FILE: model_cards/lama/models/ade20k/segm_lib/utils/data/distributed.py
class DistributedSampler (line 7) | class DistributedSampler(Sampler):
method __init__ (line 25) | def __init__(self, dataset, num_replicas=None, rank=None):
method __iter__ (line 37) | def __iter__(self):
method __len__ (line 54) | def __len__(self):
method set_epoch (line 57) | def set_epoch(self, epoch):
FILE: model_cards/lama/models/ade20k/segm_lib/utils/data/sampler.py
class Sampler (line 4) | class Sampler(object):
method __init__ (line 12) | def __init__(self, data_source):
method __iter__ (line 15) | def __iter__(self):
method __len__ (line 18) | def __len__(self):
class SequentialSampler (line 22) | class SequentialSampler(Sampler):
method __init__ (line 29) | def __init__(self, data_source):
method __iter__ (line 32) | def __iter__(self):
method __len__ (line 35) | def __len__(self):
class RandomSampler (line 39) | class RandomSampler(Sampler):
method __init__ (line 46) | def __init__(self, data_source):
method __iter__ (line 49) | def __iter__(self):
method __len__ (line 52) | def __len__(self):
class SubsetRandomSampler (line 56) | class SubsetRandomSampler(Sampler):
method __init__ (line 63) | def __init__(self, indices):
method __iter__ (line 66) | def __iter__(self):
method __len__ (line 69) | def __len__(self):
class WeightedRandomSampler (line 73) | class WeightedRandomSampler(Sampler):
method __init__ (line 84) | def __init__(self, weights, num_samples, replacement=True):
method __iter__ (line 89) | def __iter__(self):
method __len__ (line 92) | def __len__(self):
class BatchSampler (line 96) | class BatchSampler(object):
method __init__ (line 112) | def __init__(self, sampler, batch_size, drop_last):
method __iter__ (line 117) | def __iter__(self):
method __len__ (line 127) | def __len__(self):
FILE: model_cards/lama/models/ade20k/segm_lib/utils/th.py
function as_variable (line 8) | def as_variable(obj):
function as_numpy (line 18) | def as_numpy(obj):
function mark_volatile (line 30) | def mark_volatile(obj):
FILE: model_cards/lama/models/ade20k/utils.py
function load_url (line 15) | def load_url(url, model_dir='./pretrained', map_location=None):
function color_encode (line 26) | def color_encode(labelmap, colors, mode='RGB'):
FILE: model_cards/lama/saicinpainting/evaluation/__init__.py
function make_evaluator (line 9) | def make_evaluator(kind='default', ssim=True, lpips=True, fid=True, inte...
FILE: model_cards/lama/saicinpainting/evaluation/data.py
function load_image (line 12) | def load_image(fname, mode='RGB', return_orig=False):
function ceil_modulo (line 23) | def ceil_modulo(x, mod):
function pad_img_to_modulo (line 29) | def pad_img_to_modulo(img, mod):
function pad_tensor_to_modulo (line 36) | def pad_tensor_to_modulo(img, mod):
function scale_image (line 43) | def scale_image(img, factor, interpolation=cv2.INTER_AREA):
class InpaintingDataset (line 58) | class InpaintingDataset(Dataset):
method __init__ (line 59) | def __init__(self, datadir, img_suffix='.jpg', pad_out_to_modulo=None,...
method __len__ (line 66) | def __len__(self):
method __getitem__ (line 69) | def __getitem__(self, i):
class OurInpaintingDataset (line 85) | class OurInpaintingDataset(Dataset):
method __init__ (line 86) | def __init__(self, datadir, img_suffix='.jpg', pad_out_to_modulo=None,...
method __len__ (line 93) | def __len__(self):
method __getitem__ (line 96) | def __getitem__(self, i):
class PrecomputedInpaintingResultsDataset (line 110) | class PrecomputedInpaintingResultsDataset(InpaintingDataset):
method __init__ (line 111) | def __init__(self, datadir, predictdir, inpainted_suffix='_inpainted.j...
method __getitem__ (line 119) | def __getitem__(self, i):
class OurPrecomputedInpaintingResultsDataset (line 126) | class OurPrecomputedInpaintingResultsDataset(OurInpaintingDataset):
method __init__ (line 127) | def __init__(self, datadir, predictdir, inpainted_suffix="png", **kwar...
method __getitem__ (line 137) | def __getitem__(self, i):
class InpaintingEvalOnlineDataset (line 145) | class InpaintingEvalOnlineDataset(Dataset):
method __init__ (line 146) | def __init__(self, indir, mask_generator, img_suffix='.jpg', pad_out_t...
method __len__ (line 153) | def __len__(self):
method __getitem__ (line 156) | def __getitem__(self, i):
FILE: model_cards/lama/saicinpainting/evaluation/evaluator.py
class InpaintingEvaluator (line 16) | class InpaintingEvaluator():
method __init__ (line 17) | def __init__(self, dataset, scores, area_grouping=True, bins=10, batch...
method _get_bin_edges (line 42) | def _get_bin_edges(self):
method evaluate (line 67) | def evaluate(self, model=None):
function ssim_fid100_f1 (line 112) | def ssim_fid100_f1(metrics, fid_scale=100):
function lpips_fid100_f1 (line 120) | def lpips_fid100_f1(metrics, fid_scale=100):
class InpaintingEvaluatorOnline (line 129) | class InpaintingEvaluatorOnline(nn.Module):
method __init__ (line 130) | def __init__(self, scores, bins=10, image_key='image', inpainted_key='...
method _get_bins (line 162) | def _get_bins(self, mask_batch):
method forward (line 168) | def forward(self, batch: Dict[str, torch.Tensor]):
method process_batch (line 186) | def process_batch(self, batch: Dict[str, torch.Tensor]):
method evaluation_end (line 189) | def evaluation_end(self, states=None):
FILE: model_cards/lama/saicinpainting/evaluation/losses/base_loss.py
function get_groupings (line 21) | def get_groupings(groups):
class EvaluatorScore (line 40) | class EvaluatorScore(nn.Module):
method forward (line 42) | def forward(self, pred_batch, target_batch, mask):
method get_value (line 46) | def get_value(self, groups=None, states=None):
method reset (line 50) | def reset(self):
class PairwiseScore (line 54) | class PairwiseScore(EvaluatorScore, ABC):
method __init__ (line 55) | def __init__(self):
method get_value (line 59) | def get_value(self, groups=None, states=None):
method reset (line 88) | def reset(self):
class SSIMScore (line 92) | class SSIMScore(PairwiseScore):
method __init__ (line 93) | def __init__(self, window_size=11):
method forward (line 98) | def forward(self, pred_batch, target_batch, mask=None):
class LPIPSScore (line 106) | class LPIPSScore(PairwiseScore):
method __init__ (line 107) | def __init__(self, model='net-lin', net='vgg', model_path=None, use_gp...
method forward (line 113) | def forward(self, pred_batch, target_batch, mask=None):
function fid_calculate_activation_statistics (line 121) | def fid_calculate_activation_statistics(act):
function calculate_frechet_distance (line 127) | def calculate_frechet_distance(activations_pred, activations_target, eps...
class FIDScore (line 156) | class FIDScore(EvaluatorScore):
method __init__ (line 157) | def __init__(self, dims=2048, eps=1e-6):
method forward (line 168) | def forward(self, pred_batch, target_batch, mask=None):
method get_value (line 177) | def get_value(self, groups=None, states=None):
method reset (line 207) | def reset(self):
method _get_activations (line 211) | def _get_activations(self, batch):
class SegmentationAwareScore (line 221) | class SegmentationAwareScore(EvaluatorScore):
method __init__ (line 222) | def __init__(self, weights_path):
method forward (line 229) | def forward(self, pred_batch, target_batch, mask):
method reset (line 256) | def reset(self):
function distribute_values_to_classes (line 263) | def distribute_values_to_classes(target_class_freq_by_image_mask, values...
function get_segmentation_idx2name (line 271) | def get_segmentation_idx2name():
class SegmentationAwarePairwiseScore (line 275) | class SegmentationAwarePairwiseScore(SegmentationAwareScore):
method __init__ (line 276) | def __init__(self, *args, **kwargs):
method forward (line 281) | def forward(self, pred_batch, target_batch, mask):
method calc_score (line 288) | def calc_score(self, pred_batch, target_batch, mask):
method get_value (line 291) | def get_value(self, groups=None, states=None):
method reset (line 336) | def reset(self):
class SegmentationClassStats (line 341) | class SegmentationClassStats(SegmentationAwarePairwiseScore):
method calc_score (line 342) | def calc_score(self, pred_batch, target_batch, mask):
method get_value (line 345) | def get_value(self, groups=None, states=None):
class SegmentationAwareSSIM (line 420) | class SegmentationAwareSSIM(SegmentationAwarePairwiseScore):
method __init__ (line 421) | def __init__(self, *args, window_size=11, **kwargs):
method calc_score (line 425) | def calc_score(self, pred_batch, target_batch, mask):
class SegmentationAwareLPIPS (line 429) | class SegmentationAwareLPIPS(SegmentationAwarePairwiseScore):
method __init__ (line 430) | def __init__(self, *args, model='net-lin', net='vgg', model_path=None,...
method calc_score (line 435) | def calc_score(self, pred_batch, target_batch, mask):
function calculade_fid_no_img (line 439) | def calculade_fid_no_img(img_i, activations_pred, activations_target, ep...
class SegmentationAwareFID (line 445) | class SegmentationAwareFID(SegmentationAwarePairwiseScore):
method __init__ (line 446) | def __init__(self, *args, dims=2048, eps=1e-6, n_jobs=-1, **kwargs):
method calc_score (line 455) | def calc_score(self, pred_batch, target_batch, mask):
method get_value (line 460) | def get_value(self, groups=None, states=None):
method distribute_fid_to_classes (line 513) | def distribute_fid_to_classes(self, class_freq, activations_pred, acti...
method _get_activations (line 523) | def _get_activations(self, batch):
FILE: model_cards/lama/saicinpainting/evaluation/losses/fid/fid_score.py
function tqdm (line 52) | def tqdm(x): return x
function get_activations (line 76) | def get_activations(files, model, batch_size=50, dims=2048,
function calculate_frechet_distance (line 160) | def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
function calculate_activation_statistics (line 218) | def calculate_activation_statistics(files, model, batch_size=50,
function _compute_statistics_of_path (line 243) | def _compute_statistics_of_path(path, model, batch_size, dims, cuda):
function _compute_statistics_of_images (line 257) | def _compute_statistics_of_images(images, model, batch_size, dims, cuda,...
function calculate_fid_given_paths (line 268) | def calculate_fid_given_paths(paths, batch_size, cuda, dims):
function calculate_fid_given_images (line 289) | def calculate_fid_given_images(images, batch_size, cuda, dims, use_globa...
FILE: model_cards/lama/saicinpainting/evaluation/losses/fid/inception.py
class InceptionV3 (line 21) | class InceptionV3(nn.Module):
method __init__ (line 36) | def __init__(self,
method forward (line 134) | def forward(self, inp):
function fid_inception_v3 (line 171) | def fid_inception_v3():
class FIDInceptionA (line 206) | class FIDInceptionA(models.inception.InceptionA):
method __init__ (line 208) | def __init__(self, in_channels, pool_features):
method forward (line 211) | def forward(self, x):
class FIDInceptionC (line 231) | class FIDInceptionC(models.inception.InceptionC):
method __init__ (line 233) | def __init__(self, in_channels, channels_7x7):
method forward (line 236) | def forward(self, x):
class FIDInceptionE_1 (line 259) | class FIDInceptionE_1(models.inception.InceptionE):
method __init__ (line 261) | def __init__(self, in_channels):
method forward (line 264) | def forward(self, x):
class FIDInceptionE_2 (line 292) | class FIDInceptionE_2(models.inception.InceptionE):
method __init__ (line 294) | def __init__(self, in_channels):
method forward (line 297) | def forward(self, x):
FILE: model_cards/lama/saicinpainting/evaluation/losses/lpips.py
class PerceptualLoss (line 18) | class PerceptualLoss(torch.nn.Module):
method __init__ (line 19) | def __init__(self, model='net-lin', net='alex', colorspace='rgb', mode...
method forward (line 29) | def forward(self, pred, target, normalize=True):
function normalize_tensor (line 45) | def normalize_tensor(in_feat, eps=1e-10):
function l2 (line 50) | def l2(p0, p1, range=255.):
function psnr (line 54) | def psnr(p0, p1, peak=255.):
function dssim (line 58) | def dssim(p0, p1, range=255.):
function rgb2lab (line 62) | def rgb2lab(in_img, mean_cent=False):
function tensor2np (line 70) | def tensor2np(tensor_obj):
function np2tensor (line 75) | def np2tensor(np_obj):
function tensor2tensorlab (line 80) | def tensor2tensorlab(image_tensor, to_norm=True, mc_only=False):
function tensorlab2tensor (line 95) | def tensorlab2tensor(lab_tensor, return_inbnd=False):
function rgb2lab (line 114) | def rgb2lab(input):
function tensor2im (line 119) | def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=255. / 2.):
function im2tensor (line 125) | def im2tensor(image, imtype=np.uint8, cent=1., factor=255. / 2.):
function tensor2vec (line 130) | def tensor2vec(vector_tensor):
function voc_ap (line 134) | def voc_ap(rec, prec, use_07_metric=False):
function tensor2im (line 168) | def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=255. / 2.):
function im2tensor (line 175) | def im2tensor(image, imtype=np.uint8, cent=1., factor=255. / 2.):
class BaseModel (line 186) | class BaseModel(torch.nn.Module):
method __init__ (line 187) | def __init__(self):
method name (line 190) | def name(self):
method initialize (line 193) | def initialize(self, use_gpu=True):
method forward (line 196) | def forward(self):
method get_image_paths (line 199) | def get_image_paths(self):
method optimize_parameters (line 202) | def optimize_parameters(self):
method get_current_visuals (line 205) | def get_current_visuals(self):
method get_current_errors (line 208) | def get_current_errors(self):
method save (line 211) | def save(self, label):
method save_network (line 215) | def save_network(self, network, path, network_label, epoch_label):
method load_network (line 221) | def load_network(self, network, network_label, epoch_label):
method update_learning_rate (line 227) | def update_learning_rate():
method get_image_paths (line 230) | def get_image_paths(self):
method save_done (line 233) | def save_done(self, flag=False):
class DistModel (line 248) | class DistModel(BaseModel):
method name (line 249) | def name(self):
method initialize (line 252) | def initialize(self, model='net-lin', net='alex', colorspace='Lab', pn...
method forward (line 330) | def forward(self, in0, in1, retPerLayer=False):
method optimize_parameters (line 341) | def optimize_parameters(self):
method clamp_weights (line 348) | def clamp_weights(self):
method set_input (line 353) | def set_input(self, data):
method forward_train (line 369) | def forward_train(self): # run forward pass
method backward_train (line 385) | def backward_train(self):
method compute_accuracy (line 388) | def compute_accuracy(self, d0, d1, judge):
method get_current_errors (line 394) | def get_current_errors(self):
method get_current_visuals (line 403) | def get_current_visuals(self):
method save (line 418) | def save(self, path, label):
method update_learning_rate (line 425) | def update_learning_rate(self, nepoch_decay):
function score_2afc_dataset (line 436) | def score_2afc_dataset(data_loader, func, name=''):
function score_jnd_dataset (line 472) | def score_jnd_dataset(data_loader, func, name=''):
function spatial_average (line 521) | def spatial_average(in_tens, keepdim=True):
function upsample (line 525) | def upsample(in_tens, out_H=64): # assumes scale factor is same for H a...
class PNetLin (line 533) | class PNetLin(nn.Module):
method __init__ (line 534) | def __init__(self, pnet_type='vgg', pnet_rand=False, pnet_tune=False, ...
method forward (line 571) | def forward(self, in0, in1, retPerLayer=False):
class ScalingLayer (line 603) | class ScalingLayer(nn.Module):
method __init__ (line 604) | def __init__(self):
method forward (line 609) | def forward(self, inp):
class NetLinLayer (line 613) | class NetLinLayer(nn.Module):
method __init__ (line 616) | def __init__(self, chn_in, chn_out=1, use_dropout=False):
class Dist2LogitLayer (line 624) | class Dist2LogitLayer(nn.Module):
method __init__ (line 627) | def __init__(self, chn_mid=32, use_sigmoid=True):
method forward (line 639) | def forward(self, d0, d1, eps=0.1):
class BCERankingLoss (line 643) | class BCERankingLoss(nn.Module):
method __init__ (line 644) | def __init__(self, chn_mid=32):
method forward (line 650) | def forward(self, d0, d1, judge):
class FakeNet (line 657) | class FakeNet(nn.Module):
method __init__ (line 658) | def __init__(self, use_gpu=True, colorspace='Lab'):
class L2 (line 664) | class L2(FakeNet):
method forward (line 666) | def forward(self, in0, in1, retPerLayer=None):
class DSSIM (line 683) | class DSSIM(FakeNet):
method forward (line 685) | def forward(self, in0, in1, retPerLayer=None):
function print_network (line 699) | def print_network(net):
class squeezenet (line 716) | class squeezenet(torch.nn.Module):
method __init__ (line 717) | def __init__(self, requires_grad=False, pretrained=True):
method forward (line 746) | def forward(self, X):
class alexnet (line 767) | class alexnet(torch.nn.Module):
method __init__ (line 768) | def __init__(self, requires_grad=False, pretrained=True):
method forward (line 791) | def forward(self, X):
class vgg16 (line 808) | class vgg16(torch.nn.Module):
method __init__ (line 809) | def __init__(self, requires_grad=False, pretrained=True):
method forward (line 832) | def forward(self, X):
class resnet (line 849) | class resnet(torch.nn.Module):
method __init__ (line 850) | def __init__(self, requires_grad=False, pretrained=True, num=18):
method forward (line 873) | def forward(self, X):
FILE: model_cards/lama/saicinpainting/evaluation/losses/ssim.py
class SSIM (line 6) | class SSIM(torch.nn.Module):
method __init__ (line 11) | def __init__(self, window_size=11, size_average=True):
method forward (line 18) | def forward(self, img1, img2):
method _gaussian (line 36) | def _gaussian(self, window_size, sigma):
method _create_window (line 42) | def _create_window(self, window_size, channel):
method _ssim (line 47) | def _ssim(self, img1, img2, window, window_size, channel, size_average...
method _load_from_state_dict (line 73) | def _load_from_state_dict(self, state_dict, prefix, local_metadata, st...
FILE: model_cards/lama/saicinpainting/evaluation/masks/countless/countless2d.py
function simplest_countless (line 25) | def simplest_countless(data):
function quick_countless (line 52) | def quick_countless(data):
function quickest_countless (line 77) | def quickest_countless(data):
function quick_countless_xor (line 100) | def quick_countless_xor(data):
function stippled_countless (line 124) | def stippled_countless(data):
function zero_corrected_countless (line 151) | def zero_corrected_countless(data):
function countless_extreme (line 195) | def countless_extreme(data):
function countless (line 212) | def countless(data):
function upgrade_type (line 252) | def upgrade_type(arr):
function downgrade_type (line 264) | def downgrade_type(arr):
function odd_to_even (line 276) | def odd_to_even(image):
function counting (line 310) | def counting(array):
function ndzoom (line 340) | def ndzoom(array):
function countless_if (line 347) | def countless_if(array):
function downsample_with_averaging (line 376) | def downsample_with_averaging(array):
function downsample_with_max_pooling (line 401) | def downsample_with_max_pooling(array):
function striding (line 421) | def striding(array):
function benchmark (line 431) | def benchmark():
FILE: model_cards/lama/saicinpainting/evaluation/masks/countless/countless3d.py
function countless5 (line 17) | def countless5(a,b,c,d,e):
function countless8 (line 50) | def countless8(a,b,c,d,e,f,g,h):
function dynamic_countless3d (line 74) | def dynamic_countless3d(data):
function countless3d (line 133) | def countless3d(data):
function countless_generalized (line 169) | def countless_generalized(data, factor):
function dynamic_countless_generalized (line 209) | def dynamic_countless_generalized(data, factor):
function downsample_with_averaging (line 261) | def downsample_with_averaging(array):
function downsample_with_max_pooling (line 282) | def downsample_with_max_pooling(array):
function striding (line 299) | def striding(array):
function benchmark (line 309) | def benchmark():
FILE: model_cards/lama/saicinpainting/evaluation/masks/countless/test.py
function test_countless2d (line 8) | def test_countless2d():
function test_stippled_countless2d (line 55) | def test_stippled_countless2d():
function test_countless3d (line 113) | def test_countless3d():
FILE: model_cards/lama/saicinpainting/evaluation/masks/mask.py
class ObjectMask (line 19) | class ObjectMask():
method __init__ (line 20) | def __init__(self, mask):
method _get_limits (line 26) | def _get_limits(mask):
method _clean (line 40) | def _clean(self):
method horizontal_flip (line 44) | def horizontal_flip(self, inplace=False):
method vertical_flip (line 52) | def vertical_flip(self, inplace=False):
method image_center (line 60) | def image_center(self):
method rescale (line 65) | def rescale(self, scaling_factor, inplace=False):
method crop_to_canvas (line 82) | def crop_to_canvas(self, vertical=True, horizontal=True, inplace=False):
method restore_full_mask (line 114) | def restore_full_mask(self, allow_crop=False):
method shift (line 120) | def shift(self, vertical=0, horizontal=0, inplace=False):
method area (line 131) | def area(self):
class RigidnessMode (line 135) | class RigidnessMode(enum.Enum):
class SegmentationMask (line 140) | class SegmentationMask:
method __init__ (line 141) | def __init__(self, confidence_threshold=0.5, rigidness_mode=RigidnessM...
method get_segmentation (line 191) | def get_segmentation(self, img):
method _is_power_of_two (line 197) | def _is_power_of_two(n):
method identify_candidates (line 200) | def identify_candidates(self, panoptic_seg, segments_info):
method downsample_mask (line 212) | def downsample_mask(self, mask):
method _augmentation_params (line 230) | def _augmentation_params(self):
method _get_intersection (line 244) | def _get_intersection(self, mask_array, mask_object):
method _check_masks_intersection (line 250) | def _check_masks_intersection(self, aug_mask, total_mask_area, prev_ma...
method _check_foreground_intersection (line 260) | def _check_foreground_intersection(self, aug_mask, foreground):
method _move_mask (line 271) | def _move_mask(self, mask, foreground):
method _prepare_mask (line 351) | def _prepare_mask(self, mask):
method get_masks (line 358) | def get_masks(self, im, return_panoptic=False):
function propose_random_square_crop (line 410) | def propose_random_square_crop(mask, min_overlap=0.5):
FILE: model_cards/lama/saicinpainting/evaluation/refinement.py
function _pyrdown (line 19) | def _pyrdown(im : torch.Tensor, downsize : tuple=None):
function _pyrdown_mask (line 28) | def _pyrdown_mask(mask : torch.Tensor, downsize : tuple=None, eps : floa...
function _erode_mask (line 66) | def _erode_mask(mask : torch.Tensor, ekernel : torch.Tensor=None, eps : ...
function _l1_loss (line 75) | def _l1_loss(
function _infer (line 86) | def _infer(
function _get_image_mask_pyramid (line 176) | def _get_image_mask_pyramid(batch : dict, min_side : int, max_scales : i...
function refine_predict (line 228) | def refine_predict(
FILE: model_cards/lama/saicinpainting/evaluation/utils.py
function load_yaml (line 9) | def load_yaml(path):
function move_to_device (line 14) | def move_to_device(obj, device):
class SmallMode (line 26) | class SmallMode(Enum):
FILE: model_cards/lama/saicinpainting/evaluation/vis.py
function save_item_for_vis (line 6) | def save_item_for_vis(item, out_file):
function save_mask_for_sidebyside (line 27) | def save_mask_for_sidebyside(item, out_file):
function save_img_for_sidebyside (line 34) | def save_img_for_sidebyside(item, out_file):
FILE: model_cards/lama/saicinpainting/training/data/aug.py
class IAAAffine2 (line 4) | class IAAAffine2(DualIAATransform):
method __init__ (line 17) | def __init__(
method processor (line 41) | def processor(self):
method get_transform_init_args_names (line 53) | def get_transform_init_args_names(self):
class IAAPerspective2 (line 57) | class IAAPerspective2(DualIAATransform):
method __init__ (line 71) | def __init__(self, scale=(0.05, 0.1), keep_size=True, always_apply=Fal...
method processor (line 80) | def processor(self):
method get_transform_init_args_names (line 83) | def get_transform_init_args_names(self):
FILE: model_cards/lama/saicinpainting/training/data/datasets.py
class InpaintingTrainDataset (line 25) | class InpaintingTrainDataset(Dataset):
method __init__ (line 26) | def __init__(self, indir, mask_generator, transform):
method __len__ (line 32) | def __len__(self):
method __getitem__ (line 35) | def __getitem__(self, item):
class InpaintingTrainWebDataset (line 48) | class InpaintingTrainWebDataset(IterableDataset):
method __init__ (line 49) | def __init__(self, indir, mask_generator, transform, shuffle_buffer=200):
method __iter__ (line 54) | def __iter__(self):
class ImgSegmentationDataset (line 64) | class ImgSegmentationDataset(Dataset):
method __init__ (line 65) | def __init__(self, indir, mask_generator, transform, out_size, segm_in...
method __len__ (line 74) | def __len__(self):
method __getitem__ (line 77) | def __getitem__(self, item):
method load_semantic_segm (line 92) | def load_semantic_segm(self, img_path):
function get_transforms (line 101) | def get_transforms(transform_variant, out_size):
function make_default_train_dataloader (line 206) | def make_default_train_dataloader(indir, kind='default', out_size=512, m...
function make_default_val_dataset (line 249) | def make_default_val_dataset(indir, kind='default', out_size=512, transf...
function make_default_val_dataloader (line 283) | def make_default_val_dataloader(*args, dataloader_kwargs=None, **kwargs):
function make_constant_area_crop_params (line 292) | def make_constant_area_crop_params(img_height, img_width, min_size=128, ...
FILE: model_cards/lama/saicinpainting/training/data/masks.py
class DrawMethod (line 16) | class DrawMethod(Enum):
function make_random_irregular_mask (line 22) | def make_random_irregular_mask(shape, max_angle=4, max_len=60, max_width...
class RandomIrregularMaskGenerator (line 51) | class RandomIrregularMaskGenerator:
method __init__ (line 52) | def __init__(self, max_angle=4, max_len=60, max_width=20, min_times=0,...
method __call__ (line 62) | def __call__(self, img, iter_i=None, raw_image=None):
function make_random_rectangle_mask (line 72) | def make_random_rectangle_mask(shape, margin=10, bbox_min_size=30, bbox_...
class RandomRectangleMaskGenerator (line 86) | class RandomRectangleMaskGenerator:
method __init__ (line 87) | def __init__(self, margin=10, bbox_min_size=30, bbox_max_size=100, min...
method __call__ (line 95) | def __call__(self, img, iter_i=None, raw_image=None):
class RandomSegmentationMaskGenerator (line 104) | class RandomSegmentationMaskGenerator:
method __init__ (line 105) | def __init__(self, **kwargs):
method __call__ (line 109) | def __call__(self, img, iter_i=None, raw_image=None):
function make_random_superres_mask (line 118) | def make_random_superres_mask(shape, min_step=2, max_step=4, min_width=1...
class RandomSuperresMaskGenerator (line 136) | class RandomSuperresMaskGenerator:
method __init__ (line 137) | def __init__(self, **kwargs):
method __call__ (line 140) | def __call__(self, img, iter_i=None):
class DumbAreaMaskGenerator (line 144) | class DumbAreaMaskGenerator:
method __init__ (line 149) | def __init__(self, is_training):
method _random_vector (line 154) | def _random_vector(self, dimension):
method __call__ (line 167) | def __call__(self, img, iter_i=None, raw_image=None):
class OutpaintingMaskGenerator (line 176) | class OutpaintingMaskGenerator:
method __init__ (line 177) | def __init__(self, min_padding_percent:float=0.04, max_padding_percent...
method apply_padding (line 195) | def apply_padding(self, mask, coord):
method get_padding (line 200) | def get_padding(self, size):
method _img2rs (line 206) | def _img2rs(img):
method __call__ (line 212) | def __call__(self, img, iter_i=None, raw_image=None):
class MixedMaskGenerator (line 252) | class MixedMaskGenerator:
method __init__ (line 253) | def __init__(self, irregular_proba=1/3, irregular_kwargs=None,
method __call__ (line 309) | def __call__(self, img, iter_i=None, raw_image=None):
function get_mask_generator (line 318) | def get_mask_generator(kind, kwargs):
FILE: model_cards/lama/saicinpainting/training/losses/adversarial.py
class BaseAdversarialLoss (line 8) | class BaseAdversarialLoss:
method pre_generator_step (line 9) | def pre_generator_step(self, real_batch: torch.Tensor, fake_batch: tor...
method pre_discriminator_step (line 20) | def pre_discriminator_step(self, real_batch: torch.Tensor, fake_batch:...
method generator_loss (line 31) | def generator_loss(self, real_batch: torch.Tensor, fake_batch: torch.T...
method discriminator_loss (line 46) | def discriminator_loss(self, real_batch: torch.Tensor, fake_batch: tor...
method interpolate_mask (line 61) | def interpolate_mask(self, mask, shape):
function make_r1_gp (line 71) | def make_r1_gp(discr_real_pred, real_batch):
class NonSaturatingWithR1 (line 81) | class NonSaturatingWithR1(BaseAdversarialLoss):
method __init__ (line 82) | def __init__(self, gp_coef=5, weight=1, mask_as_fake_target=False, all...
method generator_loss (line 101) | def generator_loss(self, real_batch: torch.Tensor, fake_batch: torch.T...
method pre_discriminator_step (line 117) | def pre_discriminator_step(self, real_batch: torch.Tensor, fake_batch:...
method discriminator_loss (line 121) | def discriminator_loss(self, real_batch: torch.Tensor, fake_batch: tor...
class BCELoss (line 145) | class BCELoss(BaseAdversarialLoss):
method __init__ (line 146) | def __init__(self, weight):
method generator_loss (line 150) | def generator_loss(self, discr_fake_pred: torch.Tensor) -> Tuple[torch...
method pre_discriminator_step (line 155) | def pre_discriminator_step(self, real_batch: torch.Tensor, fake_batch:...
method discriminator_loss (line 159) | def discriminator_loss(self,
function make_discrim_loss (line 172) | def make_discrim_loss(kind, **kwargs):
FILE: model_cards/lama/saicinpainting/training/losses/distance_weighting.py
function dummy_distance_weighter (line 9) | def dummy_distance_weighter(real_img, pred_img, mask):
function get_gauss_kernel (line 13) | def get_gauss_kernel(kernel_size, width_factor=1):
class BlurMask (line 22) | class BlurMask(nn.Module):
method __init__ (line 23) | def __init__(self, kernel_size=5, width_factor=1):
method forward (line 28) | def forward(self, real_img, pred_img, mask):
class EmulatedEDTMask (line 34) | class EmulatedEDTMask(nn.Module):
method __init__ (line 35) | def __init__(self, dilate_kernel_size=5, blur_kernel_size=5, width_fac...
method forward (line 43) | def forward(self, real_img, pred_img, mask):
class PropagatePerceptualSim (line 51) | class PropagatePerceptualSim(nn.Module):
method __init__ (line 52) | def __init__(self, level=2, max_iters=10, temperature=500, erode_mask_...
method forward (line 82) | def forward(self, real_img, pred_img, mask):
function make_mask_distance_weighter (line 117) | def make_mask_distance_weighter(kind='none', **kwargs):
FILE: model_cards/lama/saicinpainting/training/losses/feature_matching.py
function masked_l2_loss (line 7) | def masked_l2_loss(pred, target, mask, weight_known, weight_missing):
function masked_l1_loss (line 13) | def masked_l1_loss(pred, target, mask, weight_known, weight_missing):
function feature_matching_loss (line 19) | def feature_matching_loss(fake_features: List[torch.Tensor], target_feat...
FILE: model_cards/lama/saicinpainting/training/losses/perceptual.py
class PerceptualLoss (line 14) | class PerceptualLoss(nn.Module):
method __init__ (line 15) | def __init__(self, normalize_inputs=True):
method do_normalize_inputs (line 38) | def do_normalize_inputs(self, x):
method partial_losses (line 41) | def partial_losses(self, input, target, mask=None):
method forward (line 72) | def forward(self, input, target, mask=None):
method get_global_features (line 76) | def get_global_features(self, input):
class ResNetPL (line 88) | class ResNetPL(nn.Module):
method __init__ (line 89) | def __init__(self, weight=1,
method forward (line 103) | def forward(self, pred, target):
FILE: model_cards/lama/saicinpainting/training/losses/segmentation.py
class CrossEntropy2d (line 8) | class CrossEntropy2d(nn.Module):
method __init__ (line 9) | def __init__(self, reduction="mean", ignore_label=255, weights=None, *...
method forward (line 22) | def forward(self, predict, target):
FILE: model_cards/lama/saicinpainting/training/losses/style_loss.py
class PerceptualLoss (line 6) | class PerceptualLoss(nn.Module):
method __init__ (line 13) | def __init__(self, weights=[1.0, 1.0, 1.0, 1.0, 1.0]):
method __call__ (line 19) | def __call__(self, x, y):
class VGG19 (line 34) | class VGG19(torch.nn.Module):
method __init__ (line 35) | def __init__(self):
method forward (line 111) | def forward(self, x):
FILE: model_cards/lama/saicinpainting/training/modules/__init__.py
function make_generator (line 7) | def make_generator(config, kind, **kwargs):
function make_discriminator (line 22) | def make_discriminator(kind, **kwargs):
FILE: model_cards/lama/saicinpainting/training/modules/base.py
class BaseDiscriminator (line 11) | class BaseDiscriminator(nn.Module):
method forward (line 13) | def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, List[torch.T...
function get_conv_block_ctor (line 21) | def get_conv_block_ctor(kind='default'):
function get_norm_layer (line 33) | def get_norm_layer(kind='bn'):
function get_activation (line 43) | def get_activation(kind='tanh'):
class SimpleMultiStepGenerator (line 53) | class SimpleMultiStepGenerator(nn.Module):
method __init__ (line 54) | def __init__(self, steps: List[nn.Module]):
method forward (line 58) | def forward(self, x):
function deconv_factory (line 67) | def deconv_factory(kind, ngf, mult, norm_layer, activation, max_features):
FILE: model_cards/lama/saicinpainting/training/modules/depthwise_sep_conv.py
class DepthWiseSeperableConv (line 4) | class DepthWiseSeperableConv(nn.Module):
method __init__ (line 5) | def __init__(self, in_dim, out_dim, *args, **kwargs):
method forward (line 14) | def forward(self, x):
FILE: model_cards/lama/saicinpainting/training/modules/fake_fakes.py
class FakeFakesGenerator (line 6) | class FakeFakesGenerator:
method __init__ (line 7) | def __init__(self, aug_proba=0.5, img_aug_degree=30, img_aug_translate...
method __call__ (line 20) | def __call__(self, input_images, masks):
method _make_blend_target (line 26) | def _make_blend_target(self, input_images):
method _fill_masks_with_gradient (line 34) | def _fill_masks_with_gradient(self, masks):
FILE: model_cards/lama/saicinpainting/training/modules/ffc.py
class FFCSE_block (line 16) | class FFCSE_block(nn.Module):
method __init__ (line 18) | def __init__(self, channels, ratio_g):
method forward (line 34) | def forward(self, x):
class FourierUnit (line 49) | class FourierUnit(nn.Module):
method __init__ (line 51) | def __init__(self, in_channels, out_channels, groups=1, spatial_scale_...
method forward (line 76) | def forward(self, x):
class SpectralTransform (line 116) | class SpectralTransform(nn.Module):
method __init__ (line 118) | def __init__(self, in_channels, out_channels, stride=1, groups=1, enab...
method forward (line 142) | def forward(self, x):
class FFC (line 166) | class FFC(nn.Module):
method __init__ (line 168) | def __init__(self, in_channels, out_channels, kernel_size,
method forward (line 205) | def forward(self, x):
class FFC_BN_ACT (line 228) | class FFC_BN_ACT(nn.Module):
method __init__ (line 230) | def __init__(self, in_channels, out_channels,
method forward (line 251) | def forward(self, x):
class FFCResnetBlock (line 258) | class FFCResnetBlock(nn.Module):
method __init__ (line 259) | def __init__(self, dim, padding_type, norm_layer, activation_layer=nn....
method forward (line 277) | def forward(self, x):
class ConcatTupleLayer (line 295) | class ConcatTupleLayer(nn.Module):
method forward (line 296) | def forward(self, x):
class FFCResNetGenerator (line 305) | class FFCResNetGenerator(nn.Module):
method __init__ (line 306) | def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=3, n_bl...
method forward (line 366) | def forward(self, input):
class FFCNLayerDiscriminator (line 370) | class FFCNLayerDiscriminator(BaseDiscriminator):
method __init__ (line 371) | def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNo...
method get_all_activations (line 416) | def get_all_activations(self, x):
method forward (line 423) | def forward(self, x):
FILE: model_cards/lama/saicinpainting/training/modules/multidilated_conv.py
class MultidilatedConv (line 6) | class MultidilatedConv(nn.Module):
method __init__ (line 7) | def __init__(self, in_dim, out_dim, kernel_size, dilation_num=3, comb_...
method forward (line 73) | def forward(self, x):
FILE: model_cards/lama/saicinpainting/training/modules/multiscale.py
class ResNetHead (line 11) | class ResNetHead(nn.Module):
method __init__ (line 12) | def __init__(self, input_nc, ngf=64, n_downsampling=3, n_blocks=9, nor...
method forward (line 40) | def forward(self, input):
class ResNetTail (line 44) | class ResNetTail(nn.Module):
method __init__ (line 45) | def __init__(self, output_nc, ngf=64, n_downsampling=3, n_blocks=9, no...
method forward (line 86) | def forward(self, input, return_last_act=False):
class MultiscaleResNet (line 95) | class MultiscaleResNet(nn.Module):
method __init__ (line 96) | def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=2, n_bl...
method num_scales (line 120) | def num_scales(self):
method forward (line 123) | def forward(self, ms_inputs: List[torch.Tensor], smallest_scales_num: ...
class MultiscaleDiscriminatorSimple (line 173) | class MultiscaleDiscriminatorSimple(nn.Module):
method __init__ (line 174) | def __init__(self, ms_impl):
method num_scales (line 179) | def num_scales(self):
method forward (line 182) | def forward(self, ms_inputs: List[torch.Tensor], smallest_scales_num: ...
class SingleToMultiScaleInputMixin (line 199) | class SingleToMultiScaleInputMixin:
method forward (line 200) | def forward(self, x: torch.Tensor) -> List:
class GeneratorMultiToSingleOutputMixin (line 208) | class GeneratorMultiToSingleOutputMixin:
method forward (line 209) | def forward(self, x):
class DiscriminatorMultiToSingleOutputMixin (line 213) | class DiscriminatorMultiToSingleOutputMixin:
method forward (line 214) | def forward(self, x):
class DiscriminatorMultiToSingleOutputStackedMixin (line 219) | class DiscriminatorMultiToSingleOutputStackedMixin:
method __init__ (line 220) | def __init__(self, *args, return_feats_only_levels=None, **kwargs):
method forward (line 224) | def forward(self, x):
class MultiscaleDiscrSingleInput (line 239) | class MultiscaleDiscrSingleInput(SingleToMultiScaleInputMixin, Discrimin...
class MultiscaleResNetSingle (line 243) | class MultiscaleResNetSingle(GeneratorMultiToSingleOutputMixin, SingleTo...
FILE: model_cards/lama/saicinpainting/training/modules/pix2pixhd.py
class DotDict (line 15) | class DotDict(defaultdict):
class Identity (line 22) | class Identity(nn.Module):
method __init__ (line 23) | def __init__(self):
method forward (line 26) | def forward(self, x):
class ResnetBlock (line 30) | class ResnetBlock(nn.Module):
method __init__ (line 31) | def __init__(self, dim, padding_type, norm_layer, activation=nn.ReLU(T...
method build_conv_block (line 47) | def build_conv_block(self, dim, padding_type, norm_layer, activation, ...
method forward (line 85) | def forward(self, x):
class ResnetBlock5x5 (line 92) | class ResnetBlock5x5(nn.Module):
method __init__ (line 93) | def __init__(self, dim, padding_type, norm_layer, activation=nn.ReLU(T...
method build_conv_block (line 109) | def build_conv_block(self, dim, padding_type, norm_layer, activation, ...
method forward (line 147) | def forward(self, x):
class MultidilatedResnetBlock (line 155) | class MultidilatedResnetBlock(nn.Module):
method __init__ (line 156) | def __init__(self, dim, padding_type, conv_layer, norm_layer, activati...
method build_conv_block (line 160) | def build_conv_block(self, dim, padding_type, conv_layer, norm_layer, ...
method forward (line 173) | def forward(self, x):
class MultiDilatedGlobalGenerator (line 178) | class MultiDilatedGlobalGenerator(nn.Module):
method __init__ (line 179) | def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=3,
method forward (line 236) | def forward(self, input):
class ConfigGlobalGenerator (line 239) | class ConfigGlobalGenerator(nn.Module):
method __init__ (line 240) | def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=3,
method forward (line 325) | def forward(self, input):
function make_dil_blocks (line 329) | def make_dil_blocks(dilated_blocks_n, dilation_block_kind, dilated_block...
class GlobalGenerator (line 341) | class GlobalGenerator(nn.Module):
method __init__ (line 342) | def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=3, n_bl...
method forward (line 435) | def forward(self, input):
class GlobalGeneratorGated (line 439) | class GlobalGeneratorGated(GlobalGenerator):
method __init__ (line 440) | def __init__(self, *args, **kwargs):
class GlobalGeneratorFromSuperChannels (line 450) | class GlobalGeneratorFromSuperChannels(nn.Module):
method __init__ (line 451) | def __init__(self, input_nc, output_nc, n_downsampling, n_blocks, supe...
method convert_super_channels (line 517) | def convert_super_channels(self, super_channels):
method forward (line 560) | def forward(self, input):
class NLayerDiscriminator (line 565) | class NLayerDiscriminator(BaseDiscriminator):
method __init__ (line 566) | def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNo...
method get_all_activations (line 604) | def get_all_activations(self, x):
method forward (line 611) | def forward(self, x):
class MultidilatedNLayerDiscriminator (line 616) | class MultidilatedNLayerDiscriminator(BaseDiscriminator):
method __init__ (line 617) | def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNo...
method get_all_activations (line 655) | def get_all_activations(self, x):
method forward (line 662) | def forward(self, x):
class NLayerDiscriminatorAsGen (line 667) | class NLayerDiscriminatorAsGen(NLayerDiscriminator):
method forward (line 668) | def forward(self, x):
FILE: model_cards/lama/saicinpainting/training/modules/spatial_transform.py
class LearnableSpatialTransformWrapper (line 7) | class LearnableSpatialTransformWrapper(nn.Module):
method __init__ (line 8) | def __init__(self, impl, pad_coef=0.5, angle_init_range=80, train_angl...
method forward (line 16) | def forward(self, x):
method transform (line 26) | def transform(self, x):
method inverse_transform (line 33) | def inverse_transform(self, y_padded_rotated, orig_x):
FILE: model_cards/lama/saicinpainting/training/modules/squeeze_excitation.py
class SELayer (line 4) | class SELayer(nn.Module):
method __init__ (line 5) | def __init__(self, channel, reduction=16):
method forward (line 15) | def forward(self, x):
FILE: model_cards/lama/saicinpainting/training/trainers/__init__.py
function get_training_model_class (line 6) | def get_training_model_class(kind):
function make_training_model (line 13) | def make_training_model(config):
function load_checkpoint (line 25) | def load_checkpoint(train_config, path, map_location='cuda', strict=True):
FILE: model_cards/lama/saicinpainting/training/trainers/base.py
function make_optimizer (line 24) | def make_optimizer(parameters, kind='adamw', **kwargs):
function update_running_average (line 34) | def update_running_average(result: nn.Module, new_iterate_model: nn.Modu...
function make_multiscale_noise (line 43) | def make_multiscale_noise(base_tensor, scales=6, scale_mode='bilinear'):
class BaseInpaintingTrainingModule (line 57) | class BaseInpaintingTrainingModule(ptl.LightningModule):
method __init__ (line 58) | def __init__(self, config, use_ddp, *args, predict_only=False, visual...
method configure_optimizers (line 117) | def configure_optimizers(self):
method train_dataloader (line 124) | def train_dataloader(self):
method val_dataloader (line 133) | def val_dataloader(self):
method training_step (line 147) | def training_step(self, batch, batch_idx, optimizer_idx=None):
method validation_step (line 151) | def validation_step(self, batch, batch_idx, dataloader_idx):
method training_step_end (line 163) | def training_step_end(self, batch_parts_outputs):
method validation_epoch_end (line 180) | def validation_epoch_end(self, outputs):
method _do_step (line 224) | def _do_step(self, batch, batch_idx, mode='train', optimizer_idx=None,...
method get_current_generator (line 267) | def get_current_generator(self, no_average=False):
method forward (line 272) | def forward(self, batch: Dict[str, torch.Tensor]) -> Dict[str, torch.T...
method generator_loss (line 276) | def generator_loss(self, batch) -> Tuple[torch.Tensor, Dict[str, torch...
method discriminator_loss (line 279) | def discriminator_loss(self, batch) -> Tuple[torch.Tensor, Dict[str, t...
method store_discr_outputs (line 282) | def store_discr_outputs(self, batch):
method get_ddp_rank (line 290) | def get_ddp_rank(self):
FILE: model_cards/lama/saicinpainting/training/trainers/default.py
function make_constant_area_crop_batch (line 17) | def make_constant_area_crop_batch(batch, **kwargs):
class DefaultInpaintingTrainingModule (line 26) | class DefaultInpaintingTrainingModule(BaseInpaintingTrainingModule):
method __init__ (line 27) | def __init__(self, *args, concat_mask=True, rescale_scheduler_kwargs=N...
method forward (line 47) | def forward(self, batch):
method generator_loss (line 88) | def generator_loss(self, batch):
method discriminator_loss (line 140) | def discriminator_loss(self, batch):
FILE: model_cards/lama/saicinpainting/training/visualizers/__init__.py
function make_visualizer (line 7) | def make_visualizer(kind, **kwargs):
FILE: model_cards/lama/saicinpainting/training/visualizers/base.py
class BaseVisualizer (line 14) | class BaseVisualizer:
method __call__ (line 16) | def __call__(self, epoch_i, batch_i, batch, suffix='', rank=None):
function visualize_mask_and_images (line 23) | def visualize_mask_and_images(images_dict: Dict[str, np.ndarray], keys: ...
function visualize_mask_and_images_batch (line 61) | def visualize_mask_and_images_batch(batch: Dict[str, torch.Tensor], keys...
FILE: model_cards/lama/saicinpainting/training/visualizers/colors.py
function generate_colors (line 11) | def generate_colors(nlabels, type='bright', first_color_black=False, las...
FILE: model_cards/lama/saicinpainting/training/visualizers/directory.py
class DirectoryVisualizer (line 10) | class DirectoryVisualizer(BaseVisualizer):
method __init__ (line 13) | def __init__(self, outdir, key_order=DEFAULT_KEY_ORDER, max_items_in_b...
method __call__ (line 22) | def __call__(self, epoch_i, batch_i, batch, suffix='', rank=None):
FILE: model_cards/lama/saicinpainting/training/visualizers/noop.py
class NoopVisualizer (line 4) | class NoopVisualizer(BaseVisualizer):
method __init__ (line 5) | def __init__(self, *args, **kwargs):
method __call__ (line 8) | def __call__(self, epoch_i, batch_i, batch, suffix='', rank=None):
FILE: model_cards/lama/saicinpainting/utils.py
function check_and_warn_input_range (line 20) | def check_and_warn_input_range(tensor, min_value, max_value, name):
function sum_dict_with_prefix (line 27) | def sum_dict_with_prefix(target, cur_dict, prefix, default=0):
function average_dicts (line 33) | def average_dicts(dict_list):
function add_prefix_to_keys (line 44) | def add_prefix_to_keys(dct, prefix):
function set_requires_grad (line 48) | def set_requires_grad(module, value):
function flatten_dict (line 53) | def flatten_dict(dct):
class LinearRamp (line 66) | class LinearRamp:
method __init__ (line 67) | def __init__(self, start_value=0, end_value=1, start_iter=-1, end_iter...
method __call__ (line 73) | def __call__(self, i):
class LadderRamp (line 82) | class LadderRamp:
method __init__ (line 83) | def __init__(self, start_iters, values):
method __call__ (line 88) | def __call__(self, i):
function get_ramp (line 93) | def get_ramp(kind='ladder', **kwargs):
function print_traceback_handler (line 101) | def print_traceback_handler(sig, frame):
function register_debug_signal_handlers (line 107) | def register_debug_signal_handlers(sig=signal.SIGUSR1, handler=print_tra...
function handle_deterministic_config (line 112) | def handle_deterministic_config(config):
function get_shape (line 121) | def get_shape(t):
function get_has_ddp_rank (line 134) | def get_has_ddp_rank():
function handle_ddp_subprocess (line 143) | def handle_ddp_subprocess():
function handle_ddp_parent_process (line 168) | def handle_ddp_parent_process():
FILE: model_cards/segment_anything/automatic_mask_generator.py
class SamAutomaticMaskGenerator (line 35) | class SamAutomaticMaskGenerator:
method __init__ (line 36) | def __init__(
method generate (line 137) | def generate(self, image: np.ndarray) -> List[Dict[str, Any]]:
method _generate_masks (line 197) | def _generate_masks(self, image: np.ndarray) -> MaskData:
method _process_crop (line 225) | def _process_crop(
method _process_batch (line 266) | def _process_batch(
method postprocess_small_regions (line 324) | def postprocess_small_regions(
FILE: model_cards/segment_anything/build_sam.py
function build_sam_vit_h (line 14) | def build_sam_vit_h(checkpoint=None):
function build_sam_vit_l (line 27) | def build_sam_vit_l(checkpoint=None):
function build_sam_vit_b (line 37) | def build_sam_vit_b(checkpoint=None):
function _build_sam (line 55) | def _build_sam(
FILE: model_cards/segment_anything/modeling/common.py
class MLPBlock (line 13) | class MLPBlock(nn.Module):
method __init__ (line 14) | def __init__(
method forward (line 25) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class LayerNorm2d (line 31) | class LayerNorm2d(nn.Module):
method __init__ (line 32) | def __init__(self, num_channels: int, eps: float = 1e-6) -> None:
method forward (line 38) | def forward(self, x: torch.Tensor) -> torch.Tensor:
FILE: model_cards/segment_anything/modeling/image_encoder.py
class ImageEncoderViT (line 17) | class ImageEncoderViT(nn.Module):
method __init__ (line 18) | def __init__(
method forward (line 106) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class Block (line 119) | class Block(nn.Module):
method __init__ (line 122) | def __init__(
method forward (line 166) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class Attention (line 185) | class Attention(nn.Module):
method __init__ (line 188) | def __init__(
method forward (line 224) | def forward(self, x: torch.Tensor) -> torch.Tensor:
function window_partition (line 243) | def window_partition(x: torch.Tensor, window_size: int) -> Tuple[torch.T...
function window_unpartition (line 267) | def window_unpartition(
function get_rel_pos (line 292) | def get_rel_pos(q_size: int, k_size: int, rel_pos: torch.Tensor) -> torc...
function add_decomposed_rel_pos (line 325) | def add_decomposed_rel_pos(
class PatchEmbed (line 364) | class PatchEmbed(nn.Module):
method __init__ (line 369) | def __init__(
method forward (line 391) | def forward(self, x: torch.Tensor) -> torch.Tensor:
FILE: model_cards/segment_anything/modeling/mask_decoder.py
class MaskDecoder (line 16) | class MaskDecoder(nn.Module):
method __init__ (line 17) | def __init__(
method forward (line 71) | def forward(
method predict_masks (line 112) | def predict_masks(
class MLP (line 154) | class MLP(nn.Module):
method __init__ (line 155) | def __init__(
method forward (line 171) | def forward(self, x):
FILE: model_cards/segment_anything/modeling/prompt_encoder.py
class PromptEncoder (line 16) | class PromptEncoder(nn.Module):
method __init__ (line 17) | def __init__(
method get_dense_pe (line 62) | def get_dense_pe(self) -> torch.Tensor:
method _embed_points (line 73) | def _embed_points(
method _embed_boxes (line 93) | def _embed_boxes(self, boxes: torch.Tensor) -> torch.Tensor:
method _embed_masks (line 102) | def _embed_masks(self, masks: torch.Tensor) -> torch.Tensor:
method _get_batch_size (line 107) | def _get_batch_size(
method _get_device (line 125) | def _get_device(self) -> torch.device:
method forward (line 128) | def forward(
class PositionEmbeddingRandom (line 171) | class PositionEmbeddingRandom(nn.Module):
method __init__ (line 176) | def __init__(self, num_pos_feats: int = 64, scale: Optional[float] = N...
method _pe_encoding (line 185) | def _pe_encoding(self, coords: torch.Tensor) -> torch.Tensor:
method forward (line 194) | def forward(self, size: Tuple[int, int]) -> torch.Tensor:
method forward_with_coords (line 207) | def forward_with_coords(
FILE: model_cards/segment_anything/modeling/sam.py
class Sam (line 18) | class Sam(nn.Module):
method __init__ (line 22) | def __init__(
method device (line 50) | def device(self) -> Any:
method forward (line 54) | def forward(
method postprocess_masks (line 133) | def postprocess_masks(
method preprocess (line 164) | def preprocess(self, x: torch.Tensor) -> torch.Tensor:
FILE: model_cards/segment_anything/modeling/transformer.py
class TwoWayTransformer (line 16) | class TwoWayTransformer(nn.Module):
method __init__ (line 17) | def __init__(
method forward (line 62) | def forward(
class TwoWayAttentionBlock (line 109) | class TwoWayAttentionBlock(nn.Module):
method __init__ (line 110) | def __init__(
method forward (line 151) | def forward(
class Attention (line 185) | class Attention(nn.Module):
method __init__ (line 191) | def __init__(
method _separate_heads (line 208) | def _separate_heads(self, x: Tensor, num_heads: int) -> Tensor:
method _recombine_heads (line 213) | def _recombine_heads(self, x: Tensor) -> Tensor:
method forward (line 218) | def forward(self, q: Tensor, k: Tensor, v: Tensor) -> Tensor:
FILE: model_cards/segment_anything/predictor.py
class SamPredictor (line 17) | class SamPredictor:
method __init__ (line 18) | def __init__(
method set_image (line 34) | def set_image(
method set_torch_image (line 63) | def set_torch_image(
method predict (line 92) | def predict(
method predict_torch (line 169) | def predict_torch(
method get_image_embedding (line 245) | def get_image_embedding(self) -> torch.Tensor:
method device (line 259) | def device(self) -> torch.device:
method reset_image (line 262) | def reset_image(self) -> None:
FILE: model_cards/segment_anything/utils/amg.py
class MaskData (line 16) | class MaskData:
method __init__ (line 22) | def __init__(self, **kwargs) -> None:
method __setitem__ (line 29) | def __setitem__(self, key: str, item: Any) -> None:
method __delitem__ (line 35) | def __delitem__(self, key: str) -> None:
method __getitem__ (line 38) | def __getitem__(self, key: str) -> Any:
method items (line 41) | def items(self) -> ItemsView[str, Any]:
method filter (line 44) | def filter(self, keep: torch.Tensor) -> None:
method cat (line 59) | def cat(self, new_stats: "MaskData") -> None:
method to_numpy (line 72) | def to_numpy(self) -> None:
function is_box_near_crop_edge (line 78) | def is_box_near_crop_edge(
function box_xyxy_to_xywh (line 91) | def box_xyxy_to_xywh(box_xyxy: torch.Tensor) -> torch.Tensor:
function batch_iterator (line 98) | def batch_iterator(batch_size: int, *args) -> Generator[List[Any], None,...
function mask_to_rle_pytorch (line 107) | def mask_to_rle_pytorch(tensor: torch.Tensor) -> List[Dict[str, Any]]:
function rle_to_mask (line 138) | def rle_to_mask(rle: Dict[str, Any]) -> np.ndarray:
function area_from_rle (line 152) | def area_from_rle(rle: Dict[str, Any]) -> int:
function calculate_stability_score (line 156) | def calculate_stability_score(
function build_point_grid (line 179) | def build_point_grid(n_per_side: int) -> np.ndarray:
function build_all_layer_point_grids (line 189) | def build_all_layer_point_grids(
function generate_crop_boxes (line 200) | def generate_crop_boxes(
function uncrop_boxes_xyxy (line 237) | def uncrop_boxes_xyxy(boxes: torch.Tensor, crop_box: List[int]) -> torch...
function uncrop_points (line 246) | def uncrop_points(points: torch.Tensor, crop_box: List[int]) -> torch.Te...
function uncrop_masks (line 255) | def uncrop_masks(
function remove_small_regions (line 267) | def remove_small_regions(
function coco_encode_rle (line 294) | def coco_encode_rle(uncompressed_rle: Dict[str, Any]) -> Dict[str, Any]:
function batched_mask_to_box (line 303) | def batched_mask_to_box(masks: torch.Tensor) -> torch.Tensor:
FILE: model_cards/segment_anything/utils/onnx.py
class SamOnnxModel (line 17) | class SamOnnxModel(nn.Module):
method __init__ (line 25) | def __init__(
method resize_longest_image_size (line 42) | def resize_longest_image_size(
method _embed_points (line 51) | def _embed_points(self, point_coords: torch.Tensor, point_labels: torc...
method _embed_masks (line 69) | def _embed_masks(self, input_mask: torch.Tensor, has_mask_input: torch...
method mask_postprocessing (line 76) | def mask_postprocessing(self, masks: torch.Tensor, orig_im_size: torch...
method select_masks (line 92) | def select_masks(
method forward (line 108) | def forward(
FILE: model_cards/segment_anything/utils/transforms.py
class ResizeLongestSide (line 16) | class ResizeLongestSide:
method __init__ (line 23) | def __init__(self, target_length: int) -> None:
method apply_image (line 26) | def apply_image(self, image: np.ndarray) -> np.ndarray:
method apply_coords (line 33) | def apply_coords(self, coords: np.ndarray, original_size: Tuple[int, ....
method apply_boxes (line 47) | def apply_boxes(self, boxes: np.ndarray, original_size: Tuple[int, ......
method apply_image_torch (line 55) | def apply_image_torch(self, image: torch.Tensor) -> torch.Tensor:
method apply_coords_torch (line 67) | def apply_coords_torch(
method apply_boxes_torch (line 83) | def apply_boxes_torch(
method get_preprocess_shape (line 94) | def get_preprocess_shape(oldh: int, oldw: int, long_side_length: int) ...
FILE: model_cards/setup.py
function write_version_file (line 44) | def write_version_file():
function get_extensions (line 56) | def get_extensions():
function parse_requirements (line 114) | def parse_requirements(fname="requirements.txt", with_version=True):
FILE: themes/common.js
function ChatBotHeight (line 1) | function ChatBotHeight() {
function get_elements (line 31) | function get_elements() {
FILE: themes/default.py
function adjust_theme (line 5) | def adjust_theme():
FILE: themes/green.py
function adjust_theme (line 5) | def adjust_theme():
FILE: utils/AudioRecorder.py
class BaseRecorder (line 9) | class BaseRecorder:
method __init__ (line 10) | def __init__(self, source, source_name):
method adjust_for_noise (line 21) | def adjust_for_noise(self, device_name, msg):
method record_into_queue (line 27) | def record_into_queue(self, audio_queue):
class DefaultMicRecorder (line 34) | class DefaultMicRecorder(BaseRecorder):
method __init__ (line 35) | def __init__(self):
class DefaultSpeakerRecorder (line 39) | class DefaultSpeakerRecorder(BaseRecorder):
method __init__ (line 40) | def __init__(self):
FILE: utils/AudioTrans.py
class AudioTranscriber (line 18) | class AudioTranscriber:
method __init__ (line 19) | def __init__(self, mic_source, speaker_source, model,two_ways=False):
method transcribe_audio_queue (line 45) | def transcribe_audio_queue(self, audio_queue):
method update_last_sample_and_phrase_status (line 73) | def update_last_sample_and_phrase_status(self, who_spoke, data, time_s...
method process_mic_data (line 89) | def process_mic_data(self, data, temp_file_name):
method process_speaker_data (line 95) | def process_speaker_data(self, data, temp_file_name):
method update_transcript (line 103) | def update_transcript(self, who_spoke, text, time_spoken):
method get_transcript (line 115) | def get_transcript(self):
method clear_transcript_data (line 122) | def clear_transcript_data(self):
FILE: utils/__init__.py
function set_logging (line 21) | def set_logging(name=None, verbose=VERBOSE):
function write_categories (line 36) | def write_categories(cls_name, file_path):
function threaded (line 45) | def threaded(func):
class TryExcept (line 54) | class TryExcept(contextlib.ContextDecorator):
method __init__ (line 56) | def __init__(self, msg='', verbose=True):
method __enter__ (line 60) | def __enter__(self):
method __exit__ (line 63) | def __exit__(self, exc_type, value, traceback):
function check_suffix (line 68) | def check_suffix(file=None, suffix=('.pt',), msg=''):
function is_online (line 78) | def is_online() -> bool:
function url2file (line 92) | def url2file(url):
function emojis (line 97) | def emojis(str=''):
function clean_url (line 100) | def clean_url(url):
function check_requirements (line 106) | def check_requirements(requirements=ROOT / 'requirements.txt', exclude=(...
FILE: utils/audio.py
class SetupError (line 28) | class SetupError(Exception):
class WaitTimeoutError (line 32) | class WaitTimeoutError(Exception):
class RequestError (line 36) | class RequestError(Exception):
class UnknownValueError (line 40) | class UnknownValueError(Exception):
class TranscriptionNotReady (line 44) | class TranscriptionNotReady(Exception):
class TranscriptionFailed (line 48) | class TranscriptionFailed(Exception):
class PortableNamedTemporaryFile (line 51) | class PortableNamedTemporaryFile(object):
method __init__ (line 53) | def __init__(self, mode="w+b"):
method __enter__ (line 56) | def __enter__(self):
method __exit__ (line 65) | def __exit__(self, exc_type, exc_value, traceback):
method write (line 69) | def write(self, *args, **kwargs):
method writelines (line 72) | def writelines(self, *args, **kwargs):
method flush (line 75) | def flush(self, *args, **kwargs):
class AudioSource (line 78) | class AudioSource(object):
method __init__ (line 79) | def __init__(self):
method __enter__ (line 82) | def __enter__(self):
method __exit__ (line 85) | def __exit__(self, exc_type, exc_value, traceback):
class Microphone (line 89) | class Microphone(AudioSource):
method __init__ (line 105) | def __init__(self, device_index=None, sample_rate=None, chunk_size=102...
method get_pyaudio (line 136) | def get_pyaudio():
method list_microphone_names (line 150) | def list_microphone_names():
method list_working_microphones (line 167) | def list_working_microphones():
method __enter__ (line 206) | def __enter__(self):
method __exit__ (line 234) | def __exit__(self, exc_type, exc_value, traceback):
class MicrophoneStream (line 241) | class MicrophoneStream(object):
method __init__ (line 242) | def __init__(self, pyaudio_stream):
method read (line 245) | def read(self, size):
method close (line 248) | def close(self):
class AudioFile (line 257) | class AudioFile(AudioSource):
method __init__ (line 272) | def __init__(self, filename_or_fileobject):
method __enter__ (line 284) | def __enter__(self):
method __exit__ (line 341) | def __exit__(self, exc_type, exc_value, traceback):
class AudioFileStream (line 347) | class AudioFileStream(object):
method __init__ (line 348) | def __init__(self, audio_reader, little_endian, samples_24_bit_prete...
method read (line 353) | def read(self, size=-1):
class Recognizer (line 373) | class Recognizer(AudioSource):
method __init__ (line 374) | def __init__(self):
method record (line 388) | def record(self, source, duration=None, offset=None):
method adjust_for_ambient_noise (line 421) | def adjust_for_ambient_noise(self, source, duration=1):
method snowboy_wait_for_hot_word (line 448) | def snowboy_wait_for_hot_word(self, snowboy_location, snowboy_hot_word...
method listen (line 497) | def listen(self, source, timeout=None, phrase_time_limit=None, snowboy...
method listen_in_background (line 594) | def listen_in_background(self, source, callback, phrase_time_limit=None):
method recognize_sphinx (line 627) | def recognize_sphinx(self, audio_data, language="en-US", keyword_entri...
class AudioData (line 719) | class AudioData(object):
method __init__ (line 732) | def __init__(self, frame_data, sample_rate, sample_width):
method get_segment (line 741) | def get_segment(self, start_ms=None, end_ms=None):
method get_raw_data (line 771) | def get_raw_data(self, convert_rate=None, convert_width=None):
method get_wav_data (line 843) | def get_wav_data(self, convert_rate=None, convert_width=None, nchannel...
method get_aiff_data (line 874) | def get_aiff_data(self, convert_rate=None, convert_width=None):
method get_flac_data (line 916) | def get_flac_data(self, convert_rate=None, convert_width=None):
function get_flac_converter (line 968) | def get_flac_converter():
function shutil_which (line 1018) | def shutil_which(pgm):
FILE: utils/check_proxy.py
function check_proxy (line 2) | def check_proxy(proxies):
function _check_with_backup_source (line 27) | def _check_with_backup_source(proxies):
function backup_and_download (line 33) | def backup_and_download(current_version, remote_version):
function patch_and_restart (line 65) | def patch_and_restart(path):
function get_current_version (line 97) | def get_current_version():
function auto_update (line 107) | def auto_update(raise_error=False):
function warm_up_modules (line 156) | def warm_up_modules():
FILE: utils/colorful.py
function print红 (line 11) | def print红(*kw,**kargs):
function print绿 (line 13) | def print绿(*kw,**kargs):
function print黄 (line 15) | def print黄(*kw,**kargs):
function print蓝 (line 17) | def print蓝(*kw,**kargs):
function print紫 (line 19) | def print紫(*kw,**kargs):
function print靛 (line 21) | def print靛(*kw,**kargs):
function print亮红 (line 24) | def print亮红(*kw,**kargs):
function print亮绿 (line 26) | def print亮绿(*kw,**kargs):
function print亮黄 (line 28) | def print亮黄(*kw,**kargs):
function print亮蓝 (line 30) | def print亮蓝(*kw,**kargs):
function print亮紫 (line 32) | def print亮紫(*kw,**kargs):
function print亮靛 (line 34) | def print亮靛(*kw,**kargs):
function sprint红 (line 38) | def sprint红(*kw):
function sprint绿 (line 40) | def sprint绿(*kw):
function sprint黄 (line 42) | def sprint黄(*kw):
function sprint蓝 (line 44) | def sprint蓝(*kw):
function sprint紫 (line 46) | def sprint紫(*kw):
function sprint靛 (line 48) | def sprint靛(*kw):
function sprint亮红 (line 50) | def sprint亮红(*kw):
function sprint亮绿 (line 52) | def sprint亮绿(*kw):
function sprint亮黄 (line 54) | def sprint亮黄(*kw):
function sprint亮蓝 (line 56) | def sprint亮蓝(*kw):
function sprint亮紫 (line 58) | def sprint亮紫(*kw):
function sprint亮靛 (line 60) | def sprint亮靛(*kw):
FILE: utils/dataloads.py
function get_hash (line 51) | def get_hash(paths):
function exif_size (line 59) | def exif_size(img):
function exif_transpose (line 69) | def exif_transpose(image):
function seed_worker (line 94) | def seed_worker(worker_id):
function create_dataloader (line 101) | def create_dataloader(path,
function create_kpt_dataloader (line 160) | def create_kpt_dataloader(path, imgsz, batch_size, stride, opt, hyp=None...
class InfiniteDataLoader (line 190) | class InfiniteDataLoader(dataloader.DataLoader):
method __init__ (line 195) | def __init__(self, *args, **kwargs):
method __len__ (line 200) | def __len__(self):
method __iter__ (line 203) | def __iter__(self):
class _RepeatSampler (line 208) | class _RepeatSampler:
method __init__ (line 214) | def __init__(self, sampler):
method __iter__ (line 217) | def __iter__(self):
class LoadScreenshots (line 222) | class LoadScreenshots:
method __init__ (line 224) | def __init__(self, source, img_size=640, stride=32, auto=True, transfo...
method __iter__ (line 253) | def __iter__(self):
method __next__ (line 256) | def __next__(self):
class LoadImages (line 271) | class LoadImages:
method __init__ (line 273) | def __init__(self, path, img_size=640, stride=32, auto=True, transform...
method __iter__ (line 308) | def __iter__(self):
method __next__ (line 312) | def __next__(self):
method _new_video (line 352) | def _new_video(self, path):
method _cv2_rotate (line 360) | def _cv2_rotate(self, im):
method __len__ (line 370) | def __len__(self):
class LoadWebcam (line 375) | class LoadWebcam: # for inference
method __init__ (line 377) | def __init__(self, pipe='0', img_size=640, stride=32):
method __iter__ (line 384) | def __iter__(self):
method __next__ (line 388) | def __next__(self):
method __len__ (line 411) | def __len__(self):
class LoadStreams (line 415) | class LoadStreams:
method __init__ (line 417) | def __init__(self, sources='file.streams', img_size=640, stride=32, au...
method update (line 460) | def update(self, i, cap, stream):
method __iter__ (line 477) | def __iter__(self):
method __next__ (line 481) | def __next__(self):
method __len__ (line 497) | def __len__(self):
function img2label_paths (line 501) | def img2label_paths(img_paths):
class LoadImagesAndLabels (line 507) | class LoadImagesAndLabels(Dataset):
method __init__ (line 512) | def __init__(self,
method check_cache_ram (line 662) | def check_cache_ram(self, safety_margin=0.1, prefix=''):
method cache_labels (line 679) | def cache_labels(self, path=Path('./labels.cache'), prefix=''):
method __len__ (line 717) | def __len__(self):
method __getitem__ (line 726) | def __getitem__(self, index):
method load_image (line 812) | def load_image(self, i):
method cache_images_to_disk (line 829) | def cache_images_to_disk(self, i):
method load_mosaic (line 835) | def load_mosaic(self, index):
method load_mosaic9 (line 893) | def load_mosaic9(self, index):
method load_samples (line 970) | def load_samples(self, index):
method collate_fn (line 1022) | def collate_fn(batch):
method collate_fn_v8 (line 1029) | def collate_fn_v8(batch):
method collate_fn4 (line 1045) | def collate_fn4(batch):
function create_folder (line 1073) | def create_folder(path='./new'):
function flatten_recursive (line 1080) | def flatten_recursive(path=DATASETS_DIR / 'coco128'):
function extract_boxes (line 1088) | def extract_boxes(path=DATASETS_DIR / 'coco128'): # from utils.datasets...
function autosplit (line 1122) | def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0...
function verify_image_label (line 1146) | def verify_image_label(args):
FILE: utils/downloads.py
function is_url (line 24) | def is_url(url, check_online=True):
function unzip_file (line 34) | def unzip_file(file, path=None, exclude=('.DS_Store', '__MACOSX')):
function gsutil_getsize (line 49) | def gsutil_getsize(url=''):
function url_getsize (line 54) | def url_getsize(url='https://ultralytics.com/images/bus.jpg'):
function check_disk_space (line 59) | def check_disk_space(url=None, sf=1.5, hard=True):
function safe_download (line 90) | def safe_download(url,
function attempt_download (line 176) | def attempt_download(file, repo='positive666/Prompt-Can-Anything', relea...
function download_model (line 225) | def download_model(model_url, save_directory):
FILE: utils/ops.py
function write_xml (line 63) | def write_xml(filename, img_name, img_root,preds, width, height):
function save_format (line 94) | def save_format(label_format: str ="xml",save_path :str ="runs/xmls" ,
function dilate_mask (line 107) | def dilate_mask(mask, dilate_factor=15):
function erode_mask (line 116) | def erode_mask(mask, dilate_factor=15):
function torch_nms_box (line 125) | def torch_nms_box(bboxes, scores, iou_threshold=0.5):
function check_class_names (line 171) | def check_class_names(names):
function is_kaggle (line 187) | def is_kaggle():
function is_writeable (line 192) | def is_writeable(dir, test=False):
function set_logging (line 206) | def set_logging(name=None, verbose=VERBOSE):
function set_logging (line 222) | def set_logging(name=LOGGING_NAME, verbose=True):
function user_config_dir (line 250) | def user_config_dir(dir='positive666', env_var='YOLOV5_CONFIG_DIR'):
class Profile (line 266) | class Profile(contextlib.ContextDecorator):
method __init__ (line 268) | def __init__(self, t=0.0):
method __enter__ (line 272) | def __enter__(self):
method __exit__ (line 276) | def __exit__(self, type, value, traceback):
method time (line 280) | def time(self):
class Timeout (line 286) | class Timeout(contextlib.ContextDecorator):
method __init__ (line 288) | def __init__(self, seconds, *, timeout_msg='', suppress_timeout_errors...
method _timeout_handler (line 293) | def _timeout_handler(self, signum, frame):
method __enter__ (line 296) | def __enter__(self):
method __exit__ (line 301) | def __exit__(self, exc_type, exc_val, exc_tb):
class WorkingDirectory (line 308) | class WorkingDirectory(contextlib.ContextDecorator):
method __init__ (line 310) | def __init__(self, new_dir):
method __enter__ (line 314) | def __enter__(self):
method __exit__ (line 317) | def __exit__(self, exc_type, exc_val, exc_tb):
function methods (line 322) | def methods(instance):
function print_args (line 327) | def print_args(args: Optional[dict] = None, show_file=True, show_func=Fa...
function init_seeds (line 342) | def init_seeds(seed=0, deterministic=False):
function intersect_dicts (line 360) | def intersect_dicts(da, db, exclude=()):
function get_default_args (line 364) | def get_default_args(func):
function merge_bases (line 369) | def merge_bases(rois, coeffs, attn_r, num_b, location_to_inds=None):
function get_latest_run (line 387) | def get_latest_run(search_dir='.'):
function is_docker (line 393) | def is_docker():
function is_colab (line 403) | def is_colab():
function is_jupyter (line 408) | def is_jupyter():
function is_pip (line 420) | def is_pip():
function is_ascii (line 425) | def is_ascii(s=''):
function is_chinese (line 431) | def is_chinese(s='人工智能'):
function file_age (line 436) | def file_age(path=__file__):
function file_date (line 442) | def file_date(path=__file__):
function file_size (line 448) | def file_size(path):
function check_online (line 460) | def check_online():
function check_yolo (line 475) | def check_yolo(verbose=True):
function git_describe (line 497) | def git_describe(path=ROOT): # path must be a directory
function check_git_status (line 508) | def check_git_status(repo='positive666/yolo_research', branch='master'):
function check_git_info (line 535) | def check_git_info(path='.'):
function check_python (line 551) | def check_python(minimum='3.7.0'):
function check_version (line 556) | def check_version(current='0.0.0', minimum='0.0.0', name='version ', pin...
function check_requirements (line 569) | def check_requirements(requirements=ROOT / 'requirements.txt', exclude=(...
function check_img_size (line 605) | def check_img_size(imgsz, s=32, floor=0):
function check_imshow (line 617) | def check_imshow(warn=False):
function check_suffix (line 633) | def check_suffix(file='yolov5s.pt', suffix=('.pt',), msg=''):
function check_yaml (line 644) | def check_yaml(file, suffix=('.yaml', '.yml')):
function check_file (line 649) | def check_file(file, suffix=''):
function check_dataset (line 673) | def check_dataset(data, autodownload=True):
function yaml_load (line 735) | def yaml_load(file='data.yaml', append_filename=False):
function yaml_save (line 754) | def yaml_save(file='data.yaml', data={}):
function unzip_file (line 759) | def unzip_file(file, path=None, exclude=('.DS_Store', '__MACOSX')):
function url2file (line 768) | def url2file(url):
function download (line 774) | def download(url, dir='.', unzip=True, delete=True, curl=False, threads=...
function make_divisible (line 822) | def make_divisible(x, divisor):
function clean_str (line 829) | def clean_str(s):
function one_cycle (line 834) | def one_cycle(y1=0.0, y2=1.0, steps=100):
function colorstr (line 839) | def colorstr(*input):
function labels_to_class_weights (line 865) | def labels_to_class_weights(labels, nc=80):
function labels_to_image_weights (line 884) | def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)):
function coco80_to_coco91_class (line 891) | def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index...
function xyxy2xywh (line 903) | def xyxy2xywh(x):
function xywh2xyxy (line 913) | def xywh2xyxy(x):
function xywh2xyxy_export (line 923) | def xywh2xyxy_export(cx,cy,w,h):
function xywhn2xyxy (line 935) | def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0,kpt_label=False):
function xyxy2xywhn (line 955) | def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
function xyn2xy (line 967) | def xyn2xy(x, w=640, h=640, padw=0, padh=0):
function segment2box (line 975) | def segment2box(segment, width=640, height=640):
function segments2boxes (line 983) | def segments2boxes(segments):
function resample_segments (line 992) | def resample_segments(segments, n=1000):
function scale_boxes (line 1002) | def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None,kpt_label=...
function box_iou (line 1024) | def box_iou(box1, box2, eps=1e-7):
function scale_segments (line 1044) | def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, nor...
function clip_boxes (line 1063) | def clip_boxes(boxes, shape, step=None):
function clip_segments (line 1080) | def clip_segments(boxes, shape):
function scale_image (line 1089) | def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):
function clip_boxes (line 1121) | def clip_boxes(boxes, shape):
function non_max_suppression (line 1138) | def non_max_suppression(prediction,
function non_max_suppression_keypoint (line 1246) | def non_max_suppression_keypoint(prediction, conf_thres=0.25, iou_thres=...
function increment_path (line 1346) | def increment_path(path, exist_ok=False, sep='', mkdir=False):
function imread (line 1376) | def imread(path, flags=cv2.IMREAD_COLOR):
function imwrite (line 1380) | def imwrite(path, im):
function imshow (line 1388) | def imshow(path, im):
function fitness (line 1396) | def fitness(x):
function split_images_to_folders (line 1402) | def split_images_to_folders(folder_path, num_threads, num_folders):
function merge_image_folders (line 1433) | def merge_image_folders(folder_path):
FILE: utils/plot.py
function save_mask_data (line 33) | def save_mask_data(output_dir, caption, mask_list, box_list, label_list,...
function scale_image (line 61) | def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):
function show_mask (line 88) | def show_mask(mask, ax, random_color=False,cls_color=None):
function Draw_img (line 103) | def Draw_img(data,draw,mode='box',label=None,random_color=None):
function show_box (line 132) | def show_box(box, ax, label):
class Colors (line 138) | class Colors:
method __init__ (line 140) | def __init__(self):
method __call__ (line 147) | def __call__(self, i, bgr=False):
method hex2rgb (line 152) | def hex2rgb(h): # rgb order (PIL)
function check_font (line 158) | def check_font(font=FONT, url="",progress=False):
function check_pil_font (line 167) | def check_pil_font(font=FONT, size=10):
class Annotator (line 183) | class Annotator:
method __init__ (line 185) | def __init__(self, im, line_width=None, font_size=None, font='Arial.tt...
method box_label (line 198) | def box_label(self, box, label='', color=(128, 128, 128), txt_color=(2...
method masks (line 229) | def masks(self, masks, colors, im_gpu, alpha=0.5, retina_masks=False):
method rectangle (line 259) | def rectangle(self, xy, fill=None, outline=None, width=1):
method text (line 263) | def text(self, xy, text, txt_color=(255, 255, 255), anchor='top'):
method fromarray (line 270) | def fromarray(self, im):
method result (line 275) | def result(self):
function feature_visualization (line 280) | def feature_visualization(x, module_type, stage, n=32, save_dir=Path('ru...
function hist2d (line 308) | def hist2d(x, y, n=100):
function butter_lowpass_filtfilt (line 317) | def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5):
function output_to_target (line 330) | def output_to_target(output, max_det=300):
function plot_images (line 341) | def plot_images(images, targets, paths=None, fname='images.jpg', names=N...
function plot_images_masks (line 405) | def plot_images_masks(images,
function plot_lr_scheduler (line 510) | def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=''):
function plot_val_txt (line 527) | def plot_val_txt(): # from utils.plots import *; plot_val()
function plot_targets_txt (line 544) | def plot_targets_txt(): # from utils.plots import *; plot_targets_txt()
function plot_val_study (line 557) | def plot_val_study(file='', dir='', x=None): # from utils.plots import ...
function plot_labels (line 603) | def plot_labels(labels, names=(), save_dir=Path('')):
function plot_evolve (line 678) | def plot_evolve(evolve_csv='path/to/evolve.csv'): # from utils.plots im...
function plot_results (line 705) | def plot_results(file='path/to/results.csv', dir=''):
function profile_idetection (line 731) | def profile_idetection(start=0, stop=0, labels=(), save_dir=''):
function save_one_box (line 762) | def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, squar...
function plot_one_box (line 780) | def plot_one_box(x, im, color=None, label=None, line_thickness=3, kpt_la...
function plot_skeleton_kpts (line 800) | def plot_skeleton_kpts(im, kpts, steps, orig_shape=None):
FILE: utils/text2speech.py
class T2S (line 7) | class T2S():
method __init__ (line 8) | def __init__(self) -> None:
method test (line 12) | def test(self, text, language='en'):
FILE: utils/textsplitter/ali_text_splitter.py
class AliTextSplitter (line 6) | class AliTextSplitter(CharacterTextSplitter):
method __init__ (line 7) | def __init__(self, pdf: bool = False, **kwargs):
method split_text (line 11) | def split_text(self, text: str) -> List[str]:
FILE: utils/textsplitter/chinese_text_splitter.py
class ChineseTextSplitter (line 7) | class ChineseTextSplitter(CharacterTextSplitter):
method __init__ (line 8) | def __init__(self, pdf: bool = False, sentence_size: int = SENTENCE_SI...
method split_text1 (line 13) | def split_text1(self, text: str) -> List[str]:
method split_text (line 27) | def split_text(self, text: str) -> List[str]: ##此处需要进一步优化逻辑
FILE: utils/textsplitter/zh_title_enhance.py
function under_non_alpha_ratio (line 5) | def under_non_alpha_ratio(text: str, threshold: float = 0.5):
function is_possible_title (line 30) | def is_possible_title(
function zh_title_enhance (line 88) | def zh_title_enhance(docs: Document) -> Document:
FILE: utils/toolbox.py
class ChatBotWithCookies (line 24) | class ChatBotWithCookies(list):
method __init__ (line 25) | def __init__(self, cookie):
method write_list (line 28) | def write_list(self, list):
method get_list (line 32) | def get_list(self):
method get_cookies (line 35) | def get_cookies(self):
function ArgsGeneralWrapper (line 39) | def ArgsGeneralWrapper(f):
function update_ui (line 73) | def update_ui(chatbot, history, msg='正常', **kwargs): # 刷新界面
function trimmed_format_exc (line 80) | def trimmed_format_exc():
function CatchException (line 87) | def CatchException(f):
function HotReload (line 110) | def HotReload(f):
function get_reduce_token_percent (line 146) | def get_reduce_token_percent(text):
function write_results_to_file (line 164) | def write_results_to_file(history, file_name=None):
function write_history_to_file (line 195) | def write_history_to_file(history, file_basename=None, file_fullname=None):
function regular_txt_to_markdown (line 225) | def regular_txt_to_markdown(text):
function report_execption (line 237) | def report_execption(chatbot, history, a, b):
function text_divide_paragraph (line 246) | def text_divide_paragraph(text):
function markdown_convertion (line 262) | def markdown_convertion(txt):
function close_up_code_segment_during_stream (line 338) | def close_up_code_segment_during_stream(gpt_reply):
function format_io (line 364) | def format_io(self, y):
function find_free_port (line 386) | def find_free_port():
function extract_archive (line 398) | def extract_archive(file_path, dest_dir):
function find_recent_files (line 443) | def find_recent_files(directory):
function promote_file_to_downloadzone (line 466) | def promote_file_to_downloadzone(file, rename_file=None, chatbot=None):
function disable_auto_promotion (line 481) | def disable_auto_promotion(chatbot):
function on_file_uploaded (line 485) | def on_file_uploaded(files, chatbot, txt, txt2, checkboxes):
function on_report_generated (line 523) | def on_report_generated(cookies, files, chatbot):
function is_openai_api_key (line 539) | def is_openai_api_key(key):
function is_api2d_key (line 544) | def is_api2d_key(key):
function is_any_api_key (line 550) | def is_any_api_key(key):
function what_keys (line 559) | def what_keys(keys):
Copy disabled (too large)
Download .json
Condensed preview — 407 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (12,964K chars).
[
{
"path": ".gitignore",
"chars": 1980,
"preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
},
{
"path": ".gitmodules",
"chars": 104,
"preview": "[submodule \"VisualGLM_6B\"]\n\tpath = VisualGLM_6B\n\turl = https://github.com/positive666/VisualGLM_6B.git\n\n"
},
{
"path": "LICENSE",
"chars": 35149,
"preview": " GNU GENERAL PUBLIC LICENSE\n Version 3, 29 June 2007\n\n Copyright (C) 2007 Free "
},
{
"path": "README.md",
"chars": 15444,
"preview": "# Prompt-Can-Anything\n\n<p align=\"center\"> English | <a href=\"README_zh.md\">中文</a></p>\n\nThis is a gradio library and res"
},
{
"path": "README_zh.md",
"chars": 9303,
"preview": "\n\n# Prompt-Can-Anything\n\n这是一个结合SOTA AI的应用web库以及研究的储备库,它能够帮你实现一切:你只需要提供提示!只需一次点击!通过SOTA模型的提示和创意,你可以做任何事情。\n\n**动机**\n\n当前:为工程"
},
{
"path": "a2f.py",
"chars": 12880,
"preview": "import argparse\nimport functools\nimport os\nimport yaml\nimport numpy as np\nimport ffmpeg\nimport grpc\nimport grpc\nimport a"
},
{
"path": "app.py",
"chars": 59164,
"preview": "from model_cards.autoback import AutoBackend\nimport argparse\nimport os\nimport platform\nimport sys\nfrom pathlib import Pa"
},
{
"path": "audio2face_pb2.py",
"chars": 16751,
"preview": "# -*- coding: utf-8 -*-\n# Generated by the protocol buffer compiler. DO NOT EDIT!\n# source: audio2face.proto\n\"\"\"Generat"
},
{
"path": "audio2face_pb2_grpc.py",
"chars": 4209,
"preview": "# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!\n\"\"\"Client and server classes corresponding to prot"
},
{
"path": "audio2face_streaming_utils.py",
"chars": 6975,
"preview": "\"\"\"\nThis demo script shows how to send audio data to Audio2Face Streaming Audio Player via gRPC requests.\nThere are two "
},
{
"path": "audio_segment.py",
"chars": 2665,
"preview": "\n\nimport os\nimport gradio as gr\nfrom pydub import AudioSegment\n\n# function to crop audio according to the given start an"
},
{
"path": "auto_label_demo.py",
"chars": 19802,
"preview": "from model_cards.autoback import AutoBackend\nimport argparse\nimport os\nimport platform\nimport sys\nfrom pathlib import Pa"
},
{
"path": "batch_clean_gpu.txt",
"chars": 85,
"preview": "sudo fuser -v /dev/nvidia* |awk '{for(i=1;i<=NF;i++)print \"kill -9 \" $i;}' | sudo sh\n"
},
{
"path": "crazy_functions/Langchain知识库.py",
"chars": 5182,
"preview": "from utils.toolbox import CatchException, update_ui, ProxyNetworkActivate\nfrom .crazy_utils import request_gpt_model_in_"
},
{
"path": "crazy_functions/Latex全文润色.py",
"chars": 10132,
"preview": "from utils.toolbox import update_ui, trimmed_format_exc\nfrom utils.toolbox import CatchException, report_execption, writ"
},
{
"path": "crazy_functions/Latex全文翻译.py",
"chars": 7432,
"preview": "from utils.toolbox import update_ui\nfrom utils.toolbox import CatchException, report_execption, write_results_to_file\nfa"
},
{
"path": "crazy_functions/Latex输出PDF结果.py",
"chars": 13975,
"preview": "from utils.toolbox import update_ui, trimmed_format_exc, get_conf, objdump, objload, promote_file_to_downloadzone\nfrom u"
},
{
"path": "crazy_functions/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "crazy_functions/chatglm微调工具.py",
"chars": 5758,
"preview": "from utils.toolbox import CatchException, update_ui, promote_file_to_downloadzone\nfrom .crazy_utils import request_gpt_m"
},
{
"path": "crazy_functions/crazy_functions_test.py",
"chars": 10204,
"preview": "\"\"\"\n这是什么?\n 这个文件用于函数插件的单元测试\n 运行方法 python crazy_functions/crazy_functions_test.py\n\"\"\"\n \n# ======================="
},
{
"path": "crazy_functions/crazy_utils.py",
"chars": 31908,
"preview": "from utils.toolbox import update_ui, get_conf, trimmed_format_exc\nimport threading\n\ndef input_clipping(inputs, history, "
},
{
"path": "crazy_functions/latex_fns/latex_actions.py",
"chars": 23361,
"preview": "from utils.toolbox import update_ui, update_ui_lastest_msg # 刷新Gradio前端界面\nfrom utils.toolbox import zip_folder, objdu"
},
{
"path": "crazy_functions/latex_fns/latex_toolbox.py",
"chars": 17390,
"preview": "import os, shutil\nimport re\nimport numpy as np\nPRESERVE = 0\nTRANSFORM = 1\n\npj = os.path.join\n\nclass LinkedListNode():\n "
},
{
"path": "crazy_functions/live_audio/aliyunASR.py",
"chars": 4781,
"preview": "import time, threading, json\n\n\nclass AliyunASR():\n\n def test_on_sentence_begin(self, message, *args):\n # print"
},
{
"path": "crazy_functions/live_audio/audio_io.py",
"chars": 1464,
"preview": "import numpy as np\nfrom scipy import interpolate\n\ndef Singleton(cls):\n _instance = {}\n \n def _singleton(*args, **k"
},
{
"path": "crazy_functions/test_project/cpp/cppipc/buffer.cpp",
"chars": 1695,
"preview": "#include \"libipc/buffer.h\"\n#include \"libipc/utility/pimpl.h\"\n\n#include <cstring>\n\nnamespace ipc {\n\nbool operator==(buffe"
},
{
"path": "crazy_functions/test_project/cpp/cppipc/ipc.cpp",
"chars": 23714,
"preview": "\n#include <type_traits>\n#include <cstring>\n#include <algorithm>\n#include <utility> // std::pair, std::move, std"
},
{
"path": "crazy_functions/test_project/cpp/cppipc/policy.h",
"chars": 539,
"preview": "#pragma once\n\n#include <type_traits>\n\n#include \"libipc/def.h\"\n#include \"libipc/prod_cons.h\"\n\n#include \"libipc/circ/elem_"
},
{
"path": "crazy_functions/test_project/cpp/cppipc/pool_alloc.cpp",
"chars": 320,
"preview": "#include \"libipc/pool_alloc.h\"\n\n#include \"libipc/memory/resource.h\"\n\nnamespace ipc {\nnamespace mem {\n\nvoid* pool_alloc::"
},
{
"path": "crazy_functions/test_project/cpp/cppipc/prod_cons.h",
"chars": 16667,
"preview": "#pragma once\n\n#include <atomic>\n#include <utility>\n#include <cstring>\n#include <type_traits>\n#include <cstdint>\n\n#includ"
},
{
"path": "crazy_functions/test_project/cpp/cppipc/queue.h",
"chars": 5709,
"preview": "#pragma once\n\n#include <type_traits>\n#include <new>\n#include <utility> // [[since C++14]]: std::exchange\n#include <algo"
},
{
"path": "crazy_functions/test_project/cpp/cppipc/shm.cpp",
"chars": 1953,
"preview": "\n#include <string>\n#include <utility>\n\n#include \"libipc/shm.h\"\n\n#include \"libipc/utility/pimpl.h\"\n#include \"libipc/memor"
},
{
"path": "crazy_functions/test_project/cpp/cppipc/waiter.h",
"chars": 1927,
"preview": "#pragma once\n\n#include <utility>\n#include <string>\n#include <mutex>\n#include <atomic>\n\n#include \"libipc/def.h\"\n#include "
},
{
"path": "crazy_functions/test_project/cpp/cppipc/来源",
"chars": 129,
"preview": "https://github.com/mutouyun/cpp-ipc\n\nA high-performance inter-process communication library using shared memory on Linux"
},
{
"path": "crazy_functions/test_project/cpp/libJPG/jpgd.cpp",
"chars": 86773,
"preview": "// jpgd.cpp - C++ class for JPEG decompression.\n// Public domain, Rich Geldreich <richgel99@gmail.com>\n// Last updated A"
},
{
"path": "crazy_functions/test_project/cpp/libJPG/jpgd.h",
"chars": 13766,
"preview": "// jpgd.h - C++ class for JPEG decompression.\n// Public domain, Rich Geldreich <richgel99@gmail.com>\n#ifndef JPEG_DECODE"
},
{
"path": "crazy_functions/test_project/cpp/libJPG/jpge.cpp",
"chars": 35392,
"preview": "// jpge.cpp - C++ class for JPEG compression.\n// Public domain, Rich Geldreich <richgel99@gmail.com>\n// v1.01, Dec. 18, "
},
{
"path": "crazy_functions/test_project/cpp/libJPG/jpge.h",
"chars": 6546,
"preview": "\n// jpge.h - C++ class for JPEG compression.\n// Public domain, Rich Geldreich <richgel99@gmail.com>\n// Alex Evans: Added"
},
{
"path": "crazy_functions/test_project/cpp/libJPG/来源",
"chars": 149,
"preview": "jpge.h - C++ class for JPEG compression.\nPublic domain, Rich Geldreich <richgel99@gmail.com>\nAlex Evans: Added RGBA supp"
},
{
"path": "crazy_functions/test_project/cpp/longcode/jpgd.cpp",
"chars": 86773,
"preview": "// jpgd.cpp - C++ class for JPEG decompression.\n// Public domain, Rich Geldreich <richgel99@gmail.com>\n// Last updated A"
},
{
"path": "crazy_functions/test_project/cpp/longcode/jpge.cpp",
"chars": 35392,
"preview": "// jpge.cpp - C++ class for JPEG compression.\n// Public domain, Rich Geldreich <richgel99@gmail.com>\n// v1.01, Dec. 18, "
},
{
"path": "crazy_functions/test_project/cpp/longcode/prod_cons.h",
"chars": 16667,
"preview": "#pragma once\n\n#include <atomic>\n#include <utility>\n#include <cstring>\n#include <type_traits>\n#include <cstdint>\n\n#includ"
},
{
"path": "crazy_functions/test_project/latex/attention/background.tex",
"chars": 8376,
"preview": "The goal of reducing sequential computation also forms the foundation of the Extended Neural GPU \\citep{extendedngpu}, B"
},
{
"path": "crazy_functions/test_project/latex/attention/introduction.tex",
"chars": 5300,
"preview": "Recurrent neural networks, long short-term memory \\citep{hochreiter1997} and gated recurrent \\citep{gruEval14} neural ne"
},
{
"path": "crazy_functions/test_project/latex/attention/model_architecture.tex",
"chars": 17109,
"preview": "\n\\begin{figure}\n \\centering\n \\includegraphics[scale=0.6]{Figures/ModalNet-21}\n \\caption{The Transformer - model archi"
},
{
"path": "crazy_functions/test_project/latex/attention/parameter_attention.tex",
"chars": 3489,
"preview": "\\pagebreak\n\\section*{Two Feed-Forward Layers = Attention over Parameters}\\label{sec:parameter_attention}\n\nIn addition to"
},
{
"path": "crazy_functions/test_project/latex/attention/来源",
"chars": 1340,
"preview": "chatgpt的老祖宗《Attention is all you need》\n\nAshish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N"
},
{
"path": "crazy_functions/test_project/python/dqn/__init__.py",
"chars": 106,
"preview": "from stable_baselines3.dqn.dqn import DQN\nfrom stable_baselines3.dqn.policies import CnnPolicy, MlpPolicy\n"
},
{
"path": "crazy_functions/test_project/python/dqn/dqn.py",
"chars": 10780,
"preview": "from typing import Any, Dict, List, Optional, Tuple, Type, Union\n\nimport gym\nimport numpy as np\nimport torch as th\nfrom "
},
{
"path": "crazy_functions/test_project/python/dqn/policies.py",
"chars": 8564,
"preview": "from typing import Any, Dict, List, Optional, Type\n\nimport gym\nimport torch as th\nfrom torch import nn\n\nfrom stable_base"
},
{
"path": "crazy_functions/test_project/python/dqn/来源",
"chars": 66,
"preview": "github stablebaseline3\nhttps://github.com/DLR-RM/stable-baselines3"
},
{
"path": "crazy_functions/test_project/其他测试",
"chars": 957,
"preview": "\"In practice, we found that a high-entropy initial state is more likely to increase the speed of training.\nThe entropy i"
},
{
"path": "crazy_functions/下载arxiv论文翻译摘要.py",
"chars": 6365,
"preview": "from utils.toolbox import update_ui\nfrom utils.toolbox import CatchException, report_execption, write_results_to_file, g"
},
{
"path": "crazy_functions/交互功能函数模板.py",
"chars": 2778,
"preview": "from utils.toolbox import CatchException, update_ui\nfrom .crazy_utils import request_gpt_model_in_new_thread_with_ui_ali"
},
{
"path": "crazy_functions/代码重写为全英文_多线程.py",
"chars": 5872,
"preview": "import threading\nfrom llm_cards.bridge_all import predict_no_ui_long_connection\nfrom utils.toolbox import update_ui\nfrom"
},
{
"path": "crazy_functions/图片生成.py",
"chars": 2794,
"preview": "from utils.toolbox import CatchException, update_ui, get_conf, select_api_key\nfrom .crazy_utils import request_gpt_model"
},
{
"path": "crazy_functions/对话历史存档.py",
"chars": 5955,
"preview": "from utils.toolbox import CatchException, update_ui, promote_file_to_downloadzone\nfrom .crazy_utils import request_gpt_m"
},
{
"path": "crazy_functions/总结word文档.py",
"chars": 5107,
"preview": "from utils.toolbox import update_ui\nfrom utils.toolbox import CatchException, report_execption, write_results_to_file\nfr"
},
{
"path": "crazy_functions/总结音视频.py",
"chars": 6700,
"preview": "from utils.toolbox import CatchException, report_execption, select_api_key, update_ui, write_results_to_file, get_conf\nf"
},
{
"path": "crazy_functions/批量Markdown翻译.py",
"chars": 11185,
"preview": "import glob, time, os, re\nfrom utils.toolbox import update_ui, trimmed_format_exc, gen_time_str, disable_auto_promotion\n"
},
{
"path": "crazy_functions/批量总结PDF文档.py",
"chars": 7937,
"preview": "from utils.toolbox import update_ui, promote_file_to_downloadzone, gen_time_str\nfrom utils.toolbox import CatchException"
},
{
"path": "crazy_functions/批量总结PDF文档pdfminer.py",
"chars": 6495,
"preview": "from utils.toolbox import update_ui\nfrom utils.toolbox import CatchException, report_execption, write_results_to_file\nfr"
},
{
"path": "crazy_functions/批量翻译PDF文档_多线程.py",
"chars": 8277,
"preview": "from utils.toolbox import CatchException, report_execption, write_results_to_file\nfrom utils.toolbox import update_ui, p"
},
{
"path": "crazy_functions/数学动画生成manim.py",
"chars": 5821,
"preview": "from utils.toolbox import CatchException, update_ui, gen_time_str\nfrom .crazy_utils import request_gpt_model_in_new_thre"
},
{
"path": "crazy_functions/理解PDF文档内容.py",
"chars": 5529,
"preview": "from utils.toolbox import update_ui\nfrom utils.toolbox import CatchException, report_execption\nfrom .crazy_utils import "
},
{
"path": "crazy_functions/生成函数注释.py",
"chars": 2592,
"preview": "from utils.toolbox import update_ui\nfrom utils.toolbox import CatchException, report_execption, write_results_to_file\nfr"
},
{
"path": "crazy_functions/联网的ChatGPT.py",
"chars": 4091,
"preview": "from utils.toolbox import CatchException, update_ui\nfrom .crazy_utils import request_gpt_model_in_new_thread_with_ui_ali"
},
{
"path": "crazy_functions/联网的ChatGPT_bing版.py",
"chars": 4015,
"preview": "from utils.toolbox import CatchException, update_ui\nfrom .crazy_utils import request_gpt_model_in_new_thread_with_ui_ali"
},
{
"path": "crazy_functions/虚空终端.py",
"chars": 4557,
"preview": "from toolbox import CatchException, update_ui, gen_time_str\nfrom .crazy_utils import request_gpt_model_in_new_thread_wit"
},
{
"path": "crazy_functions/解析JupyterNotebook.py",
"chars": 5830,
"preview": "from utils.toolbox import update_ui\nfrom utils.toolbox import CatchException, report_execption, write_results_to_file\nfa"
},
{
"path": "crazy_functions/解析项目源代码.py",
"chars": 18840,
"preview": "from utils.toolbox import update_ui\nfrom utils.toolbox import CatchException, report_execption, write_results_to_file\nfr"
},
{
"path": "crazy_functions/询问多个大语言模型.py",
"chars": 2608,
"preview": "from utils.toolbox import CatchException, update_ui\nfrom .crazy_utils import request_gpt_model_in_new_thread_with_ui_ali"
},
{
"path": "crazy_functions/语音助手.py",
"chars": 8047,
"preview": "from utils.toolbox import update_ui\nfrom utils.toolbox import CatchException, get_conf, markdown_convertion\nfrom crazy_f"
},
{
"path": "crazy_functions/读文章写摘要.py",
"chars": 3417,
"preview": "from utils.toolbox import update_ui\nfrom utils.toolbox import CatchException, report_execption, write_results_to_file\nfr"
},
{
"path": "crazy_functions/谷歌检索小助手.py",
"chars": 4401,
"preview": "from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive\nfrom utils.toolbox import CatchException, report_"
},
{
"path": "crazy_functions/辅助回答.py",
"chars": 965,
"preview": "# encoding: utf-8\n# @Time : 2023/4/19\n# @Author : Spike\n# @Descr :\nfrom utils.toolbox import update_ui\nfrom utils.to"
},
{
"path": "crazy_functions/高级功能函数模板.py",
"chars": 1684,
"preview": "from utils.toolbox import CatchException, update_ui\nfrom .crazy_utils import request_gpt_model_in_new_thread_with_ui_ali"
},
{
"path": "gradio_demo.py",
"chars": 21944,
"preview": "from model_cards.autoback import AutoBackend\nimport argparse\nimport os\nimport platform\nimport sys\nfrom pathlib import Pa"
},
{
"path": "llm_cards/bridge_all.py",
"chars": 11808,
"preview": "\n\"\"\"\n 该文件中主要包含2个函数,是所有LLM的通用接口,它们会继续向下调用更底层的LLM模型,处理多模型并行等细节\n\n 不具备多线程能力的函数:正常对话时使用,具备完备的交互功能,不可多线程\n 1. predict("
},
{
"path": "llm_cards/bridge_chatglm.py",
"chars": 11742,
"preview": "\nfrom transformers import AutoModel, AutoTokenizer\nimport time\nimport threading\nimport importlib\nfrom utils.toolbox impo"
},
{
"path": "llm_cards/bridge_chatgpt.py",
"chars": 18381,
"preview": "# 借鉴了 https://github.com/GaiZhenbiao/ChuanhuChatGPT 项目\n\n\"\"\"\n 该文件中主要包含三个函数\n\n 不具备多线程能力的函数:\n 1. predict: 正常对话时使用,具"
},
{
"path": "llm_cards/bridge_stackclaude.py",
"chars": 10254,
"preview": "#from llm_.bridge_newbing import preprocess_newbing_out, preprocess_newbing_out_simple\nfrom multiprocessing import Proce"
},
{
"path": "llm_cards/core_functional.py",
"chars": 4748,
"preview": "# 'primary' 颜色对应 theme.py 中的 primary_hue\n# 'secondary' 颜色对应 theme.py 中的 neutral_hue\n# 'stop' 颜色对应 theme.py 中的 color_er\n#"
},
{
"path": "llm_cards/crazy_functional.py",
"chars": 15649,
"preview": "from utils.toolbox import HotReload # HotReload 的意思是热更新,修改函数插件后,不需要重启程序,代码直接生效\n\n\ndef get_crazy_functions():\n #######"
},
{
"path": "llm_cards/requirements_chatglm.txt",
"chars": 78,
"preview": "protobuf\ntransformers==4.27.1\ncpm_kernels\ntorch>=1.10\nmdtex2html\nsentencepiece"
},
{
"path": "llm_cards/requirements_slackclaude.txt",
"chars": 17,
"preview": "slack-sdk==3.21.3"
},
{
"path": "model_cards/Tag2Text/MANIFEST.in",
"chars": 82,
"preview": "include ram/configs/*.json\ninclude ram/configs/swin/*.json\ninclude ram/data/*.txt\n"
},
{
"path": "model_cards/Tag2Text/batch_inference.py",
"chars": 13685,
"preview": "from argparse import ArgumentParser\nfrom pathlib import Path\nfrom typing import Dict, List, Optional, TextIO, Tuple\n\nimp"
},
{
"path": "model_cards/Tag2Text/datasets/openimages_common_214/imgs/.gitkeep",
"chars": 0,
"preview": ""
},
{
"path": "model_cards/Tag2Text/datasets/openimages_common_214/openimages_common_214_ram_annots.txt",
"chars": 1652767,
"preview": "test/0000c64e1253d68f,car\ntest/0002ab0af02e4a77,flower\ntest/000411001ff7dd4f,chicken\ntest/00045d609ca3f4eb,pumpkin,hallo"
},
{
"path": "model_cards/Tag2Text/datasets/openimages_common_214/openimages_common_214_ram_taglist.txt",
"chars": 1685,
"preview": "accident\naccordion\nplane\nairport\nantelope\napple\nart gallery\neggplant\nauditorium\nautumn\nbaboon\nbackpack\nbakery\nbamboo\nban"
},
{
"path": "model_cards/Tag2Text/datasets/openimages_common_214/openimages_common_214_tag2text_idannots.txt",
"chars": 1584452,
"preview": "test/0000c64e1253d68f,2317\ntest/0002ab0af02e4a77,1141\ntest/000411001ff7dd4f,147\ntest/00045d609ca3f4eb,2035,612\ntest/0007"
},
{
"path": "model_cards/Tag2Text/datasets/openimages_common_214/openimages_common_214_tag2text_tagidlist.txt",
"chars": 987,
"preview": "3\n8\n16\n19\n21\n33\n44\n50\n58\n61\n71\n77\n84\n96\n117\n139\n142\n147\n180\n200\n202\n206\n244\n267\n317\n321\n347\n361\n380\n387\n398\n407\n471\n486\n"
},
{
"path": "model_cards/Tag2Text/datasets/openimages_rare_200/imgs/.gitkeep",
"chars": 0,
"preview": ""
},
{
"path": "model_cards/Tag2Text/datasets/openimages_rare_200/openimages_rare_200_ram_annots.txt",
"chars": 857206,
"preview": "test/0000c64e1253d68f,Auto racing\ntest/0002cc8afaf1b611,Residential area\ntest/0003d84e0165d630,Lawn game\ntest/00045d609c"
},
{
"path": "model_cards/Tag2Text/datasets/openimages_rare_200/openimages_rare_200_ram_taglist.txt",
"chars": 2414,
"preview": "Aerial photography\nAircraft engine\nAle\nAloe\nAmphibian\nAngling\nAnole\nAntique car\nArcade game\nArthropod\nAssault rifle\nAthl"
},
{
"path": "model_cards/Tag2Text/inference_ram.py",
"chars": 1486,
"preview": "'''\n * The Recognize Anything Model (RAM)\n * Written by Xinyu Huang\n'''\nimport argparse\nimport numpy as np\nimport random"
},
{
"path": "model_cards/Tag2Text/inference_ram_openset.py",
"chars": 1989,
"preview": "'''\n * The Recognize Anything Model (RAM) inference on unseen classes\n * Written by Xinyu Huang\n'''\nimport argparse\nimpo"
},
{
"path": "model_cards/Tag2Text/inference_tag2text.py",
"chars": 2213,
"preview": "'''\n * The Tag2Text Model\n * Written by Xinyu Huang\n'''\nimport argparse\nimport numpy as np\nimport random\n\nimport torch\n\n"
},
{
"path": "model_cards/Tag2Text/ram/__init__.py",
"chars": 117,
"preview": "from .inference import inference_tag2text, inference_ram, inference_ram_openset\nfrom .transform import get_transform\n"
},
{
"path": "model_cards/Tag2Text/ram/configs/med_config.json",
"chars": 524,
"preview": "{\n \"architectures\": [\n \"BertModel\"\n ],\n \"attention_probs_dropout_prob\": 0.1,\n \"hidden_act\": \"gelu\",\n "
},
{
"path": "model_cards/Tag2Text/ram/configs/q2l_config.json",
"chars": 557,
"preview": "{\n \"architectures\": [\n \"BertModel\"\n ],\n \"attention_probs_dropout_prob\": 0.1,\n \"hidden_act\": \"gelu\",\n "
},
{
"path": "model_cards/Tag2Text/ram/configs/swin/config_swinB_384.json",
"chars": 230,
"preview": "{\n \"ckpt\": \"pretrain_model/swin_base_patch4_window7_224_22k.pth\",\n \"vision_width\": 1024,\n \"image_res\": 384,\n "
},
{
"path": "model_cards/Tag2Text/ram/configs/swin/config_swinL_384.json",
"chars": 233,
"preview": "{\n \"ckpt\": \"pretrain_model/swin_large_patch4_window12_384_22k.pth\",\n \"vision_width\": 1536,\n \"image_res\": 384,\n "
},
{
"path": "model_cards/Tag2Text/ram/data/ram_tag_list.txt",
"chars": 41904,
"preview": "3D CG rendering\n3D glasses\nabacus\nabalone\nmonastery\nbelly\nacademy\naccessory\naccident\naccordion\nacorn\nacrylic paint\nact\na"
},
{
"path": "model_cards/Tag2Text/ram/data/ram_tag_list_chinese.txt",
"chars": 19597,
"preview": "三维CG渲染 \n3d眼镜\n算盘 \n鲍鱼 \n修道院 \n肚子 \n学院 \n附件 \n事故 \n手风琴 \n橡子 \n丙烯颜料\n表演\n行动 \n动作电影 \n活动 \n演员 \n改编本\n添加 \n胶带 \n调整 \n成人 \n冒险 \n广告 \n天线 \n有氧运动 \n喷雾罐\n爆"
},
{
"path": "model_cards/Tag2Text/ram/data/ram_tag_list_threshold.txt",
"chars": 22016,
"preview": "0.65\n0.65\n0.65\n0.65\n0.65\n0.65\n0.65\n0.8\n0.71\n0.75\n0.65\n0.65\n0.65\n0.8\n0.65\n0.8\n0.8\n0.65\n0.65\n0.65\n0.65\n0.8\n0.65\n0.8\n0.8\n0."
},
{
"path": "model_cards/Tag2Text/ram/data/tag_list.txt",
"chars": 29062,
"preview": "tennis\nbear cub\nobservatory\nbicycle\nhillside\njudge\nwatercolor illustration\ngranite\nlobster\nlivery\nstone\nceramic\nranch\ncl"
},
{
"path": "model_cards/Tag2Text/ram/inference.py",
"chars": 1401,
"preview": "'''\n * The Inference of RAM and Tag2Text Models\n * Written by Xinyu Huang\n'''\nimport torch\n\n\ndef inference_tag2text(imag"
},
{
"path": "model_cards/Tag2Text/ram/models/__init__.py",
"chars": 52,
"preview": "from .ram import ram\nfrom .tag2text import tag2text\n"
},
{
"path": "model_cards/Tag2Text/ram/models/bert.py",
"chars": 45130,
"preview": "'''\n * Copyright (c) 2022, salesforce.com, inc.\n * All rights reserved.\n * SPDX-License-Identifier: BSD-3-Clause\n * For "
},
{
"path": "model_cards/Tag2Text/ram/models/ram.py",
"chars": 10865,
"preview": "'''\n * The Recognize Anything Model (RAM)\n * Written by Xinyu Huang\n'''\nimport json\nimport warnings\n\nimport numpy as np\n"
},
{
"path": "model_cards/Tag2Text/ram/models/swin_transformer.py",
"chars": 26827,
"preview": "# --------------------------------------------------------\n# Swin Transformer\n# Copyright (c) 2021 Microsoft\n# Licensed "
},
{
"path": "model_cards/Tag2Text/ram/models/tag2text.py",
"chars": 14787,
"preview": "'''\n * The Tag2Text Model\n * Written by Xinyu Huang\n'''\nimport numpy as np\nimport json\nimport torch\nimport warnings\n\nfro"
},
{
"path": "model_cards/Tag2Text/ram/models/utils.py",
"chars": 12958,
"preview": "import os\nimport json\nimport torch\nimport math\n\nfrom torch import nn\nfrom typing import List\nfrom transformers import Be"
},
{
"path": "model_cards/Tag2Text/ram/models/vit.py",
"chars": 14240,
"preview": "'''\n * Copyright (c) 2022, salesforce.com, inc.\n * All rights reserved.\n * SPDX-License-Identifier: BSD-3-Clause\n * For "
},
{
"path": "model_cards/Tag2Text/ram/transform.py",
"chars": 316,
"preview": "from torchvision.transforms import Normalize, Compose, Resize, ToTensor\n\n\ndef get_transform(image_size=384):\n return "
},
{
"path": "model_cards/Tag2Text/ram/utils/__init__.py",
"chars": 94,
"preview": "from .metrics import get_mAP, get_PR\nfrom .openset_utils import build_openset_label_embedding\n"
},
{
"path": "model_cards/Tag2Text/ram/utils/metrics.py",
"chars": 3222,
"preview": "from typing import List, Tuple\n\nimport numpy as np\nfrom numpy import ndarray\n\n\ndef get_mAP(\n preds: ndarray,\n gt_f"
},
{
"path": "model_cards/Tag2Text/ram/utils/openset_utils.py",
"chars": 6951,
"preview": "\n\n\nimport torch\nimport torch.nn as nn\nfrom clip import clip\n\n\ndef article(name):\n return \"an\" if name[0] in \"aeiou\" e"
},
{
"path": "model_cards/Tag2Text/requirements_groundingDINO.txt",
"chars": 135,
"preview": "timm==0.4.12\ntransformers==4.15.0\nfairscale==0.4.4\npycocoevalcap\ntorch\ntorchvision\nPillow\nscipy\ngit+https://github.com/o"
},
{
"path": "model_cards/Tag2Text/setup.cfg",
"chars": 253,
"preview": "[metadata]\nname = recognize-anything\nversion = 0.0.1\ndescription = Recognize Anything Model and Tag2Text Model\n\n[options"
},
{
"path": "model_cards/Tag2Text/setup.py",
"chars": 37,
"preview": "import setuptools\nsetuptools.setup()\n"
},
{
"path": "model_cards/autoback.py",
"chars": 19182,
"preview": "import torch\nimport json\nimport math\nimport platform\nimport warnings\nfrom collections import OrderedDict, namedtuple\nfro"
},
{
"path": "model_cards/groundingdino/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "model_cards/groundingdino/config/GroundingDINO_SwinB.cfg.py",
"chars": 1007,
"preview": "batch_size = 1\nmodelname = \"groundingdino\"\nbackbone = \"swin_B_384_22k\"\nposition_embedding = \"sine\"\npe_temperatureH = 20\n"
},
{
"path": "model_cards/groundingdino/config/GroundingDINO_SwinT_OGC.py",
"chars": 1006,
"preview": "batch_size = 1\nmodelname = \"groundingdino\"\nbackbone = \"swin_T_224_1k\"\nposition_embedding = \"sine\"\npe_temperatureH = 20\np"
},
{
"path": "model_cards/groundingdino/datasets/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "model_cards/groundingdino/datasets/transforms.py",
"chars": 9711,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved\n\"\"\"\nTransforms and data augmentation for both ima"
},
{
"path": "model_cards/groundingdino/models/GroundingDINO/__init__.py",
"chars": 823,
"preview": "# ------------------------------------------------------------------------\n# Grounding DINO\n# url: https://github.com/ID"
},
{
"path": "model_cards/groundingdino/models/GroundingDINO/backbone/__init__.py",
"chars": 37,
"preview": "from .backbone import build_backbone\n"
},
{
"path": "model_cards/groundingdino/models/GroundingDINO/backbone/backbone.py",
"chars": 7972,
"preview": "# ------------------------------------------------------------------------\n# Grounding DINO\n# url: https://github.com/ID"
},
{
"path": "model_cards/groundingdino/models/GroundingDINO/backbone/position_encoding.py",
"chars": 6866,
"preview": "# ------------------------------------------------------------------------\n# Grounding DINO\n# url: https://github.com/ID"
},
{
"path": "model_cards/groundingdino/models/GroundingDINO/backbone/swin_transformer.py",
"chars": 29339,
"preview": "# ------------------------------------------------------------------------\n# Grounding DINO\n# url: https://github.com/ID"
},
{
"path": "model_cards/groundingdino/models/GroundingDINO/bertwarper.py",
"chars": 12242,
"preview": "# ------------------------------------------------------------------------\n# Grounding DINO\n# url: https://github.com/ID"
},
{
"path": "model_cards/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn.h",
"chars": 1883,
"preview": "/*!\n**************************************************************************************************\n* Deformable DETR"
},
{
"path": "model_cards/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn_cpu.cpp",
"chars": 1311,
"preview": "/*!\n**************************************************************************************************\n* Deformable DETR"
},
{
"path": "model_cards/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn_cpu.h",
"chars": 1194,
"preview": "/*!\n**************************************************************************************************\n* Deformable DETR"
},
{
"path": "model_cards/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn_cuda.cu",
"chars": 7367,
"preview": "/*!\n**************************************************************************************************\n* Deformable DETR"
},
{
"path": "model_cards/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_attn_cuda.h",
"chars": 1195,
"preview": "/*!\n**************************************************************************************************\n* Deformable DETR"
},
{
"path": "model_cards/groundingdino/models/GroundingDINO/csrc/MsDeformAttn/ms_deform_im2col_cuda.cuh",
"chars": 54694,
"preview": "/*!\n**************************************************************************\n* Deformable DETR\n* Copyright (c) 2020 Se"
},
{
"path": "model_cards/groundingdino/models/GroundingDINO/csrc/cuda_version.cu",
"chars": 140,
"preview": "#include <cuda_runtime_api.h>\n\nnamespace groundingdino {\nint get_cudart_version() {\n return CUDART_VERSION;\n}\n} // name"
},
{
"path": "model_cards/groundingdino/models/GroundingDINO/csrc/vision.cpp",
"chars": 1419,
"preview": "// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved\n\n#include \"MsDeformAttn/ms_deform_attn.h\"\n\nnames"
},
{
"path": "model_cards/groundingdino/models/GroundingDINO/fuse_modules.py",
"chars": 11825,
"preview": "# ------------------------------------------------------------------------\n# Grounding DINO\n# url: https://github.com/ID"
},
{
"path": "model_cards/groundingdino/models/GroundingDINO/groundingdino.py",
"chars": 16702,
"preview": "# ------------------------------------------------------------------------\n# Grounding DINO\n# url: https://github.com/ID"
},
{
"path": "model_cards/groundingdino/models/GroundingDINO/ms_deform_attn.py",
"chars": 15482,
"preview": "# ------------------------------------------------------------------------\n# Grounding DINO\n# url: https://github.com/ID"
},
{
"path": "model_cards/groundingdino/models/GroundingDINO/transformer.py",
"chars": 36805,
"preview": "# ------------------------------------------------------------------------\n# Grounding DINO\n# url: https://github.com/ID"
},
{
"path": "model_cards/groundingdino/models/GroundingDINO/transformer_vanilla.py",
"chars": 4020,
"preview": "# ------------------------------------------------------------------------\n# Grounding DINO\n# url: https://github.com/ID"
},
{
"path": "model_cards/groundingdino/models/GroundingDINO/utils.py",
"chars": 10087,
"preview": "# ------------------------------------------------------------------------\n# Grounding DINO\n# url: https://github.com/ID"
},
{
"path": "model_cards/groundingdino/models/__init__.py",
"chars": 754,
"preview": "# ------------------------------------------------------------------------\n# Grounding DINO\n# url: https://github.com/ID"
},
{
"path": "model_cards/groundingdino/models/registry.py",
"chars": 2143,
"preview": "# ------------------------------------------------------------------------\n# Grounding DINO\n# url: https://github.com/ID"
},
{
"path": "model_cards/groundingdino/util/__init__.py",
"chars": 71,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved\n"
},
{
"path": "model_cards/groundingdino/util/box_ops.py",
"chars": 3905,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved\n\"\"\"\nUtilities for bounding box manipulation and G"
},
{
"path": "model_cards/groundingdino/util/get_tokenlizer.py",
"chars": 1157,
"preview": "from transformers import AutoTokenizer, BertModel, BertTokenizer, RobertaModel, RobertaTokenizerFast\n\n\ndef get_tokenlize"
},
{
"path": "model_cards/groundingdino/util/inference.py",
"chars": 7997,
"preview": "from typing import Tuple, List\n\nimport cv2\nimport numpy as np\nimport supervision as sv\nimport torch\nfrom PIL import Imag"
},
{
"path": "model_cards/groundingdino/util/logger.py",
"chars": 3303,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved\nimport functools\nimport logging\nimport os\nimport "
},
{
"path": "model_cards/groundingdino/util/misc.py",
"chars": 23348,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved\n\"\"\"\nMisc functions, including distributed helpers"
},
{
"path": "model_cards/groundingdino/util/slconfig.py",
"chars": 14380,
"preview": "# ==========================================================\n# Modified from mmcv\n# ===================================="
},
{
"path": "model_cards/groundingdino/util/slio.py",
"chars": 5377,
"preview": "# ==========================================================\n# Modified from mmcv\n# ===================================="
},
{
"path": "model_cards/groundingdino/util/time_counter.py",
"chars": 1567,
"preview": "import json\nimport time\n\n\nclass TimeCounter:\n def __init__(self) -> None:\n pass\n\n def clear(self):\n "
},
{
"path": "model_cards/groundingdino/util/utils.py",
"chars": 17712,
"preview": "import argparse\nimport json\nimport warnings\nfrom collections import OrderedDict\nfrom copy import deepcopy\nfrom typing im"
},
{
"path": "model_cards/groundingdino/util/visualizer.py",
"chars": 12047,
"preview": "# -*- coding: utf-8 -*-\n\"\"\"\n@File : visualizer.py\n@Time : 2022/04/05 11:39:33\n@Author : Shilong Liu \n@Conta"
},
{
"path": "model_cards/groundingdino/util/vl_utils.py",
"chars": 3489,
"preview": "import os\nimport random\nfrom typing import List\n\nimport torch\n\n\ndef create_positive_map_from_span(tokenized, token_span,"
},
{
"path": "model_cards/groundingdino/version.py",
"chars": 22,
"preview": "__version__ = '0.1.0'\n"
},
{
"path": "model_cards/lama/.gitignore",
"chars": 1865,
"preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
},
{
"path": "model_cards/lama/LICENSE",
"chars": 11348,
"preview": " Apache License\n Version 2.0, January 2004\n "
},
{
"path": "model_cards/lama/README.md",
"chars": 16548,
"preview": "# 🦙 LaMa: Resolution-robust Large Mask Inpainting with Fourier Convolutions\n\nby Roman Suvorov, Elizaveta Logacheva, Anto"
},
{
"path": "model_cards/lama/bin/analyze_errors.py",
"chars": 17698,
"preview": "#!/usr/bin/env python3\nimport cv2\nimport numpy as np\nimport sklearn\nimport torch\nimport os\nimport pickle\nimport pandas a"
},
{
"path": "model_cards/lama/bin/blur_predicts.py",
"chars": 2191,
"preview": "#!/usr/bin/env python3\n\nimport os\n\nimport cv2\nimport numpy as np\nimport tqdm\n\nfrom saicinpainting.evaluation.data import"
},
{
"path": "model_cards/lama/bin/calc_dataset_stats.py",
"chars": 3627,
"preview": "#!/usr/bin/env python3\n\nimport os\n\nimport numpy as np\nimport tqdm\nfrom scipy.ndimage.morphology import distance_transfor"
},
{
"path": "model_cards/lama/bin/debug/analyze_overlapping_masks.sh",
"chars": 1132,
"preview": "#!/bin/bash\n\nBASEDIR=\"$(dirname $0)\"\n\n# paths are valid for mml7\n\n# select images\n#ls /data/inpainting/work/data/train |"
},
{
"path": "model_cards/lama/bin/evaluate_predicts.py",
"chars": 3530,
"preview": "#!/usr/bin/env python3\n\nimport os\n\nimport pandas as pd\n\nfrom saicinpainting.evaluation.data import PrecomputedInpainting"
},
{
"path": "model_cards/lama/bin/evaluator_example.py",
"chars": 2359,
"preview": "import os\n\nimport cv2\nimport numpy as np\nimport torch\nfrom skimage import io\nfrom skimage.transform import resize\nfrom t"
},
{
"path": "model_cards/lama/bin/extract_masks.py",
"chars": 1627,
"preview": "import PIL.Image as Image\nimport numpy as np\nimport os\n\n\ndef main(args):\n if not args.indir.endswith('/'):\n ar"
},
{
"path": "model_cards/lama/bin/filter_sharded_dataset.py",
"chars": 2346,
"preview": "#!/usr/bin/env python3\n\n\nimport math\nimport os\nimport random\n\nimport braceexpand\nimport webdataset as wds\n\nDEFAULT_CATS_"
},
{
"path": "model_cards/lama/bin/gen_debug_mask_dataset.py",
"chars": 1909,
"preview": "#!/usr/bin/env python3\n\nimport glob\nimport os\n\nimport PIL.Image as Image\nimport cv2\nimport numpy as np\nimport tqdm\nimpor"
},
{
"path": "model_cards/lama/bin/gen_mask_dataset.py",
"chars": 5609,
"preview": "#!/usr/bin/env python3\n\nimport glob\nimport os\nimport shutil\nimport traceback\n\nimport PIL.Image as Image\nimport numpy as "
},
{
"path": "model_cards/lama/bin/gen_mask_dataset_hydra.py",
"chars": 5340,
"preview": "#!/usr/bin/env python3\n\nimport glob\nimport os\nimport shutil\nimport traceback\nimport hydra\nfrom omegaconf import OmegaCon"
},
{
"path": "model_cards/lama/bin/gen_outpainting_dataset.py",
"chars": 3226,
"preview": "#!/usr/bin/env python3\nimport glob\nimport logging\nimport os\nimport shutil\nimport sys\nimport traceback\n\nfrom saicinpainti"
},
{
"path": "model_cards/lama/bin/make_checkpoint.py",
"chars": 3097,
"preview": "#!/usr/bin/env python3\n\nimport os\nimport shutil\n\nimport torch\n\n\ndef get_checkpoint_files(s):\n s = s.strip()\n if ',"
},
{
"path": "model_cards/lama/bin/mask_example.py",
"chars": 466,
"preview": "import matplotlib.pyplot as plt\nfrom skimage import io\nfrom skimage.transform import resize\n\nfrom saicinpainting.evaluat"
},
{
"path": "model_cards/lama/bin/paper_runfiles/blur_tests.sh",
"chars": 1491,
"preview": "##!/usr/bin/env bash\n#\n## !!! file set to make test_large_30k from the vanilla test_large: configs/test_large_30k.lst\n#\n"
},
{
"path": "model_cards/lama/bin/paper_runfiles/env.sh",
"chars": 170,
"preview": "DIRNAME=\"$(dirname $0)\"\nDIRNAME=\"$(realpath \"\"$DIRNAME\"\")\"\n\nBINDIR=\"$DIRNAME/..\"\nSRCDIR=\"$BINDIR/..\"\nCONFIGDIR=\"$SRCDIR/"
},
{
"path": "model_cards/lama/bin/paper_runfiles/find_best_checkpoint.py",
"chars": 2075,
"preview": "#!/usr/bin/env python3\n\n\nimport os\nfrom argparse import ArgumentParser\n\n\ndef ssim_fid100_f1(metrics, fid_scale=100):\n "
},
{
"path": "model_cards/lama/bin/paper_runfiles/generate_test_celeba-hq.sh",
"chars": 603,
"preview": "#!/usr/bin/env bash\n\n# paths to data are valid for mml-ws01\nOUT_DIR=\"/media/inpainting/paper_data/CelebA-HQ_val_test\"\n\ns"
},
{
"path": "model_cards/lama/bin/paper_runfiles/generate_test_ffhq.sh",
"chars": 580,
"preview": "#!/usr/bin/env bash\n\n# paths to data are valid for mml-ws01\nOUT_DIR=\"/media/inpainting/paper_data/FFHQ_val\"\n\nsource \"$(d"
},
{
"path": "model_cards/lama/bin/paper_runfiles/generate_test_paris.sh",
"chars": 593,
"preview": "#!/usr/bin/env bash\n\n# paths to data are valid for mml-ws01\nOUT_DIR=\"/media/inpainting/paper_data/Paris_StreetView_Datas"
},
{
"path": "model_cards/lama/bin/paper_runfiles/generate_test_paris_256.sh",
"chars": 598,
"preview": "#!/usr/bin/env bash\n\n# paths to data are valid for mml-ws01\nOUT_DIR=\"/media/inpainting/paper_data/Paris_StreetView_Datas"
},
{
"path": "model_cards/lama/bin/paper_runfiles/generate_val_test.sh",
"chars": 1045,
"preview": "#!/usr/bin/env bash\n\n# !!! file set to make test_large_30k from the vanilla test_large: configs/test_large_30k.lst\n\n# pa"
},
{
"path": "model_cards/lama/bin/paper_runfiles/predict_inner_features.sh",
"chars": 841,
"preview": "#!/usr/bin/env bash\n\n# paths to data are valid for mml7\n\nsource \"$(dirname $0)/env.sh\"\n\n\"$BINDIR/predict_inner_features."
},
{
"path": "model_cards/lama/bin/paper_runfiles/update_test_data_stats.sh",
"chars": 927,
"preview": "#!/usr/bin/env bash\n\n# paths to data are valid for mml7\n\nsource \"$(dirname $0)/env.sh\"\n\n#INDIR=\"/data/inpainting/paper_d"
},
{
"path": "model_cards/lama/bin/predict.py",
"chars": 4092,
"preview": "#!/usr/bin/env python3\n\n# Example command:\n# ./bin/predict.py \\\n# model.path=<path to checkpoint, prepared by make"
},
{
"path": "model_cards/lama/bin/predict_inner_features.py",
"chars": 4938,
"preview": "#!/usr/bin/env python3\n\n# Example command:\n# ./bin/predict.py \\\n# model.path=<path to checkpoint, prepared by make"
},
{
"path": "model_cards/lama/bin/report_from_tb.py",
"chars": 2588,
"preview": "#!/usr/bin/env python3\n\nimport glob\nimport os\nimport re\n\nimport tensorflow as tf\nfrom torch.utils.tensorboard import Sum"
},
{
"path": "model_cards/lama/bin/sample_from_dataset.py",
"chars": 3044,
"preview": "#!/usr/bin/env python3\n\nimport os\n\nimport numpy as np\nimport tqdm\nfrom skimage import io\nfrom skimage.segmentation impor"
},
{
"path": "model_cards/lama/bin/side_by_side.py",
"chars": 2826,
"preview": "#!/usr/bin/env python3\nimport os\nimport random\n\nimport cv2\nimport numpy as np\n\nfrom saicinpainting.evaluation.data impor"
},
{
"path": "model_cards/lama/bin/split_tar.py",
"chars": 470,
"preview": "#!/usr/bin/env python3\n\n\nimport tqdm\nimport webdataset as wds\n\n\ndef main(args):\n input_dataset = wds.Dataset(args.inf"
},
{
"path": "model_cards/lama/bin/to_jit.py",
"chars": 2155,
"preview": "import os\nfrom pathlib import Path\n\nimport hydra\nimport torch\nimport yaml\nfrom omegaconf import OmegaConf\nfrom torch imp"
},
{
"path": "model_cards/lama/bin/train.py",
"chars": 2609,
"preview": "#!/usr/bin/env python3\n\nimport logging\nimport os\nimport sys\nimport traceback\n\nos.environ['OMP_NUM_THREADS'] = '1'\nos.env"
},
{
"path": "model_cards/lama/colab/LaMa_inpainting.ipynb",
"chars": 4656214,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {\n \"id\": \"_pRpIwnaOnb3\"\n },\n \"sou"
}
]
// ... and 207 more files (download for full content)
About this extraction
This page contains the full source code of the positive666/Prompt-Can-Anything GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 407 files (11.9 MB), approximately 3.2M tokens, and a symbol index with 2527 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.