Repository: positive666/Prompt-Can-Anything Branch: main Commit: 403d3678b5e0 Files: 407 Total size: 11.9 MB Directory structure: gitextract__iw3ng36/ ├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── README_zh.md ├── a2f.py ├── app.py ├── audio2face_pb2.py ├── audio2face_pb2_grpc.py ├── audio2face_streaming_utils.py ├── audio_segment.py ├── auto_label_demo.py ├── batch_clean_gpu.txt ├── crazy_functions/ │ ├── Langchain知识库.py │ ├── Latex全文润色.py │ ├── Latex全文翻译.py │ ├── Latex输出PDF结果.py │ ├── __init__.py │ ├── chatglm微调工具.py │ ├── crazy_functions_test.py │ ├── crazy_utils.py │ ├── latex_fns/ │ │ ├── latex_actions.py │ │ └── latex_toolbox.py │ ├── live_audio/ │ │ ├── aliyunASR.py │ │ └── audio_io.py │ ├── test_project/ │ │ ├── cpp/ │ │ │ ├── cppipc/ │ │ │ │ ├── buffer.cpp │ │ │ │ ├── ipc.cpp │ │ │ │ ├── policy.h │ │ │ │ ├── pool_alloc.cpp │ │ │ │ ├── prod_cons.h │ │ │ │ ├── queue.h │ │ │ │ ├── shm.cpp │ │ │ │ ├── waiter.h │ │ │ │ └── 来源 │ │ │ ├── libJPG/ │ │ │ │ ├── jpgd.cpp │ │ │ │ ├── jpgd.h │ │ │ │ ├── jpge.cpp │ │ │ │ ├── jpge.h │ │ │ │ └── 来源 │ │ │ └── longcode/ │ │ │ ├── jpgd.cpp │ │ │ ├── jpge.cpp │ │ │ └── prod_cons.h │ │ ├── latex/ │ │ │ └── attention/ │ │ │ ├── background.tex │ │ │ ├── introduction.tex │ │ │ ├── model_architecture.tex │ │ │ ├── parameter_attention.tex │ │ │ └── 来源 │ │ ├── python/ │ │ │ └── dqn/ │ │ │ ├── __init__.py │ │ │ ├── dqn.py │ │ │ ├── policies.py │ │ │ └── 来源 │ │ └── 其他测试 │ ├── 下载arxiv论文翻译摘要.py │ ├── 交互功能函数模板.py │ ├── 代码重写为全英文_多线程.py │ ├── 图片生成.py │ ├── 对话历史存档.py │ ├── 总结word文档.py │ ├── 总结音视频.py │ ├── 批量Markdown翻译.py │ ├── 批量总结PDF文档.py │ ├── 批量总结PDF文档pdfminer.py │ ├── 批量翻译PDF文档_多线程.py │ ├── 数学动画生成manim.py │ ├── 理解PDF文档内容.py │ ├── 生成函数注释.py │ ├── 联网的ChatGPT.py │ ├── 联网的ChatGPT_bing版.py │ ├── 虚空终端.py │ ├── 解析JupyterNotebook.py │ ├── 解析项目源代码.py │ ├── 询问多个大语言模型.py │ ├── 语音助手.py │ ├── 读文章写摘要.py │ ├── 谷歌检索小助手.py │ ├── 辅助回答.py │ └── 高级功能函数模板.py ├── gradio_demo.py ├── llm_cards/ │ ├── bridge_all.py │ ├── bridge_chatglm.py │ ├── bridge_chatgpt.py │ ├── bridge_stackclaude.py │ ├── core_functional.py │ ├── crazy_functional.py │ ├── requirements_chatglm.txt │ └── requirements_slackclaude.txt ├── model_cards/ │ ├── Tag2Text/ │ │ ├── MANIFEST.in │ │ ├── batch_inference.py │ │ ├── datasets/ │ │ │ ├── openimages_common_214/ │ │ │ │ ├── imgs/ │ │ │ │ │ └── .gitkeep │ │ │ │ ├── openimages_common_214_ram_annots.txt │ │ │ │ ├── openimages_common_214_ram_taglist.txt │ │ │ │ ├── openimages_common_214_tag2text_idannots.txt │ │ │ │ └── openimages_common_214_tag2text_tagidlist.txt │ │ │ └── openimages_rare_200/ │ │ │ ├── imgs/ │ │ │ │ └── .gitkeep │ │ │ ├── openimages_rare_200_ram_annots.txt │ │ │ └── openimages_rare_200_ram_taglist.txt │ │ ├── inference_ram.py │ │ ├── inference_ram_openset.py │ │ ├── inference_tag2text.py │ │ ├── ram/ │ │ │ ├── __init__.py │ │ │ ├── configs/ │ │ │ │ ├── med_config.json │ │ │ │ ├── q2l_config.json │ │ │ │ └── swin/ │ │ │ │ ├── config_swinB_384.json │ │ │ │ └── config_swinL_384.json │ │ │ ├── data/ │ │ │ │ ├── ram_tag_list.txt │ │ │ │ ├── ram_tag_list_chinese.txt │ │ │ │ ├── ram_tag_list_threshold.txt │ │ │ │ └── tag_list.txt │ │ │ ├── inference.py │ │ │ ├── models/ │ │ │ │ ├── __init__.py │ │ │ │ ├── bert.py │ │ │ │ ├── ram.py │ │ │ │ ├── swin_transformer.py │ │ │ │ ├── tag2text.py │ │ │ │ ├── utils.py │ │ │ │ └── vit.py │ │ │ ├── transform.py │ │ │ └── utils/ │ │ │ ├── __init__.py │ │ │ ├── metrics.py │ │ │ └── openset_utils.py │ │ ├── requirements_groundingDINO.txt │ │ ├── setup.cfg │ │ └── setup.py │ ├── autoback.py │ ├── groundingdino/ │ │ ├── __init__.py │ │ ├── config/ │ │ │ ├── GroundingDINO_SwinB.cfg.py │ │ │ └── GroundingDINO_SwinT_OGC.py │ │ ├── datasets/ │ │ │ ├── __init__.py │ │ │ └── transforms.py │ │ ├── models/ │ │ │ ├── GroundingDINO/ │ │ │ │ ├── __init__.py │ │ │ │ ├── backbone/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── backbone.py │ │ │ │ │ ├── position_encoding.py │ │ │ │ │ └── swin_transformer.py │ │ │ │ ├── bertwarper.py │ │ │ │ ├── csrc/ │ │ │ │ │ ├── MsDeformAttn/ │ │ │ │ │ │ ├── ms_deform_attn.h │ │ │ │ │ │ ├── ms_deform_attn_cpu.cpp │ │ │ │ │ │ ├── ms_deform_attn_cpu.h │ │ │ │ │ │ ├── ms_deform_attn_cuda.cu │ │ │ │ │ │ ├── ms_deform_attn_cuda.h │ │ │ │ │ │ └── ms_deform_im2col_cuda.cuh │ │ │ │ │ ├── cuda_version.cu │ │ │ │ │ └── vision.cpp │ │ │ │ ├── fuse_modules.py │ │ │ │ ├── groundingdino.py │ │ │ │ ├── ms_deform_attn.py │ │ │ │ ├── transformer.py │ │ │ │ ├── transformer_vanilla.py │ │ │ │ └── utils.py │ │ │ ├── __init__.py │ │ │ └── registry.py │ │ ├── util/ │ │ │ ├── __init__.py │ │ │ ├── box_ops.py │ │ │ ├── get_tokenlizer.py │ │ │ ├── inference.py │ │ │ ├── logger.py │ │ │ ├── misc.py │ │ │ ├── slconfig.py │ │ │ ├── slio.py │ │ │ ├── time_counter.py │ │ │ ├── utils.py │ │ │ ├── visualizer.py │ │ │ └── vl_utils.py │ │ └── version.py │ ├── lama/ │ │ ├── .gitignore │ │ ├── LICENSE │ │ ├── README.md │ │ ├── bin/ │ │ │ ├── analyze_errors.py │ │ │ ├── blur_predicts.py │ │ │ ├── calc_dataset_stats.py │ │ │ ├── debug/ │ │ │ │ └── analyze_overlapping_masks.sh │ │ │ ├── evaluate_predicts.py │ │ │ ├── evaluator_example.py │ │ │ ├── extract_masks.py │ │ │ ├── filter_sharded_dataset.py │ │ │ ├── gen_debug_mask_dataset.py │ │ │ ├── gen_mask_dataset.py │ │ │ ├── gen_mask_dataset_hydra.py │ │ │ ├── gen_outpainting_dataset.py │ │ │ ├── make_checkpoint.py │ │ │ ├── mask_example.py │ │ │ ├── paper_runfiles/ │ │ │ │ ├── blur_tests.sh │ │ │ │ ├── env.sh │ │ │ │ ├── find_best_checkpoint.py │ │ │ │ ├── generate_test_celeba-hq.sh │ │ │ │ ├── generate_test_ffhq.sh │ │ │ │ ├── generate_test_paris.sh │ │ │ │ ├── generate_test_paris_256.sh │ │ │ │ ├── generate_val_test.sh │ │ │ │ ├── predict_inner_features.sh │ │ │ │ └── update_test_data_stats.sh │ │ │ ├── predict.py │ │ │ ├── predict_inner_features.py │ │ │ ├── report_from_tb.py │ │ │ ├── sample_from_dataset.py │ │ │ ├── side_by_side.py │ │ │ ├── split_tar.py │ │ │ ├── to_jit.py │ │ │ └── train.py │ │ ├── colab/ │ │ │ └── LaMa_inpainting.ipynb │ │ ├── conda_env.yml │ │ ├── configs/ │ │ │ ├── analyze_mask_errors.yaml │ │ │ ├── data_gen/ │ │ │ │ ├── random_medium_256.yaml │ │ │ │ ├── random_medium_512.yaml │ │ │ │ ├── random_thick_256.yaml │ │ │ │ ├── random_thick_512.yaml │ │ │ │ ├── random_thin_256.yaml │ │ │ │ └── random_thin_512.yaml │ │ │ ├── debug_mask_gen.yaml │ │ │ ├── eval1.yaml │ │ │ ├── eval2.yaml │ │ │ ├── eval2_cpu.yaml │ │ │ ├── eval2_gpu.yaml │ │ │ ├── eval2_jpg.yaml │ │ │ ├── eval2_segm.yaml │ │ │ ├── eval2_segm_test.yaml │ │ │ ├── eval2_test.yaml │ │ │ ├── places2-categories_157.txt │ │ │ ├── prediction/ │ │ │ │ └── default.yaml │ │ │ ├── test_large_30k.lst │ │ │ └── training/ │ │ │ ├── ablv2_work.yaml │ │ │ ├── ablv2_work_ffc075.yaml │ │ │ ├── ablv2_work_md.yaml │ │ │ ├── ablv2_work_no_fm.yaml │ │ │ ├── ablv2_work_no_segmpl.yaml │ │ │ ├── ablv2_work_no_segmpl_csdilirpl.yaml │ │ │ ├── ablv2_work_no_segmpl_csdilirpl_celeba_csdilirpl1_new.yaml │ │ │ ├── ablv2_work_no_segmpl_csirpl.yaml │ │ │ ├── ablv2_work_no_segmpl_csirpl_celeba_csirpl03_new.yaml │ │ │ ├── ablv2_work_no_segmpl_vgg.yaml │ │ │ ├── ablv2_work_no_segmpl_vgg_celeba_l2_vgg003_new.yaml │ │ │ ├── ablv2_work_nodil_segmpl.yaml │ │ │ ├── ablv2_work_small_holes.yaml │ │ │ ├── big-lama-celeba.yaml │ │ │ ├── big-lama-regular-celeba.yaml │ │ │ ├── big-lama-regular.yaml │ │ │ ├── big-lama.yaml │ │ │ ├── data/ │ │ │ │ ├── abl-02-thin-bb.yaml │ │ │ │ ├── abl-04-256-mh-dist-celeba.yaml │ │ │ │ ├── abl-04-256-mh-dist-web.yaml │ │ │ │ └── abl-04-256-mh-dist.yaml │ │ │ ├── discriminator/ │ │ │ │ └── pix2pixhd_nlayer.yaml │ │ │ ├── evaluator/ │ │ │ │ └── default_inpainted.yaml │ │ │ ├── generator/ │ │ │ │ ├── ffc_resnet_075.yaml │ │ │ │ ├── pix2pixhd_global.yaml │ │ │ │ ├── pix2pixhd_global_sigmoid.yaml │ │ │ │ └── pix2pixhd_multidilated_catin_4dil_9b.yaml │ │ │ ├── hydra/ │ │ │ │ ├── no_time.yaml │ │ │ │ └── overrides.yaml │ │ │ ├── lama-fourier-celeba.yaml │ │ │ ├── lama-fourier.yaml │ │ │ ├── lama-regular-celeba.yaml │ │ │ ├── lama-regular.yaml │ │ │ ├── lama_small_train_masks.yaml │ │ │ ├── location/ │ │ │ │ ├── celeba_example.yaml │ │ │ │ ├── docker.yaml │ │ │ │ └── places_example.yaml │ │ │ ├── optimizers/ │ │ │ │ └── default_optimizers.yaml │ │ │ ├── trainer/ │ │ │ │ ├── any_gpu_large_ssim_ddp_final.yaml │ │ │ │ ├── any_gpu_large_ssim_ddp_final_benchmark.yaml │ │ │ │ └── any_gpu_large_ssim_ddp_final_celeba.yaml │ │ │ └── visualizer/ │ │ │ └── directory.yaml │ │ ├── docker/ │ │ │ ├── 1_generate_masks_from_raw_images.sh │ │ │ ├── 2_predict.sh │ │ │ ├── 3_evaluate.sh │ │ │ ├── Dockerfile │ │ │ ├── Dockerfile-cuda111 │ │ │ ├── build-cuda111.sh │ │ │ ├── build.sh │ │ │ └── entrypoint.sh │ │ ├── fetch_data/ │ │ │ ├── celebahq_dataset_prepare.sh │ │ │ ├── celebahq_gen_masks.sh │ │ │ ├── eval_sampler.py │ │ │ ├── places_challenge_train_download.sh │ │ │ ├── places_standard_evaluation_prepare_data.sh │ │ │ ├── places_standard_test_val_gen_masks.sh │ │ │ ├── places_standard_test_val_prepare.sh │ │ │ ├── places_standard_test_val_sample.sh │ │ │ ├── places_standard_train_prepare.sh │ │ │ ├── sampler.py │ │ │ ├── train_shuffled.flist │ │ │ └── val_shuffled.flist │ │ ├── models/ │ │ │ └── ade20k/ │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── color150.mat │ │ │ ├── mobilenet.py │ │ │ ├── object150_info.csv │ │ │ ├── resnet.py │ │ │ ├── segm_lib/ │ │ │ │ ├── nn/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── modules/ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── batchnorm.py │ │ │ │ │ │ ├── comm.py │ │ │ │ │ │ ├── replicate.py │ │ │ │ │ │ ├── tests/ │ │ │ │ │ │ │ ├── test_numeric_batchnorm.py │ │ │ │ │ │ │ └── test_sync_batchnorm.py │ │ │ │ │ │ └── unittest.py │ │ │ │ │ └── parallel/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── data_parallel.py │ │ │ │ └── utils/ │ │ │ │ ├── __init__.py │ │ │ │ ├── data/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── dataloader.py │ │ │ │ │ ├── dataset.py │ │ │ │ │ ├── distributed.py │ │ │ │ │ └── sampler.py │ │ │ │ └── th.py │ │ │ └── utils.py │ │ ├── requirements.txt │ │ └── saicinpainting/ │ │ ├── __init__.py │ │ ├── evaluation/ │ │ │ ├── __init__.py │ │ │ ├── data.py │ │ │ ├── evaluator.py │ │ │ ├── losses/ │ │ │ │ ├── __init__.py │ │ │ │ ├── base_loss.py │ │ │ │ ├── fid/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── fid_score.py │ │ │ │ │ └── inception.py │ │ │ │ ├── lpips.py │ │ │ │ └── ssim.py │ │ │ ├── masks/ │ │ │ │ ├── README.md │ │ │ │ ├── __init__.py │ │ │ │ ├── countless/ │ │ │ │ │ ├── .gitignore │ │ │ │ │ ├── README.md │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── countless2d.py │ │ │ │ │ ├── countless3d.py │ │ │ │ │ ├── requirements.txt │ │ │ │ │ └── test.py │ │ │ │ └── mask.py │ │ │ ├── refinement.py │ │ │ ├── utils.py │ │ │ └── vis.py │ │ ├── training/ │ │ │ ├── __init__.py │ │ │ ├── data/ │ │ │ │ ├── __init__.py │ │ │ │ ├── aug.py │ │ │ │ ├── datasets.py │ │ │ │ └── masks.py │ │ │ ├── losses/ │ │ │ │ ├── __init__.py │ │ │ │ ├── adversarial.py │ │ │ │ ├── constants.py │ │ │ │ ├── distance_weighting.py │ │ │ │ ├── feature_matching.py │ │ │ │ ├── perceptual.py │ │ │ │ ├── segmentation.py │ │ │ │ └── style_loss.py │ │ │ ├── modules/ │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ ├── depthwise_sep_conv.py │ │ │ │ ├── fake_fakes.py │ │ │ │ ├── ffc.py │ │ │ │ ├── multidilated_conv.py │ │ │ │ ├── multiscale.py │ │ │ │ ├── pix2pixhd.py │ │ │ │ ├── spatial_transform.py │ │ │ │ └── squeeze_excitation.py │ │ │ ├── trainers/ │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ └── default.py │ │ │ └── visualizers/ │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── colors.py │ │ │ ├── directory.py │ │ │ └── noop.py │ │ └── utils.py │ ├── requirements.txt │ ├── segment_anything/ │ │ ├── __init__.py │ │ ├── automatic_mask_generator.py │ │ ├── build_sam.py │ │ ├── modeling/ │ │ │ ├── __init__.py │ │ │ ├── common.py │ │ │ ├── image_encoder.py │ │ │ ├── mask_decoder.py │ │ │ ├── prompt_encoder.py │ │ │ ├── sam.py │ │ │ └── transformer.py │ │ ├── predictor.py │ │ └── utils/ │ │ ├── __init__.py │ │ ├── amg.py │ │ ├── onnx.py │ │ └── transforms.py │ └── setup.py ├── requirements.txt ├── requirements_llm_extra.txt ├── themes/ │ ├── common.js │ ├── default.css │ ├── default.py │ ├── green.css │ ├── green.py │ └── theme.py └── utils/ ├── AudioRecorder.py ├── AudioTrans.py ├── __init__.py ├── audio.py ├── check_proxy.py ├── colorful.py ├── conf.py ├── dataloads.py ├── downloads.py ├── ops.py ├── plot.py ├── text2speech.py ├── textsplitter/ │ ├── __init__.py │ ├── ali_text_splitter.py │ ├── chinese_text_splitter.py │ └── zh_title_enhance.py ├── toolbox.py ├── torch_utils.py └── video.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg runs/ train_imgs/ MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py #key ChatGPT/config # pyenv .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # PEP 582; used by e.g. github.com/David-OConnor/pyflow __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # checkpoint *.pth outputs/ checkpoints/ gfpan/ results/ .idea/ weights/ voice_dir/ SadTalker/ VITS/ config_private.py private_upload gpt_log ================================================ FILE: .gitmodules ================================================ [submodule "VisualGLM_6B"] path = VisualGLM_6B url = https://github.com/positive666/VisualGLM_6B.git ================================================ FILE: LICENSE ================================================ GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . ================================================ FILE: README.md ================================================ # Prompt-Can-Anything

English | 中文

This is a gradio library and research repository that combines SOTA AI applications. It can help you achieve anything - all you need to do is provide prompts and make one click. Through the prompts and creativity of SOTA models, you can do anything.You don't have to install all the features, you can install them according to the features you want to use. **Motivation** Currently, the “Anything” AI intelligent agent backend has been accumulated for engineering and research. This requires the use of more multi-modal tasks and zero-shot models, not only to provide multi-modal AI processing web UI, but also to gradually enrich its functionality. You can accomplish anything through this project! Let’s learn more about the development progress and plan of this project, and the final complete intelligent agent that combines the local GPT repository can help you call any AI task! Questions, stars, forks,You can also become a developer. ## Feature 1. (YOCO) It is not just a tool that can prompt anything 🔥 Data Engine: In addition, we will introduce video, audio, and 3D annotations in the future. YOCO relies on integrated multimodal models and auxiliary generators such as ChatGPT. Of course, it is not omnipotent. Through effective fully automatic annotation and stable diffusion series methods to produce and control data that meet the requirements, we complete the “data engine” and generate customized label formats that facilitate the training of conventional models. 🔥 Model Training: For each model, we not only need to use it, but also read its paper, fine-tuning methods, and communicate with the original author to try some development work for improvement and better training. We use fine-tune large models and customized label formats generated by YOCO to more efficiently train conventional models. structure 2. 🚀 Interactive content creation and visual GPT Integrate diversified GPT, mainly using the port of chatgpt, and use the open-source Tsinghua VISUALGLM to deploy and fine-tune localized GPT, as well as try to improve the model structure. Through multimodal application tools, we can conduct dialogues and content creation. easy example( asr->llM_model->tts->a2f app) https://github.com/positive666/Prompt-Can-Anything/assets/28972473/c9cc64af-939d-480f-a684-08d8db34b25f 3. ⭐ 3D && 2D Avatar(comming soon) Complete a role design interaction through a 3D Engine combined with multimodal tasks such as GPT; Complete a role design interaction through the Sadtalker open source project and multimodal tasks such as GPT. 4. 🔥🔥🚀 Unlimited potential “Anything” Through continuous creativity and accumulation, we will integrate and learn from Sota AI. We will record each integrated model and provide a detailed explanation and summary in the article. The author will summarize all the AI-related knowledge reserves and engineering experience for the local large model (this part is the final development function and is planned). structure
⭐ Research🚀 project🔥 Inspiration(In preparation) At research level, Zero-shot comparative learning is research trend, we hope to understand as much as possible the model design details of the project we are applying, so that we want to combine text, images, and audio to design a strong aligned backbone. At project level, Tensorrt acceleration of the basic model accelerates efficiency.
###
🔥 [August , Update plan preview , Welcome fork]
- 🔥 add gpt_academic repo crazy functions and add langchain\agent comming soon - Optimization of speech problems and code logic optimization before optimization, add Gilgen - 🔥Official latest model integration test for Tag2text version 2 in early June,add RAM(Done) - One-click fine-tuning button function, adding: visualglm (Done) - Voice text processing link GPT, joining chatglm with a2f APP( Done) ###
⭐[News list]
-【2023/8/7】 Fix bug with llm(chatglm2,gpt3.5 loads and improve gradio ui) -【2023/7/21】 update tag2text and ram with offical repo -【2023/6/7】 v1.15:add submodule SadTalker,update UI -【2023/6/6】 v1.15:environment installation problems and supplementary instructions, special models are called independently, and no need to install dependencies; Added the function of one-click fine-tuning of VisualGLM, considering machine configuration and video memory with caution -【2023/6/5】 v1.15 a vide demo and plan,fix asr bug ,chatgpt with asr and tts -【2023/5/31】 Fixed the already issue, add tts demo, the Linux platform is tested through all open features -【2023/5/23】 add web demo:Add VisualGLM ,chatgpt from [Academic-gpt](https://github.com/binary-husky/gpt_academic) -【2023/5/7】 add web demo:At present, the function of text generation, detection and segmentation of images or image folders on the website has been tested normally, and the program does not need to be restarted, and the last model loading configuration is remembered, and it will be continuously optimized in the future. -【2023/5/4】 add semantic segmentatio label, add args(--color-flag --save-mask ) -【2023/4/26】 YOCO,Automatic annotation TOOLS:Commit preliminary code ,For the input image or folder, you can obtain the results of detection, segmentation, and text annotation , optional chatgpt api. ## Preliminary-Works - [VisualGLM-6B](https://github.com/THUDM/VisualGLM-6B) : Visual ChatGlm(6B) - [Segment Anything](https://github.com/facebookresearch/segment-anything) : Strong segmentation model. But it needs prompts (like boxes/points/text) to generate masks. - [Grounding DINO](https://github.com/IDEA-Research/GroundingDINO) : Strong zero-shot detector which is capable of to generate high quality boxes and labels with free-form text. - [Stable-Diffusion](https://github.com/CompVis/stable-diffusion) : Amazing strong text-to-image diffusion model. - [Tag2text](https://github.com/xinyu1205/Tag2Text) : Efficient and controllable vision-language model which can simultaneously output superior image captioning and image tagging. - [SadTalker](https://github.com/OpenTalker/SadTalker): Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation - [lama](https://github.com/advimman/lama) : Resolution-robust large mask Inpainting with Fourier Convolutions - [gpt_academic](https://github.com/binary-husky/gpt_academic) : LLM tools. ## :hammer_and_wrench: YOCO: Quick Start First, Make sure you have a basic gpu deep learning environment. (Linux is recommended, Windows may have problems compiling Grounded-DINO Deformable- transformer operator, see [Grounding DINO](https://github.com/IDEA-Research/GroundingDINO) ) ```bash git clone https://github.com/positive666/Prompt-Can-Anything cd Prompt-Can-Anything ``` **Install environment ** Installation of basic environment ``` pip install -r requiremens or pip install -i https://mirrors.aliyun.com/pypi/simple/ -r requirements.txt ``` Installation of Ground detector (compiling) ```bash cd model_cards pip install -e . ``` Installation of Tsinghua VisualGLM (optional, better to use LINUX system, installation plan will be updated after testing on Windows) ```bash git submodule update --init --recursive cd VisualGLM_6B && pip install -i https://mirrors.aliyun.com/pypi/simple/ -r requirements.txt ``` Install SadTalker (optional ) ```bash git clone https://github.com/Winfredy/SadTalker.git cd SadTalker && pip install -i https://mirrors.aliyun.com/pypi/simple/ -r requirements.txt ``` ​ Tips:create two directories, checkpoints and gfpgan, and place them in the root directory. Download the extracted weights from the official website and put them into two folders, Installation of LAMA model (optional, not yet released): This environment has a relatively strict requirement for the Python version, you may need to manually override the installation by version specified in the txt below: ``` pip install -r model_cards/lama/requirements.txt ``` Installation of diffuser (optional): ```bash pip install --upgrade diffusers[torch] ``` For more content, you can check requirements, “pip install < your missing packages>”, if there is an installation version issue, please carefully look at the requirement version. **Linux environment issue**: 1. for pyaudio Method 1: pip may not be successful on the Linux platform, go to this page[pyaudio-wheels · PyPI](https://pypi.org/project/pyaudio-wheels/#files), select the version corresponding to your Python version, download it and pip install the whl file. Detailed instructions will be provided in the future. Method 2: ``` sudo apt-get install portaudio19-dev sudo apt-get install python3-all-dev pip install pyaudio ``` 2. use qlora fine tune question ``` pip install bitsandbytes -i https://mirrors.aliyun.com/pypi/simple ``` **Windows installation issue** ​ as Linux For more content, you can check the requirements, “pip install < your missing packages>”, and if there are version installation issues, please check the version carefully in the requirements. **Run** 1. downloads models weights
name backbone Data Checkpoint model-config
1 Tag2Text-Swin Swin-Base COCO, VG, SBU, CC-3M, CC-12M Download link
2 Segment-anything vit Download link| Download link| Download link
3 Lama FFC Download link
4 GroundingDINO-T Swin-T O365,GoldG,Cap4M Github link | HF link link
5 GroundingDINO-B Swin-B COCO,O365,GoldG,Cap4M,OpenImage,ODinW-35,RefCOCO Github link | HF link link
2. Configure privacy files and parameters in config_private.py. After downloading the model, configure the path in the “MODEL_xxxx_PATH” variable. If using ChatGPT, configure its proxy and API key. (If there are networking issues with other services such as TTS during use on the web UI, first turn off the VPN connection and only open it when using ChatGPT). **🏃Demo** [Video demo 1 online on baidu clound ](https://pan.baidu.com/s/1AllUjuOVhzJh7abe71iCxg?pwd=c6v6) [ Video demo 2 ] (https://pan.baidu.com/s/1jdP9mgUhyfLh_hz1W3pkeQ?pwd=c6v6) 1. Auto-label ```bash "--input_prompt" : You can manually input a prompt. For example, if you only want to detect target categories that interest you, you can directly input the prompt to the grounded detection model, or input it to the Tag2Text model. '--color-flag': Using BOX’s tags, distinguish between category and instance segmentation: the category color of speech segmentation is distinguished using BOX’s tags. ``` python auto_lable_demo.py --source --save-txt --save-mask --save-xml --save_caption Example: ​ Support multi-tasks, such as : ​ default tasks include images understand /detect/instance segment .....(add methods for image generation and inpainting ) "Prompt" control models output, example ​ image-20230427093103453 2. webui(all) ```pyhton python app.py ``` image-20230508075845259 image-20230527022556630 ​ 2.1 audio2face with llm model (Beta) ​ In Fact, ASR\TTS\LLM ,They are all arbitrarily replaceable. ​ this is a easy example, support chatglm,chatgpt(you can use anything llm model,but you need custom ) ​ start asr&tts with audio2face ​ you need install audio2face in omniverse APP,see https://www.nvidia.cn/omniverse/ ​ step1. In audio2face,open a demo ,choose a Player ,auto build Trt engine ,(not support GTX10xx GPU),latest version support chinese! ​ get model pim path. image-20230725122731372image-20230331372 ​ ![image-20230725133326397](asset\getpath.png) ​ step 2. in webui , configure your Prim path "Avatar_instance_A" in config_private.py , click"start system" and" Speech_system" ​ ## 🔨To Do List - [x] Release demo and code. - [x] web ui demo - [x] Support ChatGPT/VISUALGLM/ASR/TTS - [x] YOCO labeling fine-tuning of VISUALGLM demo[next week] - [x] 3D && 2D avatar - [ ] Complete the planned AI combination “Anything” - [ ] Fine-tune the segmentation and ground detectors of SAM, and expand the input control of SAM - [ ] Release training methods - [ ] Knowledge cloning ## :cupid: Acknowledgements - [gpt_academic](https://github.com/binary-husky/gpt_academic) - [Segment Anything](https://github.com/facebookresearch/segment-anything) - [Grounding DINO](https://github.com/IDEA-Research/GroundingDINO) - [Tag2text](https://github.com/xinyu1205/Tag2Text) - [SadTalker](https://github.com/OpenTalker/SadTalker) - [lama](https://github.com/advimman/lama) - [ VisualGLM-6B](https://github.com/THUDM/VisualGLM-6B.git) Thanks for their great work! ================================================ FILE: README_zh.md ================================================ # Prompt-Can-Anything 这是一个结合SOTA AI的应用web库以及研究的储备库,它能够帮你实现一切:你只需要提供提示!只需一次点击!通过SOTA模型的提示和创意,你可以做任何事情。 **动机** 当前:为工程和研究所积累的AI智能体后台”安尼森“,这需要使用更多的多模态任务以及zero-shot模型,不仅提供多模态的AI处理web UI,逐渐丰富的功能。 目标:你可以通过它完成一切事情!让我们详细了解下该项目的开发进度和计划,最终完整的智能体结合本地储备的GPT可以帮你调用一切AI任务!欢迎提问、star和fork,以及伸出援助之手~ ## 特性 1. (YOCO)它不仅是一个可以提示任何事情的工具 🔥 数据引擎: 此外,我们将在未来引入视频、音频和3D注释,YOCO依赖于集成的多模态模型以及GPT等辅助生成,当然它并不是万能的,通过有效的全自动标注和stable diffusion系列的方法去生产和控制符合需求的数据,完成”数据引擎“,并且生成的定制化的标签格式,去便于训练常规模型。 🔥 模型训练: 对于每一个模型我们不仅要做到使用,还在阅读它的论文和微调方法以及和原作者交流,尝试一些改进和更好训练的开发工作,Fine-tune大模型和通过YOCO生成的定制化的标签格式,更高效地训练常规模型。 structure 2. 🚀交互内容创作和视觉&&语音GPT ​ 集成多样化GPT,目前主要以chatgpt的端口为主,利用开源的清华VISUALGLM,我们实现本地化GPT的部署和微调,以及尝试改进模型结构,通过多模态的应用工具进行对话和内容创作,支持语音识别、语音合成、并发送Audio2face. ​ 这是一个最简单的例子 ​ https://github.com/positive666/Prompt-Can-Anything/assets/28972473/c9cc64af-939d-480f-a684-08d8db34b25f 3. ⭐ 应用角色扮演—— 3D &&2D 虚拟人(开发中) 通过3D引擎去结合GPT等多模态任务完成一个角色设计互动; 通过saldtalker开源项目去结合GPT等多模态任务完成一个角色设计互动; 4. 🔥🔥🚀无限的潜力“安尼森” ​ 不断的创意和积累,SOTA -AI的集成和学习,我们会通过记录每一个集成的模型,对它进行一次详解,总结在文章中。 ​ 作者AI相关所有知识储备和工程经验总结给本地大模型(这部分是最终开发功能,计划中) structure
⭐ 研究 🚀 项目 🔥 灵感(筹备中) 在研究层面上,零样本迁移比较学习是热门的研究趋势,我希望尽可能理解正在应用的项目的模型设计细节,这样我们想将文本、图像和音频相结合设计一个强大的对齐backbone。 在项目层面上,可考虑Tensorrt加速基本模型或者其他的模型转换方式可以提高效率。
###
🔥 [8月更新预告,更新频繁,感兴趣关注]
- 修复了LLM调用相关的BUG和界面调整,正在更新langchains和Agent - 更新了ram&7tag2Text【Done】 - 修复优化开源GLM的一些功能,一键微调按钮和各种微调模型 - 语音文本处理链接gpt,加入chatglm 【Done】 - Gilgen测试代码更新 - ram&7tag2TexT等解析文章 ###
⭐ [更新列表]
- 【2023/8/7】 v1.2: 修复了界面已知BUG,分离了部分依赖,修复了chatglm2和多模型加载问题,完整添加了最新的学术GPT功能,并在更新langchains更多功能 - 【2023/7/21】 v1.15: 更新了Tag2text和ram的代码,支持RAM,是一个中英识别标签的双模态模型 - 【2023/6/7】 v1.15 :加入子项目SadTalker,更新UP界面,语音对话功能界面更新 - 【2023/6/6】 v1.15版本:修复了已知的环境安装问题和补充说明,特殊的模型独立了调用,不需要可以不用安装依赖了;添加了一键微调VisualGLM的功能,考虑机器配置和显存慎用; - 【2023/6/5】 修复whisper asr的bug,内部可选模型,但是考虑显存不建议超过small,上传百度云一个介绍。 - 【2023/5/31】添加Web演示:修复已知问题BUG,添加TTS模块(临时版本),LINUX系统上测试通过了所有开放的功能,补充一些说明和测试。(修改重载:每次勾选加载模型和释放模型后,因为太多的本地化的大模型,如果部署本地GPT显卡必须要20G+,但目前机制无法动态释放调节释放多个模型显存,这个按钮只能帮助你选择、组合串联cv模型的使用方式了) - 【2023/5/29】添加Web演示:加入了学术chatgpt部分功能,感谢他们的工作,其次添加了一键生成VisualGLM-6B数据集标注功能,后续可一键微调 - 【2023/5/23】添加Web演示:加入清华的VisualGLM-6B版本 - 【2023/5/7】添加Web演示:目前,已经测试了文本生成、图像或图像文件夹的检测和分割功能,程序无需重新启动,记住了最后的模型加载配置,并将在未来持续优化。 - 【2023/5/4】添加语义分割标签,添加args(--color-flag --save-mask) - 【2023/4/26】YOCO,自动标注工具:提交初步代码,针对输入图像或文件夹,可以获得检测、分割和文本注释的结果,额外提供选择chatgpt api。 **预备工作** - [VisualGLM-6B](https://github.com/THUDM/VisualGLM-6B.git) : Visual ChatGlm. - [Segment Anything](https://github.com/facebookresearch/segment-anything):强大的分割模型。但它需要提示(如包围框/点/掩码、文本)来生成蒙版。 - [Grounding DINO](https://github.com/IDEA-Research/GroundingDINO):强大的零样本泛化检测器,能够使用自由格式文本生成高质量框和标签。 - [Stable-Diffusion](https://github.com/CompVis/stable-diffusion):文本-图像扩散模型。 - [Tag2text](https://github.com/xinyu1205/Tag2Text):高效可控的视觉-语言模型,可以同时输出优越的图像字幕和图像标记。 - [SadTalker](https://github.com/OpenTalker/SadTalker):单图声音驱动人脸的方法 - [lama](https://github.com/advimman/lama):分辨率鲁棒的大屏蔽填充与傅立叶卷积 - [gpt_academic](https://github.com/binary-husky/gpt_academic) : 丰富的LLM工具箱。 ## :hammer_and_wrench: YOCO: 快速入门 首先,需要有基本的gpu深度学习环境。 (强烈建议使用Linux,Windows可能在编译Grounded-DINO Deformable和配置Visualglm时候算子时出现问题,参见[Grounding DINO](https://github.com/IDEA-Research/GroundingDINO)) ```bash git clone https://github.com/positive666/Prompt-Can-Anything cd Prompt-Can-Anything ``` 安装基本环境: ``` pip install -r requirements 或者 pip install -i https://mirrors.aliyun.com/pypi/simple/ -r requirements.txt ``` 安装Ground检测器(编译): ``` cd model_cards pip install -e . ``` 安装清华智谱视觉VisualGLM(可选,最好用LINUX系统,window后面测试后补充安装方案): ```bash git submodule update --init --recursive cd VisualGLM_6B && pip install -i https://mirrors.aliyun.com/pypi/simple/ -r requirements.txt ``` 安装SadTalker(optional ) ```bash git clone https://github.com/Winfredy/SadTalker.git cd SadTalker && pip install -i https://mirrors.aliyun.com/pypi/simple/ -r requirements.txt ``` ​ Tips:创建checkpoints 和gfpgan两个目录,放置在根目录下。从官网下载解压的权重分别放进两个文件夹!! 安装LAMA模型(可选还未发布): ​ 这个环境对Python版本要求比较苛刻,可能需要按照下面txt的版本手动覆盖安装 ``` pip install -r model_cards/lama/requirements.txt ``` 安装扩散器(可选): ```bash pip install --upgrade diffusers[torch] ``` 更多内容,可以查看requirements, “pip install < your missing packages>”, 如果出现安装版本问题,请仔细看requirements的版本 **Linux环境问题【易出现问题的库】** 1. 对于pyaudio 方法一:在Linux平台可能通过pip并不一定成功,进入这里[pyaudio-wheels · PyPI](https://pypi.org/project/pyaudio-wheels/#files),选择对应你Python的版本,下载后pip安装whl,后续会详细补充。 方法二: ``` sudo apt-get install portaudio19-dev sudo apt-get install python3-all-dev pip install pyaudio ``` 2.VisualGLM训练环境:使用qlora微调int4模型问题: ``` pip install bitsandbytes -i https://mirrors.aliyun.com/pypi/simple ``` **Windows安装问题** 目前除了LLM的加速和微调三方库,无特殊问题。 运行 1. 下载模型权重
名称 骨干 数据 权重 模型配置
1 Tag2Text-Swin Swin-Base COCO、VG、SBU、CC-3M、CC-12M 下载链接
2 Segment-anything vit 下载链接| 下载链接| 下载链接
3 Lama FFC 下载链接
4 GroundingDINO-T Swin-T O365、GoldG、Cap4M Github链接 | HF链接 链接
2. 配置隐私文件和参数在 config_private.py 下,下载模型后将路径配置在" MODEL_xxxx_PATH“的变量 ,如果使用 chatgpt ,配置其代理和API密钥,可能在WEBUI使用过程中,如果其他服务如tts有联网问题,先关掉VPN链接,仅当使用Chatgpt时候打开。 ## 🏃Demo [视频Demo介绍 ](https://pan.baidu.com/s/1AllUjuOVhzJh7abe71iCxg?pwd=c6v6) [ Video demo 2 ] (https://pan.baidu.com/s/1jdP9mgUhyfLh_hz1W3pkeQ?pwd=c6v6) 1. 自动标注的测试样例 ```bash "--input_prompt" : 你可以手动输入prompt,比如你只想检测你感兴趣的目标类别,可以直接输入给grounded检测模型,也可以输入给tag2text '--color-flag': 使用BOX的标签同类别和实例分割区别:语义分割的类别颜色 ``` 支持多种任务,例如: 默认任务包括图像理解/检测/实例分割…(以及后修添加图像生成和编辑的方法去制作新数据) "Prompt" control models output, example ​ python auto_label_demo.py --source --save-txt --save-mask --save-xml --save_caption image-20230427093103453 2. webui ```pyhton python app.py ``` image-20230527022556630 image-20230508075845259visual_chatglm 2.语音大语言模型&&驱动a2f ​ 这是一个简单的例子,实际上asr、tts\llm_model\这些组件是可以任意替换的,只要你具备基本的开发能力,通过语言模型和语音驱动去完成A2F的服务,你需要安装Omniverse软件和Audio2face的应用,GPU不能是比较旧的帕斯卡架构,详情可以看https://www.nvidia.cn/omniverse/ ​ 步骤1.在Omniverse中,点击如图下的例子,安装一个Demo player,它会自动完成tensortt的构建,然后可以如下图中获取Player的路径Prim Path image-20230725122731372image-20230331372 ​ ![image-20230725133326397](asset/getpath.png) ​ 步骤2. 程序运行起来后,将上面获得的路径拷贝,填写在config_private的“Avatar_instance_A”,在web端如图下操作点击 ‘start system’后,点击加载“Speech_system”启动语音模式,但是注意TTS是网络服务。 ​ ## 🔨计划清单 - [x] 释放初版 - [x] web ui 界面调整 - [x] 支持chatgpt/VISUALGLM/ASR/TTS - [x] Yoco一键标注微调VISUALGLM Demo - [x] 3d &&2d avatvor - [ ] 完成计划的AI结合体“安尼森” - [ ] 微调sam分割器 and ground检测器 ,拓展SAM的输入控制 - [ ] 释放训练方法. - [ ] 知识克隆 ## 参考工作 - [gpt_academic](https://github.com/binary-husky/gpt_academic) - [Segment Anything](https://github.com/facebookresearch/segment-anything) - [Grounding DINO](https://github.com/IDEA-Research/GroundingDINO) - [Tag2text](https://github.com/xinyu1205/Tag2Text) - [SadTalker](https://github.com/OpenTalker/SadTalker) - [lama](https://github.com/advimman/lama) - [VisualGLM-6B](https://github.com/THUDM/VisualGLM-6B.git) 感谢他们的出色工作! ================================================ FILE: a2f.py ================================================ import argparse import functools import os import yaml import numpy as np import ffmpeg import grpc import grpc import audio2face_pb2 import audio2face_pb2_grpc from pydub import AudioSegment from pydub.silence import split_on_silence import soundfile from audio2face_streaming_utils import push_audio_track_stream,push_audio_track,push_stream import pyaudio import wave from queue import Queue import time import whisper import requests #from llm_cards.bridge_chatgpt import predict from config_private import API_KEY import uuid import re import asyncio import threading # 创建事件,用于线程间同步 send_event = threading.Event() # 按秒截取音频 def get_part_wav(sound, start_time, end_time, part_wav_path): save_path = os.path.dirname(part_wav_path) if not os.path.exists(save_path): os.makedirs(save_path) start_time = int(start_time) * 1000 end_time = int(end_time) * 1000 word = sound[start_time:end_time] word.export(part_wav_path, format="wav") def crop_wav(path, crop_len): for src_wav_path in os.listdir(path): wave_path = os.path.join(path, src_wav_path) print(wave_path[-4:]) if wave_path[-4:] != '.wav': continue file = wave.open(wave_path) # 帧总数 a = file.getparams().nframes # 采样频率 f = file.getparams().framerate # 获取音频时间长度 t = int(a / f) print('总时长为 %d s' % t) # 读取语音 sound = AudioSegment.from_wav(wave_path) for start_time in range(0, t, crop_len): save_path = os.path.join(path, os.path.basename(wave_path)[:-4], str(uuid.uuid1()) + '.wav') get_part_wav(sound, start_time, start_time + crop_len, save_path) from concurrent.futures import ThreadPoolExecutor def process_chunk(model, chunk, detect_language): # make log-Mel spectrogram and move to the same device as the model mel = whisper.log_mel_spectrogram(chunk).to(model.device) # detect the spoken language speech_language = 'zh' if detect_language : _, probs = model.detect_language(mel) speech_language = max(probs, key=probs.get) # decode the audio options = whisper.DecodingOptions() result = whisper.decode(model, mel, options) return result.text, speech_language def speech_recognition(inputs, model,stream_model=False,detect_language=False): # whisper all_result='' speech_language='zh' executor = ThreadPoolExecutor() results = [] audio=None if not stream_model: audio,sr= soundfile.read(inputs, dtype='float32') else: print('numpy data') sr,audio=inputs data = audio / 65538 audio = data.astype(np.float32) print(sr) chunk_size= sr*30 print((audio)) for i in range(0, len(audio), chunk_size): chunk_end = min(i + chunk_size, len(audio)) chunk = whisper.pad_or_trim(audio[i:chunk_end]) # submit the chunk to the thread pool for processing results.append(executor.submit(process_chunk, model, chunk, detect_language)) # print the recognized text and the detected language for result in results: text, language = result.result() #print(text) all_result += text speech_language = language # # print the recognized text # all_result+=result.text return all_result, speech_language Avatar_instance_A='/World/audio2face/PlayerStreaming' a2f_url = 'localhost:50051' # The audio2face url by default sample_rate_Omniverse = 22050 # Audio frame rate # 录音参数 CHUNK = 1024 FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 16000 RECORD_SECONDS =5 audio_file = "F:\\VoiceprintRecognition-Pytorch-develop\\error001.wav" buffer_length=int(RATE / CHUNK * RECORD_SECONDS) record_file='record.wav' p = pyaudio.PyAudio() def mic_audio(record_file="record.wav"): # 打开录音 import keyboard stream = p.open( input_device_index=1, format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) print("Recording...") frames = [] while True: data = stream.read(CHUNK) frames.append(data) if keyboard.is_pressed('s'): break stream.stop_stream() stream.close() p.terminate() wf = wave.open(record_file, 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(b''.join(frames)) wf.close() return 'OK' import edge_tts async def tts_send(text,onmiverse=False,send_file='voice_dir/send_a2f.wav'): if text is not None: sentences = re.split(r'[!?。: ]', text) sentences = [s.strip() for s in sentences if s.strip()] sentences_len=len(sentences) audio_chunks = {} async def process_sentences(): tasks = [] for i, sentence in enumerate(sentences): if len(sentence) > 0: # 提交任务到协程池 task = asyncio.create_task(speak(sentence, i % sentences_len)) tasks.append(task) await asyncio.gather(*tasks) async def speak(sentence, worker_id): # 合成语音 print(worker_id) audio_stream =edge_tts.Communicate(sentence, voice='zh-CN-YunxiNeural', rate='+1%', volume='+1%').stream() async for package in audio_stream: if package['type'] == 'audio': # 获取音频数据的字节流(chunk) audio_chunk = package['data'] # 将音频数据添加到字典中 if worker_id not in audio_chunks: audio_chunks[worker_id] = [] audio_chunks[worker_id].append(audio_chunk) await process_sentences() # 将每个协程合成的音频数据拼接起来 audio_data = b'' for i in range(sentences_len): if i in audio_chunks: for chunk in audio_chunks[i]: audio_data += chunk with open(f'{send_file}', 'wb') as f: f.write(audio_data) if onmiverse: audio_data, samplerate = soundfile.read(f'{send_file}', dtype="float32") if len(audio_data.shape) > 1: audio_data = np.average(audio_data, axis=1) push_audio_track_stream(a2f_url, audio_data, samplerate, Avatar_instance_A) async def tts_a2f(text): import edge_tts import soundfile as sf import numpy as np from audio2face_streaming_utils import push_audio_track_stream generate_wave = edge_tts.Communicate(text, voice='zh-CN-YunxiNeural', rate='-5%', volume='+1%') await generate_wave.save('./voice_dir/send_frame.wav') try: audio_data, samplerate = sf.read('./voice_dir/send_frame.wav', dtype="float32") if len(audio_data.shape) > 1: audio_data = np.average(audio_data, axis=1) push_audio_track_stream(a2f_url, audio_data, samplerate , Avatar_instance_A) print("send done") return 'Send Done!' except Exception as e: print(f"检查是否开启omniverse!!!") def push_stream(url,player,dir="voice_dir/send_omniverse.wav"): from audio2face_streaming_utils import push_audio_track_stream import soundfile import numpy as np retry=0 while True: try: audio_data,sr= soundfile.read(dir, dtype='float32');break except : print("tts合成速度稍慢,等待....") retry += 1 print('正在重试') if retry >=2: raise TimeoutError if len(audio_data.shape) > 1: audio_data = np.average(audio_data, axis=1) push_audio_track_stream(url, audio_data, sr, player) def audio_synthesis(gpt_replying_buffer,url,player): import threading threading.Thread(target=process_send_stream, args=(gpt_replying_buffer,url,player,)).start() def process_send_stream(gpt_replying_buffer,url,player): import subprocess dir="voice_dir/send_omniverse.wav" cmd = f'edge-tts --voice {"zh-CN-YunxiNeural"} --text "{gpt_replying_buffer}" --write-media {dir} ' subprocess.run(cmd, shell=True) time.sleep(0.5) push_stream(url,player,dir) def receive_max(q,Text): global receive_flag receive_flag=True sentences = re.split(r'[!?。: ,]', Text) sentences = [s.strip() for s in sentences if s.strip()] # from VITS import while True : if len(sentences)>0 : #audio_data=vit_tts(sentences.pop(0) #audio_data=r'voice_dir/send_frame.wav' audio_data=edge_tts.Communicate(sentences.pop(0), voice='zh-CN-YunxiNeural', rate='+1%', volume='+1%') q.put((audio_data,True)) print('done') else : print('语音合成线程结束......') receive_flag=False break ###--------线程:收集数据,中转处理源buffer收集后发送------------### def send_stream2(q): global mess global receive_flag mess=False with grpc.insecure_channel(a2f_url) as channel: stub= audio2face_pb2_grpc.Audio2FaceStub(channel) def create_generator(): global mess while True: if not q.empty(): #取出队列中的音频文件路径和对应的发送标志位 #print("检查缓存容量 :",q.qsize()) #time.sleep(2) audio_data,send_flag = q.get() if not send_flag: # TODO: 将音频文件发送出去 print(f'Sending audio...') audio_data,sr= soundfile.read('voice_dir/send_framex.wav', dtype='float32') if len(audio_data.shape) > 1: audio_data = np.average(audio_data, axis=1) #yield audio2face_pb2.PushAudioStreamRequest(start_marker=Avatar_instance_A) #for i in range(len(audio_data) // sr//10 + 1): # chunk = audio_data[i * sr//10: i * sr//10+ sr//10] #yield audio2face_pb2.PushAudioStreamRequest(audio_data=chunk.astype(np.float32).tobytes()) push_audio_track_stream(a2f_url, audio_data, sr, Avatar_instance_A) send_flag=True # 重置事件状态 send_event.clear() else: if not receive_flag: print("发送线程结束") break else: continue stub.PushAudioStream(create_generator()) def audio_chatbot(text): q = Queue() t1 = threading.Thread(target=receive_max,args=(q,text)) t2 = threading.Thread(target=send_stream2,args=(q,)) t1.start() t2.start() # t1.join() #t2.join() global receive_flag while True: send_flag=True # 从队列中取出音频文件路径和对应的发送标志位 audio, send_flag = q.get() if not send_flag: # 将音频文件路径放回队列(因为发送是在另一个线程中完成的) q.put((audio,False)) # 设置事件,通知发送线程可以发送该音频 send_event.set() if not receive_flag: break if __name__ == "__main__": text = "这里是一段较长的文本,需要拆分成多个句子来进行语音合成!句子也可以用问号来结尾吗?\ 当然可以。我要实现一个人工智能,这里是一段较长的文本,需要拆分成多个句子来进行语音合成!句子也可以用问号来结尾吗?当然可以。我要实现一个人工智能,但是我需要很多时间和精力完成\ 这里是一段较长的文本,需要拆分成多个句子来进行语音合成!句子也可以用问号来结尾吗?当然可以。我要实现一个人工智能,但是我需要很多时间和精力完成" # 启动主程序 audio_chatbot(text) # t1=time.time() # asyncio.run(tts_send(text)) # print(time.time()-t1) # # t1 = threading.Thread(target=send_stream) # t1=time.time() # #asyncio.run(tts_a2f(text)) # print(time.time()-t1) ================================================ FILE: app.py ================================================ from model_cards.autoback import AutoBackend import argparse import os import platform import sys from pathlib import Path import numpy as np import torch import torch.backends.cudnn as cudnn import matplotlib.pyplot as plt from PIL import Image,ImageDraw,ImageFont from utils.ops import (LOGGER, Profile, check_file, check_requirements, colorstr, cv2, dilate_mask, increment_path , scale_boxes, xyxy2xywh,save_format) from utils.plot import Annotator, save_one_box,show_box,show_mask,save_mask_data,Draw_img from config_private import * from llm_cards.bridge_all import predict_all,talk_all from llm_cards.bridge_chatgpt import Talk_with_app from llm_cards.core_functional import get_core_functions from utils.toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, load_chat_cookies, DummyWith from utils.torch_utils import select_device from utils import VID_FORMATS,IMG_FORMATS,write_categories import gradio as gr import random import json import multiprocessing as mp import asyncio import concurrent.futures from utils.colorful import * functional = get_core_functions() VisualGLM_dir=f"VisualGLM_6B" sys.path.append(VisualGLM_dir) FILE = Path(__file__).resolve() ROOT = FILE.parents[0] # root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative global categories categories = {} global category_colors category_colors={} # 初始对应类别编号 class_ids = [] global speech_AI speech_AI={'asr':{'whisper':None},'tts':{'tts_VITS':None,'tts_edge': None}} ## speech global models_config models_config = {'tag2text': None, 'ram': None,'lama': None,'sam': None,'grounded': None,'sd': None, ## cv with text 'visual_glm': None , 'trans_zh': None,'gligen': None} NUM_WORKERS=1 JSON_DATASETS=[] operation_running = False def toggle_operation(flag): import whisper from a2f import speech_recognition,mic_audio,keyboard if speech_AI['asr']['whisper'] is None: speech_AI['asr']['whisper']=whisper.load_model("small", download_root="weights") print("asr加载完毕,开始录音!") text=[] speech_text='' while True: # result_txt="你好我没有正确识别到结果" if keyboard.is_pressed('q'): mic_audio('voice_dir/send_asr.wav') speech_text,__=speech_recognition('voice_dir/send_asr.wav',speech_AI['asr']['whisper'],False) break print(speech_text) text.append(speech_text) return text async def sadtalker_demo(checkpoint_path,config_path,source_image, driven_audio, preprocess_type, is_still_mode, enhancer, batch_size, size_of_image, pose_style, exp_weight): sys.path.append('SadTalker') from SadTalker.app import SadTalker sadtaker_model=SadTalker(checkpoint_path, config_path, lazy_load=True) output = await asyncio.to_thread(sadtaker_model.test, source_image, driven_audio, preprocess_type, is_still_mode, enhancer, batch_size, size_of_image, pose_style, exp_weight) return output def train_visualGLM(name,model_size,mode,train_iters,resume_data, max_source_length,max_target_length,lora_rank,layer_range_s,layer_range_e,pre_seq_len, train_data,valid_data,distributed_backend,lr_decay_style,warmup, checkpoint_activations,save_interval,eval_interval,save_path, split,eval_iters,eval_batch_size ,zero_stage, lr,batch_size,accumulation_steps,method_type): model_args=[max_source_length,max_target_length,lora_rank,layer_range_s,layer_range_e,pre_seq_len] gpt_option=[name,int(model_size),mode,int(train_iters),resume_data, #23 train_data,valid_data,distributed_backend,lr_decay_style,warmup, checkpoint_activations,int(save_interval),int(eval_interval),save_path, int(split),int(eval_iters),int(eval_batch_size),int(zero_stage), lr,int(batch_size),int(accumulation_steps)] processes = [] for i in range(NUM_WORKERS): p = mp.Process(target=start_finetuning_process, args=(gpt_option,model_args,method_type)) p.start() processes.append(p) for p in processes: p.join() return 'OK' #具体参数待修复调整 def start_finetuning_process(gpt_option,model_args,method_type): print('fine subprocess start') script_path = os.path.abspath(__file__) script_dir = os.path.dirname(script_path) print(script_dir+'/'+VisualGLM_dir) main_dir = os.path.dirname(script_dir) model_args = f'--max_source_length {model_args[0]} --max_target_length {model_args[1]} --lora_rank {model_args[2]} --layer_range {model_args[3]} {model_args[4]} --pre_seq_len {model_args[5]}' options_nccl = 'NCCL_DEBUG=info NCCL_IB_DISABLE=0 NCCL_NET_GDR_LEVEL=2' host_file_path = 'hostfile_single' gpt_option_prefix=f" \ --experiment-name finetune-{gpt_option[0]} \ --model-parallel-size {gpt_option[1]} \ --mode {gpt_option[2]} \ --train-iters {gpt_option[3]} \ --resume-dataloader \ {model_args} \ --train-data {gpt_option[5]} \ --valid-data {gpt_option[6]} \ --distributed-backend {gpt_option[7]} \ --lr-decay-style {gpt_option[8]}\ --warmup {gpt_option[9]} \ --checkpoint-activations \ --save-interval {gpt_option[11]} \ --eval-interval {gpt_option[12]} \ --save {gpt_option[13]} \ --split {gpt_option[14]}\ --eval-iters {gpt_option[15]} \ --eval-batch-size {gpt_option[16]}\ --zero-stage {gpt_option[17]} \ --lr {gpt_option[18]} \ --batch-size {gpt_option[19]} " lora=f" \ --skip-init \ --fp16 \ --use_lora " qlora=f"--gradient-accumulation-steps {gpt_option[20]} \ --skip-init \ --fp16 \ --use_qlora" ptune=f" \ --skip-init \ --fp16 \ --use_ptuning" if method_type=='use_qlora': gpt_options=gpt_option_prefix+qlora elif method_type=='use_lora': gpt_options=gpt_option_prefix+lora elif method_type=='use_ptuning': gpt_options=gpt_option_prefix+ptune else: LOGGER.info("没有选择训练方法!!!") return run_cmd = f'{options_nccl} deepspeed --master_port 16666 --hostfile {host_file_path} {VisualGLM_dir}/finetune_visualglm.py {gpt_options} ' os.system(run_cmd) async def load_speech_model(asr_method,tts_method): import whisper global speech_AI if asr_method=='whisper' : speech_AI['asr']['whisper']= whisper.load_model("small",download_root="weights") LOGGER.info('loads whisper') elif not asr_method and speech_AI['asr']['whisper']: LOGGER.info('free memory') speech_AI['asr']['whisper']=None else: LOGGER.info('pass') if tts_method =="VITS": print('调试中,很快更新') # speech_AI['tts']['VITS'] = # LOGGER.info('loads whisper') elif not tts_method: LOGGER.info('pass') return '语音识别记载完成' def save_text2img_data(prompt,label,img_name,zh_select): global JSON_DATASETS if not prompt : prompt=f"这张图片的背景里有什么内容?" if not zh_select: prompt=f'What contents are present in the background of this picture?' example = { "img": f"{img_name}", "prompt": prompt, "label": label } JSON_DATASETS.append(example) async def load_auto_backend_models(lama, sam, det,tag2text,ram, trans_zh, visual_glm,device=0, quant=4, bar=None): try: with concurrent.futures.ThreadPoolExecutor() as pool: wait_coros = asyncio.get_event_loop().run_in_executor(pool, load_auto_backend_model, lama, sam, det, tag2text,ram,trans_zh, visual_glm,device, quant, bar) await asyncio.wait([wait_coros]) await asyncio.sleep(0.01) except Exception as e: LOGGER.info("An error occurred: ", e) return 'windows可能会出现问题,请再次点击加载按钮,也可以检查后台' return 'Loads Done !' def load_auto_backend_model(lama,sam,det,tag2text,ram,trans_zh,visual_glm,device,quant,bar): """ 加载模型库 """ # Load model global models_config if visual_glm and not models_config['visual_glm']: from VisualGLM_6B.chatglm import VisualGLM models_config['visual_glm']=VisualGLM(gpu_device=int(device),quant=int(quant)) LOGGER.info(f'GPU{int(device)}———量化VisualGLM模型:int{int(quant)}') elif not visual_glm: LOGGER.info('no select visualGLM') models_config['visual_glm']=None else: LOGGER.info('free or no visual_glm') device = select_device(device) if tag2text and not models_config['tag2text']: models_config['tag2text'] = AutoBackend("tag2text",weights=Tag2Text_Model_Path,device=device) elif not tag2text : LOGGER.info('no tag2text') models_config['tag2text'] =None else : LOGGER.info('free or tag2text pass') if det and not models_config['grounded']: models_config['grounded'] = AutoBackend("grounded-DINO",weights=GROUNED_MODEL_TYPE['S'], device=device, args_config= 'model_cards/groundingdino/config/GroundingDINO_SwinT_OGC.py') elif not det : models_config['grounded'] =None else : LOGGER.info('free or grounded pass') if sam and not models_config['sam']: models_config['sam']= AutoBackend("segment-anything",weights=SAM_MODEL_TYPE['vit_h'] ,device=device) elif not sam : models_config['sam'] =None else: LOGGER.info("PASS SAM") if ram and not models_config['ram']: LOGGER.info("ram loads") models_config['ram']= AutoBackend('ram',weights=Ram_Model_Path ,device=device) elif not ram : models_config['ram'] =None else: LOGGER.info("PASS ram") if trans_zh and not models_config['trans_zh']: from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM cn_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-zh",cache_dir='weights') cn_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-zh",cache_dir='weights') translator = pipeline("text2text-generation", model=cn_model, tokenizer=cn_tokenizer) models_config['trans_zh']= translator elif not trans_zh : models_config['trans_zh'] =None else : LOGGER.info('zh model pass') if lama and not models_config['lama']: models_config['lama']= AutoBackend("lama",weights=None,args_config='model_cards/lama/configs/prediction/default.yaml',device=device) elif not lama : models_config['lama'] =None else : LOGGER.info('free or lama pass') return 'OK' def Auto_run( source= 'data/images', # file/dir/URL/glob, 0 for webcam img_input='', input_prompt="Anything in this image", conf_thres=0.3, # confidence threshold iou_thres=0.5, # NMS IOU threshold text_thres=0.2, device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu quant=4, save_conf=False, # save confidences in --save-txt labels img_save=False, # do not save images/videos visualize=False, # visualize features project=ROOT / 'runs/detect', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment lama=False, # use lama models sam=True, # use segment-anythings det=True, # use grounded detect model with text tag2text=False, ram=False, save_txt=False, # save results to *.txt save_xml=False, # save results to *.xml save_mask=False, save_caption=False, batch_process=False, color_flag=False, zh_select=False, record_audio=None, up_audio=None, process_name=0, ): global models_config global category_colors global JSON_DATASETS cls_index = -1 # 设置默认值为 -1 if img_input: source =img_input source = str(source) img_paths=None if os.path.isdir(source): img_paths = [os.path.join(source, f) for f in os.listdir(source) if Path(f).suffix[1:] in (IMG_FORMATS + VID_FORMATS)] else: img_paths = [source] # Directories is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) # save_img = img_save and not source.endswith('.txt') # save inference images is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://')) #webcam = source.isnumeric() or source.endswith('.streams') or (is_url ) if is_url and is_file: source = check_file(source) # download save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir (save_dir / 'xmls' if save_xml else save_dir).mkdir(parents=True, exist_ok=True) # make dir (save_dir / 'masks' if save_mask else save_dir).mkdir(parents=True, exist_ok=True) # make dir (save_dir / 'captions' if save_caption else save_dir).mkdir(parents=True, exist_ok=True) # make dir p = Path(str(save_dir) ) # to Path seen=0 # loda data and inference caption=None for source in (img_paths): im = cv2.imread(source) name_p= source.split('/')[-1].split('.')[0] img_rgb = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) preds=None masks=[] prompt=input_prompt if tag2text: LOGGER.info(f'text_prompt:{prompt}') preds = models_config['tag2text'](im = img_rgb ,prompt=prompt,box_threshold=conf_thres,text_threshold=text_thres,iou_threshold=iou_thres) # Currently ", " is better for detecting single tags # while ". " is a little worse in some case prompt=preds[0].replace(' |', ',') caption=preds[2] LOGGER.info(f"Caption: {caption}") LOGGER.info(f"Tags: {prompt}") if zh_select and prompt : caption=models_config['trans_zh'](caption, max_length=1000, clean_up_tokenization_spaces=True)[0]["generated_text"] if save_caption: save_text2img_data(None, caption,name_p,zh_select) #save_format(label_format="txt",save_path=f'{save_dir}/captions',img_name=name_p, results=caption) if ram: LOGGER.info(f'ram No need prompt:{prompt}') en_tag,zh_tag = models_config['ram'](im = img_rgb,prompt=prompt,box_threshold=conf_thres,text_threshold=text_thres,iou_threshold=iou_thres) prompt=en_tag.replace(' |', ',') zh_tag=zh_tag.replace(' |', ', ') #LOGGER.info(preds[1]) LOGGER.info(f"en_Tags: {prompt}") print(f"zh_Tags : {zh_tag}") # if zh_select and prompt : # caption=models_config['trans_zh'](caption, max_length=1000, clean_up_tokenization_spaces=True)[0]["generated_text"] # if save_caption: # save_text2img_data(None, caption,name_p,zh_select) if det: if input_prompt: prompt=input_prompt LOGGER.info('your input prompt replace default:',prompt) preds= models_config['grounded'](im = img_rgb,prompt=prompt, box_threshold=conf_thres,text_threshold=text_thres, iou_threshold=iou_thres) if sam and det : if preds[0].numel()>0: masks= models_config['sam'](im = img_rgb, prompt=preds[0],box_threshold=conf_thres,text_threshold=text_thres, iou_threshold=iou_thres) if save_mask: save_mask_data(str(save_dir)+'/masks', caption, masks, preds[0], preds[2],name_p) # Write results if img_save: seen+=1 plt.figure(figsize=(20,18)) plt.imshow(img_rgb) if det: for box,label in zip(preds[0],preds[2]): show_box(box.numpy(),plt.gca(),label) if sam : for mask in masks: show_mask(mask.cpu().numpy(),plt.gca(),random_color=True) if tag2text: plt.title('Captioning: ' + caption + '\n' + 'Tagging:' + prompt + '\n') plt.axis('off') plt.savefig(f'{save_dir}/{seen}.jpg',bbox_iches='tight',dpi=600,pad_inches=0.0) if lama and masks is not None : masks_prompts= masks.detach().cpu().numpy().astype(np.uint8) * 255 for idx, mask in enumerate(masks_prompts): sub_mask = [dilate_mask(ma, 15) for ma in mask] img_inpainted_p= f'{save_dir}/mask_{idx}.png' idx=idx+1 img_inpainted = models_config['lama']( im=img_rgb, prompt=sub_mask[0]) Image.fromarray(img_inpainted.astype(np.uint8)).save(img_inpainted_p) img_rgb=img_inpainted for category in categories: if category not in category_colors: category_colors[category] = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) gn = torch.tensor(im.shape)[[1, 0, 1, 0]] # normalization gain whwh if (color_flag or save_txt) and(det ) : seg_mask = np.zeros_like(img_rgb) # img_array 为输入图像的数组表示 category_color=[] for xyxy, conf, cls,mask in zip(preds[0],preds[1],preds[2],masks): #per im boxes xywh = (xyxy2xywh((xyxy).view(1,4)) / gn).view(-1).tolist() # normalized xywh if cls not in categories: categories.update({ str(cls): len(categories)}) write_categories(cls,f'{save_dir}/classes_id.txt') cls_index = len(categories) - 1 category_colors.update({ str(cls): (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))}) category_color=category_colors[str(cls)] else: cls_index = categories[str(cls)] if str(cls) not in category_colors: category_colors.update({ str(cls): (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))}) category_color=category_colors[str(cls)] line = (cls_index, xywh, conf) if save_conf else (cls_index, xywh) # label format line = str(line).replace('[', '').replace(']', '').replace("(",'').replace(")"," ").replace(",", " " * 2) if save_mask: h, w = mask.shape[-2:] mask_color = np.array(category_color).reshape((1, 1, -1)) seg_mask = seg_mask + mask.cpu().numpy().reshape(h, w, 1) * mask_color # add if save_txt: save_format(label_format="txt",save_path=f'{save_dir}/labels', img_name=name_p, results=line) if save_mask: plt.figure(figsize=(10,10)) plt.imshow(seg_mask) #plt.title('Captioning: ' + caption + '\n' + 'Tagging:' + prompt + '\n') plt.axis('off') plt.savefig(os.path.join(f'{save_dir}/masks', f'{name_p}_cls.jpg'), bbox_inches="tight", dpi=300, pad_inches=0.0) if save_xml: h,w=im.shape[:2] save_format("xml",f'{save_dir}/xmls' ,name_p, Path(source).parent, preds, h, w) if det: img_rgb= Image.fromarray(np.uint8(img_rgb), mode='RGB') draw_img=ImageDraw.Draw(img_rgb) for box,label in zip(preds[0],preds[2]): Draw_img( box, draw_img,'box',label,category_colors[str(label)] if color_flag else None) if sam: img_mask=Image.new('RGBA',img_rgb.size,color=(0,0,0,0) ) draw_mask=ImageDraw.Draw(img_mask) for mask in masks: Draw_img(mask[0].cpu().numpy(),draw_mask,'mask',None,category_colors[str(label)] if color_flag else None) img_rgb.paste(img_mask, mask=img_mask) #img_rgb.save(f'{save_dir}/{seen}.jpg') if save_txt: #class_ids.append(cls) LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}/labels") if save_xml: LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}/xmls") if save_caption: with open(f'{save_dir}/captions/dataset.json', 'a',encoding='utf-8') as f: json.dump(JSON_DATASETS,f,ensure_ascii=False) f.write('\n') LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}/captions") if save_mask: LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}/masks") LOGGER.info('Done...') return [[img_rgb],caption,prompt,len(categories)] def visual_chat(prompt_input, temperature, top_p, image_prompt, result_text,record_audio,upload_audio,omniverse=False): global models_config print(f"是否连接omniverse:{omniverse}") if models_config['visual_glm']: if image_prompt and prompt_input: __, result_text=(models_config['visual_glm'].request_model(prompt_input, temperature, top_p, image_prompt, result_text)) if omniverse: from a2f import tts_a2f asyncio.run(tts_a2f(result_text[-1][-1])) return "",result_text else : LOGGER.info("请检查你的输入格式和glm模型的参数配置!!!") else: return result_text,"没有加载部署的VisualGLM模型!!!" def clear_fn_image(value): return [("", "Hi, What do you want to know ?或者你想从图像中知道什么?")] if __name__ == "__main__": #check_requirements(exclude=('tensorboard', 'thop')) proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, AVAIL_LLM_MODELS, AUTO_CLEAR_TXT = \ get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'AVAIL_LLM_MODELS', 'AUTO_CLEAR_TXT') AUTO_CLEAR_TXT = get_conf('AUTO_CLEAR_TXT') # 如果WEB_PORT是-1, 则随机选取WEB端口 PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT functional = get_core_functions() from themes.theme import adjust_theme, advanced_css, theme_declaration # 高级函数插件 from llm_cards.crazy_functional import get_crazy_functions crazy_fns = get_crazy_functions() import logging, uuid os.makedirs("gpt_log", exist_ok=True) try:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO, encoding="utf-8", format="%(asctime)s %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M:%S") except:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO, format="%(asctime)s %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M:%S") # Disable logging output from the 'httpx' logger logging.getLogger("httpx").setLevel(logging.WARNING) print("所有问询记录将自动保存在本地目录./gpt_log/chat_secrets.log, 请注意自我隐私保护哦!") # 处理markdown文本格式的转变 gr.Chatbot.postprocess = format_io # 代理与自动更新 from utils.check_proxy import check_proxy, auto_update, warm_up_modules proxy_info = check_proxy(proxies) voice_dir='voice_dir' if not os.path.exists(voice_dir): os.mkdir(voice_dir) inputxs=[] outputs=[] cancel_handles = [] with gr.Blocks(title="Prompt-Can-Anythings",reload=True, theme=adjust_theme(), analytics_enabled=False,full_width=True,css=advanced_css) as block: gr.HTML( f"

Prompt-Can-Anythings_v1.15 (周更迭代中)

") cookies = gr.State({'api_key': API_KEY, 'llm_model': LLM_MODEL}) with gr.Row().style(equal_height=False): with gr.Column(scale=1): with gr.Accordion('视觉模型配置', open=False): with gr.TabItem('本地模型配置'): box_threshold=gr.inputs.Number(label='Confidence Threshold', default=0.3) iou_threshold=gr.inputs.Number(label='Iou Threshold', default=0.5) text_threshold=gr.inputs.Number(label='Text Threshold', default=0.25) device_input=gr.inputs.Textbox(label='device',default='0') quant=gr.inputs.Number(label='quant levels',default=4) with gr.TabItem('其他【不需要修改】'): option_inputs = { 'Save Conf': gr.inputs.Checkbox(label='Save Conf',default=False), 'Save img': gr.inputs.Checkbox(label='Save img',default=False), 'Visualize': gr.inputs.Checkbox(label='Visualize',default=False), 'Project': gr.inputs.Textbox(label='Project:save dir_path',default='runs/detect'), 'Name': gr.inputs.Textbox(label='Name',default='exp'), 'Exist Ok': gr.inputs.Checkbox(label='Exist Ok',default=False) } inputxs.extend(list(option_inputs.values())) with gr.Accordion('Method_Options:free combo', open=True): methods_options={'Lama': gr.inputs.Checkbox(label='Lama model[近期更新测试中]',default=False), 'Sam': gr.inputs.Checkbox(label='Sam[当前仅支持检测器的BOX输入]',default=False), 'Det': gr.inputs.Checkbox(label='Grounded[可输入文本的检测器]',default=False), 'Tag2text': gr.inputs.Checkbox(label='Tag2text[图文理解]',default=False), 'ram': gr.inputs.Checkbox(label='ram[识别标签]',default=False) } visual_glm=gr.inputs.Checkbox(label='VisualGLM',default=False) chatgpt=gr.inputs.Checkbox(label='ChatGPT(目前为网络服务自动挂载)',default=True) loads_model_button=gr.Button('热重载模型',variant="primary") loads_flag=gr.inputs.Textbox(label="加载模型进度") list_methods=list(methods_options.values()) inputxs.extend(list_methods) with gr.Accordion('format Options', open=False): save_options={ 'Save txt': gr.inputs.Checkbox(label='Save txt [collect class nums]',default=False), 'Save xml': gr.inputs.Checkbox(label='Save xml',default=False), 'Save Mask': gr.inputs.Checkbox(label='Save Mask',default=False), 'Save Caption': gr.inputs.Checkbox(label='Save Caption',default=False), 'Batch Process': gr.inputs.Checkbox(label='Batch Process[暂不支持]',default=False), 'Color Flag': gr.inputs.Checkbox(label='Color Flag[标识语义]',default=False) } inputxs.extend(list(save_options.values())) dir_inputs =gr.inputs.Textbox(label='加载本地图像文件夹路径',default='train_imgs') with gr.Accordion('LLM模型配置', open=False): checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "底部输入区", "输入清除键", "插件参数区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区") md_dropdown = gr.Dropdown(AVAIL_LLM_MODELS, value=LLM_MODEL, label="更换LLM模型源 [暂时仅支持chatgpt/glm2]").style(container=False) max_length_sl = gr.Slider(minimum=256, maximum=4096, value=512, step=1, interactive=True, label="Local LLM MaxLength") with gr.Row(): quant_chatglm= gr.Dropdown(MODEL_QUANTIZE,value=None,label="llm quantize[chatglm] ").style(container=False) top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="nucleus sampling",) temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",) with gr.Accordion('VisualGLM模型配置', open=False): visual_temperature = gr.Slider(maximum=1, value=0.8, minimum=0, label='VisualGLMTemperature') visual_top_p = gr.Slider(maximum=1, value=0.4, minimum=0, label='VisualGLM top_P') with gr.Accordion('语音模型配置', open=False): with gr.Row(): asr_select = gr.Dropdown(ASR_METHOD,value='whisper', label="语音识别方法").style(container=False) tts_select = gr.Dropdown(TTS_METHOD,value='VITS', label="语音合成方法").style(container=False) asr_gpt = gr.inputs.Checkbox(label='ASR gpt [无需加载按钮]',default=False).style(height=1,width=1) asr_button = gr.Button('Loads SPEECH_AI').style(height=5,width=5) with gr.Accordion('大模型对话系统配置', open=True): with gr.Row(): chat_app = gr.inputs.Checkbox(label='start system',default=False).style(height=1,width=1) chat_app_button = gr.Button('Speech_system').style(height=5,width=5) with gr.Accordion('ViusalGLM训练配置', open=False): with gr.Row(): train_methods=gr.Dropdown(AVAIL_METHOD_FINETUNE,value=METHOD_FINETUNE, label="微调方法").style(container=False) visualglm_args=[ gr.inputs.Textbox(label="Experiment_Name", default="visualglm-6b"), gr.inputs.Number(label="Model Parallel Size", default=1), gr.inputs.Textbox(label="mode", default='finetune'), gr.Slider(minimum=1, maximum=3000, value=300, step=1, interactive=True, label="train-iters"), gr.inputs.Checkbox(label="resume dataloader", default=True), gr.Slider(minimum=16, maximum=256, value=64, step=1, interactive=True, label="max_source_length"), gr.Slider(minimum=16, maximum=1024, value=256, step=1, interactive=True, label="max_target_length"), gr.Slider(minimum=1, maximum=100, value=10, step=1, interactive=True, label="lora_rank"), gr.Slider(minimum=0, maximum=256, value=0, step=1, interactive=True, label="layer_range_start"), gr.Slider(minimum=0, maximum=20, value=14, step=1, interactive=True, label="layer_range_end"), gr.Slider(minimum=1, maximum=60, value=4, step=1, interactive=True, label="pre_seq_len"), gr.inputs.Textbox(label="Train Data", default="fewshot-data/dataset.json"), gr.inputs.Textbox(label="Eval Data", default="fewshot-data/dataset.json"), gr.inputs.Textbox(label="distributed backend", default="nccl"), gr.inputs.Dropdown(label="lr decay style ", choices=["cosine", "linear"], default="cosine"), gr.inputs.Number(label="warmup", default=0.02), gr.inputs.Checkbox(label="checkpoint-activations", default=True) , gr.inputs.Number(label="Save Interval", default=300), gr.inputs.Number(label="Eval Interval", default=10000), gr.inputs.Textbox(label="Save Directory", default="./checkpoints"), gr.inputs.Number(label="split", default=1), gr.inputs.Number(label="Eval Iters", default=10), gr.inputs.Number(label="Eval Batch Size", default=8), gr.inputs.Textbox(label='Zero Stage',default=1), gr.inputs.Number(label="lr", default=0.0001), gr.inputs.Number(label="batch size", default=4), gr.inputs.Number(label="gradient accumulation steps", default=4), ] fine_tune=gr.Button('Finetune VisualGLM').style(height=5,width=5) with gr.Accordion('sadtakler配置', open=False): with gr.Tabs(elem_id="sadtalker_checkbox"): with gr.TabItem('Settings'): gr.Markdown("need help? please visit our [[best practice page](https://github.com/OpenTalker/SadTalker/blob/main/docs/best_practice.md)] for more detials") with gr.Column(variant='panel'): # width = gr.Slider(minimum=64, elem_id="img2img_width", maximum=2048, step=8, label="Manually Crop Width", value=512) # img2img_width # height = gr.Slider(minimum=64, elem_id="img2img_height", maximum=2048, step=8, label="Manually Crop Height", value=512) # img2img_width with gr.Row(): pose_style = gr.Slider(minimum=0, maximum=46, step=1, label="Pose style", value=0) # exp_weight = gr.Slider(minimum=0, maximum=3, step=0.1, label="expression scale", value=1) # with gr.Row(): sadtalker_path=gr.inputs.Textbox(label="checkpoint path", default="checkpoints") sadtalker_config=gr.inputs.Textbox(label="config path", default="SadTalker/src/config") with gr.Row(): size_of_image = gr.Radio([256, 512], value=256, label='face model resolution', info="use 256/512 model?") # preprocess_type = gr.Radio(['crop', 'resize','full', 'extcrop', 'extfull'], value='crop', label='preprocess', info="How to handle input image?") with gr.Row(): is_still_mode = gr.Checkbox(label="Still Mode (fewer hand motion, works with preprocess `full`)") batch_size = gr.Slider(label="batch size in generation", step=1, maximum=10, value=2) enhancer = gr.Checkbox(label="GFPGAN as Face enhancer") sadtalker_submit = gr.Button('Generate_video', elem_id="sadtalker_generate", variant='primary') with gr.Column(variant='panel',scale=15): with gr.Tabs(elem_id="Process_audio"): with gr.TabItem('Upload OR TTS'): with gr.Column(variant='panel'): with gr.Row(): record_audio = gr.Audio(label="record your voice", source="microphone",type='filepath') #Recording_audio=gr.Button('Recording_asr',elem_id="speech2text", variant='primary') with gr.Row(): upload_audio = gr.Audio(label="Input audio(./wav/.mp3)", source="upload",type='filepath').style(height=20,width=120) input_text = gr.Textbox(label="Generating audio from text", lines=2, placeholder="please enter some text here, we genreate the audio from TTS.") with gr.Row(): asr = gr.Button('Generate text',elem_id="text_generate", variant='primary') tts = gr.Button('Generate audio',elem_id="audio_generate", variant='primary') with gr.TabItem('Omniverse App'): with gr.Row(): omniverse_switch = gr.inputs.Checkbox(label='Omniverse A2F',default=False) #audio_to_face=gr.Button('send a Audio to Omniverse ', variant='primary') def t2s(text,method): from a2f import tts_send2 send_dir=f'{voice_dir}/send_a2f.wav' if method=='VITS': print('更新中,暂不支持') elif method=='edge_tts' : asyncio.run(tts_send2(text,False,send_dir)) return send_dir def s2t(speech_file,stream_mode=False): from a2f import speech_recognition speech_text, speech_language=speech_recognition(speech_file, speech_AI['asr']['whisper'],stream_mode) # return speech_text with gr.Tabs(elem_id="上传图像"): with gr.TabItem('Upload image'): with gr.Row(): image_prompt = gr.Image(label="Source image", source="upload", type="filepath").style(height=200,width=180) prompt_input=gr.inputs.Textbox(lines=2, label="prompt with image/仅与图像相关 : (Optional,注意每个功能请考虑在这个框里的TEXT提示词要不要先清空)") inputs = [dir_inputs,image_prompt,prompt_input,box_threshold,iou_threshold,text_threshold,device_input,quant] inputs.extend(inputxs) with gr.Row(): run_button = gr.Button('Run CV_Task',variant="primary"); run_button.style(size="sm") clear_button= gr.Button("清除文本", variant="secondary"); clear_button.style(size="sm") with gr.Row(): resetBtn = gr.Button("重置", variant="secondary"); resetBtn.style(size="sm") stopBtn2 = gr.Button("停止", variant="secondary"); stopBtn2.style(size="sm") clearBtn = gr.Button("清除", variant="secondary", visible=False); clearBtn.style(size="sm") with gr.Row(): status = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行。当前模型: {LLM_MODEL} \n {proxy_info}", elem_id="state-panel") with gr.Tabs(elem_id="Chatbox"): with gr.TabItem('对话区'): with gr.Accordion("输入区", open=True, elem_id="input-panel") as area_input_primary: with gr.Row(): chat_txt=gr.Textbox(lines=3,show_label=False, placeholder="question").style(container=False) with gr.Accordion("备选输入区", open=True, visible=False) as area_input_secondary: with gr.Row(): txt = gr.Textbox(show_label=False, placeholder="Input question here.", label="输入区2").style(container=False) with gr.Row(): run_button_chat = gr.Button('Chat_Sumbit',variant="primary") run_button_2 = gr.Button('VisualGLM',variant="primary") with gr.Accordion("学术ChatGPT基础功能", open=False) as area_basic_fn: with gr.Row(): for k in functional: if ("Visible" in functional[k]) and (not functional[k]["Visible"]): continue variant = functional[k]["Color"] if "Color" in functional[k] else "secondary" functional[k]["Button"] = gr.Button(k, variant=variant) with gr.Accordion("函数插件区", open=False, elem_id="plugin-panel") as area_crazy_fn: with gr.Row(): gr.Markdown("插件可读取“输入区”文本/路径作为参数(上传文件自动修正路径)") with gr.Row(): for k in crazy_fns: if not crazy_fns[k].get("AsButton", True): continue variant = crazy_fns[k]["Color"] if "Color" in crazy_fns[k] else "secondary" crazy_fns[k]["Button"] = gr.Button(k, variant=variant) crazy_fns[k]["Button"].style(size="sm") with gr.Row(): with gr.Accordion("更多函数插件", open=False): # update dropdown_fn_list = crazy_fns.keys() with gr.Row(): dropdown = gr.Dropdown(dropdown_fn_list, value=r"打开插件列表", label="", show_label=False).style(container=False) with gr.Row(): plugin_advanced_arg = gr.Textbox(show_label=True, label="高级参数输入区", visible=False, placeholder="特殊函数插件的高级参数输入区").style(container=False) with gr.Row(): switchy_bt = gr.Button(r"请先从插件列表中选择", variant="secondary") with gr.Row(): with gr.Accordion("点击展开“文件上传区”。上传本地文件/压缩包供函数插件调用。", open=False) as area_file_up: file_upload = gr.Files(label="任何文件, 但推荐上传压缩文件(zip, tar)", file_count="multiple") with gr.Column(scale=20): with gr.Accordion('输出区', open=True): with gr.TabItem('图像输出'): gallery = gr.Gallery(label="Generated images",show_label=False,elem_id="gallery",).style(preview=True, grid=2, object_fit="scale-down") with gr.TabItem('视频输出'): video_output = gr.Video(label="Generated video", format="mp4").style(width=600) with gr.TabItem('图文理解'): with gr.Row(): output_text = gr.Textbox(label="tag2text",lines=2) with gr.Row(): output_tag= gr.outputs.Textbox(label="Tag").style(height=1) with gr.Row(): zh_select=gr.inputs.Checkbox(label='英译中 Tag2Text【选后需重载模型】',default=False).style(width=1) with gr.Row(): output_classes= gr.Textbox(label="Class Numbers ",lines=1, placeholder="generate classes numbers,color flag or save_txt must be ture/你必须启动存储txt的功能,这个是全局的").style(conatiner=False,width=1) with gr.Row(): with gr.Accordion("备选输入区", open=True, visible=False) as area_input_secondary: system_prompt = gr.Textbox(show_label=True, placeholder=f"Chat Prompt", label="下方输入对话支持图像和文本", value="AI assistant.") #stopBtn2 = gr.Button("停止", variant="secondary"); stopBtn2.style(size="sm") clearBtn2 = gr.Button("清除", variant="secondary", visible=False); clearBtn2.style(size="sm") with gr.Row(): with gr.Column(scale=2): result_text = gr.Chatbot(label=f'当前模型:{LLM_MODEL}', value=[("", "Hi, What do you want to know ?")]).style(height=CHATBOT_HEIGHT) history = gr.State([]) #Recording_audio.click(fn=toggle_operation,inputs=[asr_select],outputs=[input_text]) # 将 toggle_operation 函数绑定到按钮 # 功能区显示开关与功能区的互动 def fn_area_visibility(a): ret = {} ret.update({area_basic_fn: gr.update(visible=("基础功能区" in a))}) ret.update({area_crazy_fn: gr.update(visible=("函数插件区" in a))}) ret.update({area_input_primary: gr.update(visible=("底部输入区" not in a))}) ret.update({area_input_secondary: gr.update(visible=("底部输入区" in a))}) ret.update({clearBtn: gr.update(visible=("输入清除键" in a))}) ret.update({clearBtn2: gr.update(visible=("输入清除键" in a))}) ret.update({plugin_advanced_arg: gr.update(visible=("插件参数区" in a))}) if "底部输入区" in a: ret.update({txt: gr.update(value="")}) return ret checkboxes.select(fn_area_visibility, [checkboxes], [area_basic_fn, area_crazy_fn, area_input_primary, area_input_secondary, chat_txt,txt , clearBtn, clearBtn2, plugin_advanced_arg] ) sadtalker_submit.click(fn=sadtalker_demo,inputs=[sadtalker_path,sadtalker_config,image_prompt,upload_audio, preprocess_type,is_still_mode,enhancer, batch_size, size_of_image, pose_style, exp_weight],outputs=[video_output]) #audio_to_face.click(fn=t2s, inputs=[result_text,input_text,gr.State(True),omniverse_switch], outputs=[upload_audio] ) asr_button.click(fn=load_speech_model,inputs=[asr_select,tts_select],outputs=[loads_flag]) asr.click(fn=s2t, inputs=[upload_audio], outputs=[input_text]) tts.click(fn=t2s, inputs=[input_text,tts_select], outputs=[upload_audio]) # fine tune VisualGLM visualglm_args.append(train_methods) fine_tune.click(fn=train_visualGLM,inputs=visualglm_args,outputs=[txt]) # visualGLM inputs cs=[] cs.extend(list_methods) cs.extend([zh_select, visual_glm,device_input, quant, loads_flag]) loads_model_button.click(fn=load_auto_backend_models,inputs=cs,outputs=[loads_flag]) inputs.append(zh_select) def on_md_dropdown_changed(k): return {result_text: gr.update(label="当前模型:"+k)} md_dropdown.select(on_md_dropdown_changed, [md_dropdown],[result_text]) outputs = [gallery, output_text, output_tag,output_classes] input_combo = [cookies, max_length_sl, md_dropdown,chat_txt,txt,top_p, temperature, result_text, history,system_prompt,plugin_advanced_arg,omniverse_switch,record_audio,asr_gpt,quant_chatglm,chat_app] output_combo = [cookies, result_text, history, status] # output_combo2=[result_text, history, status] predict_args = dict(fn=ArgsGeneralWrapper(predict_all), inputs=input_combo, outputs=output_combo) chat_args=dict(fn=ArgsGeneralWrapper(talk_all), inputs=input_combo, outputs=output_combo) run_button.click(fn=Auto_run, inputs=inputs, outputs=outputs) # 提交按钮、重置按钮 cancel_handles.append(chat_txt.submit(**predict_args)) cancel_handles.append(txt.submit(**predict_args)) cancel_handles.append(run_button_chat.click(**predict_args)) cancel_handles.append(run_button_2.click(**predict_args)) cancel_handles.append(chat_app_button.click(**chat_args)) resetBtn.click(lambda: ([], [], "已重置"), None, [result_text, history, status]) stopBtn2.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles) clearBtn.click(lambda: ("",""), None, [chat_txt,txt]) clearBtn2.click(lambda: ("",""), None, [chat_txt,txt]) if AUTO_CLEAR_TXT: run_button_chat.click(lambda: ("",""), None, [chat_txt,txt]) run_button_2.click(lambda: ("",""), None, [chat_txt,txt]) chat_txt.submit(lambda: ("",""), None, [chat_txt,txt]) txt.submit(lambda: ("",""), None, [chat_txt,txt]) for k in functional: if ("Visible" in functional[k]) and (not functional[k]["Visible"]): continue dict_args=dict(fn=ArgsGeneralWrapper(predict_all), inputs=[*input_combo, gr.State(True),gr.State(k)], outputs=output_combo) cancel_handles.append(functional[k]["Button"].click(**dict_args)) # 文件上传区,接收文件后与chatbot的互动 file_upload.upload(on_file_uploaded, [file_upload, result_text, chat_txt, txt, checkboxes], [result_text, chat_txt, txt]) # 函数插件-固定按钮区 for k in crazy_fns: print(f'检查插件名字{k},是否载入') if not crazy_fns[k].get("AsButton", True): continue click_handle = crazy_fns[k]["Button"].click(ArgsGeneralWrapper(crazy_fns[k]["Function"]), [*input_combo, gr.State(PORT)], output_combo) click_handle.then(on_report_generated, [cookies, file_upload, result_text], [cookies, file_upload, result_text]) cancel_handles.append(click_handle) # 函数插件-下拉菜单与随变按钮的互动 def on_dropdown_changed(k): variant = crazy_fns[k]["Color"] if "Color" in crazy_fns[k] else "secondary" ret = {switchy_bt: gr.update(value=k, variant=variant)} if crazy_fns[k].get("AdvancedArgs", False): # 是否唤起高级插件参数区 ret.update({plugin_advanced_arg: gr.update(visible=True, label=f"插件[{k}]的高级参数说明:" + crazy_fns[k].get("ArgsReminder", [f"没有提供高级参数功能说明"]))}) else: ret.update({plugin_advanced_arg: gr.update(visible=False, label=f"插件[{k}]不需要高级参数。")}) return ret dropdown.select(on_dropdown_changed, [dropdown], [switchy_bt, plugin_advanced_arg] ) def on_md_dropdown_changed(k): return {result_text: gr.update(label="当前模型:"+k)} md_dropdown.select(on_md_dropdown_changed, [md_dropdown], [result_text] ) # 随变按钮的回调函数注册 def route(request: gr.Request, k, *args, **kwargs): if k in [r"打开插件列表", r"请先从插件列表中选择"]: return yield from ArgsGeneralWrapper(crazy_fns[k]["Function"])(request, *args, **kwargs) click_handle = switchy_bt.click(route,[switchy_bt, *input_combo, gr.State(PORT)], output_combo) click_handle.then(on_report_generated, [cookies, file_upload, result_text], [cookies, file_upload, result_text]) cancel_handles.append(click_handle) # 终止按钮的回调函数注册 # stopBtn.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles) stopBtn2.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles) #VisualGLM run run_button_2.click(fn=visual_chat,inputs=[chat_txt, visual_temperature, visual_top_p, image_prompt, result_text,record_audio,upload_audio,omniverse_switch], outputs=[txt, result_text]) prompt_input.submit(fn=visual_chat,inputs=[chat_txt, visual_temperature, visual_top_p, image_prompt, result_text,record_audio,upload_audio,omniverse_switch], outputs=[txt,result_text]) #upload_audio.upload(fn=clear_fn_image, inputs=clear_button, outputs=[result_text]) image_prompt.upload(fn=clear_fn_image, inputs=clear_button, outputs=[result_text]) clear_button.click(lambda: ("","","","",""), None, [prompt_input,result_text,txt, input_text,chat_txt]) image_prompt.clear(fn=clear_fn_image, inputs=clear_button, outputs=[result_text]) # def init_cookie(cookies, chatbot): # # 为每一位访问的用户赋予一个独一无二的uuid编码 # cookies.update({'uuid': uuid.uuid4()}) # return cookies def auto_opentab_delay(port=7586): import threading, webbrowser, time LOGGER.info(f"\n如果浏览器没有自动打开,请复制并转到以下URL:") LOGGER.info(f"\t(亮色主题): http://localhost:{port}") LOGGER.info(f"\t(暗色主题): http://localhost:{port}/?__theme=dark") def open(): time.sleep(2) # 打开浏览器 DARK_MODE, = get_conf('DARK_MODE') if DARK_MODE: webbrowser.open_new_tab(f"http://localhost:{port}/?__theme=dark") else: webbrowser.open_new_tab(f"http://localhost:{port}") threading.Thread(target=open, name="open-browser", daemon=True).start() #threading.Thread(target=auto_update, name="self-upgrade", daemon=True).start() auto_opentab_delay(7901) block.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name='0.0.0.0', server_port=7901,debug=True, share=False) ================================================ FILE: audio2face_pb2.py ================================================ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: audio2face.proto """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() DESCRIPTOR = _descriptor.FileDescriptor( name="audio2face.proto", package="nvidia.audio2face", syntax="proto3", serialized_options=None, create_key=_descriptor._internal_create_key, serialized_pb=b'\n\x10\x61udio2face.proto\x12\x11nvidia.audio2face"{\n\x10PushAudioRequest\x12\x15\n\rinstance_name\x18\x01 \x01(\t\x12\x12\n\nsamplerate\x18\x02 \x01(\x05\x12\x12\n\naudio_data\x18\x03 \x01(\x0c\x12(\n block_until_playback_is_finished\x18\x04 \x01(\x08"5\n\x11PushAudioResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t"\x85\x01\n\x16PushAudioStreamRequest\x12@\n\x0cstart_marker\x18\x01 \x01(\x0b\x32(.nvidia.audio2face.PushAudioRequestStartH\x00\x12\x14\n\naudio_data\x18\x02 \x01(\x0cH\x00\x42\x13\n\x11streaming_request"l\n\x15PushAudioRequestStart\x12\x15\n\rinstance_name\x18\x01 \x01(\t\x12\x12\n\nsamplerate\x18\x02 \x01(\x05\x12(\n block_until_playback_is_finished\x18\x03 \x01(\x08";\n\x17PushAudioStreamResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t2\xd4\x01\n\nAudio2Face\x12X\n\tPushAudio\x12#.nvidia.audio2face.PushAudioRequest\x1a$.nvidia.audio2face.PushAudioResponse"\x00\x12l\n\x0fPushAudioStream\x12).nvidia.audio2face.PushAudioStreamRequest\x1a*.nvidia.audio2face.PushAudioStreamResponse"\x00(\x01\x62\x06proto3', ) _PUSHAUDIOREQUEST = _descriptor.Descriptor( name="PushAudioRequest", full_name="nvidia.audio2face.PushAudioRequest", filename=None, file=DESCRIPTOR, containing_type=None, create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="instance_name", full_name="nvidia.audio2face.PushAudioRequest.instance_name", index=0, number=1, type=9, cpp_type=9, label=1, has_default_value=False, default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="samplerate", full_name="nvidia.audio2face.PushAudioRequest.samplerate", index=1, number=2, type=5, cpp_type=1, label=1, has_default_value=False, default_value=0, message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="audio_data", full_name="nvidia.audio2face.PushAudioRequest.audio_data", index=2, number=3, type=12, cpp_type=9, label=1, has_default_value=False, default_value=b"", message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="block_until_playback_is_finished", full_name="nvidia.audio2face.PushAudioRequest.block_until_playback_is_finished", index=3, number=4, type=8, cpp_type=7, label=1, has_default_value=False, default_value=False, message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key, ), ], extensions=[], nested_types=[], enum_types=[], serialized_options=None, is_extendable=False, syntax="proto3", extension_ranges=[], oneofs=[], serialized_start=39, serialized_end=162, ) _PUSHAUDIORESPONSE = _descriptor.Descriptor( name="PushAudioResponse", full_name="nvidia.audio2face.PushAudioResponse", filename=None, file=DESCRIPTOR, containing_type=None, create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="success", full_name="nvidia.audio2face.PushAudioResponse.success", index=0, number=1, type=8, cpp_type=7, label=1, has_default_value=False, default_value=False, message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="message", full_name="nvidia.audio2face.PushAudioResponse.message", index=1, number=2, type=9, cpp_type=9, label=1, has_default_value=False, default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key, ), ], extensions=[], nested_types=[], enum_types=[], serialized_options=None, is_extendable=False, syntax="proto3", extension_ranges=[], oneofs=[], serialized_start=164, serialized_end=217, ) _PUSHAUDIOSTREAMREQUEST = _descriptor.Descriptor( name="PushAudioStreamRequest", full_name="nvidia.audio2face.PushAudioStreamRequest", filename=None, file=DESCRIPTOR, containing_type=None, create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="start_marker", full_name="nvidia.audio2face.PushAudioStreamRequest.start_marker", index=0, number=1, type=11, cpp_type=10, label=1, has_default_value=False, default_value=None, message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="audio_data", full_name="nvidia.audio2face.PushAudioStreamRequest.audio_data", index=1, number=2, type=12, cpp_type=9, label=1, has_default_value=False, default_value=b"", message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key, ), ], extensions=[], nested_types=[], enum_types=[], serialized_options=None, is_extendable=False, syntax="proto3", extension_ranges=[], oneofs=[ _descriptor.OneofDescriptor( name="streaming_request", full_name="nvidia.audio2face.PushAudioStreamRequest.streaming_request", index=0, containing_type=None, create_key=_descriptor._internal_create_key, fields=[], ) ], serialized_start=220, serialized_end=353, ) _PUSHAUDIOREQUESTSTART = _descriptor.Descriptor( name="PushAudioRequestStart", full_name="nvidia.audio2face.PushAudioRequestStart", filename=None, file=DESCRIPTOR, containing_type=None, create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="instance_name", full_name="nvidia.audio2face.PushAudioRequestStart.instance_name", index=0, number=1, type=9, cpp_type=9, label=1, has_default_value=False, default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="samplerate", full_name="nvidia.audio2face.PushAudioRequestStart.samplerate", index=1, number=2, type=5, cpp_type=1, label=1, has_default_value=False, default_value=0, message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="block_until_playback_is_finished", full_name="nvidia.audio2face.PushAudioRequestStart.block_until_playback_is_finished", index=2, number=3, type=8, cpp_type=7, label=1, has_default_value=False, default_value=False, message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key, ), ], extensions=[], nested_types=[], enum_types=[], serialized_options=None, is_extendable=False, syntax="proto3", extension_ranges=[], oneofs=[], serialized_start=355, serialized_end=463, ) _PUSHAUDIOSTREAMRESPONSE = _descriptor.Descriptor( name="PushAudioStreamResponse", full_name="nvidia.audio2face.PushAudioStreamResponse", filename=None, file=DESCRIPTOR, containing_type=None, create_key=_descriptor._internal_create_key, fields=[ _descriptor.FieldDescriptor( name="success", full_name="nvidia.audio2face.PushAudioStreamResponse.success", index=0, number=1, type=8, cpp_type=7, label=1, has_default_value=False, default_value=False, message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key, ), _descriptor.FieldDescriptor( name="message", full_name="nvidia.audio2face.PushAudioStreamResponse.message", index=1, number=2, type=9, cpp_type=9, label=1, has_default_value=False, default_value=b"".decode("utf-8"), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key, ), ], extensions=[], nested_types=[], enum_types=[], serialized_options=None, is_extendable=False, syntax="proto3", extension_ranges=[], oneofs=[], serialized_start=465, serialized_end=524, ) _PUSHAUDIOSTREAMREQUEST.fields_by_name["start_marker"].message_type = _PUSHAUDIOREQUESTSTART _PUSHAUDIOSTREAMREQUEST.oneofs_by_name["streaming_request"].fields.append( _PUSHAUDIOSTREAMREQUEST.fields_by_name["start_marker"] ) _PUSHAUDIOSTREAMREQUEST.fields_by_name["start_marker"].containing_oneof = _PUSHAUDIOSTREAMREQUEST.oneofs_by_name[ "streaming_request" ] _PUSHAUDIOSTREAMREQUEST.oneofs_by_name["streaming_request"].fields.append( _PUSHAUDIOSTREAMREQUEST.fields_by_name["audio_data"] ) _PUSHAUDIOSTREAMREQUEST.fields_by_name["audio_data"].containing_oneof = _PUSHAUDIOSTREAMREQUEST.oneofs_by_name[ "streaming_request" ] DESCRIPTOR.message_types_by_name["PushAudioRequest"] = _PUSHAUDIOREQUEST DESCRIPTOR.message_types_by_name["PushAudioResponse"] = _PUSHAUDIORESPONSE DESCRIPTOR.message_types_by_name["PushAudioStreamRequest"] = _PUSHAUDIOSTREAMREQUEST DESCRIPTOR.message_types_by_name["PushAudioRequestStart"] = _PUSHAUDIOREQUESTSTART DESCRIPTOR.message_types_by_name["PushAudioStreamResponse"] = _PUSHAUDIOSTREAMRESPONSE _sym_db.RegisterFileDescriptor(DESCRIPTOR) PushAudioRequest = _reflection.GeneratedProtocolMessageType( "PushAudioRequest", (_message.Message,), { "DESCRIPTOR": _PUSHAUDIOREQUEST, "__module__": "audio2face_pb2" # @@protoc_insertion_point(class_scope:nvidia.audio2face.PushAudioRequest) }, ) _sym_db.RegisterMessage(PushAudioRequest) PushAudioResponse = _reflection.GeneratedProtocolMessageType( "PushAudioResponse", (_message.Message,), { "DESCRIPTOR": _PUSHAUDIORESPONSE, "__module__": "audio2face_pb2" # @@protoc_insertion_point(class_scope:nvidia.audio2face.PushAudioResponse) }, ) _sym_db.RegisterMessage(PushAudioResponse) PushAudioStreamRequest = _reflection.GeneratedProtocolMessageType( "PushAudioStreamRequest", (_message.Message,), { "DESCRIPTOR": _PUSHAUDIOSTREAMREQUEST, "__module__": "audio2face_pb2" # @@protoc_insertion_point(class_scope:nvidia.audio2face.PushAudioStreamRequest) }, ) _sym_db.RegisterMessage(PushAudioStreamRequest) PushAudioRequestStart = _reflection.GeneratedProtocolMessageType( "PushAudioRequestStart", (_message.Message,), { "DESCRIPTOR": _PUSHAUDIOREQUESTSTART, "__module__": "audio2face_pb2" # @@protoc_insertion_point(class_scope:nvidia.audio2face.PushAudioRequestStart) }, ) _sym_db.RegisterMessage(PushAudioRequestStart) PushAudioStreamResponse = _reflection.GeneratedProtocolMessageType( "PushAudioStreamResponse", (_message.Message,), { "DESCRIPTOR": _PUSHAUDIOSTREAMRESPONSE, "__module__": "audio2face_pb2" # @@protoc_insertion_point(class_scope:nvidia.audio2face.PushAudioStreamResponse) }, ) _sym_db.RegisterMessage(PushAudioStreamResponse) _AUDIO2FACE = _descriptor.ServiceDescriptor( name="Audio2Face", full_name="nvidia.audio2face.Audio2Face", file=DESCRIPTOR, index=0, serialized_options=None, create_key=_descriptor._internal_create_key, serialized_start=527, serialized_end=739, methods=[ _descriptor.MethodDescriptor( name="PushAudio", full_name="nvidia.audio2face.Audio2Face.PushAudio", index=0, containing_service=None, input_type=_PUSHAUDIOREQUEST, output_type=_PUSHAUDIORESPONSE, serialized_options=None, create_key=_descriptor._internal_create_key, ), _descriptor.MethodDescriptor( name="PushAudioStream", full_name="nvidia.audio2face.Audio2Face.PushAudioStream", index=1, containing_service=None, input_type=_PUSHAUDIOSTREAMREQUEST, output_type=_PUSHAUDIOSTREAMRESPONSE, serialized_options=None, create_key=_descriptor._internal_create_key, ), ], ) _sym_db.RegisterServiceDescriptor(_AUDIO2FACE) DESCRIPTOR.services_by_name["Audio2Face"] = _AUDIO2FACE # @@protoc_insertion_point(module_scope) ================================================ FILE: audio2face_pb2_grpc.py ================================================ # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! """Client and server classes corresponding to protobuf-defined services.""" import grpc import audio2face_pb2 as audio2face__pb2 class Audio2FaceStub(object): """Missing associated documentation comment in .proto file.""" def __init__(self, channel): """Constructor. Args: channel: A grpc.Channel. """ self.PushAudio = channel.unary_unary( "/nvidia.audio2face.Audio2Face/PushAudio", request_serializer=audio2face__pb2.PushAudioRequest.SerializeToString, response_deserializer=audio2face__pb2.PushAudioResponse.FromString, ) self.PushAudioStream = channel.stream_unary( "/nvidia.audio2face.Audio2Face/PushAudioStream", request_serializer=audio2face__pb2.PushAudioStreamRequest.SerializeToString, response_deserializer=audio2face__pb2.PushAudioStreamResponse.FromString, ) class Audio2FaceServicer(object): """Missing associated documentation comment in .proto file.""" def PushAudio(self, request, context): """Missing associated documentation comment in .proto file.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details("Method not implemented!") raise NotImplementedError("Method not implemented!") def PushAudioStream(self, request_iterator, context): """Missing associated documentation comment in .proto file.""" context.set_code(grpc.StatusCode.UNIMPLEMENTED) context.set_details("Method not implemented!") raise NotImplementedError("Method not implemented!") def add_Audio2FaceServicer_to_server(servicer, server): rpc_method_handlers = { "PushAudio": grpc.unary_unary_rpc_method_handler( servicer.PushAudio, request_deserializer=audio2face__pb2.PushAudioRequest.FromString, response_serializer=audio2face__pb2.PushAudioResponse.SerializeToString, ), "PushAudioStream": grpc.stream_unary_rpc_method_handler( servicer.PushAudioStream, request_deserializer=audio2face__pb2.PushAudioStreamRequest.FromString, response_serializer=audio2face__pb2.PushAudioStreamResponse.SerializeToString, ), } generic_handler = grpc.method_handlers_generic_handler("nvidia.audio2face.Audio2Face", rpc_method_handlers) server.add_generic_rpc_handlers((generic_handler,)) # This class is part of an EXPERIMENTAL API. class Audio2Face(object): """Missing associated documentation comment in .proto file.""" @staticmethod def PushAudio( request, target, options=(), channel_credentials=None, call_credentials=None, insecure=False, compression=None, wait_for_ready=None, timeout=None, metadata=None, ): return grpc.experimental.unary_unary( request, target, "/nvidia.audio2face.Audio2Face/PushAudio", audio2face__pb2.PushAudioRequest.SerializeToString, audio2face__pb2.PushAudioResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata, ) @staticmethod def PushAudioStream( request_iterator, target, options=(), channel_credentials=None, call_credentials=None, insecure=False, compression=None, wait_for_ready=None, timeout=None, metadata=None, ): return grpc.experimental.stream_unary( request_iterator, target, "/nvidia.audio2face.Audio2Face/PushAudioStream", audio2face__pb2.PushAudioStreamRequest.SerializeToString, audio2face__pb2.PushAudioStreamResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata, ) ================================================ FILE: audio2face_streaming_utils.py ================================================ """ This demo script shows how to send audio data to Audio2Face Streaming Audio Player via gRPC requests. There are two options: * Send the whole track at once using PushAudioRequest() * Send the audio chunks seuqntially in a stream using PushAudioStreamRequest() For the second option this script emulates the stream of chunks, generated by splitting an input WAV audio file. But in a real application such stream of chunks may be aquired from some other streaming source: * streaming audio via internet, streaming Text-To-Speech, etc gRPC protocol details could be find in audio2face.proto """ import sys import grpc import time import numpy as np import soundfile import audio2face_pb2_grpc import audio2face_pb2 def push_audio_track(url, audio_data, samplerate, instance_name): """ This function pushes the whole audio track at once via PushAudioRequest() PushAudioRequest parameters: * audio_data: bytes, containing audio data for the whole track, where each sample is encoded as 4 bytes (float32) * samplerate: sampling rate for the audio data * instance_name: prim path of the Audio2Face Streaming Audio Player on the stage, were to push the audio data * block_until_playback_is_finished: if True, the gRPC request will be blocked until the playback of the pushed track is finished The request is passed to PushAudio() """ block_until_playback_is_finished = True # ADJUST with grpc.insecure_channel(url) as channel: stub = audio2face_pb2_grpc.Audio2FaceStub(channel) request = audio2face_pb2.PushAudioRequest() request.audio_data = audio_data.astype(np.float32).tobytes() request.samplerate = samplerate request.instance_name = instance_name request.block_until_playback_is_finished = block_until_playback_is_finished print("Sending audio data...") response = stub.PushAudio(request) if response.success: print("SUCCESS") else: print(f"ERROR: {response.message}") print("Closed channel") def push_audio_track_stream(url, audio_data, samplerate, instance_name): """ This function pushes audio chunks sequentially via PushAudioStreamRequest() The function emulates the stream of chunks, generated by splitting input audio track. But in a real application such stream of chunks may be aquired from some other streaming source. The first message must contain start_marker field, containing only meta information (without audio data): * samplerate: sampling rate for the audio data * instance_name: prim path of the Audio2Face Streaming Audio Player on the stage, were to push the audio data * block_until_playback_is_finished: if True, the gRPC request will be blocked until the playback of the pushed track is finished (after the last message) Second and other messages must contain audio_data field: * audio_data: bytes, containing audio data for an audio chunk, where each sample is encoded as 4 bytes (float32) All messages are packed into a Python generator and passed to PushAudioStream() """ #print(type(audio_data)) chunk_size = samplerate // 10 # ADJUST sleep_between_chunks = 0.01 # ADJUST block_until_playback_is_finished = True # ADJUST #print(type(audio_data)) with grpc.insecure_channel(url) as channel: stub = audio2face_pb2_grpc.Audio2FaceStub(channel) def make_generator(): start_marker = audio2face_pb2.PushAudioRequestStart( samplerate=samplerate, instance_name=instance_name, block_until_playback_is_finished=block_until_playback_is_finished, ) # At first, we send a message with start_marker yield audio2face_pb2.PushAudioStreamRequest(start_marker=start_marker) # Then we send messages with audio_data for i in range(len(audio_data) // chunk_size + 1): #time.sleep(sleep_between_chunks) chunk = audio_data[i * chunk_size : i * chunk_size + chunk_size] yield audio2face_pb2.PushAudioStreamRequest(audio_data=chunk.astype(np.float32).tobytes()) request_generator = make_generator() print("Sending audio data...") response = stub.PushAudioStream(request_generator) if response.success: print("SUCCESS") else: print(f"ERROR: {response.message}") print("Channel closed") def push_stream(url, audio_data, samplerate, instance_name): """ This function pushes audio chunks sequentially via PushAudioStreamRequest() The function emulates the stream of chunks, generated by splitting input audio track. But in a real application such stream of chunks may be aquired from some other streaming source. The first message must contain start_marker field, containing only meta information (without audio data): * samplerate: sampling rate for the audio data * instance_name: prim path of the Audio2Face Streaming Audio Player on the stage, were to push the audio data * block_until_playback_is_finished: if True, the gRPC request will be blocked until the playback of the pushed track is finished (after the last message) Second and other messages must contain audio_data field: * audio_data: bytes, containing audio data for an audio chunk, where each sample is encoded as 4 bytes (float32) All messages are packed into a Python generator and passed to PushAudioStream() """ print(len(audio_data)) chunk_size = samplerate // 10 # ADJUST sleep_between_chunks = 0.01 # ADJUST block_until_playback_is_finished = True # ADJUST print(type(audio_data)) with grpc.insecure_channel(url) as channel: print("Channel creadted") stub = audio2face_pb2_grpc.Audio2FaceStub(channel) def make_generator(): start_marker = audio2face_pb2.PushAudioRequestStart( samplerate=samplerate, instance_name=instance_name, block_until_playback_is_finished=block_until_playback_is_finished, ) # At first, we send a message with start_marker yield audio2face_pb2.PushAudioStreamRequest(start_marker=start_marker) # Then we send messages with audio_data for i in range(len(audio_data) // chunk_size + 1): #time.sleep(sleep_between_chunks) chunk = audio_data[i * chunk_size : i * chunk_size + chunk_size] yield audio2face_pb2.PushAudioStreamRequest(audio_data=chunk.astype(np.float32).tobytes()) request_generator = make_generator() print("Sending audio data...") response = stub.PushAudioStream(request_generator) if response.success: print("SUCCESS") return True else: print(f"ERROR: {response.message}") # print("Channel closed") ================================================ FILE: audio_segment.py ================================================ import os import gradio as gr from pydub import AudioSegment # function to crop audio according to the given start and end time def crop_audio(file_path, start_time, end_time): audio = AudioSegment.from_file(file_path) cropped_audio = audio[start_time:end_time] filename = os.path.splitext(os.path.basename(file_path))[0] if not os.path.exists("cropped_audio"): os.makedirs("cropped_audio") cropped_file_path = os.path.join("cropped_audio",f"{filename}_{start_time//1000}_{end_time//1000}.wav") cropped_audio.export(cropped_file_path, format="wav") return cropped_file_path # function to split audio file into segments def split_audio_file(file_path, output_path, segment_time=3000): audio = AudioSegment.from_file(file_path) file_name = os.path.splitext(os.path.basename(file_path))[0] # Calculating total segments that will be created. total_segments = int(audio.duration_seconds // (segment_time/1000)) + 1 # Creating each segment and saving to the output folder for segment_number in range(total_segments): start_time = segment_number * segment_time end_time = start_time + segment_time segment_file_path = os.path.join(output_path, f"{file_name}_{start_time//1000}_{end_time//1000}.wav") segment = audio[start_time:end_time] segment.export(segment_file_path, format="wav") return output_path # main function def audio_processing(file_path, output_path, label): # 分割音频文件 if not os.path.exists(output_path): os.makedirs(output_path) split_audio_file(file_path, output_path) # 获取手动选择的音频段并裁剪 cropped_files_paths = [] for root, dirs, files in os.walk(output_path): for file in files: if file.endswith('.wav'): file_path = os.path.abspath(os.path.join(root, file)) cropped_file_path = crop_audio(file_path, 0, 1000) # 注意此处仅提供示例裁剪了1s的音频 cropped_files_paths.append(cropped_file_path) # 生成txt文件 txt_file = open('file_labels.txt', 'a') for index, cropped_file_path in enumerate(cropped_files_paths): segment_label = label + '_' + str(index) # 将文件路径和标签写入txt文件 txt_file.write(f"{cropped_file_path}\t{segment_label}\n") txt_file.close() print("处理完成!") # 定义输入界面, 接收音频文件、输出文件夹和标签 iface = gr.Interface( fn=audio_processing, inputs=[gr.inputs.File(label="上传音频文件"), gr.inputs.Textbox(label="输出文件夹路径"), gr.inputs.Textbox(label="标签")], outputs="text", title="音频处理工具", description="通过鼠标点击音频的任意区间保存片段") iface.launch() ================================================ FILE: auto_label_demo.py ================================================ from model_cards.autoback import AutoBackend import argparse import os import platform import sys from pathlib import Path import numpy as np import torch import torch.backends.cudnn as cudnn import matplotlib.pyplot as plt from PIL import Image import random FILE = Path(__file__).resolve() ROOT = FILE.parents[0] # root directory if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative from utils.ops import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, dilate_mask, increment_path, non_max_suppression ,print_args, scale_boxes, xyxy2xywh,save_format) from utils.plot import Annotator, save_one_box,show_box,show_mask,save_mask_data from utils.torch_utils import select_device from config_private import SAM_MODEL_TYPE,GROUNED_MODEL_TYPE,Tag2Text_Model_Path,GLIGEN_META_LIST from utils import VID_FORMATS,IMG_FORMATS,write_categories import json import xml.etree.cElementTree as ET from tqdm import tqdm # 初始已知类别列表 global categories categories = {} global category_colors category_colors={} # 初始对应类别编号 class_ids = [] models_config = {'tag2text': None, 'lama': None,'sam': None,'grounded': None,'sd': None,'visual_glm': None,'trans_zh': None,'gilgen':None} JSON_DATASETS=[] def save_text2img_data(output_dir, prompt,label,img_name): global JSON_DATASETS if not prompt: prompt=f"这张图片的背景里有什么内容?" example = { "img": f"{img_name}", "prompt": prompt, "label": label } JSON_DATASETS.append((example)) def load_auto_backend_models(opt): """ 加载多个模型 """ # Load model device = select_device(opt.device) if opt.tag2text: models_config['tag2text'] = AutoBackend("tag2text",weights=Tag2Text_Model_Path,device=device, fp16=opt.half) if opt.det: models_config['grounded'] = AutoBackend("grounded-DINO",weights=GROUNED_MODEL_TYPE['S'], device=device, args_config= 'model_cards/groundingdino/config/GroundingDINO_SwinT_OGC.py', fp16=opt.half) if opt.sam: models_config['sam']= AutoBackend("segment-anything",weights=SAM_MODEL_TYPE['vit_h'] ,device=device, fp16=opt.half) if opt.lama: models_config['lama']= AutoBackend("lama",weights=None,args_config='model_cards/lama/configs/prediction/default.yaml',device=device) if opt.gligen: models_config['gligen']=AutoBackend("gligen",weights=GLIGEN_META_LIST[0]) print('【loads models done】') def Auto_run(weights=ROOT / '', # model.pt path(s) source= 'data/images', # file/dir/URL/glob, 0 for webcam input_prompt="Anything in this image", data=ROOT / 'data/', # dataset.yaml path imgsz=(1920, 1080), # inference size (height, width) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold text_thres=0.3, max_det=1000, # maximum detections per image device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt save_xml=False, # save results to *.xml save_conf=False, # save confidences in --save-txt labels save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos classes=None, # filter by class: --class 0, or --class 0 2 3 zh_select=False, agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference visualize=False, # visualize features update=False, # update all models project=ROOT / 'runs/detect', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference trace=False, # u lama=False, # use lama models sam=True, # use segment-anythings det=True, # use grounded detect model with text tag2text=True, save_mask=False, save_caption=False, batch_process=False, color_flag=False, process_name=0, gligen=False, ): global models_config global category_colors global JSON_DATASETS LOGGER.info(f'当前的进程ID:{process_name},加载的模型列表:{models_config.keys()}') cls_index = -1 # 设置默认值为 -1 source = str(source) print(f'input:{source}') img_paths=None if os.path.isdir(source): img_paths = [os.path.join(source, f) for f in os.listdir(source) if Path(f).suffix[1:] in (IMG_FORMATS + VID_FORMATS)] elif os.path.isfile(source): img_paths = [source] else: return False # 获取文件夹中的所有图像 is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) save_img = not nosave and not source.endswith('.txt') # save inference images is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://')) #webcam = source.isnumeric() or source.endswith('.streams') or (is_url ) if is_url and is_file: source = check_file(source) # download # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir (save_dir / 'xmls' if save_xml else save_dir).mkdir(parents=True, exist_ok=True) # make dir (save_dir / 'masks' if save_mask else save_dir).mkdir(parents=True, exist_ok=True) # make dir (save_dir / 'captions' if save_caption else save_dir).mkdir(parents=True, exist_ok=True) # make dir seen=0 # loda data and inference caption=None for source in tqdm(img_paths,desc="Processing"): im = cv2.imread(source) name_p= source.split('/')[-1].split('.')[0] img_rgb = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) preds=None masks=[] prompt=input_prompt if tag2text: preds = models_config['tag2text'](im = img_rgb ,prompt=prompt,box_threshold=conf_thres,text_threshold=text_thres,iou_threshold=iou_thres) # Currently ", " is better for detecting single tags # while ". " is a little worse in some case prompt=preds[0].replace(' |', ',') caption=preds[2] print(f"Caption: {caption}") print(f"Tags: {prompt}") if zh_select: caption=models_config['trans_zh'](prompt, max_length=1000, clean_up_tokenization_spaces=True)[0]["generated_text"] if save_caption: save_format(label_format="txt",save_path=f'{save_dir}/captions',img_name=name_p, results=caption) if det: if input_prompt: prompt=input_prompt print('grouned start input prompt:',prompt) preds= models_config['grounded'](im = img_rgb,prompt=prompt, box_threshold=conf_thres,text_threshold=text_thres, iou_threshold=iou_thres) if sam and det : if preds[0].numel()>0: print('sam start input prompt:',preds[0]) masks= models_config['sam'](im = img_rgb, prompt=preds[0],box_threshold=conf_thres,text_threshold=text_thres, iou_threshold=iou_thres) if save_mask: save_mask_data(str(save_dir)+'/masks', caption, masks, preds[0], preds[2],name_p) # Write results if save_img: seen+=1 plt.figure(figsize=(10,10)) plt.imshow(img_rgb) if det: for box,label in zip(preds[0],preds[2]): show_box(box.numpy(),plt.gca(),label) for mask in masks: show_mask(mask.cpu().numpy(),plt.gca(),random_color=True) if tag2text: plt.title('Captioning: ' + caption + '\n' + 'Tagging:' + prompt + '\n') plt.axis('off') plt.savefig(f'{save_dir}/{seen}.png',bbox_iches='tight',dpi=300,pad_inches=0.0) if lama and masks is not None : masks_prompts= masks.detach().cpu().numpy().astype(np.uint8) * 255 for idx, mask in enumerate(masks_prompts): sub_mask = [dilate_mask(ma, 15) for ma in mask] img_inpainted_p= f'{save_dir}/mask_{idx}.png' idx=idx+1 img_inpainted = models_config['lama']( im=img_rgb, prompt=sub_mask[0]) Image.fromarray(img_inpainted.astype(np.uint8)).save(img_inpainted_p) img_rgb=img_inpainted for category in categories: if category not in category_colors: category_colors[category] = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) gn = torch.tensor(im.shape)[[1, 0, 1, 0]] # normalization gain whwh if color_flag or save_txt: seg_mask = np.zeros_like(img_rgb) # img_array category_color=[] for xyxy, conf, cls,mask in zip(preds[0],preds[1],preds[2],masks): #per im boxes xywh = (xyxy2xywh((xyxy).view(1,4)) / gn).view(-1).tolist() # normalized xywh if cls not in categories: # print(f'Add {cls} to categories: {categories}') categories.update({ str(cls): len(categories)}) write_categories(cls,f'{save_dir}/classes_id.txt') cls_index = len(categories) - 1 category_colors.update({ str(cls): (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))}) category_color=category_colors[str(cls)] else: cls_index = categories[str(cls)] if str(cls) not in category_colors: category_colors.update({ str(cls): (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))}) category_color=category_colors[str(cls)] line = (cls_index, xywh, conf) if save_conf else (cls_index, xywh) # label format line = str(line).replace('[', '').replace(']', '').replace("(",'').replace(")"," ").replace(",", " " * 2) if save_mask: h, w = mask.shape[-2:] mask_color = np.array(category_color).reshape((1, 1, -1)) seg_mask = seg_mask + mask.cpu().numpy().reshape(h, w, 1) * mask_color # add if save_txt: save_format(label_format="txt",save_path=f'{save_dir}/labels', img_name=name_p, results=line) if color_flag and save_mask: plt.figure(figsize=(10,10)) plt.imshow(seg_mask) plt.title('Captioning: ' + caption + '\n' + 'Tagging:' + prompt + '\n') plt.axis('off') plt.savefig(os.path.join(f'{save_dir}/masks', f'{name_p}_cls.jpg'), bbox_inches="tight", dpi=300, pad_inches=0.0) if save_xml: h,w=im.shape[:2] save_format("xml",f'{save_dir}/xmls' ,name_p, Path(source).parent, preds, h,w) if save_txt: #class_ids.append(cls) LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}/labels") if save_xml: LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}/xmls") if save_caption: with open(f'{save_dir}/dataset.json', 'a',encoding='utf-8') as f: json.dump(JSON_DATASETS,f,ensure_ascii=False) f.write('\n') LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}/captions") LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}/captions") if save_mask: LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}/masks") def run_do(shared_args,process_name=0): Auto_run(**vars(shared_args), process_name=process_name) def parse_opt(): parser = argparse.ArgumentParser() parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'your model path', help='model path(s)') parser.add_argument('--source', type=str, default=ROOT / 'train_imgs', help='file/dir/URL/glob, 0 for webcam') parser.add_argument('--input_prompt', type=str, default='', help='provide prompt words') parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path') parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold') parser.add_argument('--text-thres', type=float, default=0.3, help='confidence threshold') parser.add_argument('--iou-thres', type=float, default=0.5, help='NMS IoU threshold') parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image') parser.add_argument('--device', default='0', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--view-img', action='store_true', help='show results') parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') parser.add_argument('--save-xml', action='store_true', help='save results to *.xml') parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes') parser.add_argument('--nosave', action='store_true', help='do not save images/videos') parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3') parser.add_argument('--zh_select', action='store_true', default=False) parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') parser.add_argument('--augment', action='store_true', help='augmented inference') parser.add_argument('--visualize', action='store_true', help='visualize features') parser.add_argument('--update', action='store_true', help='update all models') parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name') parser.add_argument('--name', default='exp', help='save results to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)') parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels') parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') parser.add_argument('--trace', action='store_true', help='trace model') parser.add_argument('--lama',default=False, action='store_true', help='lama model') parser.add_argument('--sam', default=False,action='store_true', help='seg model') parser.add_argument('--det',default=False, action='store_true', help='det model') parser.add_argument('--tag2text', default=True,action='store_true', help='tag2text model ') parser.add_argument('--save-mask', default=False,action='store_true', help='mask save json') parser.add_argument('--save-caption', default=True,action='store_true', help='caption ') parser.add_argument('--batch-process', action='store_true', help='therads process file') parser.add_argument('--color-flag', action='store_true', help='class-color ') parser.add_argument('--gligen', action='store_true', help='class-color ') opt = parser.parse_args() print_args(vars(opt)) return opt import threading import concurrent.futures def main(opt): check_requirements(exclude=('tensorboard', 'thop')) global models_config # if not opt.input_prompt and opt.input_prompt=='': # LOGGER.info(' input prompt') # words_name= input("please your prompt words: ") # opt.input_prompt=words_name load_auto_backend_models(opt) LOGGER.info(f"模型加载成功{models_config.keys()}") if opt.batch_process and os.path.isdir(opt.source): #检查目录是否存在以及检查是否为目录的操作 if not os.path.exists(opt.source): LOGGER.info(f"Error: Input directory {opt.source} does not exist.") return seen=0 # output_dir=f'{opt.source}_subs{seen}' segment_size =100 for file_name in opt.source: file_path = os.path.join(opt.source, file_name) # pass if not Path(file_path).suffix[1:] in (IMG_FORMATS + VID_FORMATS): continue # 使用Pillow库读取图像文件并将其转换为NumPy数组 img = Image.open(file_path) img_array = np.asarray(img) # 多线程处理每个图像段 with concurrent.futures.ThreadPoolExecutor() as executor: futures = [] # 用于保存每个线程的未来对象 # 分段并发读取并进行处理 for i in range(0, img_array.shape[0], segment_size): start_row = i end_row = min(i + segment_size, img_array.shape[0]) future = executor.submit(run_do, img_array, start_row, end_row) futures.append(future) # 获取所有未来对象的结果 for future in concurrent.futures.as_completed(futures): segment = future.result() else: Auto_run(**vars(opt)) if __name__ == "__main__": opt = parse_opt() main(opt) ================================================ FILE: batch_clean_gpu.txt ================================================ sudo fuser -v /dev/nvidia* |awk '{for(i=1;i<=NF;i++)print "kill -9 " $i;}' | sudo sh ================================================ FILE: crazy_functions/Langchain知识库.py ================================================ from utils.toolbox import CatchException, update_ui, ProxyNetworkActivate from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_files_from_everything @CatchException def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): """ txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径 llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行 plugin_kwargs 插件模型的参数,暂时没有用武之地 chatbot 聊天显示框的句柄,用于显示给用户 history 聊天历史,前情提要 system_prompt 给gpt的静默提醒 web_port 当前软件运行的端口号 """ history = [] # 清空历史,以免输入溢出 chatbot.append(("这是什么功能?", "[Local Message] 从一批文件(txt, md, tex)中读取数据构建知识库, 然后进行问答。")) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # resolve deps try: from zh_langchain import construct_vector_store from langchain.embeddings.huggingface import HuggingFaceEmbeddings from .crazy_utils import knowledge_archive_interface except Exception as e: chatbot.append( ["依赖不足", "导入依赖失败。正在尝试自动安装,请查看终端的输出或耐心等待..."] ) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 from .crazy_utils import try_install_deps try_install_deps(['zh_langchain==0.2.1', 'pypinyin']) # < --------------------读取参数--------------- > if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg") kai_id = plugin_kwargs.get("advanced_arg", 'default') # < --------------------读取文件--------------- > file_manifest = [] spl = ["txt", "doc", "docx", "email", "epub", "html", "json", "md", "msg", "pdf", "ppt", "pptx", "rtf"] for sp in spl: _, file_manifest_tmp, _ = get_files_from_everything(txt, type=f'.{sp}') file_manifest += file_manifest_tmp if len(file_manifest) == 0: chatbot.append(["没有找到任何可读取文件", "当前支持的格式包括: txt, md, docx, pptx, pdf, json等"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return # < -------------------预热文本向量化模组--------------- > chatbot.append(['
'.join(file_manifest), "正在预热文本向量化模组, 如果是第一次运行, 将消耗较长时间下载中文向量化模型..."]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 print('Checking Text2vec ...') from langchain.embeddings.huggingface import HuggingFaceEmbeddings with ProxyNetworkActivate(): # 临时地激活代理网络 HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese") # < -------------------构建知识库--------------- > chatbot.append(['
'.join(file_manifest), "正在构建知识库..."]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 print('Establishing knowledge archive ...') with ProxyNetworkActivate(): # 临时地激活代理网络 kai = knowledge_archive_interface() kai.feed_archive(file_manifest=file_manifest, id=kai_id) kai_files = kai.get_loaded_file() kai_files = '
'.join(kai_files) # chatbot.append(['知识库构建成功', "正在将知识库存储至cookie中"]) # yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # chatbot._cookies['langchain_plugin_embedding'] = kai.get_current_archive_id() # chatbot._cookies['lock_plugin'] = 'crazy_functions.Langchain知识库->读取知识库作答' # chatbot.append(['完成', "“根据知识库作答”函数插件已经接管问答系统, 提问吧! 但注意, 您接下来不能再使用其他插件了,刷新页面即可以退出知识库问答模式。"]) chatbot.append(['构建完成', f"当前知识库内的有效文件:\n\n---\n\n{kai_files}\n\n---\n\n请切换至“知识库问答”插件进行知识库访问, 或者使用此插件继续上传更多文件。"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新 @CatchException def 读取知识库作答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port=-1): # resolve deps try: from zh_langchain import construct_vector_store from langchain.embeddings.huggingface import HuggingFaceEmbeddings from .crazy_utils import knowledge_archive_interface except Exception as e: chatbot.append(["依赖不足", "导入依赖失败。正在尝试自动安装,请查看终端的输出或耐心等待..."]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 from .crazy_utils import try_install_deps try_install_deps(['zh_langchain==0.2.1']) # < ------------------- --------------- > kai = knowledge_archive_interface() if 'langchain_plugin_embedding' in chatbot._cookies: resp, prompt = kai.answer_with_archive_by_id(txt, chatbot._cookies['langchain_plugin_embedding']) else: if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg") kai_id = plugin_kwargs.get("advanced_arg", 'default') resp, prompt = kai.answer_with_archive_by_id(txt, kai_id) chatbot.append((txt, '[Local Message] ' + prompt)) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新 gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( inputs=prompt, inputs_show_user=txt, llm_kwargs=llm_kwargs, chatbot=chatbot, history=[], sys_prompt=system_prompt ) history.extend((prompt, gpt_say)) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新 ================================================ FILE: crazy_functions/Latex全文润色.py ================================================ from utils.toolbox import update_ui, trimmed_format_exc from utils.toolbox import CatchException, report_execption, write_results_to_file, zip_folder class PaperFileGroup(): def __init__(self): self.file_paths = [] self.file_contents = [] self.sp_file_contents = [] self.sp_file_index = [] self.sp_file_tag = [] # count_token from llm_cards.bridge_all import model_info enc = model_info["gpt-3.5-turbo"]['tokenizer'] def get_token_num(txt): return len(enc.encode(txt, disallowed_special=())) self.get_token_num = get_token_num def run_file_split(self, max_token_limit=1900): """ 将长文本分离开来 """ for index, file_content in enumerate(self.file_contents): if self.get_token_num(file_content) < max_token_limit: self.sp_file_contents.append(file_content) self.sp_file_index.append(index) self.sp_file_tag.append(self.file_paths[index]) else: from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit) for j, segment in enumerate(segments): self.sp_file_contents.append(segment) self.sp_file_index.append(index) self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex") print('Segmentation: done') def merge_result(self): self.file_result = ["" for _ in range(len(self.file_paths))] for r, k in zip(self.sp_file_result, self.sp_file_index): self.file_result[k] += r def write_result(self): manifest = [] for path, res in zip(self.file_paths, self.file_result): with open(path + '.polish.tex', 'w', encoding='utf8') as f: manifest.append(path + '.polish.tex') f.write(res) return manifest def zip_result(self): import os, time folder = os.path.dirname(self.file_paths[0]) t = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) zip_folder(folder, './gpt_log/', f'{t}-polished.zip') def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='polish'): import time, os, re from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency # <-------- 读取Latex文件,删除其中的所有注释 ----------> pfg = PaperFileGroup() for index, fp in enumerate(file_manifest): with open(fp, 'r', encoding='utf-8', errors='replace') as f: file_content = f.read() # 定义注释的正则表达式 comment_pattern = r'(? pfg.run_file_split(max_token_limit=1024) n_split = len(pfg.sp_file_contents) # <-------- 多线程润色开始 ----------> if language == 'en': if mode == 'polish': inputs_array = ["Below is a section from an academic paper, polish this section to meet the academic standard, " + "improve the grammar, clarity and overall readability, do not modify any latex command such as \section, \cite and equations:" + f"\n\n{frag}" for frag in pfg.sp_file_contents] else: inputs_array = [r"Below is a section from an academic paper, proofread this section." + r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + r"Answer me only with the revised text:" + f"\n\n{frag}" for frag in pfg.sp_file_contents] inputs_show_user_array = [f"Polish {f}" for f in pfg.sp_file_tag] sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)] elif language == 'zh': if mode == 'polish': inputs_array = [f"以下是一篇学术论文中的一段内容,请将此部分润色以满足学术标准,提高语法、清晰度和整体可读性,不要修改任何LaTeX命令,例如\section,\cite和方程式:" + f"\n\n{frag}" for frag in pfg.sp_file_contents] else: inputs_array = [f"以下是一篇学术论文中的一段内容,请对这部分内容进行语法矫正。不要修改任何LaTeX命令,例如\section,\cite和方程式:" + f"\n\n{frag}" for frag in pfg.sp_file_contents] inputs_show_user_array = [f"润色 {f}" for f in pfg.sp_file_tag] sys_prompt_array=["你是一位专业的中文学术论文作家。" for _ in range(n_split)] gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( inputs_array=inputs_array, inputs_show_user_array=inputs_show_user_array, llm_kwargs=llm_kwargs, chatbot=chatbot, history_array=[[""] for _ in range(n_split)], sys_prompt_array=sys_prompt_array, # max_workers=5, # 并行任务数量限制,最多同时执行5个,其他的排队等待 scroller_max_len = 80 ) # <-------- 文本碎片重组为完整的tex文件,整理结果为压缩包 ----------> try: pfg.sp_file_result = [] for i_say, gpt_say in zip(gpt_response_collection[0::2], gpt_response_collection[1::2]): pfg.sp_file_result.append(gpt_say) pfg.merge_result() pfg.write_result() pfg.zip_result() except: print(trimmed_format_exc()) # <-------- 整理结果,退出 ----------> create_report_file_name = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + f"-chatgpt.polish.md" res = write_results_to_file(gpt_response_collection, file_name=create_report_file_name) history = gpt_response_collection chatbot.append((f"{fp}完成了吗?", res)) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 @CatchException def Latex英文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): # 基本信息:功能、贡献者 chatbot.append([ "函数插件功能?", "对整个Latex项目进行润色。函数插件贡献者: Binary-Husky"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 尝试导入依赖,如果缺少依赖,则给出安装建议 try: import tiktoken except: report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return history = [] # 清空历史,以免输入溢出 import glob, os if os.path.exists(txt): project_folder = txt else: if txt == "": txt = '空空如也的输入栏' report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] if len(file_manifest) == 0: report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en') @CatchException def Latex中文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): # 基本信息:功能、贡献者 chatbot.append([ "函数插件功能?", "对整个Latex项目进行润色。函数插件贡献者: Binary-Husky"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 尝试导入依赖,如果缺少依赖,则给出安装建议 try: import tiktoken except: report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return history = [] # 清空历史,以免输入溢出 import glob, os if os.path.exists(txt): project_folder = txt else: if txt == "": txt = '空空如也的输入栏' report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] if len(file_manifest) == 0: report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='zh') @CatchException def Latex英文纠错(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): # 基本信息:功能、贡献者 chatbot.append([ "函数插件功能?", "对整个Latex项目进行纠错。函数插件贡献者: Binary-Husky"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 尝试导入依赖,如果缺少依赖,则给出安装建议 try: import tiktoken except: report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return history = [] # 清空历史,以免输入溢出 import glob, os if os.path.exists(txt): project_folder = txt else: if txt == "": txt = '空空如也的输入栏' report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] if len(file_manifest) == 0: report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='proofread') ================================================ FILE: crazy_functions/Latex全文翻译.py ================================================ from utils.toolbox import update_ui from utils.toolbox import CatchException, report_execption, write_results_to_file fast_debug = False class PaperFileGroup(): def __init__(self): self.file_paths = [] self.file_contents = [] self.sp_file_contents = [] self.sp_file_index = [] self.sp_file_tag = [] # count_token from llm_cards.bridge_all import model_info enc = model_info["gpt-3.5-turbo"]['tokenizer'] def get_token_num(txt): return len(enc.encode(txt, disallowed_special=())) self.get_token_num = get_token_num def run_file_split(self, max_token_limit=1900): """ 将长文本分离开来 """ for index, file_content in enumerate(self.file_contents): if self.get_token_num(file_content) < max_token_limit: self.sp_file_contents.append(file_content) self.sp_file_index.append(index) self.sp_file_tag.append(self.file_paths[index]) else: from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit) for j, segment in enumerate(segments): self.sp_file_contents.append(segment) self.sp_file_index.append(index) self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex") print('Segmentation: done') def 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en'): import time, os, re from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency # <-------- 读取Latex文件,删除其中的所有注释 ----------> pfg = PaperFileGroup() for index, fp in enumerate(file_manifest): with open(fp, 'r', encoding='utf-8', errors='replace') as f: file_content = f.read() # 定义注释的正则表达式 comment_pattern = r'(? pfg.run_file_split(max_token_limit=1024) n_split = len(pfg.sp_file_contents) # <-------- 抽取摘要 ----------> # if language == 'en': # abs_extract_inputs = f"Please write an abstract for this paper" # # 单线,获取文章meta信息 # paper_meta_info = yield from request_gpt_model_in_new_thread_with_ui_alive( # inputs=abs_extract_inputs, # inputs_show_user=f"正在抽取摘要信息。", # llm_kwargs=llm_kwargs, # chatbot=chatbot, history=[], # sys_prompt="Your job is to collect information from materials。", # ) # <-------- 多线程润色开始 ----------> if language == 'en->zh': inputs_array = ["Below is a section from an English academic paper, translate it into Chinese, do not modify any latex command such as \section, \cite and equations:" + f"\n\n{frag}" for frag in pfg.sp_file_contents] inputs_show_user_array = [f"翻译 {f}" for f in pfg.sp_file_tag] sys_prompt_array = ["You are a professional academic paper translator." for _ in range(n_split)] elif language == 'zh->en': inputs_array = [f"Below is a section from a Chinese academic paper, translate it into English, do not modify any latex command such as \section, \cite and equations:" + f"\n\n{frag}" for frag in pfg.sp_file_contents] inputs_show_user_array = [f"翻译 {f}" for f in pfg.sp_file_tag] sys_prompt_array = ["You are a professional academic paper translator." for _ in range(n_split)] gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( inputs_array=inputs_array, inputs_show_user_array=inputs_show_user_array, llm_kwargs=llm_kwargs, chatbot=chatbot, history_array=[[""] for _ in range(n_split)], sys_prompt_array=sys_prompt_array, # max_workers=5, # OpenAI所允许的最大并行过载 scroller_max_len = 80 ) # <-------- 整理结果,退出 ----------> create_report_file_name = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + f"-chatgpt.polish.md" res = write_results_to_file(gpt_response_collection, file_name=create_report_file_name) history = gpt_response_collection chatbot.append((f"{fp}完成了吗?", res)) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 @CatchException def Latex英译中(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): # 基本信息:功能、贡献者 chatbot.append([ "函数插件功能?", "对整个Latex项目进行翻译。函数插件贡献者: Binary-Husky"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 尝试导入依赖,如果缺少依赖,则给出安装建议 try: import tiktoken except: report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return history = [] # 清空历史,以免输入溢出 import glob, os if os.path.exists(txt): project_folder = txt else: if txt == "": txt = '空空如也的输入栏' report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] if len(file_manifest) == 0: report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return yield from 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en->zh') @CatchException def Latex中译英(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): # 基本信息:功能、贡献者 chatbot.append([ "函数插件功能?", "对整个Latex项目进行翻译。函数插件贡献者: Binary-Husky"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 尝试导入依赖,如果缺少依赖,则给出安装建议 try: import tiktoken except: report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return history = [] # 清空历史,以免输入溢出 import glob, os if os.path.exists(txt): project_folder = txt else: if txt == "": txt = '空空如也的输入栏' report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] if len(file_manifest) == 0: report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return yield from 多文件翻译(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='zh->en') ================================================ FILE: crazy_functions/Latex输出PDF结果.py ================================================ from utils.toolbox import update_ui, trimmed_format_exc, get_conf, objdump, objload, promote_file_to_downloadzone from utils.toolbox import CatchException, report_execption, update_ui_lastest_msg, zip_result, gen_time_str from functools import partial import glob, os, requests, time pj = os.path.join ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/") # =================================== 工具函数 =============================================== 专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". ' def switch_prompt(pfg, mode, more_requirement): """ Generate prompts and system prompts based on the mode for proofreading or translating. Args: - pfg: Proofreader or Translator instance. - mode: A string specifying the mode, either 'proofread' or 'translate_zh'. Returns: - inputs_array: A list of strings containing prompts for users to respond to. - sys_prompt_array: A list of strings containing prompts for system prompts. """ n_split = len(pfg.sp_file_contents) if mode == 'proofread_en': inputs_array = [r"Below is a section from an academic paper, proofread this section." + r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + more_requirement + r"Answer me only with the revised text:" + f"\n\n{frag}" for frag in pfg.sp_file_contents] sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)] elif mode == 'translate_zh': inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese. " + more_requirement + r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + r"Answer me only with the translated text:" + f"\n\n{frag}" for frag in pfg.sp_file_contents] sys_prompt_array = ["You are a professional translator." for _ in range(n_split)] else: assert False, "未知指令" return inputs_array, sys_prompt_array def desend_to_extracted_folder_if_exist(project_folder): """ Descend into the extracted folder if it exists, otherwise return the original folder. Args: - project_folder: A string specifying the folder path. Returns: - A string specifying the path to the extracted folder, or the original folder if there is no extracted folder. """ maybe_dir = [f for f in glob.glob(f'{project_folder}/*') if os.path.isdir(f)] if len(maybe_dir) == 0: return project_folder if maybe_dir[0].endswith('.extract'): return maybe_dir[0] return project_folder def move_project(project_folder, arxiv_id=None): """ Create a new work folder and copy the project folder to it. Args: - project_folder: A string specifying the folder path of the project. Returns: - A string specifying the path to the new work folder. """ import shutil, time time.sleep(2) # avoid time string conflict if arxiv_id is not None: new_workfolder = pj(ARXIV_CACHE_DIR, arxiv_id, 'workfolder') else: new_workfolder = f'gpt_log/{gen_time_str()}' try: shutil.rmtree(new_workfolder) except: pass # align subfolder if there is a folder wrapper items = glob.glob(pj(project_folder,'*')) if len(glob.glob(pj(project_folder,'*.tex'))) == 0 and len(items) == 1: if os.path.isdir(items[0]): project_folder = items[0] shutil.copytree(src=project_folder, dst=new_workfolder) return new_workfolder def arxiv_download(chatbot, history, txt): def check_cached_translation_pdf(arxiv_id): translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'translation') if not os.path.exists(translation_dir): os.makedirs(translation_dir) target_file = pj(translation_dir, 'translate_zh.pdf') if os.path.exists(target_file): promote_file_to_downloadzone(target_file, rename_file=None, chatbot=chatbot) return target_file return False def is_float(s): try: float(s) return True except ValueError: return False if ('.' in txt) and ('/' not in txt) and is_float(txt): # is arxiv ID txt = 'https://arxiv.org/abs/' + txt.strip() if ('.' in txt) and ('/' not in txt) and is_float(txt[:10]): # is arxiv ID txt = 'https://arxiv.org/abs/' + txt[:10] if not txt.startswith('https://arxiv.org'): return txt, None # <-------------- inspect format -------------> chatbot.append([f"检测到arxiv文档连接", '尝试下载 ...']) yield from update_ui(chatbot=chatbot, history=history) time.sleep(1) # 刷新界面 url_ = txt # https://arxiv.org/abs/1707.06690 if not txt.startswith('https://arxiv.org/abs/'): msg = f"解析arxiv网址失败, 期望格式例如: https://arxiv.org/abs/1707.06690。实际得到格式: {url_}" yield from update_ui_lastest_msg(msg, chatbot=chatbot, history=history) # 刷新界面 return msg, None # <-------------- set format -------------> arxiv_id = url_.split('/abs/')[-1] if 'v' in arxiv_id: arxiv_id = arxiv_id[:10] cached_translation_pdf = check_cached_translation_pdf(arxiv_id) if cached_translation_pdf: return cached_translation_pdf, arxiv_id url_tar = url_.replace('/abs/', '/e-print/') translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'e-print') extract_dst = pj(ARXIV_CACHE_DIR, arxiv_id, 'extract') os.makedirs(translation_dir, exist_ok=True) # <-------------- download arxiv source file -------------> dst = pj(translation_dir, arxiv_id+'.tar') if os.path.exists(dst): yield from update_ui_lastest_msg("调用缓存", chatbot=chatbot, history=history) # 刷新界面 else: yield from update_ui_lastest_msg("开始下载", chatbot=chatbot, history=history) # 刷新界面 proxies, = get_conf('proxies') r = requests.get(url_tar, proxies=proxies) with open(dst, 'wb+') as f: f.write(r.content) # <-------------- extract file -------------> yield from update_ui_lastest_msg("下载完成", chatbot=chatbot, history=history) # 刷新界面 from utils.toolbox import extract_archive extract_archive(file_path=dst, dest_dir=extract_dst) return extract_dst, arxiv_id # ========================================= 插件主程序1 ===================================================== @CatchException def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): # <-------------- information about this plugin -------------> chatbot.append([ "函数插件功能?", "对整个Latex项目进行纠错, 用latex编译为PDF对修正处做高亮。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。仅在Windows系统进行了测试,其他操作系统表现未知。"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # <-------------- more requirements -------------> if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg") more_req = plugin_kwargs.get("advanced_arg", "") _switch_prompt_ = partial(switch_prompt, more_requirement=more_req) # <-------------- check deps -------------> try: import glob, os, time, subprocess subprocess.Popen(['pdflatex', '-version']) from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex except Exception as e: chatbot.append([ f"解析项目: {txt}", f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return # <-------------- clear history and read input -------------> history = [] if os.path.exists(txt): project_folder = txt else: if txt == "": txt = '空空如也的输入栏' report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] if len(file_manifest) == 0: report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return # <-------------- if is a zip/tar file -------------> project_folder = desend_to_extracted_folder_if_exist(project_folder) # <-------------- move latex project away from temp folder -------------> project_folder = move_project(project_folder, arxiv_id=None) # <-------------- if merge_translate_zh is already generated, skip gpt req -------------> if not os.path.exists(project_folder + '/merge_proofread_en.tex'): yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread_en', switch_prompt=_switch_prompt_) # <-------------- compile PDF -------------> success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread_en', work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder) # <-------------- zip PDF -------------> zip_res = zip_result(project_folder) if success: chatbot.append((f"成功啦", '请查收结果(压缩包)...')) yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 promote_file_to_downloadzone(file=zip_res, chatbot=chatbot) else: chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...')) yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 promote_file_to_downloadzone(file=zip_res, chatbot=chatbot) # <-------------- we are done -------------> return success # ========================================= 插件主程序2 ===================================================== @CatchException def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): # <-------------- information about this plugin -------------> chatbot.append([ "函数插件功能?", "对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 此插件Windows支持最佳,Linux下必须使用Docker安装,详见项目主README.md。目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # <-------------- more requirements -------------> if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg") more_req = plugin_kwargs.get("advanced_arg", "") _switch_prompt_ = partial(switch_prompt, more_requirement=more_req) # <-------------- check deps -------------> try: import glob, os, time, subprocess subprocess.Popen(['pdflatex', '-version']) from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex except Exception as e: chatbot.append([ f"解析项目: {txt}", f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return # <-------------- clear history and read input -------------> history = [] txt, arxiv_id = yield from arxiv_download(chatbot, history, txt) if txt.endswith('.pdf'): report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"发现已经存在翻译好的PDF文档") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return if os.path.exists(txt): project_folder = txt else: if txt == "": txt = '空空如也的输入栏' report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] if len(file_manifest) == 0: report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return # <-------------- if is a zip/tar file -------------> project_folder = desend_to_extracted_folder_if_exist(project_folder) # <-------------- move latex project away from temp folder -------------> project_folder = move_project(project_folder, arxiv_id) # <-------------- if merge_translate_zh is already generated, skip gpt req -------------> if not os.path.exists(project_folder + '/merge_translate_zh.tex'): yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='translate_zh', switch_prompt=_switch_prompt_) # <-------------- compile PDF -------------> success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_translate_zh', mode='translate_zh', work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder) # <-------------- zip PDF -------------> zip_res = zip_result(project_folder) if success: chatbot.append((f"成功啦", '请查收结果(压缩包)...')) yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 promote_file_to_downloadzone(file=zip_res, chatbot=chatbot) else: chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...')) yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 promote_file_to_downloadzone(file=zip_res, chatbot=chatbot) # <-------------- we are done -------------> return success ================================================ FILE: crazy_functions/__init__.py ================================================ ================================================ FILE: crazy_functions/chatglm微调工具.py ================================================ from utils.toolbox import CatchException, update_ui, promote_file_to_downloadzone from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency import datetime, json def fetch_items(list_of_items, batch_size): for i in range(0, len(list_of_items), batch_size): yield list_of_items[i:i + batch_size] def string_to_options(arguments): import argparse import shlex # Create an argparse.ArgumentParser instance parser = argparse.ArgumentParser() # Add command-line arguments parser.add_argument("--llm_to_learn", type=str, help="LLM model to learn", default="gpt-3.5-turbo") parser.add_argument("--prompt_prefix", type=str, help="Prompt prefix", default='') parser.add_argument("--system_prompt", type=str, help="System prompt", default='') parser.add_argument("--batch", type=int, help="System prompt", default=50) parser.add_argument("--pre_seq_len", type=int, help="pre_seq_len", default=50) parser.add_argument("--learning_rate", type=float, help="learning_rate", default=2e-2) parser.add_argument("--num_gpus", type=int, help="num_gpus", default=1) parser.add_argument("--json_dataset", type=str, help="json_dataset", default="") parser.add_argument("--ptuning_directory", type=str, help="ptuning_directory", default="") # Parse the arguments args = parser.parse_args(shlex.split(arguments)) return args @CatchException def 微调数据集生成(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): """ txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径 llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行 plugin_kwargs 插件模型的参数 chatbot 聊天显示框的句柄,用于显示给用户 history 聊天历史,前情提要 system_prompt 给gpt的静默提醒 web_port 当前软件运行的端口号 """ history = [] # 清空历史,以免输入溢出 chatbot.append(("这是什么功能?", "[Local Message] 微调数据集生成")) if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg") args = plugin_kwargs.get("advanced_arg", None) if args is None: chatbot.append(("没给定指令", "退出")) yield from update_ui(chatbot=chatbot, history=history); return else: arguments = string_to_options(arguments=args) dat = [] with open(txt, 'r', encoding='utf8') as f: for line in f.readlines(): json_dat = json.loads(line) dat.append(json_dat["content"]) llm_kwargs['llm_model'] = arguments.llm_to_learn for batch in fetch_items(dat, arguments.batch): res = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( inputs_array=[f"{arguments.prompt_prefix}\n\n{b}" for b in (batch)], inputs_show_user_array=[f"Show Nothing" for _ in (batch)], llm_kwargs=llm_kwargs, chatbot=chatbot, history_array=[[] for _ in (batch)], sys_prompt_array=[arguments.system_prompt for _ in (batch)], max_workers=10 # OpenAI所允许的最大并行过载 ) with open(txt+'.generated.json', 'a+', encoding='utf8') as f: for b, r in zip(batch, res[1::2]): f.write(json.dumps({"content":b, "summary":r}, ensure_ascii=False)+'\n') promote_file_to_downloadzone(txt+'.generated.json', rename_file='generated.json', chatbot=chatbot) return @CatchException def 启动微调(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): """ txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径 llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行 plugin_kwargs 插件模型的参数 chatbot 聊天显示框的句柄,用于显示给用户 history 聊天历史,前情提要 system_prompt 给gpt的静默提醒 web_port 当前软件运行的端口号 """ import subprocess history = [] # 清空历史,以免输入溢出 chatbot.append(("这是什么功能?", "[Local Message] 微调数据集生成")) if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg") args = plugin_kwargs.get("advanced_arg", None) if args is None: chatbot.append(("没给定指令", "退出")) yield from update_ui(chatbot=chatbot, history=history); return else: arguments = string_to_options(arguments=args) pre_seq_len = arguments.pre_seq_len # 128 learning_rate = arguments.learning_rate # 2e-2 num_gpus = arguments.num_gpus # 1 json_dataset = arguments.json_dataset # 't_code.json' ptuning_directory = arguments.ptuning_directory # '/home/hmp/ChatGLM2-6B/ptuning' command = f"torchrun --standalone --nnodes=1 --nproc-per-node={num_gpus} main.py \ --do_train \ --train_file AdvertiseGen/{json_dataset} \ --validation_file AdvertiseGen/{json_dataset} \ --preprocessing_num_workers 20 \ --prompt_column content \ --response_column summary \ --overwrite_cache \ --model_name_or_path THUDM/chatglm2-6b \ --output_dir output/clothgen-chatglm2-6b-pt-{pre_seq_len}-{learning_rate} \ --overwrite_output_dir \ --max_source_length 256 \ --max_target_length 256 \ --per_device_train_batch_size 1 \ --per_device_eval_batch_size 1 \ --gradient_accumulation_steps 16 \ --predict_with_generate \ --max_steps 100 \ --logging_steps 10 \ --save_steps 20 \ --learning_rate {learning_rate} \ --pre_seq_len {pre_seq_len} \ --quantization_bit 4" process = subprocess.Popen(command, shell=True, cwd=ptuning_directory) try: process.communicate(timeout=3600*24) except subprocess.TimeoutExpired: process.kill() return ================================================ FILE: crazy_functions/crazy_functions_test.py ================================================ """ 这是什么? 这个文件用于函数插件的单元测试 运行方法 python crazy_functions/crazy_functions_test.py """ # ============================================================================================================================== def validate_path(): import os, sys dir_name = os.path.dirname(__file__) root_dir_assume = os.path.abspath(os.path.dirname(__file__) + '/..') os.chdir(root_dir_assume) sys.path.append(root_dir_assume) validate_path() # validate path so you can run from base directory # ============================================================================================================================== from utils.colorful import * from utils.toolbox import get_conf, ChatBotWithCookies import contextlib import os import sys from functools import wraps proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \ get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY') llm_kwargs = { 'api_key': API_KEY, 'llm_model': LLM_MODEL, 'top_p':1.0, 'max_length': None, 'temperature':1.0, } plugin_kwargs = { } chatbot = ChatBotWithCookies(llm_kwargs) history = [] system_prompt = "Serve me as a writing and programming assistant." web_port = 1024 # ============================================================================================================================== def silence_stdout(func): @wraps(func) def wrapper(*args, **kwargs): _original_stdout = sys.stdout sys.stdout = open(os.devnull, 'w') for q in func(*args, **kwargs): sys.stdout = _original_stdout yield q sys.stdout = open(os.devnull, 'w') sys.stdout.close() sys.stdout = _original_stdout return wrapper class CLI_Printer(): def __init__(self) -> None: self.pre_buf = "" def print(self, buf): bufp = "" for index, chat in enumerate(buf): a, b = chat bufp += sprint亮靛('[Me]:' + a) + '\n' bufp += '[GPT]:' + b if index < len(buf)-1: bufp += '\n' if self.pre_buf!="" and bufp.startswith(self.pre_buf): print(bufp[len(self.pre_buf):], end='') else: print('\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n'+bufp, end='') self.pre_buf = bufp return cli_printer = CLI_Printer() # ============================================================================================================================== def test_解析一个Python项目(): from crazy_functions.解析项目源代码 import 解析一个Python项目 txt = "crazy_functions/test_project/python/dqn" for cookies, cb, hist, msg in 解析一个Python项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): print(cb) def test_解析一个Cpp项目(): from crazy_functions.解析项目源代码 import 解析一个C项目 txt = "crazy_functions/test_project/cpp/cppipc" for cookies, cb, hist, msg in 解析一个C项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): print(cb) def test_Latex英文润色(): from crazy_functions.Latex全文润色 import Latex英文润色 txt = "crazy_functions/test_project/latex/attention" for cookies, cb, hist, msg in Latex英文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): print(cb) def test_Markdown中译英(): from crazy_functions.批量Markdown翻译 import Markdown中译英 txt = "README.md" for cookies, cb, hist, msg in Markdown中译英(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): print(cb) def test_批量翻译PDF文档(): from crazy_functions.批量翻译PDF文档_多线程 import 批量翻译PDF文档 txt = "crazy_functions/test_project/pdf_and_word" for cookies, cb, hist, msg in 批量翻译PDF文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): print(cb) def test_谷歌检索小助手(): from crazy_functions.谷歌检索小助手 import 谷歌检索小助手 txt = "https://scholar.google.com/scholar?hl=en&as_sdt=0%2C5&q=auto+reinforcement+learning&btnG=" for cookies, cb, hist, msg in 谷歌检索小助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): print(cb) def test_总结word文档(): from crazy_functions.总结word文档 import 总结word文档 txt = "crazy_functions/test_project/pdf_and_word" for cookies, cb, hist, msg in 总结word文档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): print(cb) def test_下载arxiv论文并翻译摘要(): from crazy_functions.下载arxiv论文翻译摘要 import 下载arxiv论文并翻译摘要 txt = "1812.10695" for cookies, cb, hist, msg in 下载arxiv论文并翻译摘要(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): print(cb) def test_联网回答问题(): from crazy_functions.联网的ChatGPT import 连接网络回答问题 # txt = "谁是应急食品?" # >> '根据以上搜索结果可以得知,应急食品是“原神”游戏中的角色派蒙的外号。' # txt = "道路千万条,安全第一条。后面两句是?" # >> '行车不规范,亲人两行泪。' # txt = "You should have gone for the head. What does that mean?" # >> The phrase "You should have gone for the head" is a quote from the Marvel movies, Avengers: Infinity War and Avengers: Endgame. It was spoken by the character Thanos in Infinity War and by Thor in Endgame. txt = "AutoGPT是什么?" for cookies, cb, hist, msg in 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): print("当前问答:", cb[-1][-1].replace("\n"," ")) for i, it in enumerate(cb): print亮蓝(it[0]); print亮黄(it[1]) def test_解析ipynb文件(): from crazy_functions.解析JupyterNotebook import 解析ipynb文件 txt = "crazy_functions/test_samples" for cookies, cb, hist, msg in 解析ipynb文件(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): print(cb) def test_数学动画生成manim(): from crazy_functions.数学动画生成manim import 动画生成 txt = "A ball split into 2, and then split into 4, and finally split into 8." for cookies, cb, hist, msg in 动画生成(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): print(cb) def test_Markdown多语言(): from crazy_functions.批量Markdown翻译 import Markdown翻译指定语言 txt = "README.md" history = [] for lang in ["English", "French", "Japanese", "Korean", "Russian", "Italian", "German", "Portuguese", "Arabic"]: plugin_kwargs = {"advanced_arg": lang} for cookies, cb, hist, msg in Markdown翻译指定语言(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): print(cb) def test_Langchain知识库(): from crazy_functions.Langchain知识库 import 知识库问答 txt = "./" chatbot = ChatBotWithCookies(llm_kwargs) for cookies, cb, hist, msg in silence_stdout(知识库问答)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): cli_printer.print(cb) # print(cb) chatbot = ChatBotWithCookies(cookies) from crazy_functions.Langchain知识库 import 读取知识库作答 txt = "What is the installation method?" for cookies, cb, hist, msg in silence_stdout(读取知识库作答)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): cli_printer.print(cb) # print(cb) def test_Langchain知识库读取(): from crazy_functions.Langchain知识库 import 读取知识库作答 txt = "远程云服务器部署?" for cookies, cb, hist, msg in silence_stdout(读取知识库作答)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): cli_printer.print(cb) # print(cb) def test_Latex(): from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比, Latex翻译中文并重新编译PDF # txt = r"https://arxiv.org/abs/1706.03762" # txt = r"https://arxiv.org/abs/1902.03185" # txt = r"https://arxiv.org/abs/2305.18290" # txt = r"https://arxiv.org/abs/2305.17608" # txt = r"https://arxiv.org/abs/2211.16068" # ACE # txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE # txt = r"https://arxiv.org/abs/2002.09253" # txt = r"https://arxiv.org/abs/2306.07831" # txt = r"https://arxiv.org/abs/2212.10156" # txt = r"https://arxiv.org/abs/2211.11559" # txt = r"https://arxiv.org/abs/2303.08774" # txt = r"https://arxiv.org/abs/2303.12712" # txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder" # txt = r"2306.17157" # 这个paper有个input命令文件名大小写错误! # txt = "https://arxiv.org/abs/2205.14135" # txt = r"C:\Users\fuqingxu\arxiv_cache\2205.14135\workfolder" # txt = r"C:\Users\fuqingxu\arxiv_cache\2205.14135\workfolder" txt = r"2210.03629" txt = r"2307.04964" for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): cli_printer.print(cb) # print(cb) # txt = "2302.02948.tar" # print(txt) # main_tex, work_folder = Latex预处理(txt) # print('main tex:', main_tex) # res = 编译Latex(main_tex, work_folder) # # for cookies, cb, hist, msg in silence_stdout(编译Latex)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): # cli_printer.print(cb) # print(cb) def test_chatglm_finetune(): from crazy_functions.chatglm微调工具 import 微调数据集生成, 启动微调 txt = 'build/dev.json' plugin_kwargs = {"advanced_arg":"--llm_to_learn=gpt-3.5-turbo --prompt_prefix='根据下面的服装类型提示,想象一个穿着者,对这个人外貌、身处的环境、内心世界、人设进行描写。要求:100字以内,用第二人称。' --system_prompt=''" } # for cookies, cb, hist, msg in (微调数据集生成)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): # cli_printer.print(cb) plugin_kwargs = {"advanced_arg": " --pre_seq_len=128 --learning_rate=2e-2 --num_gpus=1 --json_dataset='t_code.json' --ptuning_directory='/home/hmp/ChatGLM2-6B/ptuning' " } for cookies, cb, hist, msg in (启动微调)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): cli_printer.print(cb) if __name__ == "__main__": # test_解析一个Python项目() # test_Latex英文润色() # test_Markdown中译英() # test_批量翻译PDF文档() # test_谷歌检索小助手() # test_总结word文档() # test_下载arxiv论文并翻译摘要() # test_解析一个Cpp项目() # test_联网回答问题() # test_解析ipynb文件() # test_数学动画生成manim() # test_Langchain知识库() # test_Langchain知识库读取() test_Latex() # test_chatglm_finetune() input("程序完成,回车退出。") print("退出。") ================================================ FILE: crazy_functions/crazy_utils.py ================================================ from utils.toolbox import update_ui, get_conf, trimmed_format_exc import threading def input_clipping(inputs, history, max_token_limit): import numpy as np from llm_cards.bridge_all import model_info enc = model_info["gpt-3.5-turbo"]['tokenizer'] def get_token_num(txt): return len(enc.encode(txt, disallowed_special=())) mode = 'input-and-history' # 当 输入部分的token占比 小于 全文的一半时,只裁剪历史 input_token_num = get_token_num(inputs) if input_token_num < max_token_limit//2: mode = 'only-history' max_token_limit = max_token_limit - input_token_num everything = [inputs] if mode == 'input-and-history' else [''] everything.extend(history) n_token = get_token_num('\n'.join(everything)) everything_token = [get_token_num(e) for e in everything] delta = max(everything_token) // 16 # 截断时的颗粒度 while n_token > max_token_limit: where = np.argmax(everything_token) encoded = enc.encode(everything[where], disallowed_special=()) clipped_encoded = encoded[:len(encoded)-delta] everything[where] = enc.decode(clipped_encoded)[:-1] # -1 to remove the may-be illegal char everything_token[where] = get_token_num(everything[where]) n_token = get_token_num('\n'.join(everything)) if mode == 'input-and-history': inputs = everything[0] else: pass history = everything[1:] return inputs, history def request_gpt_model_in_new_thread_with_ui_alive( inputs, inputs_show_user, llm_kwargs, chatbot, history, sys_prompt, refresh_interval=0.2, handle_token_exceed=True, retry_times_at_unknown_error=2, ): """ Request GPT model,请求GPT模型同时维持用户界面活跃。 输入参数 Args (以_array结尾的输入变量都是列表,列表长度为子任务的数量,执行时,会把列表拆解,放到每个子线程中分别执行): inputs (string): List of inputs (输入) inputs_show_user (string): List of inputs to show user(展现在报告中的输入,借助此参数,在汇总报告中隐藏啰嗦的真实输入,增强报告的可读性) top_p (float): Top p value for sampling from model distribution (GPT参数,浮点数) temperature (float): Temperature value for sampling from model distribution(GPT参数,浮点数) chatbot: chatbot inputs and outputs (用户界面对话窗口句柄,用于数据流可视化) history (list): List of chat history (历史,对话历史列表) sys_prompt (string): List of system prompts (系统输入,列表,用于输入给GPT的前提提示,比如你是翻译官怎样怎样) refresh_interval (float, optional): Refresh interval for UI (default: 0.2) (刷新时间间隔频率,建议低于1,不可高于3,仅仅服务于视觉效果) handle_token_exceed:是否自动处理token溢出的情况,如果选择自动处理,则会在溢出时暴力截断,默认开启 retry_times_at_unknown_error:失败时的重试次数 输出 Returns: future: 输出,GPT返回的结果 """ import time from concurrent.futures import ThreadPoolExecutor from llm_cards.bridge_all import predict_no_ui_long_connection # 用户反馈 chatbot.append([inputs_show_user, ""]) yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面 executor = ThreadPoolExecutor(max_workers=16) mutable = ["", time.time(), ""] def _req_gpt(inputs, history, sys_prompt): retry_op = retry_times_at_unknown_error exceeded_cnt = 0 while True: # watchdog error if len(mutable) >= 2 and (time.time()-mutable[1]) > 5: raise RuntimeError("检测到程序终止。") try: # 【第一种情况】:顺利完成 result = predict_no_ui_long_connection( inputs=inputs, llm_kwargs=llm_kwargs, history=history, sys_prompt=sys_prompt, observe_window=mutable) return result except ConnectionAbortedError as token_exceeded_error: # 【第二种情况】:Token溢出 if handle_token_exceed: exceeded_cnt += 1 # 【选择处理】 尝试计算比例,尽可能多地保留文本 from utils.toolbox import get_reduce_token_percent p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error)) MAX_TOKEN = 4096 EXCEED_ALLO = 512 + 512 * exceeded_cnt inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO) mutable[0] += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n' continue # 返回重试 else: # 【选择放弃】 tb_str = '```\n' + trimmed_format_exc() + '```' mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n" return mutable[0] # 放弃 except: # 【第三种情况】:其他错误:重试几次 tb_str = '```\n' + trimmed_format_exc() + '```' print(tb_str) mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n" if retry_op > 0: retry_op -= 1 mutable[0] += f"[Local Message] 重试中,请稍等 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}:\n\n" if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str): time.sleep(30) time.sleep(5) continue # 返回重试 else: time.sleep(5) return mutable[0] # 放弃 # 提交任务 future = executor.submit(_req_gpt, inputs, history, sys_prompt) while True: # yield一次以刷新前端页面 time.sleep(refresh_interval) # “喂狗”(看门狗) mutable[1] = time.time() if future.done(): break chatbot[-1] = [chatbot[-1][0], mutable[0]] yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面 final_result = future.result() chatbot[-1] = [chatbot[-1][0], final_result] yield from update_ui(chatbot=chatbot, history=[]) # 如果最后成功了,则删除报错信息 return final_result def can_multi_process(llm): if llm.startswith('gpt-'): return True if llm.startswith('api2d-'): return True if llm.startswith('azure-'): return True return False def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( inputs_array, inputs_show_user_array, llm_kwargs, chatbot, history_array, sys_prompt_array, refresh_interval=0.2, max_workers=-1, scroller_max_len=30, handle_token_exceed=True, show_user_at_complete=False, retry_times_at_unknown_error=2, ): """ Request GPT model using multiple threads with UI and high efficiency 请求GPT模型的[多线程]版。 具备以下功能: 实时在UI上反馈远程数据流 使用线程池,可调节线程池的大小避免openai的流量限制错误 处理中途中止的情况 网络等出问题时,会把traceback和已经接收的数据转入输出 输入参数 Args (以_array结尾的输入变量都是列表,列表长度为子任务的数量,执行时,会把列表拆解,放到每个子线程中分别执行): inputs_array (list): List of inputs (每个子任务的输入) inputs_show_user_array (list): List of inputs to show user(每个子任务展现在报告中的输入,借助此参数,在汇总报告中隐藏啰嗦的真实输入,增强报告的可读性) llm_kwargs: llm_kwargs参数 chatbot: chatbot (用户界面对话窗口句柄,用于数据流可视化) history_array (list): List of chat history (历史对话输入,双层列表,第一层列表是子任务分解,第二层列表是对话历史) sys_prompt_array (list): List of system prompts (系统输入,列表,用于输入给GPT的前提提示,比如你是翻译官怎样怎样) refresh_interval (float, optional): Refresh interval for UI (default: 0.2) (刷新时间间隔频率,建议低于1,不可高于3,仅仅服务于视觉效果) max_workers (int, optional): Maximum number of threads (default: see config.py) (最大线程数,如果子任务非常多,需要用此选项防止高频地请求openai导致错误) scroller_max_len (int, optional): Maximum length for scroller (default: 30)(数据流的显示最后收到的多少个字符,仅仅服务于视觉效果) handle_token_exceed (bool, optional): (是否在输入过长时,自动缩减文本) handle_token_exceed:是否自动处理token溢出的情况,如果选择自动处理,则会在溢出时暴力截断,默认开启 show_user_at_complete (bool, optional): (在结束时,把完整输入-输出结果显示在聊天框) retry_times_at_unknown_error:子任务失败时的重试次数 输出 Returns: list: List of GPT model responses (每个子任务的输出汇总,如果某个子任务出错,response中会携带traceback报错信息,方便调试和定位问题。) """ import time, random from concurrent.futures import ThreadPoolExecutor from llm_cards.bridge_all import predict_no_ui_long_connection assert len(inputs_array) == len(history_array) assert len(inputs_array) == len(sys_prompt_array) if max_workers == -1: # 读取配置文件 try: max_workers, = get_conf('DEFAULT_WORKER_NUM') except: max_workers = 8 if max_workers <= 0: max_workers = 3 # 屏蔽掉 chatglm的多线程,可能会导致严重卡顿 if not can_multi_process(llm_kwargs['llm_model']): max_workers = 1 executor = ThreadPoolExecutor(max_workers=max_workers) n_frag = len(inputs_array) # 用户反馈 chatbot.append(["请开始多线程操作。", ""]) yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面 # 跨线程传递 mutable = [["", time.time(), "等待中"] for _ in range(n_frag)] # 子线程任务 def _req_gpt(index, inputs, history, sys_prompt): gpt_say = "" retry_op = retry_times_at_unknown_error exceeded_cnt = 0 mutable[index][2] = "执行中" while True: # watchdog error if len(mutable[index]) >= 2 and (time.time()-mutable[index][1]) > 5: raise RuntimeError("检测到程序终止。") try: # 【第一种情况】:顺利完成 # time.sleep(10); raise RuntimeError("测试") gpt_say = predict_no_ui_long_connection( inputs=inputs, llm_kwargs=llm_kwargs, history=history, sys_prompt=sys_prompt, observe_window=mutable[index], console_slience=True ) mutable[index][2] = "已成功" return gpt_say except ConnectionAbortedError as token_exceeded_error: # 【第二种情况】:Token溢出, if handle_token_exceed: exceeded_cnt += 1 # 【选择处理】 尝试计算比例,尽可能多地保留文本 from utils.toolbox import get_reduce_token_percent p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error)) MAX_TOKEN = 4096 EXCEED_ALLO = 512 + 512 * exceeded_cnt inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO) gpt_say += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n' mutable[index][2] = f"截断重试" continue # 返回重试 else: # 【选择放弃】 tb_str = '```\n' + trimmed_format_exc() + '```' gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n" if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0] mutable[index][2] = "输入过长已放弃" return gpt_say # 放弃 except: # 【第三种情况】:其他错误 tb_str = '```\n' + trimmed_format_exc() + '```' print(tb_str) gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n" if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0] if retry_op > 0: retry_op -= 1 wait = random.randint(5, 20) if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str): wait = wait * 3 fail_info = "OpenAI绑定信用卡可解除频率限制 " else: fail_info = "" # 也许等待十几秒后,情况会好转 for i in range(wait): mutable[index][2] = f"{fail_info}等待重试 {wait-i}"; time.sleep(1) # 开始重试 mutable[index][2] = f"重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}" continue # 返回重试 else: mutable[index][2] = "已失败" wait = 5 time.sleep(5) return gpt_say # 放弃 # 异步任务开始 futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip( range(len(inputs_array)), inputs_array, history_array, sys_prompt_array)] cnt = 0 while True: # yield一次以刷新前端页面 time.sleep(refresh_interval) cnt += 1 worker_done = [h.done() for h in futures] # 更好的UI视觉效果 observe_win = [] # 每个线程都要“喂狗”(看门狗) for thread_index, _ in enumerate(worker_done): mutable[thread_index][1] = time.time() # 在前端打印些好玩的东西 for thread_index, _ in enumerate(worker_done): print_something_really_funny = "[ ...`"+mutable[thread_index][0][-scroller_max_len:].\ replace('\n', '').replace('```', '...').replace( ' ', '.').replace('
', '.....').replace('$', '.')+"`... ]" observe_win.append(print_something_really_funny) # 在前端打印些好玩的东西 stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n' if not done else f'`{mutable[thread_index][2]}`\n\n' for thread_index, done, obs in zip(range(len(worker_done)), worker_done, observe_win)]) # 在前端打印些好玩的东西 chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))] yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面 if all(worker_done): executor.shutdown() break # 异步任务结束 gpt_response_collection = [] for inputs_show_user, f in zip(inputs_show_user_array, futures): gpt_res = f.result() gpt_response_collection.extend([inputs_show_user, gpt_res]) # 是否在结束时,在界面上显示结果 if show_user_at_complete: for inputs_show_user, f in zip(inputs_show_user_array, futures): gpt_res = f.result() chatbot.append([inputs_show_user, gpt_res]) yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面 time.sleep(0.3) return gpt_response_collection def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit): def cut(txt_tocut, must_break_at_empty_line): # 递归 if get_token_fn(txt_tocut) <= limit: return [txt_tocut] else: lines = txt_tocut.split('\n') estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines) estimated_line_cut = int(estimated_line_cut) for cnt in reversed(range(estimated_line_cut)): if must_break_at_empty_line: if lines[cnt] != "": continue print(cnt) prev = "\n".join(lines[:cnt]) post = "\n".join(lines[cnt:]) if get_token_fn(prev) < limit: break if cnt == 0: raise RuntimeError("存在一行极长的文本!") # print(len(post)) # 列表递归接龙 result = [prev] result.extend(cut(post, must_break_at_empty_line)) return result try: return cut(txt, must_break_at_empty_line=True) except RuntimeError: return cut(txt, must_break_at_empty_line=False) def force_breakdown(txt, limit, get_token_fn): """ 当无法用标点、空行分割时,我们用最暴力的方法切割 """ for i in reversed(range(len(txt))): if get_token_fn(txt[:i]) < limit: return txt[:i], txt[i:] return "Tiktoken未知错误", "Tiktoken未知错误" def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit): # 递归 def cut(txt_tocut, must_break_at_empty_line, break_anyway=False): if get_token_fn(txt_tocut) <= limit: return [txt_tocut] else: lines = txt_tocut.split('\n') estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines) estimated_line_cut = int(estimated_line_cut) cnt = 0 for cnt in reversed(range(estimated_line_cut)): if must_break_at_empty_line: if lines[cnt] != "": continue prev = "\n".join(lines[:cnt]) post = "\n".join(lines[cnt:]) if get_token_fn(prev) < limit: break if cnt == 0: if break_anyway: prev, post = force_breakdown(txt_tocut, limit, get_token_fn) else: raise RuntimeError(f"存在一行极长的文本!{txt_tocut}") # print(len(post)) # 列表递归接龙 result = [prev] result.extend(cut(post, must_break_at_empty_line, break_anyway=break_anyway)) return result try: # 第1次尝试,将双空行(\n\n)作为切分点 return cut(txt, must_break_at_empty_line=True) except RuntimeError: try: # 第2次尝试,将单空行(\n)作为切分点 return cut(txt, must_break_at_empty_line=False) except RuntimeError: try: # 第3次尝试,将英文句号(.)作为切分点 res = cut(txt.replace('.', '。\n'), must_break_at_empty_line=False) # 这个中文的句号是故意的,作为一个标识而存在 return [r.replace('。\n', '.') for r in res] except RuntimeError as e: try: # 第4次尝试,将中文句号(。)作为切分点 res = cut(txt.replace('。', '。。\n'), must_break_at_empty_line=False) return [r.replace('。。\n', '。') for r in res] except RuntimeError as e: # 第5次尝试,没办法了,随便切一下敷衍吧 return cut(txt, must_break_at_empty_line=False, break_anyway=True) def read_and_clean_pdf_text(fp): """ 这个函数用于分割pdf,用了很多trick,逻辑较乱,效果奇好 **输入参数说明** - `fp`:需要读取和清理文本的pdf文件路径 **输出参数说明** - `meta_txt`:清理后的文本内容字符串 - `page_one_meta`:第一页清理后的文本内容列表 **函数功能** 读取pdf文件并清理其中的文本内容,清理规则包括: - 提取所有块元的文本信息,并合并为一个字符串 - 去除短块(字符数小于100)并替换为回车符 - 清理多余的空行 - 合并小写字母开头的段落块并替换为空格 - 清除重复的换行 - 将每个换行符替换为两个换行符,使每个段落之间有两个换行符分隔 """ import fitz, copy import re import numpy as np from utils.colorful import print亮黄, print亮绿 fc = 0 # Index 0 文本 fs = 1 # Index 1 字体 fb = 2 # Index 2 框框 REMOVE_FOOT_NOTE = True # 是否丢弃掉 不是正文的内容 (比正文字体小,如参考文献、脚注、图注等) REMOVE_FOOT_FFSIZE_PERCENT = 0.95 # 小于正文的?时,判定为不是正文(有些文章的正文部分字体大小不是100%统一的,有肉眼不可见的小变化) def primary_ffsize(l): """ 提取文本块主字体 """ fsize_statiscs = {} for wtf in l['spans']: if wtf['size'] not in fsize_statiscs: fsize_statiscs[wtf['size']] = 0 fsize_statiscs[wtf['size']] += len(wtf['text']) return max(fsize_statiscs, key=fsize_statiscs.get) def ffsize_same(a,b): """ 提取字体大小是否近似相等 """ return abs((a-b)/max(a,b)) < 0.02 with fitz.open(fp) as doc: meta_txt = [] meta_font = [] meta_line = [] meta_span = [] ############################## <第 1 步,搜集初始信息> ################################## for index, page in enumerate(doc): # file_content += page.get_text() text_areas = page.get_text("dict") # 获取页面上的文本信息 for t in text_areas['blocks']: if 'lines' in t: pf = 998 for l in t['lines']: txt_line = "".join([wtf['text'] for wtf in l['spans']]) if len(txt_line) == 0: continue pf = primary_ffsize(l) meta_line.append([txt_line, pf, l['bbox'], l]) for wtf in l['spans']: # for l in t['lines']: meta_span.append([wtf['text'], wtf['size'], len(wtf['text'])]) # meta_line.append(["NEW_BLOCK", pf]) # 块元提取 for each word segment with in line for each line cross-line words for each block meta_txt.extend([" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace( '- ', '') for t in text_areas['blocks'] if 'lines' in t]) meta_font.extend([np.mean([np.mean([wtf['size'] for wtf in l['spans']]) for l in t['lines']]) for t in text_areas['blocks'] if 'lines' in t]) if index == 0: page_one_meta = [" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace( '- ', '') for t in text_areas['blocks'] if 'lines' in t] ############################## <第 2 步,获取正文主字体> ################################## fsize_statiscs = {} for span in meta_span: if span[1] not in fsize_statiscs: fsize_statiscs[span[1]] = 0 fsize_statiscs[span[1]] += span[2] main_fsize = max(fsize_statiscs, key=fsize_statiscs.get) if REMOVE_FOOT_NOTE: give_up_fize_threshold = main_fsize * REMOVE_FOOT_FFSIZE_PERCENT ############################## <第 3 步,切分和重新整合> ################################## mega_sec = [] sec = [] for index, line in enumerate(meta_line): if index == 0: sec.append(line[fc]) continue if REMOVE_FOOT_NOTE: if meta_line[index][fs] <= give_up_fize_threshold: continue if ffsize_same(meta_line[index][fs], meta_line[index-1][fs]): # 尝试识别段落 if meta_line[index][fc].endswith('.') and\ (meta_line[index-1][fc] != 'NEW_BLOCK') and \ (meta_line[index][fb][2] - meta_line[index][fb][0]) < (meta_line[index-1][fb][2] - meta_line[index-1][fb][0]) * 0.7: sec[-1] += line[fc] sec[-1] += "\n\n" else: sec[-1] += " " sec[-1] += line[fc] else: if (index+1 < len(meta_line)) and \ meta_line[index][fs] > main_fsize: # 单行 + 字体大 mega_sec.append(copy.deepcopy(sec)) sec = [] sec.append("# " + line[fc]) else: # 尝试识别section if meta_line[index-1][fs] > meta_line[index][fs]: sec.append("\n" + line[fc]) else: sec.append(line[fc]) mega_sec.append(copy.deepcopy(sec)) finals = [] for ms in mega_sec: final = " ".join(ms) final = final.replace('- ', ' ') finals.append(final) meta_txt = finals ############################## <第 4 步,乱七八糟的后处理> ################################## def 把字符太少的块清除为回车(meta_txt): for index, block_txt in enumerate(meta_txt): if len(block_txt) < 100: meta_txt[index] = '\n' return meta_txt meta_txt = 把字符太少的块清除为回车(meta_txt) def 清理多余的空行(meta_txt): for index in reversed(range(1, len(meta_txt))): if meta_txt[index] == '\n' and meta_txt[index-1] == '\n': meta_txt.pop(index) return meta_txt meta_txt = 清理多余的空行(meta_txt) def 合并小写开头的段落块(meta_txt): def starts_with_lowercase_word(s): pattern = r"^[a-z]+" match = re.match(pattern, s) if match: return True else: return False for _ in range(100): for index, block_txt in enumerate(meta_txt): if starts_with_lowercase_word(block_txt): if meta_txt[index-1] != '\n': meta_txt[index-1] += ' ' else: meta_txt[index-1] = '' meta_txt[index-1] += meta_txt[index] meta_txt[index] = '\n' return meta_txt meta_txt = 合并小写开头的段落块(meta_txt) meta_txt = 清理多余的空行(meta_txt) meta_txt = '\n'.join(meta_txt) # 清除重复的换行 for _ in range(5): meta_txt = meta_txt.replace('\n\n', '\n') # 换行 -> 双换行 meta_txt = meta_txt.replace('\n', '\n\n') ############################## <第 5 步,展示分割效果> ################################## # for f in finals: # print亮黄(f) # print亮绿('***************************') return meta_txt, page_one_meta def get_files_from_everything(txt, type): # type='.md' """ 这个函数是用来获取指定目录下所有指定类型(如.md)的文件,并且对于网络上的文件,也可以获取它。 下面是对每个参数和返回值的说明: 参数 - txt: 路径或网址,表示要搜索的文件或者文件夹路径或网络上的文件。 - type: 字符串,表示要搜索的文件类型。默认是.md。 返回值 - success: 布尔值,表示函数是否成功执行。 - file_manifest: 文件路径列表,里面包含以指定类型为后缀名的所有文件的绝对路径。 - project_folder: 字符串,表示文件所在的文件夹路径。如果是网络上的文件,就是临时文件夹的路径。 该函数详细注释已添加,请确认是否满足您的需要。 """ import glob, os success = True if txt.startswith('http'): # 网络的远程文件 import requests from utils.toolbox import get_conf proxies, = get_conf('proxies') r = requests.get(txt, proxies=proxies) with open('./gpt_log/temp'+type, 'wb+') as f: f.write(r.content) project_folder = './gpt_log/' file_manifest = ['./gpt_log/temp'+type] elif txt.endswith(type): # 直接给定文件 file_manifest = [txt] project_folder = os.path.dirname(txt) elif os.path.exists(txt): # 本地路径,递归搜索 project_folder = txt file_manifest = [f for f in glob.glob(f'{project_folder}/**/*'+type, recursive=True)] if len(file_manifest) == 0: success = False else: project_folder = None file_manifest = [] success = False return success, file_manifest, project_folder def Singleton(cls): _instance = {} def _singleton(*args, **kargs): if cls not in _instance: _instance[cls] = cls(*args, **kargs) return _instance[cls] return _singleton @Singleton class knowledge_archive_interface(): def __init__(self) -> None: self.threadLock = threading.Lock() self.current_id = "" self.kai_path = None self.qa_handle = None self.text2vec_large_chinese = None def get_chinese_text2vec(self): if self.text2vec_large_chinese is None: # < -------------------预热文本向量化模组--------------- > from utils.toolbox import ProxyNetworkActivate print('Checking Text2vec ...') from langchain.embeddings.huggingface import HuggingFaceEmbeddings with ProxyNetworkActivate(): # 临时地激活代理网络 self.text2vec_large_chinese = HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese") return self.text2vec_large_chinese def feed_archive(self, file_manifest, id="default"): self.threadLock.acquire() # import uuid self.current_id = id from zh_langchain import construct_vector_store self.qa_handle, self.kai_path = construct_vector_store( vs_id=self.current_id, files=file_manifest, sentence_size=100, history=[], one_conent="", one_content_segmentation="", text2vec = self.get_chinese_text2vec(), ) self.threadLock.release() def get_current_archive_id(self): return self.current_id def get_loaded_file(self): return self.qa_handle.get_loaded_file() def answer_with_archive_by_id(self, txt, id): self.threadLock.acquire() if not self.current_id == id: self.current_id = id from zh_langchain import construct_vector_store self.qa_handle, self.kai_path = construct_vector_store( vs_id=self.current_id, files=[], sentence_size=100, history=[], one_conent="", one_content_segmentation="", text2vec = self.get_chinese_text2vec(), ) VECTOR_SEARCH_SCORE_THRESHOLD = 0 VECTOR_SEARCH_TOP_K = 4 CHUNK_SIZE = 512 resp, prompt = self.qa_handle.get_knowledge_based_conent_test( query = txt, vs_path = self.kai_path, score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD, vector_search_top_k=VECTOR_SEARCH_TOP_K, chunk_conent=True, chunk_size=CHUNK_SIZE, text2vec = self.get_chinese_text2vec(), ) self.threadLock.release() return resp, prompt def try_install_deps(deps): for dep in deps: import subprocess, sys import platform system = platform.system() # 判断系统是否为Windows if system == "Windows": print("Windows please conda install pycocotools ") subprocess.check_call ([sys.executable, 'conda', 'install', 'pycocotools']) subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--user', dep]) class construct_html(): def __init__(self) -> None: self.css = """ .row { display: flex; flex-wrap: wrap; } .column { flex: 1; padding: 10px; } .table-header { font-weight: bold; border-bottom: 1px solid black; } .table-row { border-bottom: 1px solid lightgray; } .table-cell { padding: 5px; } """ self.html_string = f'翻译结果' def add_row(self, a, b): tmp = """
REPLACE_A
REPLACE_B
""" from utils.toolbox import markdown_convertion tmp = tmp.replace('REPLACE_A', markdown_convertion(a)) tmp = tmp.replace('REPLACE_B', markdown_convertion(b)) self.html_string += tmp def save_file(self, file_name): with open(f'./gpt_log/{file_name}', 'w', encoding='utf8') as f: f.write(self.html_string.encode('utf-8', 'ignore').decode()) # from typing import List, Tuple, Any, Union # from langchain.schema import AgentAction, AgentFinish # from langchain.agents import BaseSingleActionAgent # from langchain import LLMChain, PromptTemplate # from langchain.base_language import BaseLanguageModel # class IntentAgent(BaseSingleActionAgent): # tools: List # llm: BaseLanguageModel # intent_template: str = """ # 现在有一些意图,类别为{intents},你的任务是理解用户问题的意图,并判断该问题属于哪一类意图。 # 回复的意图类别必须在提供的类别中,并且必须按格式回复:“意图类别:<>”。 # 举例: # 问题:什么是游戏角色皮卡丘? # 意图类别:游戏角色信息查询 # 问题:“{query}” # """ # prompt = PromptTemplate.from_template(intent_template) # llm_chain: LLMChain = None # def get_llm_chain(self): # if not self.llm_chain: # self.llm_chain = LLMChain(llm=self.llm, prompt=self.prompt) # def choose_tools(self, query) -> List[str]: # self.get_llm_chain() # tool_names = [tool.name for tool in self.tools] # resp = self.llm_chain.predict(intents=tool_names, query=query) # select_tools = [(name, resp.index(name)) for name in tool_names if name in resp] # select_tools.sort(key=lambda x:x[1]) # return [x[0] for x in select_tools] # @property # def input_keys(self): # return ["input"] # def plan( # self, intermediate_steps: List[Tuple[AgentAction, str]], **kwargs: Any # ) -> Union[AgentAction, AgentFinish]: # # only for single tool # tool_name = self.choose_tools(kwargs["input"])[0] # return AgentAction(tool=tool_name, tool_input=kwargs["input"], log="") # async def aplan( # self, intermediate_steps: List[Tuple[AgentAction, str]], **kwargs: Any # ) -> Union[List[AgentAction], AgentFinish]: # raise NotImplementedError("IntentAgent does not support async") ================================================ FILE: crazy_functions/latex_fns/latex_actions.py ================================================ from utils.toolbox import update_ui, update_ui_lastest_msg # 刷新Gradio前端界面 from utils.toolbox import zip_folder, objdump, objload, promote_file_to_downloadzone from .latex_toolbox import PRESERVE, TRANSFORM from .latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace from .latex_toolbox import reverse_forbidden_text_careful_brace, reverse_forbidden_text, convert_to_linklist, post_process from .latex_toolbox import fix_content, find_main_tex_file, merge_tex_files, compile_latex_with_timeout import os, shutil import re import numpy as np pj = os.path.join def split_subprocess(txt, project_folder, return_dict, opts): """ break down latex file to a linked list, each node use a preserve flag to indicate whether it should be proccessed by GPT. """ text = txt mask = np.zeros(len(txt), dtype=np.uint8) + TRANSFORM # 吸收title与作者以上的部分 text, mask = set_forbidden_text(text, mask, r"^(.*?)\\maketitle", re.DOTALL) text, mask = set_forbidden_text(text, mask, r"^(.*?)\\begin{document}", re.DOTALL) # 吸收iffalse注释 text, mask = set_forbidden_text(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL) # 吸收在42行以内的begin-end组合 text, mask = set_forbidden_text_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=42) # 吸收匿名公式 text, mask = set_forbidden_text(text, mask, [ r"\$\$([^$]+)\$\$", r"\\\[.*?\\\]" ], re.DOTALL) # 吸收其他杂项 text, mask = set_forbidden_text(text, mask, [ r"\\section\{(.*?)\}", r"\\section\*\{(.*?)\}", r"\\subsection\{(.*?)\}", r"\\subsubsection\{(.*?)\}" ]) text, mask = set_forbidden_text(text, mask, [ r"\\bibliography\{(.*?)\}", r"\\bibliographystyle\{(.*?)\}" ]) text, mask = set_forbidden_text(text, mask, r"\\begin\{thebibliography\}.*?\\end\{thebibliography\}", re.DOTALL) text, mask = set_forbidden_text(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL) text, mask = set_forbidden_text(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL) text, mask = set_forbidden_text(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL) text, mask = set_forbidden_text(text, mask, [r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}"], re.DOTALL) text, mask = set_forbidden_text(text, mask, [r"\\begin\{figure\}(.*?)\\end\{figure\}", r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}"], re.DOTALL) text, mask = set_forbidden_text(text, mask, [r"\\begin\{multline\}(.*?)\\end\{multline\}", r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}"], re.DOTALL) text, mask = set_forbidden_text(text, mask, [r"\\begin\{table\}(.*?)\\end\{table\}", r"\\begin\{table\*\}(.*?)\\end\{table\*\}"], re.DOTALL) text, mask = set_forbidden_text(text, mask, [r"\\begin\{minipage\}(.*?)\\end\{minipage\}", r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}"], re.DOTALL) text, mask = set_forbidden_text(text, mask, [r"\\begin\{align\*\}(.*?)\\end\{align\*\}", r"\\begin\{align\}(.*?)\\end\{align\}"], re.DOTALL) text, mask = set_forbidden_text(text, mask, [r"\\begin\{equation\}(.*?)\\end\{equation\}", r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}"], re.DOTALL) text, mask = set_forbidden_text(text, mask, [r"\\includepdf\[(.*?)\]\{(.*?)\}", r"\\clearpage", r"\\newpage", r"\\appendix", r"\\tableofcontents", r"\\include\{(.*?)\}"]) text, mask = set_forbidden_text(text, mask, [r"\\vspace\{(.*?)\}", r"\\hspace\{(.*?)\}", r"\\label\{(.*?)\}", r"\\begin\{(.*?)\}", r"\\end\{(.*?)\}", r"\\item "]) text, mask = set_forbidden_text_careful_brace(text, mask, r"\\hl\{(.*?)\}", re.DOTALL) # reverse 操作必须放在最后 text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL, forbid_wrapper=True) text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\abstract\{(.*?)\}", re.DOTALL, forbid_wrapper=True) text, mask = reverse_forbidden_text(text, mask, r"\\begin\{abstract\}(.*?)\\end\{abstract\}", re.DOTALL, forbid_wrapper=True) root = convert_to_linklist(text, mask) # 最后一步处理,增强稳健性 root = post_process(root) # 输出html调试文件,用红色标注处保留区(PRESERVE),用黑色标注转换区(TRANSFORM) with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f: segment_parts_for_gpt = [] nodes = [] node = root while True: nodes.append(node) show_html = node.string.replace('\n','
') if not node.preserve: segment_parts_for_gpt.append(node.string) f.write(f'

#{node.range}{show_html}#

') else: f.write(f'

{show_html}

') node = node.next if node is None: break for n in nodes: n.next = None # break return_dict['nodes'] = nodes return_dict['segment_parts_for_gpt'] = segment_parts_for_gpt return return_dict class LatexPaperSplit(): """ break down latex file to a linked list, each node use a preserve flag to indicate whether it should be proccessed by GPT. """ def __init__(self) -> None: self.nodes = None self.msg = "*{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成," + \ "版权归原文作者所有。翻译内容可靠性无保障,请仔细鉴别并以原文为准。" + \ "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。" # 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者) self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\" def merge_result(self, arr, mode, msg, buggy_lines=[], buggy_line_surgery_n_lines=10): """ Merge the result after the GPT process completed """ result_string = "" node_cnt = 0 line_cnt = 0 for node in self.nodes: if node.preserve: line_cnt += node.string.count('\n') result_string += node.string else: translated_txt = fix_content(arr[node_cnt], node.string) begin_line = line_cnt end_line = line_cnt + translated_txt.count('\n') # reverse translation if any error if any([begin_line-buggy_line_surgery_n_lines <= b_line <= end_line+buggy_line_surgery_n_lines for b_line in buggy_lines]): translated_txt = node.string result_string += translated_txt node_cnt += 1 line_cnt += translated_txt.count('\n') if mode == 'translate_zh': pattern = re.compile(r'\\begin\{abstract\}.*\n') match = pattern.search(result_string) if not match: # match \abstract{xxxx} pattern_compile = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL) match = pattern_compile.search(result_string) position = match.regs[1][0] else: # match \begin{abstract}xxxx\end{abstract} position = match.end() result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:] return result_string def split(self, txt, project_folder, opts): """ break down latex file to a linked list, each node use a preserve flag to indicate whether it should be proccessed by GPT. P.S. use multiprocessing to avoid timeout error """ import multiprocessing manager = multiprocessing.Manager() return_dict = manager.dict() p = multiprocessing.Process( target=split_subprocess, args=(txt, project_folder, return_dict, opts)) p.start() p.join() p.close() self.nodes = return_dict['nodes'] self.sp = return_dict['segment_parts_for_gpt'] return self.sp class LatexPaperFileGroup(): """ use tokenizer to break down text according to max_token_limit """ def __init__(self): self.file_paths = [] self.file_contents = [] self.sp_file_contents = [] self.sp_file_index = [] self.sp_file_tag = [] # count_token from request_llm.bridge_all import model_info enc = model_info["gpt-3.5-turbo"]['tokenizer'] def get_token_num(txt): return len(enc.encode(txt, disallowed_special=())) self.get_token_num = get_token_num def run_file_split(self, max_token_limit=1900): """ use tokenizer to break down text according to max_token_limit """ for index, file_content in enumerate(self.file_contents): if self.get_token_num(file_content) < max_token_limit: self.sp_file_contents.append(file_content) self.sp_file_index.append(index) self.sp_file_tag.append(self.file_paths[index]) else: from ..crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit) for j, segment in enumerate(segments): self.sp_file_contents.append(segment) self.sp_file_index.append(index) self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex") print('Segmentation: done') def merge_result(self): self.file_result = ["" for _ in range(len(self.file_paths))] for r, k in zip(self.sp_file_result, self.sp_file_index): self.file_result[k] += r def write_result(self): manifest = [] for path, res in zip(self.file_paths, self.file_result): with open(path + '.polish.tex', 'w', encoding='utf8') as f: manifest.append(path + '.polish.tex') f.write(res) return manifest def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread', switch_prompt=None, opts=[]): import time, os, re from ..crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency from .latex_actions import LatexPaperFileGroup, LatexPaperSplit # <-------- 寻找主tex文件 ----------> maintex = find_main_tex_file(file_manifest, mode) chatbot.append((f"定位主Latex文件", f'[Local Message] 分析结果:该项目的Latex主文件是{maintex}, 如果分析错误, 请立即终止程序, 删除或修改歧义文件, 然后重试。主程序即将开始, 请稍候。')) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 time.sleep(3) # <-------- 读取Latex文件, 将多文件tex工程融合为一个巨型tex ----------> main_tex_basename = os.path.basename(maintex) assert main_tex_basename.endswith('.tex') main_tex_basename_bare = main_tex_basename[:-4] may_exist_bbl = pj(project_folder, f'{main_tex_basename_bare}.bbl') if os.path.exists(may_exist_bbl): shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge.bbl')) shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge_{mode}.bbl')) shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge_diff.bbl')) with open(maintex, 'r', encoding='utf-8', errors='replace') as f: content = f.read() merged_content = merge_tex_files(project_folder, content, mode) with open(project_folder + '/merge.tex', 'w', encoding='utf-8', errors='replace') as f: f.write(merged_content) # <-------- 精细切分latex文件 ----------> chatbot.append((f"Latex文件融合完成", f'[Local Message] 正在精细切分latex文件,这需要一段时间计算,文档越长耗时越长,请耐心等待。')) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 lps = LatexPaperSplit() res = lps.split(merged_content, project_folder, opts) # 消耗时间的函数 # <-------- 拆分过长的latex片段 ----------> pfg = LatexPaperFileGroup() for index, r in enumerate(res): pfg.file_paths.append('segment-' + str(index)) pfg.file_contents.append(r) pfg.run_file_split(max_token_limit=1024) n_split = len(pfg.sp_file_contents) # <-------- 根据需要切换prompt ----------> inputs_array, sys_prompt_array = switch_prompt(pfg, mode) inputs_show_user_array = [f"{mode} {f}" for f in pfg.sp_file_tag] if os.path.exists(pj(project_folder,'temp.pkl')): # <-------- 【仅调试】如果存在调试缓存文件,则跳过GPT请求环节 ----------> pfg = objload(file=pj(project_folder,'temp.pkl')) else: # <-------- gpt 多线程请求 ----------> gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( inputs_array=inputs_array, inputs_show_user_array=inputs_show_user_array, llm_kwargs=llm_kwargs, chatbot=chatbot, history_array=[[""] for _ in range(n_split)], sys_prompt_array=sys_prompt_array, # max_workers=5, # 并行任务数量限制, 最多同时执行5个, 其他的排队等待 scroller_max_len = 40 ) # <-------- 文本碎片重组为完整的tex片段 ----------> pfg.sp_file_result = [] for i_say, gpt_say, orig_content in zip(gpt_response_collection[0::2], gpt_response_collection[1::2], pfg.sp_file_contents): pfg.sp_file_result.append(gpt_say) pfg.merge_result() # <-------- 临时存储用于调试 ----------> pfg.get_token_num = None objdump(pfg, file=pj(project_folder,'temp.pkl')) write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot, project_folder=project_folder) # <-------- 写出文件 ----------> msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}。" final_tex = lps.merge_result(pfg.file_result, mode, msg) objdump((lps, pfg.file_result, mode, msg), file=pj(project_folder,'merge_result.pkl')) with open(project_folder + f'/merge_{mode}.tex', 'w', encoding='utf-8', errors='replace') as f: if mode != 'translate_zh' or "binary" in final_tex: f.write(final_tex) # <-------- 整理结果, 退出 ----------> chatbot.append((f"完成了吗?", 'GPT结果已输出, 即将编译PDF')) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # <-------- 返回 ----------> return project_folder + f'/merge_{mode}.tex' def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work_folder_modified, fixed_line=[]): try: with open(log_path, 'r', encoding='utf-8', errors='replace') as f: log = f.read() import re buggy_lines = re.findall(tex_name+':([0-9]{1,5}):', log) buggy_lines = [int(l) for l in buggy_lines] buggy_lines = sorted(buggy_lines) buggy_line = buggy_lines[0]-1 print("reversing tex line that has errors", buggy_line) # 重组,逆转出错的段落 if buggy_line not in fixed_line: fixed_line.append(buggy_line) lps, file_result, mode, msg = objload(file=pj(work_folder_modified,'merge_result.pkl')) final_tex = lps.merge_result(file_result, mode, msg, buggy_lines=fixed_line, buggy_line_surgery_n_lines=5*n_fix) with open(pj(work_folder_modified, f"{tex_name_pure}_fix_{n_fix}.tex"), 'w', encoding='utf-8', errors='replace') as f: f.write(final_tex) return True, f"{tex_name_pure}_fix_{n_fix}", buggy_lines except: print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.") return False, -1, [-1] def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder, mode='default'): import os, time n_fix = 1 fixed_line = [] max_try = 32 chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder},如果程序停顿5分钟以上,请直接去该路径下取回翻译结果,或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history) chatbot.append([f"正在编译PDF文档", '...']); yield from update_ui(chatbot=chatbot, history=history); time.sleep(1); chatbot[-1] = list(chatbot[-1]) # 刷新界面 yield from update_ui_lastest_msg('编译已经开始...', chatbot, history) # 刷新Gradio前端界面 while True: import os may_exist_bbl = pj(work_folder_modified, f'merge.bbl') target_bbl = pj(work_folder_modified, f'{main_file_modified}.bbl') if os.path.exists(may_exist_bbl) and not os.path.exists(target_bbl): shutil.copyfile(may_exist_bbl, target_bbl) # https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面 ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original) yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history) # 刷新Gradio前端界面 ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified) if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')): # 只有第二步成功,才能继续下面的步骤 yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译BibTex ...', chatbot, history) # 刷新Gradio前端界面 if not os.path.exists(pj(work_folder_original, f'{main_file_original}.bbl')): ok = compile_latex_with_timeout(f'bibtex {main_file_original}.aux', work_folder_original) if not os.path.exists(pj(work_folder_modified, f'{main_file_modified}.bbl')): ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux', work_folder_modified) yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history) # 刷新Gradio前端界面 ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original) ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified) ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original) ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified) if mode!='translate_zh': yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面 print( f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex') ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex') yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面 ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder) ok = compile_latex_with_timeout(f'bibtex merge_diff.aux', work_folder) ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder) ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder) # <---------- 检查结果 -----------> results_ = "" original_pdf_success = os.path.exists(pj(work_folder_original, f'{main_file_original}.pdf')) modified_pdf_success = os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')) diff_pdf_success = os.path.exists(pj(work_folder, f'merge_diff.pdf')) results_ += f"原始PDF编译是否成功: {original_pdf_success};" results_ += f"转化PDF编译是否成功: {modified_pdf_success};" results_ += f"对比PDF编译是否成功: {diff_pdf_success};" yield from update_ui_lastest_msg(f'第{n_fix}编译结束:
{results_}...', chatbot, history) # 刷新Gradio前端界面 if diff_pdf_success: result_pdf = pj(work_folder_modified, f'merge_diff.pdf') # get pdf path promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI if modified_pdf_success: yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history) # 刷新Gradio前端界面 result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path origin_pdf = pj(work_folder_original, f'{main_file_original}.pdf') # get pdf path if os.path.exists(pj(work_folder, '..', 'translation')): shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf')) promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI # 将两个PDF拼接 if original_pdf_success: try: from .latex_toolbox import merge_pdfs concat_pdf = pj(work_folder_modified, f'comparison.pdf') merge_pdfs(origin_pdf, result_pdf, concat_pdf) promote_file_to_downloadzone(concat_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI except Exception as e: pass return True # 成功啦 else: if n_fix>=max_try: break n_fix += 1 can_retry, main_file_modified, buggy_lines = remove_buggy_lines( file_path=pj(work_folder_modified, f'{main_file_modified}.tex'), log_path=pj(work_folder_modified, f'{main_file_modified}.log'), tex_name=f'{main_file_modified}.tex', tex_name_pure=f'{main_file_modified}', n_fix=n_fix, work_folder_modified=work_folder_modified, fixed_line=fixed_line ) yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history) # 刷新Gradio前端界面 if not can_retry: break return False # 失败啦 def write_html(sp_file_contents, sp_file_result, chatbot, project_folder): # write html try: import shutil from ..crazy_utils import construct_html from toolbox import gen_time_str ch = construct_html() orig = "" trans = "" final = [] for c,r in zip(sp_file_contents, sp_file_result): final.append(c) final.append(r) for i, k in enumerate(final): if i%2==0: orig = k if i%2==1: trans = k ch.add_row(a=orig, b=trans) create_report_file_name = f"{gen_time_str()}.trans.html" ch.save_file(create_report_file_name) shutil.copyfile(pj('./gpt_log/', create_report_file_name), pj(project_folder, create_report_file_name)) promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot) except: from toolbox import trimmed_format_exc print('writing html result failed:', trimmed_format_exc()) ================================================ FILE: crazy_functions/latex_fns/latex_toolbox.py ================================================ import os, shutil import re import numpy as np PRESERVE = 0 TRANSFORM = 1 pj = os.path.join class LinkedListNode(): """ Linked List Node """ def __init__(self, string, preserve=True) -> None: self.string = string self.preserve = preserve self.next = None self.range = None # self.begin_line = 0 # self.begin_char = 0 def convert_to_linklist(text, mask): root = LinkedListNode("", preserve=True) current_node = root for c, m, i in zip(text, mask, range(len(text))): if (m==PRESERVE and current_node.preserve) \ or (m==TRANSFORM and not current_node.preserve): # add current_node.string += c else: current_node.next = LinkedListNode(c, preserve=(m==PRESERVE)) current_node = current_node.next return root def post_process(root): # 修复括号 node = root while True: string = node.string if node.preserve: node = node.next if node is None: break continue def break_check(string): str_stack = [""] # (lv, index) for i, c in enumerate(string): if c == '{': str_stack.append('{') elif c == '}': if len(str_stack) == 1: print('stack fix') return i str_stack.pop(-1) else: str_stack[-1] += c return -1 bp = break_check(string) if bp == -1: pass elif bp == 0: node.string = string[:1] q = LinkedListNode(string[1:], False) q.next = node.next node.next = q else: node.string = string[:bp] q = LinkedListNode(string[bp:], False) q.next = node.next node.next = q node = node.next if node is None: break # 屏蔽空行和太短的句子 node = root while True: if len(node.string.strip('\n').strip(''))==0: node.preserve = True if len(node.string.strip('\n').strip(''))<42: node.preserve = True node = node.next if node is None: break node = root while True: if node.next and node.preserve and node.next.preserve: node.string += node.next.string node.next = node.next.next node = node.next if node is None: break # 将前后断行符脱离 node = root prev_node = None while True: if not node.preserve: lstriped_ = node.string.lstrip().lstrip('\n') if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)): prev_node.string += node.string[:-len(lstriped_)] node.string = lstriped_ rstriped_ = node.string.rstrip().rstrip('\n') if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)): node.next.string = node.string[len(rstriped_):] + node.next.string node.string = rstriped_ # ===== prev_node = node node = node.next if node is None: break # 标注节点的行数范围 node = root n_line = 0 expansion = 2 while True: n_l = node.string.count('\n') node.range = [n_line-expansion, n_line+n_l+expansion] # 失败时,扭转的范围 n_line = n_line+n_l node = node.next if node is None: break return root """ =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Latex segmentation with a binary mask (PRESERVE=0, TRANSFORM=1) =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= """ def set_forbidden_text(text, mask, pattern, flags=0): """ Add a preserve text area in this paper e.g. with pattern = r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}" you can mask out (mask = PRESERVE so that text become untouchable for GPT) everything between "\begin{equation}" and "\end{equation}" """ if isinstance(pattern, list): pattern = '|'.join(pattern) pattern_compile = re.compile(pattern, flags) for res in pattern_compile.finditer(text): mask[res.span()[0]:res.span()[1]] = PRESERVE return text, mask def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True): """ Move area out of preserve area (make text editable for GPT) count the number of the braces so as to catch compelete text area. e.g. \begin{abstract} blablablablablabla. \end{abstract} """ if isinstance(pattern, list): pattern = '|'.join(pattern) pattern_compile = re.compile(pattern, flags) for res in pattern_compile.finditer(text): if not forbid_wrapper: mask[res.span()[0]:res.span()[1]] = TRANSFORM else: mask[res.regs[0][0]: res.regs[1][0]] = PRESERVE # '\\begin{abstract}' mask[res.regs[1][0]: res.regs[1][1]] = TRANSFORM # abstract mask[res.regs[1][1]: res.regs[0][1]] = PRESERVE # abstract return text, mask def set_forbidden_text_careful_brace(text, mask, pattern, flags=0): """ Add a preserve text area in this paper (text become untouchable for GPT). count the number of the braces so as to catch compelete text area. e.g. \caption{blablablablabla\texbf{blablabla}blablabla.} """ pattern_compile = re.compile(pattern, flags) for res in pattern_compile.finditer(text): brace_level = -1 p = begin = end = res.regs[0][0] for _ in range(1024*16): if text[p] == '}' and brace_level == 0: break elif text[p] == '}': brace_level -= 1 elif text[p] == '{': brace_level += 1 p += 1 end = p+1 mask[begin:end] = PRESERVE return text, mask def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0, forbid_wrapper=True): """ Move area out of preserve area (make text editable for GPT) count the number of the braces so as to catch compelete text area. e.g. \caption{blablablablabla\texbf{blablabla}blablabla.} """ pattern_compile = re.compile(pattern, flags) for res in pattern_compile.finditer(text): brace_level = 0 p = begin = end = res.regs[1][0] for _ in range(1024*16): if text[p] == '}' and brace_level == 0: break elif text[p] == '}': brace_level -= 1 elif text[p] == '{': brace_level += 1 p += 1 end = p mask[begin:end] = TRANSFORM if forbid_wrapper: mask[res.regs[0][0]:begin] = PRESERVE mask[end:res.regs[0][1]] = PRESERVE return text, mask def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42): """ Find all \begin{} ... \end{} text block that with less than limit_n_lines lines. Add it to preserve area """ pattern_compile = re.compile(pattern, flags) def search_with_line_limit(text, mask): for res in pattern_compile.finditer(text): cmd = res.group(1) # begin{what} this = res.group(2) # content between begin and end this_mask = mask[res.regs[2][0]:res.regs[2][1]] white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof', 'em', 'emph', 'textit', 'textbf', 'itemize', 'enumerate'] if (cmd in white_list) or this.count('\n') >= limit_n_lines: # use a magical number 42 this, this_mask = search_with_line_limit(this, this_mask) mask[res.regs[2][0]:res.regs[2][1]] = this_mask else: mask[res.regs[0][0]:res.regs[0][1]] = PRESERVE return text, mask return search_with_line_limit(text, mask) """ =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Latex Merge File =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= """ def find_main_tex_file(file_manifest, mode): """ 在多Tex文档中,寻找主文件,必须包含documentclass,返回找到的第一个。 P.S. 但愿没人把latex模板放在里面传进来 (6.25 加入判定latex模板的代码) """ canidates = [] for texf in file_manifest: if os.path.basename(texf).startswith('merge'): continue with open(texf, 'r', encoding='utf8', errors='ignore') as f: file_content = f.read() if r'\documentclass' in file_content: canidates.append(texf) else: continue if len(canidates) == 0: raise RuntimeError('无法找到一个主Tex文件(包含documentclass关键字)') elif len(canidates) == 1: return canidates[0] else: # if len(canidates) >= 2 通过一些Latex模板中常见(但通常不会出现在正文)的单词,对不同latex源文件扣分,取评分最高者返回 canidates_score = [] # 给出一些判定模板文档的词作为扣分项 unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers'] expected_words = ['\input', '\ref', '\cite'] for texf in canidates: canidates_score.append(0) with open(texf, 'r', encoding='utf8', errors='ignore') as f: file_content = f.read() for uw in unexpected_words: if uw in file_content: canidates_score[-1] -= 1 for uw in expected_words: if uw in file_content: canidates_score[-1] += 1 select = np.argmax(canidates_score) # 取评分最高者返回 return canidates[select] def rm_comments(main_file): new_file_remove_comment_lines = [] for l in main_file.splitlines(): # 删除整行的空注释 if l.lstrip().startswith("%"): pass else: new_file_remove_comment_lines.append(l) main_file = '\n'.join(new_file_remove_comment_lines) # main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file) # 将 \include 命令转换为 \input 命令 main_file = re.sub(r'(? 0 and node_string.count('\_') > final_tex.count('\_'): # walk and replace any _ without \ final_tex = re.sub(r"(?{}".format(args)) pass def test_on_close(self, *args): self.aliyun_service_ok = False pass def test_on_result_chg(self, message, *args): # print("test_on_chg:{}".format(message)) message = json.loads(message) self.parsed_text = message['payload']['result'] self.event_on_result_chg.set() def test_on_completed(self, message, *args): # print("on_completed:args=>{} message=>{}".format(args, message)) pass def audio_convertion_thread(self, uuid): # 在一个异步线程中采集音频 import nls # pip install git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git import tempfile from scipy import io from toolbox import get_conf from .audio_io import change_sample_rate from .audio_io import RealtimeAudioDistribution NEW_SAMPLERATE = 16000 rad = RealtimeAudioDistribution() rad.clean_up() temp_folder = tempfile.gettempdir() TOKEN, APPKEY = get_conf('ALIYUN_TOKEN', 'ALIYUN_APPKEY') if len(TOKEN) == 0: TOKEN = self.get_token() self.aliyun_service_ok = True URL="wss://nls-gateway.aliyuncs.com/ws/v1" sr = nls.NlsSpeechTranscriber( url=URL, token=TOKEN, appkey=APPKEY, on_sentence_begin=self.test_on_sentence_begin, on_sentence_end=self.test_on_sentence_end, on_start=self.test_on_start, on_result_changed=self.test_on_result_chg, on_completed=self.test_on_completed, on_error=self.test_on_error, on_close=self.test_on_close, callback_args=[uuid.hex] ) r = sr.start(aformat="pcm", enable_intermediate_result=True, enable_punctuation_prediction=True, enable_inverse_text_normalization=True) while not self.stop: # time.sleep(self.capture_interval) audio = rad.read(uuid.hex) if audio is not None: # convert to pcm file temp_file = f'{temp_folder}/{uuid.hex}.pcm' # dsdata = change_sample_rate(audio, rad.rate, NEW_SAMPLERATE) # 48000 --> 16000 io.wavfile.write(temp_file, NEW_SAMPLERATE, dsdata) # read pcm binary with open(temp_file, "rb") as f: data = f.read() # print('audio len:', len(audio), '\t ds len:', len(dsdata), '\t need n send:', len(data)//640) slices = zip(*(iter(data),) * 640) # 640个字节为一组 for i in slices: sr.send_audio(bytes(i)) else: time.sleep(0.1) if not self.aliyun_service_ok: self.stop = True self.stop_msg = 'Aliyun音频服务异常,请检查ALIYUN_TOKEN和ALIYUN_APPKEY是否过期。' r = sr.stop() def get_token(self): from toolbox import get_conf import json from aliyunsdkcore.request import CommonRequest from aliyunsdkcore.client import AcsClient AccessKey_ID, AccessKey_secret = get_conf('ALIYUN_ACCESSKEY', 'ALIYUN_SECRET') # 创建AcsClient实例 client = AcsClient( AccessKey_ID, AccessKey_secret, "cn-shanghai" ) # 创建request,并设置参数。 request = CommonRequest() request.set_method('POST') request.set_domain('nls-meta.cn-shanghai.aliyuncs.com') request.set_version('2019-02-28') request.set_action_name('CreateToken') try: response = client.do_action_with_exception(request) print(response) jss = json.loads(response) if 'Token' in jss and 'Id' in jss['Token']: token = jss['Token']['Id'] expireTime = jss['Token']['ExpireTime'] print("token = " + token) print("expireTime = " + str(expireTime)) except Exception as e: print(e) return token ================================================ FILE: crazy_functions/live_audio/audio_io.py ================================================ import numpy as np from scipy import interpolate def Singleton(cls): _instance = {} def _singleton(*args, **kargs): if cls not in _instance: _instance[cls] = cls(*args, **kargs) return _instance[cls] return _singleton @Singleton class RealtimeAudioDistribution(): def __init__(self) -> None: self.data = {} self.max_len = 1024*1024 self.rate = 48000 # 只读,每秒采样数量 def clean_up(self): self.data = {} def feed(self, uuid, audio): self.rate, audio_ = audio # print('feed', len(audio_), audio_[-25:]) if uuid not in self.data: self.data[uuid] = audio_ else: new_arr = np.concatenate((self.data[uuid], audio_)) if len(new_arr) > self.max_len: new_arr = new_arr[-self.max_len:] self.data[uuid] = new_arr def read(self, uuid): if uuid in self.data: res = self.data.pop(uuid) print('\r read-', len(res), '-', max(res), end='', flush=True) else: res = None return res def change_sample_rate(audio, old_sr, new_sr): duration = audio.shape[0] / old_sr time_old = np.linspace(0, duration, audio.shape[0]) time_new = np.linspace(0, duration, int(audio.shape[0] * new_sr / old_sr)) interpolator = interpolate.interp1d(time_old, audio.T) new_audio = interpolator(time_new).T return new_audio.astype(np.int16) ================================================ FILE: crazy_functions/test_project/cpp/cppipc/buffer.cpp ================================================ #include "libipc/buffer.h" #include "libipc/utility/pimpl.h" #include namespace ipc { bool operator==(buffer const & b1, buffer const & b2) { return (b1.size() == b2.size()) && (std::memcmp(b1.data(), b2.data(), b1.size()) == 0); } bool operator!=(buffer const & b1, buffer const & b2) { return !(b1 == b2); } class buffer::buffer_ : public pimpl { public: void* p_; std::size_t s_; void* a_; buffer::destructor_t d_; buffer_(void* p, std::size_t s, buffer::destructor_t d, void* a) : p_(p), s_(s), a_(a), d_(d) { } ~buffer_() { if (d_ == nullptr) return; d_((a_ == nullptr) ? p_ : a_, s_); } }; buffer::buffer() : buffer(nullptr, 0, nullptr, nullptr) { } buffer::buffer(void* p, std::size_t s, destructor_t d) : p_(p_->make(p, s, d, nullptr)) { } buffer::buffer(void* p, std::size_t s, destructor_t d, void* additional) : p_(p_->make(p, s, d, additional)) { } buffer::buffer(void* p, std::size_t s) : buffer(p, s, nullptr) { } buffer::buffer(char const & c) : buffer(const_cast(&c), 1) { } buffer::buffer(buffer&& rhs) : buffer() { swap(rhs); } buffer::~buffer() { p_->clear(); } void buffer::swap(buffer& rhs) { std::swap(p_, rhs.p_); } buffer& buffer::operator=(buffer rhs) { swap(rhs); return *this; } bool buffer::empty() const noexcept { return (impl(p_)->p_ == nullptr) || (impl(p_)->s_ == 0); } void* buffer::data() noexcept { return impl(p_)->p_; } void const * buffer::data() const noexcept { return impl(p_)->p_; } std::size_t buffer::size() const noexcept { return impl(p_)->s_; } } // namespace ipc ================================================ FILE: crazy_functions/test_project/cpp/cppipc/ipc.cpp ================================================ #include #include #include #include // std::pair, std::move, std::forward #include #include // aligned_storage_t #include #include #include #include #include "libipc/ipc.h" #include "libipc/def.h" #include "libipc/shm.h" #include "libipc/pool_alloc.h" #include "libipc/queue.h" #include "libipc/policy.h" #include "libipc/rw_lock.h" #include "libipc/waiter.h" #include "libipc/utility/log.h" #include "libipc/utility/id_pool.h" #include "libipc/utility/scope_guard.h" #include "libipc/utility/utility.h" #include "libipc/memory/resource.h" #include "libipc/platform/detail.h" #include "libipc/circ/elem_array.h" namespace { using msg_id_t = std::uint32_t; using acc_t = std::atomic; template struct msg_t; template struct msg_t<0, AlignSize> { msg_id_t cc_id_; msg_id_t id_; std::int32_t remain_; bool storage_; }; template struct msg_t : msg_t<0, AlignSize> { std::aligned_storage_t data_ {}; msg_t() = default; msg_t(msg_id_t cc_id, msg_id_t id, std::int32_t remain, void const * data, std::size_t size) : msg_t<0, AlignSize> {cc_id, id, remain, (data == nullptr) || (size == 0)} { if (this->storage_) { if (data != nullptr) { // copy storage-id *reinterpret_cast(&data_) = *static_cast(data); } } else std::memcpy(&data_, data, size); } }; template ipc::buff_t make_cache(T& data, std::size_t size) { auto ptr = ipc::mem::alloc(size); std::memcpy(ptr, &data, (ipc::detail::min)(sizeof(data), size)); return { ptr, size, ipc::mem::free }; } struct cache_t { std::size_t fill_; ipc::buff_t buff_; cache_t(std::size_t f, ipc::buff_t && b) : fill_(f), buff_(std::move(b)) {} void append(void const * data, std::size_t size) { if (fill_ >= buff_.size() || data == nullptr || size == 0) return; auto new_fill = (ipc::detail::min)(fill_ + size, buff_.size()); std::memcpy(static_cast(buff_.data()) + fill_, data, new_fill - fill_); fill_ = new_fill; } }; auto cc_acc() { static ipc::shm::handle acc_h("__CA_CONN__", sizeof(acc_t)); return static_cast(acc_h.get()); } IPC_CONSTEXPR_ std::size_t align_chunk_size(std::size_t size) noexcept { return (((size - 1) / ipc::large_msg_align) + 1) * ipc::large_msg_align; } IPC_CONSTEXPR_ std::size_t calc_chunk_size(std::size_t size) noexcept { return ipc::make_align(alignof(std::max_align_t), align_chunk_size( ipc::make_align(alignof(std::max_align_t), sizeof(std::atomic)) + size)); } struct chunk_t { std::atomic &conns() noexcept { return *reinterpret_cast *>(this); } void *data() noexcept { return reinterpret_cast(this) + ipc::make_align(alignof(std::max_align_t), sizeof(std::atomic)); } }; struct chunk_info_t { ipc::id_pool<> pool_; ipc::spin_lock lock_; IPC_CONSTEXPR_ static std::size_t chunks_mem_size(std::size_t chunk_size) noexcept { return ipc::id_pool<>::max_count * chunk_size; } ipc::byte_t *chunks_mem() noexcept { return reinterpret_cast(this + 1); } chunk_t *at(std::size_t chunk_size, ipc::storage_id_t id) noexcept { if (id < 0) return nullptr; return reinterpret_cast(chunks_mem() + (chunk_size * id)); } }; auto& chunk_storages() { class chunk_handle_t { ipc::shm::handle handle_; public: chunk_info_t *get_info(std::size_t chunk_size) { if (!handle_.valid() && !handle_.acquire( ("__CHUNK_INFO__" + ipc::to_string(chunk_size)).c_str(), sizeof(chunk_info_t) + chunk_info_t::chunks_mem_size(chunk_size) )) { ipc::error("[chunk_storages] chunk_shm.id_info_.acquire failed: chunk_size = %zd\n", chunk_size); return nullptr; } auto info = static_cast(handle_.get()); if (info == nullptr) { ipc::error("[chunk_storages] chunk_shm.id_info_.get failed: chunk_size = %zd\n", chunk_size); return nullptr; } return info; } }; static ipc::map chunk_hs; return chunk_hs; } chunk_info_t *chunk_storage_info(std::size_t chunk_size) { auto &storages = chunk_storages(); std::decay_t::iterator it; { static ipc::rw_lock lock; IPC_UNUSED_ std::shared_lock guard {lock}; if ((it = storages.find(chunk_size)) == storages.end()) { using chunk_handle_t = std::decay_t::value_type::second_type; guard.unlock(); IPC_UNUSED_ std::lock_guard guard {lock}; it = storages.emplace(chunk_size, chunk_handle_t{}).first; } } return it->second.get_info(chunk_size); } std::pair acquire_storage(std::size_t size, ipc::circ::cc_t conns) { std::size_t chunk_size = calc_chunk_size(size); auto info = chunk_storage_info(chunk_size); if (info == nullptr) return {}; info->lock_.lock(); info->pool_.prepare(); // got an unique id auto id = info->pool_.acquire(); info->lock_.unlock(); auto chunk = info->at(chunk_size, id); if (chunk == nullptr) return {}; chunk->conns().store(conns, std::memory_order_relaxed); return { id, chunk->data() }; } void *find_storage(ipc::storage_id_t id, std::size_t size) { if (id < 0) { ipc::error("[find_storage] id is invalid: id = %ld, size = %zd\n", (long)id, size); return nullptr; } std::size_t chunk_size = calc_chunk_size(size); auto info = chunk_storage_info(chunk_size); if (info == nullptr) return nullptr; return info->at(chunk_size, id)->data(); } void release_storage(ipc::storage_id_t id, std::size_t size) { if (id < 0) { ipc::error("[release_storage] id is invalid: id = %ld, size = %zd\n", (long)id, size); return; } std::size_t chunk_size = calc_chunk_size(size); auto info = chunk_storage_info(chunk_size); if (info == nullptr) return; info->lock_.lock(); info->pool_.release(id); info->lock_.unlock(); } template bool sub_rc(ipc::wr, std::atomic &/*conns*/, ipc::circ::cc_t /*curr_conns*/, ipc::circ::cc_t /*conn_id*/) noexcept { return true; } template bool sub_rc(ipc::wr, std::atomic &conns, ipc::circ::cc_t curr_conns, ipc::circ::cc_t conn_id) noexcept { auto last_conns = curr_conns & ~conn_id; for (unsigned k = 0;;) { auto chunk_conns = conns.load(std::memory_order_acquire); if (conns.compare_exchange_weak(chunk_conns, chunk_conns & last_conns, std::memory_order_release)) { return (chunk_conns & last_conns) == 0; } ipc::yield(k); } } template void recycle_storage(ipc::storage_id_t id, std::size_t size, ipc::circ::cc_t curr_conns, ipc::circ::cc_t conn_id) { if (id < 0) { ipc::error("[recycle_storage] id is invalid: id = %ld, size = %zd\n", (long)id, size); return; } std::size_t chunk_size = calc_chunk_size(size); auto info = chunk_storage_info(chunk_size); if (info == nullptr) return; auto chunk = info->at(chunk_size, id); if (chunk == nullptr) return; if (!sub_rc(Flag{}, chunk->conns(), curr_conns, conn_id)) { return; } info->lock_.lock(); info->pool_.release(id); info->lock_.unlock(); } template bool clear_message(void* p) { auto msg = static_cast(p); if (msg->storage_) { std::int32_t r_size = static_cast(ipc::data_length) + msg->remain_; if (r_size <= 0) { ipc::error("[clear_message] invalid msg size: %d\n", (int)r_size); return true; } release_storage( *reinterpret_cast(&msg->data_), static_cast(r_size)); } return true; } struct conn_info_head { ipc::string name_; msg_id_t cc_id_; // connection-info id ipc::detail::waiter cc_waiter_, wt_waiter_, rd_waiter_; ipc::shm::handle acc_h_; conn_info_head(char const * name) : name_ {name} , cc_id_ {(cc_acc() == nullptr) ? 0 : cc_acc()->fetch_add(1, std::memory_order_relaxed)} , cc_waiter_{("__CC_CONN__" + name_).c_str()} , wt_waiter_{("__WT_CONN__" + name_).c_str()} , rd_waiter_{("__RD_CONN__" + name_).c_str()} , acc_h_ {("__AC_CONN__" + name_).c_str(), sizeof(acc_t)} { } void quit_waiting() { cc_waiter_.quit_waiting(); wt_waiter_.quit_waiting(); rd_waiter_.quit_waiting(); } auto acc() { return static_cast(acc_h_.get()); } auto& recv_cache() { thread_local ipc::unordered_map tls; return tls; } }; template bool wait_for(W& waiter, F&& pred, std::uint64_t tm) { if (tm == 0) return !pred(); for (unsigned k = 0; pred();) { bool ret = true; ipc::sleep(k, [&k, &ret, &waiter, &pred, tm] { ret = waiter.wait_if(std::forward(pred), tm); k = 0; }); if (!ret) return false; // timeout or fail if (k == 0) break; // k has been reset } return true; } template struct queue_generator { using queue_t = ipc::queue, Policy>; struct conn_info_t : conn_info_head { queue_t que_; conn_info_t(char const * name) : conn_info_head{name} , que_{("__QU_CONN__" + ipc::to_string(DataSize) + "__" + ipc::to_string(AlignSize) + "__" + name).c_str()} { } void disconnect_receiver() { bool dis = que_.disconnect(); this->quit_waiting(); if (dis) { this->recv_cache().clear(); } } }; }; template struct detail_impl { using policy_t = Policy; using flag_t = typename policy_t::flag_t; using queue_t = typename queue_generator::queue_t; using conn_info_t = typename queue_generator::conn_info_t; constexpr static conn_info_t* info_of(ipc::handle_t h) noexcept { return static_cast(h); } constexpr static queue_t* queue_of(ipc::handle_t h) noexcept { return (info_of(h) == nullptr) ? nullptr : &(info_of(h)->que_); } /* API implementations */ static void disconnect(ipc::handle_t h) { auto que = queue_of(h); if (que == nullptr) { return; } que->shut_sending(); assert(info_of(h) != nullptr); info_of(h)->disconnect_receiver(); } static bool reconnect(ipc::handle_t * ph, bool start_to_recv) { assert(ph != nullptr); assert(*ph != nullptr); auto que = queue_of(*ph); if (que == nullptr) { return false; } if (start_to_recv) { que->shut_sending(); if (que->connect()) { // wouldn't connect twice info_of(*ph)->cc_waiter_.broadcast(); return true; } return false; } // start_to_recv == false if (que->connected()) { info_of(*ph)->disconnect_receiver(); } return que->ready_sending(); } static bool connect(ipc::handle_t * ph, char const * name, bool start_to_recv) { assert(ph != nullptr); if (*ph == nullptr) { *ph = ipc::mem::alloc(name); } return reconnect(ph, start_to_recv); } static void destroy(ipc::handle_t h) { disconnect(h); ipc::mem::free(info_of(h)); } static std::size_t recv_count(ipc::handle_t h) noexcept { auto que = queue_of(h); if (que == nullptr) { return ipc::invalid_value; } return que->conn_count(); } static bool wait_for_recv(ipc::handle_t h, std::size_t r_count, std::uint64_t tm) { auto que = queue_of(h); if (que == nullptr) { return false; } return wait_for(info_of(h)->cc_waiter_, [que, r_count] { return que->conn_count() < r_count; }, tm); } template static bool send(F&& gen_push, ipc::handle_t h, void const * data, std::size_t size) { if (data == nullptr || size == 0) { ipc::error("fail: send(%p, %zd)\n", data, size); return false; } auto que = queue_of(h); if (que == nullptr) { ipc::error("fail: send, queue_of(h) == nullptr\n"); return false; } if (que->elems() == nullptr) { ipc::error("fail: send, queue_of(h)->elems() == nullptr\n"); return false; } if (!que->ready_sending()) { ipc::error("fail: send, que->ready_sending() == false\n"); return false; } ipc::circ::cc_t conns = que->elems()->connections(std::memory_order_relaxed); if (conns == 0) { ipc::error("fail: send, there is no receiver on this connection.\n"); return false; } // calc a new message id auto acc = info_of(h)->acc(); if (acc == nullptr) { ipc::error("fail: send, info_of(h)->acc() == nullptr\n"); return false; } auto msg_id = acc->fetch_add(1, std::memory_order_relaxed); auto try_push = std::forward(gen_push)(info_of(h), que, msg_id); if (size > ipc::large_msg_limit) { auto dat = acquire_storage(size, conns); void * buf = dat.second; if (buf != nullptr) { std::memcpy(buf, data, size); return try_push(static_cast(size) - static_cast(ipc::data_length), &(dat.first), 0); } // try using message fragment //ipc::log("fail: shm::handle for big message. msg_id: %zd, size: %zd\n", msg_id, size); } // push message fragment std::int32_t offset = 0; for (std::int32_t i = 0; i < static_cast(size / ipc::data_length); ++i, offset += ipc::data_length) { if (!try_push(static_cast(size) - offset - static_cast(ipc::data_length), static_cast(data) + offset, ipc::data_length)) { return false; } } // if remain > 0, this is the last message fragment std::int32_t remain = static_cast(size) - offset; if (remain > 0) { if (!try_push(remain - static_cast(ipc::data_length), static_cast(data) + offset, static_cast(remain))) { return false; } } return true; } static bool send(ipc::handle_t h, void const * data, std::size_t size, std::uint64_t tm) { return send([tm](auto info, auto que, auto msg_id) { return [tm, info, que, msg_id](std::int32_t remain, void const * data, std::size_t size) { if (!wait_for(info->wt_waiter_, [&] { return !que->push( [](void*) { return true; }, info->cc_id_, msg_id, remain, data, size); }, tm)) { ipc::log("force_push: msg_id = %zd, remain = %d, size = %zd\n", msg_id, remain, size); if (!que->force_push( clear_message, info->cc_id_, msg_id, remain, data, size)) { return false; } } info->rd_waiter_.broadcast(); return true; }; }, h, data, size); } static bool try_send(ipc::handle_t h, void const * data, std::size_t size, std::uint64_t tm) { return send([tm](auto info, auto que, auto msg_id) { return [tm, info, que, msg_id](std::int32_t remain, void const * data, std::size_t size) { if (!wait_for(info->wt_waiter_, [&] { return !que->push( [](void*) { return true; }, info->cc_id_, msg_id, remain, data, size); }, tm)) { return false; } info->rd_waiter_.broadcast(); return true; }; }, h, data, size); } static ipc::buff_t recv(ipc::handle_t h, std::uint64_t tm) { auto que = queue_of(h); if (que == nullptr) { ipc::error("fail: recv, queue_of(h) == nullptr\n"); return {}; } if (!que->connected()) { // hasn't connected yet, just return. return {}; } auto& rc = info_of(h)->recv_cache(); for (;;) { // pop a new message typename queue_t::value_t msg; if (!wait_for(info_of(h)->rd_waiter_, [que, &msg] { return !que->pop(msg); }, tm)) { // pop failed, just return. return {}; } info_of(h)->wt_waiter_.broadcast(); if ((info_of(h)->acc() != nullptr) && (msg.cc_id_ == info_of(h)->cc_id_)) { continue; // ignore message to self } // msg.remain_ may minus & abs(msg.remain_) < data_length std::int32_t r_size = static_cast(ipc::data_length) + msg.remain_; if (r_size <= 0) { ipc::error("fail: recv, r_size = %d\n", (int)r_size); return {}; } std::size_t msg_size = static_cast(r_size); // large message if (msg.storage_) { ipc::storage_id_t buf_id = *reinterpret_cast(&msg.data_); void* buf = find_storage(buf_id, msg_size); if (buf != nullptr) { struct recycle_t { ipc::storage_id_t storage_id; ipc::circ::cc_t curr_conns; ipc::circ::cc_t conn_id; } *r_info = ipc::mem::alloc(recycle_t{ buf_id, que->elems()->connections(std::memory_order_relaxed), que->connected_id() }); if (r_info == nullptr) { ipc::log("fail: ipc::mem::alloc.\n"); return ipc::buff_t{buf, msg_size}; // no recycle } else { return ipc::buff_t{buf, msg_size, [](void* p_info, std::size_t size) { auto r_info = static_cast(p_info); IPC_UNUSED_ auto finally = ipc::guard([r_info] { ipc::mem::free(r_info); }); recycle_storage(r_info->storage_id, size, r_info->curr_conns, r_info->conn_id); }, r_info}; } } else { ipc::log("fail: shm::handle for large message. msg_id: %zd, buf_id: %zd, size: %zd\n", msg.id_, buf_id, msg_size); continue; } } // find cache with msg.id_ auto cac_it = rc.find(msg.id_); if (cac_it == rc.end()) { if (msg_size <= ipc::data_length) { return make_cache(msg.data_, msg_size); } // gc if (rc.size() > 1024) { std::vector need_del; for (auto const & pair : rc) { auto cmp = std::minmax(msg.id_, pair.first); if (cmp.second - cmp.first > 8192) { need_del.push_back(pair.first); } } for (auto id : need_del) rc.erase(id); } // cache the first message fragment rc.emplace(msg.id_, cache_t { ipc::data_length, make_cache(msg.data_, msg_size) }); } // has cached before this message else { auto& cac = cac_it->second; // this is the last message fragment if (msg.remain_ <= 0) { cac.append(&(msg.data_), msg_size); // finish this message, erase it from cache auto buff = std::move(cac.buff_); rc.erase(cac_it); return buff; } // there are remain datas after this message cac.append(&(msg.data_), ipc::data_length); } } } static ipc::buff_t try_recv(ipc::handle_t h) { return recv(h, 0); } }; // detail_impl template using policy_t = ipc::policy::choose; } // internal-linkage namespace ipc { template ipc::handle_t chan_impl::inited() { ipc::detail::waiter::init(); return nullptr; } template bool chan_impl::connect(ipc::handle_t * ph, char const * name, unsigned mode) { return detail_impl>::connect(ph, name, mode & receiver); } template bool chan_impl::reconnect(ipc::handle_t * ph, unsigned mode) { return detail_impl>::reconnect(ph, mode & receiver); } template void chan_impl::disconnect(ipc::handle_t h) { detail_impl>::disconnect(h); } template void chan_impl::destroy(ipc::handle_t h) { detail_impl>::destroy(h); } template char const * chan_impl::name(ipc::handle_t h) { auto info = detail_impl>::info_of(h); return (info == nullptr) ? nullptr : info->name_.c_str(); } template std::size_t chan_impl::recv_count(ipc::handle_t h) { return detail_impl>::recv_count(h); } template bool chan_impl::wait_for_recv(ipc::handle_t h, std::size_t r_count, std::uint64_t tm) { return detail_impl>::wait_for_recv(h, r_count, tm); } template bool chan_impl::send(ipc::handle_t h, void const * data, std::size_t size, std::uint64_t tm) { return detail_impl>::send(h, data, size, tm); } template buff_t chan_impl::recv(ipc::handle_t h, std::uint64_t tm) { return detail_impl>::recv(h, tm); } template bool chan_impl::try_send(ipc::handle_t h, void const * data, std::size_t size, std::uint64_t tm) { return detail_impl>::try_send(h, data, size, tm); } template buff_t chan_impl::try_recv(ipc::handle_t h) { return detail_impl>::try_recv(h); } template struct chan_impl>; // template struct chan_impl>; // TBD // template struct chan_impl>; // TBD template struct chan_impl>; template struct chan_impl>; } // namespace ipc ================================================ FILE: crazy_functions/test_project/cpp/cppipc/policy.h ================================================ #pragma once #include #include "libipc/def.h" #include "libipc/prod_cons.h" #include "libipc/circ/elem_array.h" namespace ipc { namespace policy { template