[
  {
    "path": "LICENSE",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright Zhengxiao Du\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License."
  },
  {
    "path": "MODEL_LICENSE",
    "content": "The CodeGeeX License\n\n1. Definitions\n\n“Licensor” means the CodeGeeX Model Team that distributes its Software.\n\n“Software” means the CodeGeeX model parameters made available under this license.\n\n2. License Grant\n\nSubject to the terms and conditions of this License, the Licensor hereby grants to you a non-exclusive, worldwide, non-transferable, non-sublicensable, revocable, royalty-free copyright license to use the Software solely for your non-commercial research purposes.\n\nThe above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.\n\n3. Restriction\n\nYou will not use, copy, modify, merge, publish, distribute, reproduce, or create derivative works of the Software, in whole or in part, for any commercial, military, or illegal purposes.\n\nYou will not use the Software for any act that may undermine China's national security and national unity, harm the public interest of society, or infringe upon the rights and interests of human beings.\n\n4. Disclaimer\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n\n5. Limitation of Liability\n\nEXCEPT TO THE EXTENT PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER BASED IN TORT, NEGLIGENCE, CONTRACT, LIABILITY, OR OTHERWISE WILL ANY LICENSOR BE LIABLE TO YOU FOR ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES, OR ANY OTHER COMMERCIAL LOSSES, EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.\n\n6. Dispute Resolution\n\nThis license shall be governed and construed in accordance with the laws of People’s Republic of China. Any dispute arising from or in connection with this License shall be submitted to Haidian District People's Court in Beijing.\n\nNote that the license is subject to update to a more comprehensive version. For any questions related to the license and copyright, please contact us at license@zhipuai.cn."
  },
  {
    "path": "README.md",
    "content": "![](resources/codegeex_logo.png)\n\n<p align=\"center\">\n    🏠 <a href=\"https://codegeex.cn\" target=\"_blank\">主页</a>｜🛠 插件 <a href=\"https://marketplace.visualstudio.com/items?itemName=aminer.codegeex\" target=\"_blank\">VS Code</a>, <a href=\"https://plugins.jetbrains.com/plugin/20587-codegeex\" target=\"_blank\">Jetbrains</a>｜🤗 <a href=\"https://huggingface.co/THUDM/codegeex2-6b\" target=\"_blank\">模型下载</a>｜📄 <a href=\"https://arxiv.org/abs/2303.17568\" target=\"_blank\">论文</a>｜👋 加入<a href=\"resources/wechat.md\"target=\"_blank\">微信开发者交流群</a>\n</p>\n\nRead this in [English](README_EN.md)<br>\n[日本語](README_JA.md)で読む<br>\nLire en [Français](README_FR.md)\n\n⭐️ 最新一代 [CodeGeeX4](https://github.com/THUDM/CodeGeeX4) 模型已经正式开源。\nThe newest [CodeGeeX4](https://github.com/THUDM/CodeGeeX4) has been released.\n\n# CodeGeeX2: 更强大的多语言代码生成模型\n\nCodeGeeX2 是多语言代码生成模型 [CodeGeeX](https://github.com/THUDM/CodeGeeX) ([KDD’23](https://arxiv.org/abs/2303.17568)) 的第二代模型。不同于一代 CodeGeeX（完全在国产华为昇腾芯片平台训练） ，CodeGeeX2 是基于 [ChatGLM2](https://github.com/THUDM/ChatGLM2-6B) 架构加入代码预训练实现，得益于 ChatGLM2 的更优性能，CodeGeeX2 在多项指标上取得性能提升（+107% > CodeGeeX；仅60亿参数即超过150亿参数的 StarCoder-15B 近10%），更多特性包括：\n\n* **更强大的代码能力**：基于 ChatGLM2-6B 基座语言模型，CodeGeeX2-6B 进一步经过了 600B 代码数据预训练，相比一代模型，在代码能力上全面提升，[HumanEval-X](https://huggingface.co/datasets/THUDM/humaneval-x) 评测集的六种编程语言均大幅提升 (Python +57%, C++ +71%, Java +54%, JavaScript +83%, Go +56%, Rust +321\\%)，在Python上达到 35.9\\% 的 Pass@1 一次通过率，超越规模更大的 StarCoder-15B。\n* **更优秀的模型特性**：继承 ChatGLM2-6B 模型特性，CodeGeeX2-6B 更好支持中英文输入，支持最大 8192 序列长度，推理速度较一代 CodeGeeX-13B 大幅提升，量化后仅需6GB显存即可运行，支持轻量级本地化部署。\n* **更全面的AI编程助手**：CodeGeeX插件（[VS Code](https://marketplace.visualstudio.com/items?itemName=aminer.codegeex), [Jetbrains](https://plugins.jetbrains.com/plugin/20587-codegeex)）后端升级，支持超过100种编程语言，新增上下文补全、跨文件补全等实用功能。结合 Ask CodeGeeX 交互式AI编程助手，支持中英文对话解决各种编程问题，包括且不限于代码解释、代码翻译、代码纠错、文档生成等，帮助程序员更高效开发。\n* **更开放的协议**：CodeGeeX2-6B 权重对学术研究完全开放，填写[登记表](https://open.bigmodel.cn/mla/form?mcode=CodeGeeX2-6B)申请商业使用。\n\n## 使用教程\n\n* [快速开始](#快速开始)\n* [推理教程（多卡推理，加速推理，多平台推理等）](docs/zh/inference_zh.md)\n\n## AI编程助手\n\n![](resources/codegeex_demo.png)\n\n我们开发了支持 VS Code、 IntelliJ IDEA、PyCharm、GoLand、WebStorm、Android Studio 等IDE的 CodeGeeX 插件。在插件中，可以更直接地体验到 CodeGeeX2 模型在代码生成与补全、添加注释、代码翻译及技术问答方面的能力为开发效率带来的提升。欢迎在IDE中下载 CodeGeeX 插件获得更加全面的AI编程体验，详情见[CodeGeeX主页](https://codegeex.cn/)。\n\n\n## 快速开始\n\n### 使用`transformers`快速调用[CodeGeeX2-6B](https://huggingface.co/THUDM/codegeex2-6b)：\n\n```python\nfrom transformers import AutoTokenizer, AutoModel\ntokenizer = AutoTokenizer.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True)\nmodel = AutoModel.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True, device='cuda')\nmodel = model.eval()\n\n# remember adding a language tag for better performance\nprompt = \"# language: Python\\n# write a bubble sort function\\n\"\ninputs = tokenizer.encode(prompt, return_tensors=\"pt\").to(model.device)\noutputs = model.generate(inputs, max_length=256, top_k=1)\nresponse = tokenizer.decode(outputs[0])\n\n>>> print(response)\n# language: Python\n# write a bubble sort function\n\n\ndef bubble_sort(list):\n    for i in range(len(list) - 1):\n        for j in range(len(list) - 1):\n            if list[j] > list[j + 1]:\n                list[j], list[j + 1] = list[j + 1], list[j]\n    return list\n\n\nprint(bubble_sort([5, 2, 1, 8, 4]))\n```\n\n### 启动 Gradio DEMO：\n```\npython ./demo/run_demo.py\n\nusage: run_demo.py [-h] [--model-path MODEL_PATH] [--example-path EXAMPLE_PATH] [--quantize QUANTIZE]\n                   [--chatglm-cpp] [--fastllm] [--n-gpus N_GPUS] [--gpu GPU] [--cpu] [--auth] [--username yourname]\n                   [--password yourpassword]\n                   [--port PORT] [--listen ADDRESS]\n\n# 若要启用身份验证，请先启用--auth，然后定义--username与--password，如：\npython run_demo.py --auth --username user --password password  # 若要监听所有地址请指定 --listen 0.0.0.0\n```\n支持使用 [ChatGLM.cpp](https://github.com/li-plus/chatglm.cpp) 量化推理加速：\n```sh\npython ./demo/run_demo.py --quantize 4 --chatglm-cpp\n```\n### 启动FAST API:\n```\npython ./demo/fastapicpu.py\nusage: fastapicpu.py [-h] [--model-path MODEL_PATH] [--listen ADDRESS] [--port PORT] [--workders NUM] [--cpu] [--half] [--quantize QUANTIZE] [--chatglm-cpp]\n# --cpu启用cpu --half启用.half()\n```\n支持使用 [ChatGLM.cpp](https://github.com/li-plus/chatglm.cpp) 量化推理加速，同样添加 `--quantize 4 --chatglm-cpp` 参数即可。\n### API使用示例\n```\ncurl -X POST \"http://127.0.0.1:7860\" \\\n    -H 'Content-Type: application/json' \\\n    -d '{\"lang\": \"Python\", \"prompt\": \"# Write a quick sort function\"}'\n```\n\n\n❗️请注意：\n* CodeGeeX2-6B 是一个基座代码生成模型，不具备聊天能力。请前往插件中体验更全面的 Ask CodeGeeX 聊天功能。\n* 在使用 CodeGeeX2-6B 的补全功能时，输入prompt需要遵循特定的格式以获得最好的效果。比如需要在开头加入编程语言标签（`# language: Python`，请查看[完整语言列表](https://github.com/THUDM/CodeGeeX2/blob/main/evaluation/utils.py#L14)），以注释的形式写prompt等。参考`run_demo.py`中的处理。\n* 如果显卡不支持`bfloat16`格式，将会输出错误的内容，需要将模型转换成`float16`格式：\n    ```python\n    model = AutoModel.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True).half().cuda()\n    ```\n* 如果需要使用多显卡加载模型,可以将以下代码：\n    ```python\n    tokenizer = AutoTokenizer.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True)\n    model = AutoModel.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True, device='cuda')\n    model = model.eval()\n    ```\n    替换为\n\n    ```python\n    def get_model():\n        tokenizer = AutoTokenizer.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True)\n        from gpus import load_model_on_gpus\n        # gpus文件在demo文件夹中\n        model = load_model_on_gpus(\"THUDM/codegeex2-6b\", num_gpus=2)\n        model = model.eval()\n        return tokenizer, model\n\n    tokenizer, model = get_model()\n    ```\n\n## 代码能力评测\n\nCodeGeeX2 作为一个多语言代码生成基座模型，代码能力较上一代大幅提升，以下是在 HumanEval，HumanEval-X, DS1000 基准上的评测结果（评价指标 Pass@k 定义与[论文](https://arxiv.org/abs/2303.17568)中一致）：\n\n### HumanEval (Pass@1,10,100)\n\n| **Model**           | **Pass@1** | **Pass@10** | **Pass@100** |\n| :-----------------: | :--------: | :---------: | :----------: |\n| CodeGen-16B-multi   | 19\\.2      | 34\\.6       | 55\\.2        |\n| CodeGeeX-13B        | 22\\.9      | 39\\.6       | 60\\.9        |\n| Codex-12B           | 28\\.8      | 46\\.8       | 72\\.3        |\n| CodeT5Plus-16B-mono | 30\\.9      | 51\\.6       | 76\\.7        |\n| Code-Cushman-001    | 33\\.5      | 54\\.3       | 77\\.4        |\n| LLaMA-65B           | 23\\.7      | -           | 79\\.3        |\n| LLaMA2-70B          | 29\\.9      | -           | -            |\n| CodeGen2\\.5-7B-mono | 33\\.4      | 58\\.4       | 82\\.7        |\n| StarCoder-15B       | 33\\.2      | 61\\.0       | 84\\.7        |\n| **CodeGeeX2-6B**    | **35\\.9**  | **62\\.6**   | **88\\.3**    |\n> **Pass@1** 使用 `n=20, t=0.2, top_p=0.95`；**Pass@10,Pass@100** 使用 `n=200, t=0.8, top_p=0.95`。\n\n### HumanEval-X (Pass@1)\n\n| **Model**                | **Python** | **C++**   | **Java**  | **JavaScript** | **Go**    | **Rust**  | **Overall** |\n| :------------------: | :--------: | :-------: | :-------: | :------------: | :-------: | :-------: | :---------: |\n| CodeGen-16B-multi    | 19\\.2      | 18\\.1     | 15\\.0     | 18\\.4          | 13\\.0     | 1\\.8      | 14\\.2       |\n| CodeGeeX-13B         | 22\\.9      | 17\\.1     | 20\\.0     | 17\\.6          | 14\\.4     | 4\\.3      | 16\\.0       |\n| Replit-code-v1-3B    | 22\\.0      | 20\\.1     | 20\\.1     | 20\\.1          | 12\\.2     | 8\\.6      | 17\\.2       |\n| CodeGen2\\.5-7B-multi | 30\\.6      | 24\\.3     | 29\\.0     | 27\\.5          | 18\\.9     | **20\\.1** | 25\\.1       |\n| StarCoder-15B        | 35\\.5      | 28\\.2     | **31\\.5** | **33\\.2**      | 21\\.3     | 17\\.8     | 27\\.9       |\n| **CodeGeeX2-6B**         | **35\\.9**  | **29\\.3** | 30\\.8     | 32\\.2          | **22\\.5** | 18\\.1     | **28\\.1**   |\n> **Pass@1** 使用 `n=20, t=0.2, top_p=0.95`。\n\n以上结果可使用脚本`scripts/run_humanevalx.sh`复现。环境配置和说明参见[评测环境](https://github.com/THUDM/CodeGeeX/blob/main/codegeex/benchmark/README_zh.md)。\n\n### DS1000 (Pass@1)\n\n| **Model**            | **Matplotlib** | **Numpy** | **Pandas** | **Pytorch** | **SciPy** | **Scikit-learn** | **TensorFlow** | **Overall** |\n| :--------------: | :------------: | :-------: | :--------: | :---------: | :-------: | :--------------: | :------------: | :---------: |\n| \\# Samples       | 155            | 220       | 291        | 68          | 106       | 115              | 45             | 1000        |\n| CodeGen-16B-Mono | 31\\.7          | 10\\.9     | 3\\.4       | 7\\.0        | 9\\.0      | 10\\.8            | 15\\.2          | 11\\.7       |\n| code-cushman-001 | 40\\.7          | 21\\.8     | 7\\.9       | 12\\.4       | 11\\.3     | 18\\.0            | 12\\.2          | 18\\.1       |\n| Codex-001        | 41\\.8          | 26\\.6     | 9\\.4       | 9\\.7        | 15\\.0     | 18\\.5            | 17\\.2          | 20\\.2       |\n| **CodeGeeX2-6B** | 40\\.5          | 25\\.5     | 14\\.5      | 17\\.3       | 19\\.3     | 24\\.0            | 23\\.0          | 23\\.1       |\n| StarCoder-15B    | 51\\.7          | 29\\.7     | 11\\.4      | 21\\.4       | 20\\.2     | 29\\.5            | 24\\.5          | 26\\.0       |\n| Codex-002        | **57\\.0**      | **43\\.1** | **26\\.5**  | **41\\.8**   | **31\\.8** | **44\\.8**        | **39\\.3**      | **39\\.2**   |\n> **Pass@1** 使用 `n=40, t=0.2, top_p=0.5`。\n\n以上结果可使用[DS1000评测代码](https://github.com/HKUNLP/DS-1000.git)复现。\n\n## 量化推理性能\n\nCodeGeeX2 与上一代相比，对部署更加友好。得益于使用 Multi-Query Attention 和 Flash Attention，推理速度更快，且量化后仅需6GB显存即可运行：\n\n### 量化\n\n| **Model**        | FP16/BF16 | INT8    | INT4   |\n| :--------------: | :-------: | :-----: | :----: |\n| CodeGeeX-13B     | 26\\.9 GB   | 14\\.7 GB | -      |\n| **CodeGeeX2-6B** | 13\\.1 GB  | 8\\.2 GB  | 5\\.5 GB |\n> 基于 PyTorch 2.0 测试，利用`torch.nn.functional.scaled_dot_product_attention`实现高效的 Attention 计算。\n\n### 推理\n\n| **Model**        | **推理速度 (字符/秒)** |\n| :--------------: | :-------------: |\n| CodeGeeX-13B     | 32              |\n| **CodeGeeX2-6B** | 94              |\n> `batch_size=1, max_length=2048`，均使用加速框架，测试硬件为`GeForce RTX-3090`。\n\n## 协议\n\n本仓库的代码依照 [Apache-2.0](https://www.apache.org/licenses/LICENSE-2.0) 协议开源，模型的权重的使用则需要遵循 [Model License](MODEL_LICENSE)。CodeGeeX2-6B 权重对学术研究完全开放，填写[登记表](https://open.bigmodel.cn/mla/form?mcode=CodeGeeX2-6B)申请商业使用。\n\n\n## 引用\n\n如果觉得我们的工作有帮助，欢迎引用以下论文：\n\n```\n@inproceedings{zheng2023codegeex,\n  title={CodeGeeX: A Pre-Trained Model for Code Generation with Multilingual Benchmarking on HumanEval-X},\n  author={Qinkai Zheng and Xiao Xia and Xu Zou and Yuxiao Dong and Shan Wang and Yufei Xue and Zihan Wang and Lei Shen and Andi Wang and Yang Li and Teng Su and Zhilin Yang and Jie Tang},\n  booktitle={Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining},\n  pages={5673--5684},\n  year={2023}\n}\n```\n"
  },
  {
    "path": "README_EN.md",
    "content": "![](resources/codegeex_logo.png)\n\n<p align=\"center\">\n    🏠 <a href=\"https://codegeex.cn\" target=\"_blank\">Homepage</a>｜🛠 Extensions <a href=\"https://marketplace.visualstudio.com/items?itemName=aminer.codegeex\" target=\"_blank\">VS Code</a>, <a href=\"https://plugins.jetbrains.com/plugin/20587-codegeex\" target=\"_blank\">Jetbrains</a>｜🤗 <a href=\"https://huggingface.co/THUDM/codegeex2-6b\" target=\"_blank\">HF Repo</a>｜📄 <a href=\"https://arxiv.org/abs/2303.17568\" target=\"_blank\">Paper</a>\n</p>\n\n<p align=\"center\">\n    👋 Join our <a href=\"https://discord.gg/8gjHdkmAN6\" target=\"_blank\">Discord</a>, <a href=\"https://join.slack.com/t/codegeexworkspace/shared_invite/zt-1s118ffrp-mpKKhQD0tKBmzNZVCyEZLw\" target=\"_blank\">Slack</a>, <a href=\"https://t.me/+IipIayJ32B1jOTg1\" target=\"_blank\">Telegram</a>, <a href=\"resources/wechat.md\"target=\"_blank\">WeChat</a>\n</p>\n\n查看[中文版](README.md)<br>\n[日本語](README_JA.md)で読む<br>\nLire en [Français](README_FR.md)\n\n# CodeGeeX2: A More Powerful Multilingual Code Generation Model\n\nCodeGeeX2 is the second-generation model of the multilingual code generation model [CodeGeeX](https://github.com/THUDM/CodeGeeX) ([KDD’23](https://arxiv.org/abs/2303.17568)), which is implemented based on the [ChatGLM2](https://github.com/THUDM/ChatGLM2-6B) architecture trained on more code data. Due to the advantage of ChatGLM2, CodeGeeX2 has been comprehensively improved in coding capability (+107% > CodeGeeX; with only 6B parameters, surpassing larger StarCoder-15B for some tasks). It has the following features:\n\n* **More Powerful Coding Capabilities**: Based on the ChatGLM2-6B model, CodeGeeX2-6B has been further pre-trained on 600B code tokens, which has been comprehensively improved in coding capability compared to the first-generation. On the [HumanEval-X](https://huggingface.co/datasets/THUDM/humaneval-x) benchmark, all six languages have been significantly improved (Python +57%, C++ +71%, Java +54%, JavaScript +83%, Go +56%, Rust +321\\%), and in Python it reached 35.9% of Pass@1 one-time pass rate, surpassing the larger StarCoder-15B.\n* **More Useful Features**: Inheriting the ChatGLM2-6B model features, CodeGeeX2-6B better supports both Chinese and English prompts, maximum 8192 sequence length, and the inference speed is significantly improved compared to the first-generation. After quantization, it only needs 6GB of GPU memory for inference, thus supports lightweight local deployment.\n* **Comprehensive AI Coding Assistant**: The backend of CodeGeeX plugin ([VS Code](https://marketplace.visualstudio.com/items?itemName=aminer.codegeex), [Jetbrains](https://plugins.jetbrains.com/plugin/20587-codegeex)) is upgraded, supporting 100+ programming languages, and adding practical functions such as infilling and cross-file completion. Combined with the \"Ask CodeGeeX\" interactive AI coding assistant, it can be used to solve various programming problems via Chinese or English dialogue, including but not limited to code summarization, code translation, debugging, and comment generation, which helps increasing the efficiency of developpers.\n* **Open License**: CodeGeeX2-6B weights are fully open to academic research, and please apply for commercial use by filling in the [registration form](https://open.bigmodel.cn/mla/form?mcode=CodeGeeX2-6B).\n\n\n## AI Coding Assistant\n\n![](resources/codegeex_demo.png)\n\nWe have developed the CodeGeeX plugin, which supports IDEs such as VS Code, IntelliJ IDEA, PyCharm, GoLand, WebStorm, and Android Studio. The plugin allows you to experience the CodeGeeX2 model's capabilities in code generation and completion, annotation, code translation, and \"Ask CodeGeeX\" interactive programming, which can help improve your development efficiency. Please download the CodeGeeX plugin in your IDE to get a more comprehensive AI coding experience. You can find more details on our [homepage]( https://codegeex.cn/).\n\n## Get Started\n\nUse `transformers` to quickly launch [CodeGeeX2-6B](https://huggingface.co/THUDM/codegeex2-6b)：\n\n```python\nfrom transformers import AutoTokenizer, AutoModel\ntokenizer = AutoTokenizer.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True)\nmodel = AutoModel.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True, device='cuda')\nmodel = model.eval()\n\n# remember adding a language tag for better performance\nprompt = \"# language: Python\\n# write a bubble sort function\\n\"\ninputs = tokenizer.encode(prompt, return_tensors=\"pt\").to(model.device)\noutputs = model.generate(inputs, max_length=256, top_k=1)\nresponse = tokenizer.decode(outputs[0])\n\n>>> print(response)\n# language: Python\n# write a bubble sort function\n\n\ndef bubble_sort(list):\n    for i in range(len(list) - 1):\n        for j in range(len(list) - 1):\n            if list[j] > list[j + 1]:\n                list[j], list[j + 1] = list[j + 1], list[j]\n    return list\n\n\nprint(bubble_sort([5, 2, 1, 8, 4]))\n```\n\nLaunch Gradio DEMO:\n```\npython ./demo/run_demo.py\n```\n\n❗️Attention:\n* CodeGeeX2 is a base model, which is not instruction-tuned for chatting. It can do tasks like code completion/translation/explaination. To try the instruction-tuned version in CodeGeeX plugins ([VS Code](https://marketplace.visualstudio.com/items?itemName=aminer.codegeex), [Jetbrains](https://plugins.jetbrains.com/plugin/20587-codegeex)).\n* Programming languages can be controled by adding `language tag`, e.g., `# language: Python`. The format should be respected to ensure performance, full list can be found [here](https://github.com/THUDM/CodeGeeX2/blob/main/evaluation/utils.py#L14). Please write comments under the format of the selected programming language to achieve better results.\n* If the GPU doesn't support `bfloat16` format, it will cause incorrect output. Please convert the model to `float16` format:\n    ```python\n    model = AutoModel.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True).half().cuda()\n    ```\n* If you need to use Multiple GPUs to load the model, you can use the following code:\n    ```python\n    tokenizer = AutoTokenizer.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True)\n    model = AutoModel.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True, device='cuda')\n    model = model.eval()\n    ```\n    Replace with\n\n    ```python\n    def get_model():\n        tokenizer = AutoTokenizer.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True)\n        from gpus import load_model_on_gpus\n        # The \"gpus\" file is located in the demo folder\n        model = load_model_on_gpus(\"THUDM/codegeex2-6b\", num_gpus=2)\n        model = model.eval()\n        return tokenizer, model\n\n    tokenizer, model = get_model()\n    ```\n\n## Evaluation\n\nCodeGeeX2 is a base model for multilingual code generation, which has been significantly improved in its coding ability compared to the previous generation. The following are the evaluation results on the HumanEval, HumanEval-X, and DS1000 benchmarks (the evaluation metric Pass@k is the same as in the [paper](https://arxiv.org/abs/2303.17568)):\n\n### HumanEval (Pass@1,10,100)\n\n| **Model**           | **Pass@1** | **Pass@10** | **Pass@100** |\n| :-----------------: | :--------: | :---------: | :----------: |\n| CodeGen-16B-multi   | 19\\.2      | 34\\.6       | 55\\.2        |\n| CodeGeeX-13B        | 22\\.9      | 39\\.6       | 60\\.9        |\n| Codex-12B           | 28\\.8      | 46\\.8       | 72\\.3        |\n| CodeT5Plus-16B-mono | 30\\.9      | 51\\.6       | 76\\.7        |\n| Code-Cushman-001    | 33\\.5      | 54\\.3       | 77\\.4        |\n| LLaMA-65B           | 23\\.7      | -           | 79\\.3        |\n| LLaMA2-70B          | 29\\.9      | -           | -            |\n| CodeGen2\\.5-7B-mono | 33\\.4      | 58\\.4       | 82\\.7        |\n| StarCoder-15B       | 33\\.2      | 61\\.0       | 84\\.7        |\n| **CodeGeeX2-6B**    | **35\\.9**  | **62\\.6**   | **88\\.3**    |\n> `n=20, t=0.2, top_p=0.95` for **Pass@1**; `n=200, t=0.8, top_p=0.95` for **Pass@10** and **Pass@100**.\n\n### HumanEval-X (Pass@1)\n\n| **Model**                | **Python** | **C++**   | **Java**  | **JavaScript** | **Go**    | **Rust**  | **Overall** |\n| :------------------: | :--------: | :-------: | :-------: | :------------: | :-------: | :-------: | :---------: |\n| CodeGen-16B-multi    | 19\\.2      | 18\\.1     | 15\\.0     | 18\\.4          | 13\\.0     | 1\\.8      | 14\\.2       |\n| CodeGeeX-13B         | 22\\.9      | 17\\.1     | 20\\.0     | 17\\.6          | 14\\.4     | 4\\.3      | 16\\.0       |\n| Replit-code-v1-3B    | 22\\.0      | 20\\.1     | 20\\.1     | 20\\.1          | 12\\.2     | 8\\.6      | 17\\.2       |\n| CodeGen2\\.5-7B-multi | 30\\.6      | 24\\.3     | 29\\.0     | 27\\.5          | 18\\.9     | **20\\.1** | 25\\.1       |\n| StarCoder-15B        | 35\\.5      | 28\\.2     | **31\\.5** | **33\\.2**      | 21\\.3     | 17\\.8     | 27\\.9       |\n| **CodeGeeX2-6B**         | **35\\.9**  | **29\\.3** | 30\\.8     | 32\\.2          | **22\\.5** | 18\\.1     | **28\\.1**   |\n> `n=20, t=0.2, top_p=0.95` for **Pass@1**.\n\nThe above results can be reproduced by running `scripts/run_humanevalx.sh`. Refer to [HumanEval-X environment](https://github.com/THUDM/CodeGeeX/blob/main/codegeex/benchmark/README_zh.md) for the experiment setups.\n\n### DS1000 (Pass@1)\n\n| **Model**            | **Matplotlib** | **Numpy** | **Pandas** | **Pytorch** | **SciPy** | **Scikit-learn** | **TensorFlow** | **Overall** |\n| :--------------: | :------------: | :-------: | :--------: | :---------: | :-------: | :--------------: | :------------: | :---------: |\n| \\# Samples       | 155            | 220       | 291        | 68          | 106       | 115              | 45             | 1000        |\n| CodeGen-16B-Mono | 31\\.7          | 10\\.9     | 3\\.4       | 7\\.0        | 9\\.0      | 10\\.8            | 15\\.2          | 11\\.7       |\n| code-cushman-001 | 40\\.7          | 21\\.8     | 7\\.9       | 12\\.4       | 11\\.3     | 18\\.0            | 12\\.2          | 18\\.1       |\n| Codex-001        | 41\\.8          | 26\\.6     | 9\\.4       | 9\\.7        | 15\\.0     | 18\\.5            | 17\\.2          | 20\\.2       |\n| **CodeGeeX2-6B** | 40\\.5          | 25\\.5     | 14\\.5      | 17\\.3       | 19\\.3     | 24\\.0            | 23\\.0          | 23\\.1       |\n| StarCoder-15B    | 51\\.7          | 29\\.7     | 11\\.4      | 21\\.4       | 20\\.2     | 29\\.5            | 24\\.5          | 26\\.0       |\n| Codex-002        | **57\\.0**      | **43\\.1** | **26\\.5**  | **41\\.8**   | **31\\.8** | **44\\.8**        | **39\\.3**      | **39\\.2**   |\n> `n=40, t=0.2, top_p=0.5` for **Pass@1**。\n\nThe above results can be reproduced by the code in [DS1000 repo](https://github.com/HKUNLP/DS-1000.git).\n\n## Inference\n\nCodeGeeX2 is more friendly to deployment than the previous generation. Thanks to the use of Multi-Query Attention and Flash Attention, the inference speed is faster, and only 6GB of GPU memory is required after INT4 quantization.\n\n### Quantization\n\n| **Model**        | FP16/BF16 | INT8    | INT4   |\n| :--------------: | :-------: | :-----: | :----: |\n| CodeGeeX-13B     | 26\\.9 GB   | 14\\.7 GB | -      |\n| **CodeGeeX2-6B** | 13\\.1 GB  | 8\\.2 GB  | 5\\.5 GB |\n> Based on PyTorch 2.0, using `torch.nn.functional.scaled_dot_product_attention` for effecient attention mechanism。\n\n### Acceleration\n\n| **Model**        | **Inference speed (token/s)** |\n| :--------------: | :-------------: |\n| CodeGeeX-13B     | 32              |\n| **CodeGeeX2-6B** | 94              |\n> `batch_size=1, max_length=2048`, both using acceleration framework, in `GeForce RTX-3090`。\n\n## License\n\nThe code in this repository is open source under the [Apache-2.0](https://www.apache.org/licenses/LICENSE-2.0) license. The model weights are licensed under the [Model License](MODEL_LICENSE). CodeGeeX2-6B weights are open for academic research, and please apply for commercial use by filling in the [registration form](https://open.bigmodel.cn/mla/form?mcode=CodeGeeX2-6B).\n\n\n## Citation\n\nIf you find our work helpful, please feel free to cite the following paper:\n\n```\n@inproceedings{zheng2023codegeex,\n  title={CodeGeeX: A Pre-Trained Model for Code Generation with Multilingual Benchmarking on HumanEval-X},\n  author={Qinkai Zheng and Xiao Xia and Xu Zou and Yuxiao Dong and Shan Wang and Yufei Xue and Zihan Wang and Lei Shen and Andi Wang and Yang Li and Teng Su and Zhilin Yang and Jie Tang},\n  booktitle={Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining},\n  pages={5673--5684},\n  year={2023}\n}\n```\n"
  },
  {
    "path": "README_FR.md",
    "content": "![](resources/codegeex_logo.png)\n\n<p align=\"center\">\n    🏠 <a href=\"https://codegeex.cn\" target=\"_blank\">Homepage</a>｜🛠 Extensions <a href=\"https://marketplace.visualstudio.com/items?itemName=aminer.codegeex\" target=\"_blank\">VS Code</a>, <a href=\"https://plugins.jetbrains.com/plugin/20587-codegeex\" target=\"_blank\">Jetbrains</a>｜🤗 <a href=\"https://huggingface.co/THUDM/codegeex2-6b\" target=\"_blank\">HF Repo</a>｜📄 <a href=\"https://arxiv.org/abs/2303.17568\" target=\"_blank\">Paper</a>\n</p>\n\n<p align=\"center\">\n    👋 Rejoignez nous sur <a href=\"https://discord.gg/8gjHdkmAN6\" target=\"_blank\">Discord</a>, <a href=\"https://join.slack.com/t/codegeexworkspace/shared_invite/zt-1s118ffrp-mpKKhQD0tKBmzNZVCyEZLw\" target=\"_blank\">Slack</a>, <a href=\"https://t.me/+IipIayJ32B1jOTg1\" target=\"_blank\">Telegram</a>, <a href=\"resources/wechat.md\"target=\"_blank\">WeChat</a>\n</p>\n\n查看[中文版](README.md)<br>\nRead this in [English](README_EN.md)<br>\n[日本語](README_JA.md)で読む\n\n# CodeGeeX2: Un Modèle de Génération de Code Plus Puissant\n\nCodeGeeX2 est la deuxième itération du modèle de génération de code multilingue [CodeGeeX](https://github.com/THUDM/CodeGeeX) ([KDD’23](https://arxiv.org/abs/2303.17568)), basé sur [ChatGLM2](https://github.com/THUDM/ChatGLM2-6B) et entrainé sur un large corpus de code. Grâce à l'architecture ChatGLM2, CodeGeeX2 excelle sur une multitude de tâches de génération de code (+107% > CodeGeeX; avec seulement 6 milliards de paramètres, dépassant StarCoder-15B pour certaines tâches). CodeGeeX2 possède les fonctionnalités suivantes:\n\n* **Capacités de Génération de Code Accrues**: Basé sur ChatGLM2-6B, CodeGeeX2-6B à été entrainé sur un dataset de 600 milliards de tokens de plus ce qui a propulsé ses capacités de génération de code par rapport à la génération précédente. Sur [HumanEval-X](https://huggingface.co/datasets/THUDM/humaneval-x), le modèle opère bien mieux que son prédécesseur (Python +57%, C++ +71%, Java +54%, JavaScript +83%, Go +56%, Rust +321\\%). En Python, CodeGeeX atteint un score Pass@1 de 35.9%, surpassant StarCoder-15B malgré le fait que CodeGeeX ait ~3 fois moins de paramètres.\n* **Des Fonctionnalités Plus Utiles**: Héritant des fonctionnalités de ChatGLM2-6B, CodeGeeX2-6B prend mieux en charge les prompts en chinois et en anglais, peut ingérer jusqu'à 8192 tokens, et se dotte d'une vitesse de génération en inference fortement accrue comparé à la dernière génération. Après quantisation, CodeGeeX fonctionne sur un GPU avec >6GB de mémoire, permettant un déploiement local efficace.\n* **Un Assistant Intelligent dans votre Éditeur**: Les plugins ([VS Code](https://marketplace.visualstudio.com/items?itemName=aminer.codegeex), et [Jetbrains](https://plugins.jetbrains.com/plugin/20587-codegeex)) ont été mis à jour et sont maintenant compatible avec plus de 100 langages de programmation. Le modèle, couplé à l'extension, permet désormais aux utilisateurs de générer du code pour plusieurs fichiers ainsi que de générer et modifier des sections de code. CodeGeeX2 est maintenant capable de résoudre de nombreux problèmes de programmation. Les utilisateurs peuvent profiter de la fonctionnalité \"Ask CodeGeeX\" pour discuter de manière interactive avec un AI-assistant afin de résumer et d'expliquer du code, traduire du code entre langages, rajouter des commentaires, etc. CodeGeeX permet de maximiser la productivité de ses utilisateurs.\n* **License Open-Source**: Les poids du modèle CodeGeeX2-6B sont en accès libre pour toute utilisation dans le cadre de la recherche. Pour toute utilisation commerciale, merci de consulter ce [formulaire](https://open.bigmodel.cn/mla/form?mcode=CodeGeeX2-6B).\n\n\n## Assistant Intelligent\n\n![](resources/codegeex_demo.png)\n\nNous avons développé une extension pour VS Code, IntelliJ IDEA, PyCharm, GoLand, WebStorm, and Android Studio. L'extension permet de profiter des capacités du modèle CodeGeeX2 et de générer, annoter et traduire du code. La fonctionnalité \"Ask CodeGeeX\" permet de coder de manière interactive et améliore grandement votre productivité. Téléchargez l'extension CodeGeeX dans votre IDE pour une meilleure expérience de développement. Trouvez plus de détail sur notre [site]( https://codegeex.cn/).\n\n## Utilisation\n\nPour exécuter [CodeGeeX2-6B](https://huggingface.co/THUDM/codegeex2-6b), utilisez la librairie `transformers`：\n\n```python\nfrom transformers import AutoTokenizer, AutoModel\ntokenizer = AutoTokenizer.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True)\nmodel = AutoModel.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True, device='cuda')\nmodel = model.eval()\n\n# TIP: Utilisez un tag pour identifier le langage dans lequel vous souhaitez générer.\nprompt = \"# language: Python\\n# write a bubble sort function\\n\"\ninputs = tokenizer.encode(prompt, return_tensors=\"pt\").to(model.device)\noutputs = model.generate(inputs, max_length=256, top_k=1)\nresponse = tokenizer.decode(outputs[0])\n\n>>> print(response)\n# language: Python\n# write a bubble sort function\n\n\ndef bubble_sort(list):\n    for i in range(len(list) - 1):\n        for j in range(len(list) - 1):\n            if list[j] > list[j + 1]:\n                list[j], list[j + 1] = list[j + 1], list[j]\n    return list\n\n\nprint(bubble_sort([5, 2, 1, 8, 4]))\n```\n\nAccéder à la démo Gradio:\n```\npython ./demo/run_demo.py\n```\n\n❗️Attention:\n* Cette version de CodeGeeX2 est capable de compléter / expliquer / traduire du code mais n'a pas été fine-tuned pour être utilisé comme un chatbot. Pour accéder à la version chatbot de CodeGeeX, utilisez les extensions [VS Code](https://marketplace.visualstudio.com/items?itemName=aminer.codegeex) et [Jetbrains](https://plugins.jetbrains.com/plugin/20587-codegeex).\n* Pour controller le langage dans lequel CodeGeeX2 opère, utilisez des tags formattés ainsi: `# language: Python`. La liste de tous les langages de programmations que CodeGeeX supporte est accessible [ici](https://github.com/THUDM/CodeGeeX2/blob/main/evaluation/utils.py#L14).\n* Si vous avez besoin d'utiliser plusieurs GPU pour charger le modèle, vous pouvez utiliser le code suivant:\n    ```python\n    tokenizer = AutoTokenizer.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True)\n    model = AutoModel.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True, device='cuda')\n    model = model.eval()\n    ```\n    Remplacer par\n\n    ```python\n    def get_model():\n        tokenizer = AutoTokenizer.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True)\n        from gpus import load_model_on_gpus\n        # Le fichier \"gpus\" se trouve dans le dossier de démonstration\n        model = load_model_on_gpus(\"THUDM/codegeex2-6b\", num_gpus=2)\n        model = model.eval()\n        return tokenizer, model\n\n    tokenizer, model = get_model()\n    ```\n\n## Evaluation\n\nCodeGeeX2 est un modèle de base capable de générer du code en plusieurs langages de programmation et qui est bien plus performant que la version précédente. Voici les capacités de CodeGeeX sur les benchmarks HumanEval, HumanEval-X, et DS1000 (la métrique Pass@k est la même que celle décrite dans ce [papier](https://arxiv.org/abs/2303.17568)):\n\n### HumanEval (Pass@1,10,100)\n\n| **Model**           | **Pass@1** | **Pass@10** | **Pass@100** |\n| :-----------------: | :--------: | :---------: | :----------: |\n| CodeGen-16B-multi   | 19\\.2      | 34\\.6       | 55\\.2        |\n| CodeGeeX-13B        | 22\\.9      | 39\\.6       | 60\\.9        |\n| Codex-12B           | 28\\.8      | 46\\.8       | 72\\.3        |\n| CodeT5Plus-16B-mono | 30\\.9      | 51\\.6       | 76\\.7        |\n| Code-Cushman-001    | 33\\.5      | 54\\.3       | 77\\.4        |\n| LLaMA-65B           | 23\\.7      | -           | 79\\.3        |\n| LLaMA2-70B          | 29\\.9      | -           | -            |\n| CodeGen2\\.5-7B-mono | 33\\.4      | 58\\.4       | 82\\.7        |\n| StarCoder-15B       | 33\\.2      | 61\\.0       | 84\\.7        |\n| **CodeGeeX2-6B**    | **35\\.9**  | **62\\.6**   | **88\\.3**    |\n> `n=20, t=0.2, top_p=0.95` pour **Pass@1**; `n=200, t=0.8, top_p=0.95` pour **Pass@10** et **Pass@100**.\n\n### HumanEval-X (Pass@1)\n\n| **Model**                | **Python** | **C++**   | **Java**  | **JavaScript** | **Go**    | **Rust**  | **Overall** |\n| :------------------: | :--------: | :-------: | :-------: | :------------: | :-------: | :-------: | :---------: |\n| CodeGen-16B-multi    | 19\\.2      | 18\\.1     | 15\\.0     | 18\\.4          | 13\\.0     | 1\\.8      | 14\\.2       |\n| CodeGeeX-13B         | 22\\.9      | 17\\.1     | 20\\.0     | 17\\.6          | 14\\.4     | 4\\.3      | 16\\.0       |\n| Replit-code-v1-3B    | 22\\.0      | 20\\.1     | 20\\.1     | 20\\.1          | 12\\.2     | 8\\.6      | 17\\.2       |\n| CodeGen2\\.5-7B-multi | 30\\.6      | 24\\.3     | 29\\.0     | 27\\.5          | 18\\.9     | **20\\.1** | 25\\.1       |\n| StarCoder-15B        | 35\\.5      | 28\\.2     | **31\\.5** | **33\\.2**      | 21\\.3     | 17\\.8     | 27\\.9       |\n| **CodeGeeX2-6B**         | **35\\.9**  | **29\\.3** | 30\\.8     | 32\\.2          | **22\\.5** | 18\\.1     | **28\\.1**   |\n> `n=20, t=0.2, top_p=0.95` for **Pass@1**.\n\nLes résultats ci-dessus peuvent être reproduits avec le script `scripts/run_humanevalx.sh`. Les environements utilisés sont renseignés [ici](https://github.com/THUDM/CodeGeeX/blob/main/codegeex/benchmark/README_zh.md).\n\n### DS1000 (Pass@1)\n\n| **Model**            | **Matplotlib** | **Numpy** | **Pandas** | **Pytorch** | **SciPy** | **Scikit-learn** | **TensorFlow** | **Overall** |\n| :--------------: | :------------: | :-------: | :--------: | :---------: | :-------: | :--------------: | :------------: | :---------: |\n| \\# Samples       | 155            | 220       | 291        | 68          | 106       | 115              | 45             | 1000        |\n| CodeGen-16B-Mono | 31\\.7          | 10\\.9     | 3\\.4       | 7\\.0        | 9\\.0      | 10\\.8            | 15\\.2          | 11\\.7       |\n| code-cushman-001 | 40\\.7          | 21\\.8     | 7\\.9       | 12\\.4       | 11\\.3     | 18\\.0            | 12\\.2          | 18\\.1       |\n| Codex-001        | 41\\.8          | 26\\.6     | 9\\.4       | 9\\.7        | 15\\.0     | 18\\.5            | 17\\.2          | 20\\.2       |\n| **CodeGeeX2-6B** | 40\\.5          | 25\\.5     | 14\\.5      | 17\\.3       | 19\\.3     | 24\\.0            | 23\\.0          | 23\\.1       |\n| StarCoder-15B    | 51\\.7          | 29\\.7     | 11\\.4      | 21\\.4       | 20\\.2     | 29\\.5            | 24\\.5          | 26\\.0       |\n| Codex-002        | **57\\.0**      | **43\\.1** | **26\\.5**  | **41\\.8**   | **31\\.8** | **44\\.8**        | **39\\.3**      | **39\\.2**   |\n> `n=40, t=0.2, top_p=0.5` for **Pass@1**。\n\nLes résultats ci-dessus peuvent être reproduits avec le code présent sur le repository [HKUNLP/DS-1000](https://github.com/HKUNLP/DS-1000.git).\n\n## Inference\n\nCodeGeeX2 est bien plus simple à déployer que la génération précédente. L'utilisation de \"Multi-Query Attention\" et \"Flash Attention\" accélère grandement la vitesse de génération et le modèle n'a besoin que de 6GB de mémoire après avoir été quantisé en INT4.\n\n### Quantisation\n\n| **Model**        | FP16/BF16 | INT8    | INT4   |\n| :--------------: | :-------: | :-----: | :----: |\n| CodeGeeX-13B     | 26\\.9 GB   | 14\\.7 GB | -      |\n| **CodeGeeX2-6B** | 13\\.1 GB  | 8\\.2 GB  | 5\\.5 GB |\n> Résultats obtenus avec PyTorch 2.0, avec `torch.nn.functional.scaled_dot_product_attention` qui est une version plus rapide du calcul de l'attention.\n\n### Accelération\n\n| **Model**        | **Inference speed (token/s)** |\n| :--------------: | :-------------: |\n| CodeGeeX-13B     | 32              |\n| **CodeGeeX2-6B** | 94              |\n> `batch_size=1, max_length=2048` et en utilisant l'accélération des GPUs `GeForce RTX-3090`。\n\n## License\n\nLe code dans ce dépôt est en libre accès selon les droits et devoirs prévu par la license [Apache-2.0](https://www.apache.org/licenses/LICENSE-2.0). Les poids du modèle sont régis par la [license du modèle](MODEL_LICENSE). Les poids du modèle CodeGeeX2-6B sont en accès libre pour toute utilisation dans le cadre de la recherche. Pour toute utilisation commerciale, merci de consulter ce [formulaire](https://open.bigmodel.cn/mla/form?mcode=CodeGeeX2-6B).\n\n\n## Citation\n\nSi vous trouvez ce projet utile, n'hésitez pas à citer notre papier:\n\n```\n@inproceedings{zheng2023codegeex,\n  title={CodeGeeX: A Pre-Trained Model for Code Generation with Multilingual Benchmarking on HumanEval-X},\n  author={Qinkai Zheng and Xiao Xia and Xu Zou and Yuxiao Dong and Shan Wang and Yufei Xue and Zihan Wang and Lei Shen and Andi Wang and Yang Li and Teng Su and Zhilin Yang and Jie Tang},\n  booktitle={Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining},\n  pages={5673--5684},\n  year={2023}\n}\n```\n"
  },
  {
    "path": "README_JA.md",
    "content": "![](resources/codegeex_logo.png)\n\n<p align=\"center\">\n    🏠 <a href=\"https://codegeex.cn\" target=\"_blank\">ホームページ</a>｜🛠 拡張 <a href=\"https://marketplace.visualstudio.com/items?itemName=aminer.codegeex\" target=\"_blank\">VS Code</a>, <a href=\"https://plugins.jetbrains.com/plugin/20587-codegeex\" target=\"_blank\">Jetbrains</a>｜🤗 <a href=\"https://huggingface.co/THUDM/codegeex2-6b\" target=\"_blank\">HF Repo</a>｜📄 <a href=\"https://arxiv.org/abs/2303.17568\" target=\"_blank\">論文</a>\n</p>\n\n<p align=\"center\">\n    👋 <a href=\"https://discord.gg/8gjHdkmAN6\" target=\"_blank\">Discord</a> に参加, <a href=\"https://join.slack.com/t/codegeexworkspace/shared_invite/zt-1s118ffrp-mpKKhQD0tKBmzNZVCyEZLw\" target=\"_blank\">Slack</a>, <a href=\"https://t.me/+IipIayJ32B1jOTg1\" target=\"_blank\">Telegram</a>, <a href=\"resources/wechat.md\"target=\"_blank\">WeChat</a>\n</p>\n\n查看[中文版](README.md)<br>\nRead this in [English](README_EN.md)<br>\nLire en [Français](README_FR.md)\n\n# CodeGeeX2: より強力な多言語コード生成モデル\n\nCodeGeeX2 は、多言語コード生成モデル [CodeGeeX](https://github.com/THUDM/CodeGeeX)([KDD'23](https://arxiv.org/abs/2303.17568)) の第 2 世代モデルであり、より多くのコードデータで学習された [ChatGLM2](https://github.com/THUDM/ChatGLM2-6B) アーキテクチャに基づいて実装されています。ChatGLM2 のアドバンテージにより、CodeGeeX2 のコーディング能力は包括的に向上しています(+107% > CodeGeeX; わずか 6B のパラメータで、いくつかのタスクではより大規模な StarCoder-15B を凌駕しています)。以下の特徴があります:\n\n* **より強力なコーディング機能**: CodeGeeX2-6B は、ChatGLM2-6B モデルをベースに、さらに 600B のコードトークンに対して事前学習を行っており、第一世代と比較してコーディング能力が総合的に向上しています。[HumanEval-X](https://huggingface.co/datasets/THUDM/humaneval-x) ベンチマークでは、6 言語すべてで大幅な改善が見られ（Python +57%、C++ +71%、Java +54%、JavaScript +83%、Go +56%、Rust +321%）、Python では Pass@1 一回合格率 35.9% に達し、より大規模な StarCoder-15B を上回りました。\n* **その他の便利な機能**: ChatGLM2-6B モデルの特徴を継承し、CodeGeeX2-6B は中国語と英語のプロンプト、最大 8192 シーケンス長をサポートし、推論速度は第一世代と比較して大幅に改善されています。量子化後、推論に必要な GPU メモリは 6GB のみで、軽量なローカル展開をサポートします。\n* **包括的な AI コーディングアシスタント**: CodeGeeX プラグイン（[VS Code](https://marketplace.visualstudio.com/items?itemName=aminer.codegeex)、[Jetbrains](https://plugins.jetbrains.com/plugin/20587-codegeex)）のバックエンドがアップグレードされ、100 以上のプログラミング言語をサポートし、インフィルやクロスファイル補完などの実用的な機能が追加されました。対話型 AI コーディングアシスタント \"Ask CodeGeeX\" と組み合わせることで、中国語または英語の対話を通じて、コードの要約、コードの翻訳、デバッグ、コメント生成など、さまざまなプログラミング問題を解決することができ、開発者の作業効率を高めることができます。\n* **オープンライセンス**: CodeGeeX2-6B ウェイトは学術研究に全面的に開放しています。商用利用をご希望の方は、[登録フォーム](https://open.bigmodel.cn/mla/form?mcode=CodeGeeX2-6B)にご記入の上、お申し込みください。\n\n\n## AI コーディングアシスタント\n\n![](resources/codegeex_demo.png)\n\nVS Code、IntelliJ IDEA、PyCharm、GoLand、WebStorm、Android Studio などの IDE をサポートする CodeGeeX プラグインを開発しました。このプラグインを使用することで、CodeGeeX2 モデルのコード生成と補完、アノテーション、コード変換、\"Ask CodeGeeX\" 対話型プログラミングなどの機能を体験することができ、開発効率を向上させることができます。より包括的な AI コーディング体験を得るために、IDE に CodeGeeX プラグインをダウンロードしてください。詳しくは[ホームページ](https://codegeex.cn/)をご覧ください。\n\n## 始める\n\n[CodeGeeX2-6B](https://huggingface.co/THUDM/codegeex2-6b) を素早く起動するには、`transformers` を使用します：\n\n```python\nfrom transformers import AutoTokenizer, AutoModel\ntokenizer = AutoTokenizer.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True)\nmodel = AutoModel.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True, device='cuda')\nmodel = model.eval()\n\n# remember adding a language tag for better performance\nprompt = \"# language: Python\\n# write a bubble sort function\\n\"\ninputs = tokenizer.encode(prompt, return_tensors=\"pt\").to(model.device)\noutputs = model.generate(inputs, max_length=256, top_k=1)\nresponse = tokenizer.decode(outputs[0])\n\n>>> print(response)\n# language: Python\n# write a bubble sort function\n\n\ndef bubble_sort(list):\n    for i in range(len(list) - 1):\n        for j in range(len(list) - 1):\n            if list[j] > list[j + 1]:\n                list[j], list[j + 1] = list[j + 1], list[j]\n    return list\n\n\nprint(bubble_sort([5, 2, 1, 8, 4]))\n```\n\nGradio DEMO の起動:\n```\npython ./demo/run_demo.py\n```\n\n❗️注意:\n* CodeGeeX2 はベースモデルであり、チャット用の命令チューニングはされていません。コード補完/翻訳/説明のようなタスクは可能です。CodeGeeX のプラグイン([VS Code](https://marketplace.visualstudio.com/items?itemName=aminer.codegeex), [Jetbrains](https://plugins.jetbrains.com/plugin/20587-codegeex))で命令チューニングされたバージョンを試すことができます。\n* プログラミング言語は、`# language: Python` のように `language tag` を追加することで制御できます。パフォーマンスを確保するため、書式を守る必要があります。完全なリストは[こちら](https://github.com/THUDM/CodeGeeX2/blob/main/evaluation/utils.py#L14)にあります。より良い結果を得るためには、選択したプログラミング言語のフォーマットでコメントを書いてください。\n* 複数のグラフィックカードを使用してモデルをロードする必要がある場合は、以下のコードを使用できます：\n    ```python\n    tokenizer = AutoTokenizer.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True)\n    model = AutoModel.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True, device='cuda')\n    model = model.eval()\n    ```\n    をに置き換えてください\n\n    ```python\n    def get_model():\n        tokenizer = AutoTokenizer.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True)\n        from gpus import load_model_on_gpus\n        # gpusファイルはdemoフォルダにあります\n        model = load_model_on_gpus(\"THUDM/codegeex2-6b\", num_gpus=2)\n        model = model.eval()\n        return tokenizer, model\n\n    tokenizer, model = get_model()\n    ```\n## 評価\n\nCodeGeeX2 は多言語コード生成のベースモデルであり、前世代と比較してコーディング能力が大幅に向上しています。HumanEval、HumanEval-X、DS1000 ベンチマークでの評価結果を以下に示します（評価指標 Pass@k は[論文](https://arxiv.org/abs/2303.17568)と同じです）:\n\n### HumanEval (Pass@1,10,100)\n\n| **Model**           | **Pass@1** | **Pass@10** | **Pass@100** |\n| :-----------------: | :--------: | :---------: | :----------: |\n| CodeGen-16B-multi   | 19\\.2      | 34\\.6       | 55\\.2        |\n| CodeGeeX-13B        | 22\\.9      | 39\\.6       | 60\\.9        |\n| Codex-12B           | 28\\.8      | 46\\.8       | 72\\.3        |\n| CodeT5Plus-16B-mono | 30\\.9      | 51\\.6       | 76\\.7        |\n| Code-Cushman-001    | 33\\.5      | 54\\.3       | 77\\.4        |\n| LLaMA-65B           | 23\\.7      | -           | 79\\.3        |\n| LLaMA2-70B          | 29\\.9      | -           | -            |\n| CodeGen2\\.5-7B-mono | 33\\.4      | 58\\.4       | 82\\.7        |\n| StarCoder-15B       | 33\\.2      | 61\\.0       | 84\\.7        |\n| **CodeGeeX2-6B**    | **35\\.9**  | **62\\.6**   | **88\\.3**    |\n> **Pass@1** 使用 `n=20, t=0.2, top_p=0.95`; **Pass@10** および **Pass@100** を使用 `n=200, t=0.8, top_p=0.95`。\n\n### HumanEval-X (Pass@1)\n\n| **Model**                | **Python** | **C++**   | **Java**  | **JavaScript** | **Go**    | **Rust**  | **Overall** |\n| :------------------: | :--------: | :-------: | :-------: | :------------: | :-------: | :-------: | :---------: |\n| CodeGen-16B-multi    | 19\\.2      | 18\\.1     | 15\\.0     | 18\\.4          | 13\\.0     | 1\\.8      | 14\\.2       |\n| CodeGeeX-13B         | 22\\.9      | 17\\.1     | 20\\.0     | 17\\.6          | 14\\.4     | 4\\.3      | 16\\.0       |\n| Replit-code-v1-3B    | 22\\.0      | 20\\.1     | 20\\.1     | 20\\.1          | 12\\.2     | 8\\.6      | 17\\.2       |\n| CodeGen2\\.5-7B-multi | 30\\.6      | 24\\.3     | 29\\.0     | 27\\.5          | 18\\.9     | **20\\.1** | 25\\.1       |\n| StarCoder-15B        | 35\\.5      | 28\\.2     | **31\\.5** | **33\\.2**      | 21\\.3     | 17\\.8     | 27\\.9       |\n| **CodeGeeX2-6B**         | **35\\.9**  | **29\\.3** | 30\\.8     | 32\\.2          | **22\\.5** | 18\\.1     | **28\\.1**   |\n> **Pass@1** 使用 `n=20, t=0.2, top_p=0.95`。\n\n上記の結果は `scripts/run_humanevalx.sh` を実行することで再現できる。実験の設定は [HumanEval-X 環境](https://github.com/THUDM/CodeGeeX/blob/main/codegeex/benchmark/README_zh.md)を参照してください。\n\n### DS1000 (Pass@1)\n\n| **Model**            | **Matplotlib** | **Numpy** | **Pandas** | **Pytorch** | **SciPy** | **Scikit-learn** | **TensorFlow** | **Overall** |\n| :--------------: | :------------: | :-------: | :--------: | :---------: | :-------: | :--------------: | :------------: | :---------: |\n| \\# Samples       | 155            | 220       | 291        | 68          | 106       | 115              | 45             | 1000        |\n| CodeGen-16B-Mono | 31\\.7          | 10\\.9     | 3\\.4       | 7\\.0        | 9\\.0      | 10\\.8            | 15\\.2          | 11\\.7       |\n| code-cushman-001 | 40\\.7          | 21\\.8     | 7\\.9       | 12\\.4       | 11\\.3     | 18\\.0            | 12\\.2          | 18\\.1       |\n| Codex-001        | 41\\.8          | 26\\.6     | 9\\.4       | 9\\.7        | 15\\.0     | 18\\.5            | 17\\.2          | 20\\.2       |\n| **CodeGeeX2-6B** | 40\\.5          | 25\\.5     | 14\\.5      | 17\\.3       | 19\\.3     | 24\\.0            | 23\\.0          | 23\\.1       |\n| StarCoder-15B    | 51\\.7          | 29\\.7     | 11\\.4      | 21\\.4       | 20\\.2     | 29\\.5            | 24\\.5          | 26\\.0       |\n| Codex-002        | **57\\.0**      | **43\\.1** | **26\\.5**  | **41\\.8**   | **31\\.8** | **44\\.8**        | **39\\.3**      | **39\\.2**   |\n> **Pass@1** 使用 `n=40, t=0.2, top_p=0.5`。\n\n上記の結果は [DS1000 repo](https://github.com/HKUNLP/DS-1000.git) のコードで再現できる。\n\n## 推論\n\nCodeGeeX2 は、前世代よりも導入が容易になりました。マルチクエリーアテンションとフラッシュアテンションの使用により、推論速度が速くなり、INT4 量子化後に必要な GPU メモリは 6GB のみです。\n\n### 量子化\n\n| **Model**        | FP16/BF16 | INT8    | INT4   |\n| :--------------: | :-------: | :-----: | :----: |\n| CodeGeeX-13B     | 26\\.9 GB   | 14\\.7 GB | -      |\n| **CodeGeeX2-6B** | 13\\.1 GB  | 8\\.2 GB  | 5\\.5 GB |\n> PyTorch 2.0に基づき、`torch.nn.functional.scaled_dot_product_attention` を使用して、効率的なアテンションメカニズムを実現。\n\n### 加速\n\n| **Model**        | **推論速度 (token/秒)** |\n| :--------------: | :-------------: |\n| CodeGeeX-13B     | 32              |\n| **CodeGeeX2-6B** | 94              |\n> `batch_size=1, max_length=2048`, どちらもアクセラレーションフレームワークを使用、`GeForce RTX-3090` の場合。\n\n## ライセンス\n\nこのリポジトリのコードは、[Apache-2.0](https://www.apache.org/licenses/LICENSE-2.0) ライセンスの下でのオープンソースです。モデルのウェイトは [Model License](MODEL_LICENSE) に基づいてライセンスされています。CodeGeeX2-6B のウェイトは学術研究用に公開されています。商用利用を希望される方は、[登録フォーム](https://open.bigmodel.cn/mla/form?mcode=CodeGeeX2-6B)にご記入の上、お申し込みください。\n\n\n## 引用\n\n私たちの研究がお役に立ちましたら、ぜひ以下の論文を引用してください:\n\n```\n@inproceedings{zheng2023codegeex,\n  title={CodeGeeX: A Pre-Trained Model for Code Generation with Multilingual Benchmarking on HumanEval-X},\n  author={Qinkai Zheng and Xiao Xia and Xu Zou and Yuxiao Dong and Shan Wang and Yufei Xue and Zihan Wang and Lei Shen and Andi Wang and Yang Li and Teng Su and Zhilin Yang and Jie Tang},\n  booktitle={Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining},\n  pages={5673--5684},\n  year={2023}\n}\n```\n"
  },
  {
    "path": "benchmark/humanevalx/go/go.mod",
    "content": "module humanEval\n\ngo 1.18\n\nrequire (\n\tgithub.com/go-openapi/inflect v0.19.0\n\tgithub.com/stretchr/testify v1.8.0\n)\n\nrequire (\n\tgithub.com/davecgh/go-spew v1.1.1 // indirect\n\tgithub.com/pmezard/go-difflib v1.0.0 // indirect\n\tgopkg.in/yaml.v3 v3.0.1 // indirect\n)\n"
  },
  {
    "path": "benchmark/humanevalx/go/go.sum",
    "content": "github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=\ngithub.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=\ngithub.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=\ngithub.com/go-openapi/inflect v0.19.0 h1:9jCH9scKIbHeV9m12SmPilScz6krDxKRasNNSNPXu/4=\ngithub.com/go-openapi/inflect v0.19.0/go.mod h1:lHpZVlpIQqLyKwJ4N+YSc9hchQy/i12fJykb83CRBH4=\ngithub.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=\ngithub.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=\ngithub.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=\ngithub.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=\ngithub.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=\ngithub.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk=\ngithub.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=\ngopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=\ngopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=\ngopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=\ngopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=\ngopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=\n"
  },
  {
    "path": "benchmark/humanevalx/rust/Cargo.toml",
    "content": "[package]\nname = \"rust\"\nversion = \"0.1.0\"\nedition = \"2021\"\n\n# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html\n\n[dependencies]\nrand = \"0.4\"\nregex = \"1\"\nmd5 = \"0.7.0\"\n\n"
  },
  {
    "path": "demo/example_inputs.jsonl",
    "content": "{\"code\": \"# Write a quick sort function\\n\", \"langauge\": \"Python\"}\n{\"code\": \"// 写一个冒泡排序函数\\n\", \"langauge\": \"C++\"}\n{\"code\": \"// 写一个二叉树的类\\npublic class\", \"langauge\": \"Java\"}\n{\"code\": \"// 矩阵求行列式\\n\", \"langauge\": \"Matlab\"}\n{\"code\": \"<!--Write a homepage of CodeGeeX.-->\\n\", \"langauge\": \"HTML\"}\n{\"code\": \"// 写一个服务器框架， 接收浏览器发过来的请求，并返回处理后的内容\\n\", \"langauge\": \"JavaScript\"}\n{\"code\": \"// Write a binary search function\\n\", \"langauge\": \"Rust\"}\n{\"code\": \"-- 查询品类最多的三种食品，但是不包含出现时间在2014年及之后\\n\", \"langauge\": \"SQL\"}\n{\"code\": \"// Write a simple file system that allows parallel read/write in Golang\\n\", \"langauge\": \"Go\"}"
  },
  {
    "path": "demo/fastapicpu.py",
    "content": "from fastapi import FastAPI, Request\nfrom transformers import AutoTokenizer, AutoModel\nimport uvicorn, json, datetime\nimport torch\nimport argparse\n\ntry:\n    import chatglm_cpp\n    enable_chatglm_cpp = True\nexcept:\n    print(\"[WARN] chatglm-cpp not found. Install it by `pip install chatglm-cpp` for better performance. \"\n          \"Check out https://github.com/li-plus/chatglm.cpp for more details.\")\n    enable_chatglm_cpp = False\n\n\n#获取选项        \ndef add_code_generation_args(parser):\n    group = parser.add_argument_group(title=\"CodeGeeX2 DEMO\")\n    group.add_argument(\n        \"--model-path\",\n        type=str,\n        default=\"THUDM/codegeex2-6b\",\n    )\n    group.add_argument(\n        \"--listen\",\n        type=str,\n        default=\"127.0.0.1\",\n    )\n    group.add_argument(\n        \"--port\",\n        type=int,\n        default=7860,\n    )\n    group.add_argument(\n        \"--workers\",\n        type=int,\n        default=1,\n    )\n    group.add_argument(                      \n        \"--cpu\",\n        action=\"store_true\",\n    )\n    group.add_argument(                      \n        \"--half\",\n        action=\"store_true\",\n    )\n    group.add_argument(\n        \"--quantize\",\n        type=int,\n        default=None,\n    )\n    group.add_argument(\n        \"--chatglm-cpp\",\n        action=\"store_true\",\n    )\n    return parser\n\nLANGUAGE_TAG = {\n    \"Abap\"         : \"* language: Abap\",\n    \"ActionScript\" : \"// language: ActionScript\",\n    \"Ada\"          : \"-- language: Ada\",\n    \"Agda\"         : \"-- language: Agda\",\n    \"ANTLR\"        : \"// language: ANTLR\",\n    \"AppleScript\"  : \"-- language: AppleScript\",\n    \"Assembly\"     : \"; language: Assembly\",\n    \"Augeas\"       : \"// language: Augeas\",\n    \"AWK\"          : \"// language: AWK\",\n    \"Basic\"        : \"' language: Basic\",\n    \"C\"            : \"// language: C\",\n    \"C#\"           : \"// language: C#\",\n    \"C++\"          : \"// language: C++\",\n    \"CMake\"        : \"# language: CMake\",\n    \"Cobol\"        : \"// language: Cobol\",\n    \"CSS\"          : \"/* language: CSS */\",\n    \"CUDA\"         : \"// language: Cuda\",\n    \"Dart\"         : \"// language: Dart\",\n    \"Delphi\"       : \"{language: Delphi}\",\n    \"Dockerfile\"   : \"# language: Dockerfile\",\n    \"Elixir\"       : \"# language: Elixir\",\n    \"Erlang\"       : f\"% language: Erlang\",\n    \"Excel\"        : \"' language: Excel\",\n    \"F#\"           : \"// language: F#\",\n    \"Fortran\"      : \"!language: Fortran\",\n    \"GDScript\"     : \"# language: GDScript\",\n    \"GLSL\"         : \"// language: GLSL\",\n    \"Go\"           : \"// language: Go\",\n    \"Groovy\"       : \"// language: Groovy\",\n    \"Haskell\"      : \"-- language: Haskell\",\n    \"HTML\"         : \"<!--language: HTML-->\",\n    \"Isabelle\"     : \"(*language: Isabelle*)\",\n    \"Java\"         : \"// language: Java\",\n    \"JavaScript\"   : \"// language: JavaScript\",\n    \"Julia\"        : \"# language: Julia\",\n    \"Kotlin\"       : \"// language: Kotlin\",\n    \"Lean\"         : \"-- language: Lean\",\n    \"Lisp\"         : \"; language: Lisp\",\n    \"Lua\"          : \"// language: Lua\",\n    \"Markdown\"     : \"<!--language: Markdown-->\",\n    \"Matlab\"       : f\"% language: Matlab\",\n    \"Objective-C\"  : \"// language: Objective-C\",\n    \"Objective-C++\": \"// language: Objective-C++\",\n    \"Pascal\"       : \"// language: Pascal\",\n    \"Perl\"         : \"# language: Perl\",\n    \"PHP\"          : \"// language: PHP\",\n    \"PowerShell\"   : \"# language: PowerShell\",\n    \"Prolog\"       : f\"% language: Prolog\",\n    \"Python\"       : \"# language: Python\",\n    \"R\"            : \"# language: R\",\n    \"Racket\"       : \"; language: Racket\",\n    \"RMarkdown\"    : \"# language: RMarkdown\",\n    \"Ruby\"         : \"# language: Ruby\",\n    \"Rust\"         : \"// language: Rust\",\n    \"Scala\"        : \"// language: Scala\",\n    \"Scheme\"       : \"; language: Scheme\",\n    \"Shell\"        : \"# language: Shell\",\n    \"Solidity\"     : \"// language: Solidity\",\n    \"SPARQL\"       : \"# language: SPARQL\",\n    \"SQL\"          : \"-- language: SQL\",\n    \"Swift\"        : \"// language: swift\",\n    \"TeX\"          : f\"% language: TeX\",\n    \"Thrift\"       : \"/* language: Thrift */\",\n    \"TypeScript\"   : \"// language: TypeScript\",\n    \"Vue\"          : \"<!--language: Vue-->\",\n    \"Verilog\"      : \"// language: Verilog\",\n    \"Visual Basic\" : \"' language: Visual Basic\",\n}\n\napp = FastAPI()\ndef device():\n    if enable_chatglm_cpp and args.chatglm_cpp:\n        print(\"Using chatglm-cpp to improve performance\")\n        dtype = \"f16\" if args.half else \"f32\"\n        if args.quantize in [4, 5, 8]:\n            dtype = f\"q{args.quantize}_0\"\n        model = chatglm_cpp.Pipeline(args.model_path, dtype=dtype)\n        return model\n\n    print(\"chatglm-cpp not enabled, falling back to transformers\")\n    if not args.cpu:\n        if not args.half:\n            model = AutoModel.from_pretrained(args.model_path, trust_remote_code=True).cuda()\n        else:\n            model = AutoModel.from_pretrained(args.model_path, trust_remote_code=True).cuda().half()\n        if args.quantize in [4, 8]:\n            print(f\"Model is quantized to INT{args.quantize} format.\")\n            model = model.half().quantize(args.quantize)\n    else:\n        model = AutoModel.from_pretrained(args.model_path, trust_remote_code=True)\n\n    return model.eval()\n\n@app.post(\"/\")\nasync def create_item(request: Request):\n    global model, tokenizer\n    json_post_raw = await request.json()\n    json_post = json.dumps(json_post_raw)\n    json_post_list = json.loads(json_post)\n    lang = json_post_list.get('lang')\n    prompt = json_post_list.get('prompt')\n    max_length = json_post_list.get('max_length', 128)\n    top_p = json_post_list.get('top_p', 0.95)\n    temperature = json_post_list.get('temperature', 0.2)\n    top_k = json_post_list.get('top_k', 0)\n    if lang != \"None\":\n        prompt = LANGUAGE_TAG[lang] + \"\\n\" + prompt\n    if enable_chatglm_cpp and args.chatglm_cpp:\n        response = model.generate(prompt,\n                                  max_length=max_length,\n                                  do_sample=temperature > 0,\n                                  top_p=top_p,\n                                  top_k=top_k,\n                                  temperature=temperature)\n    else:\n        response = model.chat(tokenizer,\n                              prompt,\n                              max_length=max_length,\n                              top_p=top_p,\n                              top_k=top_k,\n                              temperature=temperature)\n    now = datetime.datetime.now()\n    time = now.strftime(\"%Y-%m-%d %H:%M:%S\")\n    answer = {\n        \"response\": response,\n        \"lang\": lang,\n        \"status\": 200,\n        \"time\": time\n    }\n    log = \"[\" + time + \"] \" + '\", prompt:\"' + prompt + '\", response:\"' + repr(response) + '\"'\n    print(log)\n\n    return answer\n\n\nif __name__ == '__main__':    \n    parser = argparse.ArgumentParser()\n    parser = add_code_generation_args(parser)\n    args, _ = parser.parse_known_args()\n    tokenizer = AutoTokenizer.from_pretrained(args.model_path, trust_remote_code=True)\n    model = device()\n    uvicorn.run(app, host=args.listen, port=args.port, workers=args.workers)\n"
  },
  {
    "path": "demo/gpus.py",
    "content": "import os\r\nfrom typing import Dict, Tuple, Union, Optional\r\n\r\nfrom torch.nn import Module\r\nfrom transformers import AutoModel\r\n\r\n\r\ndef auto_configure_device_map(num_gpus: int) -> Dict[str, int]:\r\n    # transformer.word_embeddings 占用1层\r\n    # transformer.final_layernorm 和 lm_head 占用1层\r\n    # transformer.layers 占用 28 层\r\n    # 总共30层分配到num_gpus张卡上\r\n    num_trans_layers = 28\r\n    per_gpu_layers = 30 / num_gpus\r\n\r\n    # bugfix: 在linux中调用torch.embedding传入的weight,input不在同一device上,导致RuntimeError\r\n    # windows下 model.device 会被设置成 transformer.word_embeddings.device\r\n    # linux下 model.device 会被设置成 lm_head.device\r\n    # 在调用chat或者stream_chat时,input_ids会被放到model.device上\r\n    # 如果transformer.word_embeddings.device和model.device不同,则会导致RuntimeError\r\n    # 因此这里将transformer.word_embeddings,transformer.final_layernorm,lm_head都放到第一张卡上\r\n    # 本文件来源于https://github.com/THUDM/ChatGLM-6B/blob/main/utils.py\r\n    # 仅此处做少许修改以支持ChatGLM2,CodeGeeX2\r\n    device_map = {\r\n        'transformer.embedding.word_embeddings': 0,\r\n        'transformer.encoder.final_layernorm': 0,\r\n        'transformer.output_layer': 0,\r\n        'transformer.rotary_pos_emb': 0,\r\n        'lm_head': 0\r\n    }\r\n\r\n    used = 2\r\n    gpu_target = 0\r\n    for i in range(num_trans_layers):\r\n        if used >= per_gpu_layers:\r\n            gpu_target += 1\r\n            used = 0\r\n        assert gpu_target < num_gpus\r\n        device_map[f'transformer.encoder.layers.{i}'] = gpu_target\r\n        used += 1\r\n\r\n    return device_map\r\n\r\n\r\ndef load_model_on_gpus(checkpoint_path: Union[str, os.PathLike], num_gpus: int = 2,\r\n                       device_map: Optional[Dict[str, int]] = None, **kwargs) -> Module:\r\n    if num_gpus < 2 and device_map is None:\r\n        model = AutoModel.from_pretrained(checkpoint_path, trust_remote_code=True, **kwargs).half().cuda()\r\n    else:\r\n        from accelerate import dispatch_model\r\n\r\n        model = AutoModel.from_pretrained(checkpoint_path, trust_remote_code=True, **kwargs).half()\r\n\r\n        if device_map is None:\r\n            device_map = auto_configure_device_map(num_gpus)\r\n\r\n        model = dispatch_model(model, device_map=device_map)\r\n\r\n    return model\r\n"
  },
  {
    "path": "demo/run_demo.py",
    "content": "import os\nimport json\nimport numpy\nimport torch\nimport random\nimport argparse\nimport gradio as gr\n\nfrom transformers import AutoTokenizer, AutoModel\n\ntry:\n    # Should first install fastllm (https://github.com/ztxz16/fastllm.git)\n    from fastllm_pytools import llm\n    enable_fastllm = True\nexcept:\n    print(\"fastllm disabled.\")\n    enable_fastllm = False\n\ntry:\n    from gpus import load_model_on_gpus\n    enable_multiple_gpus = True\nexcept:\n    print(\"Multiple GPUs support disabled.\")\n    enable_multiple_gpus = False\n\ntry:\n    import chatglm_cpp\n    enable_chatglm_cpp = True\nexcept:\n    print(\"[WARN] chatglm-cpp not found. Install it by `pip install chatglm-cpp` for better performance. \"\n          \"Check out https://github.com/li-plus/chatglm.cpp for more details.\")\n    enable_chatglm_cpp = False\n\n\ndef get_model(args):\n    if not args.cpu:\n        if torch.cuda.is_available():\n            device = f\"cuda:{args.gpu}\"\n        elif torch.backends.mps.is_built():\n            device = \"mps\"\n        else:\n            device = \"cpu\"\n    else:\n        device = \"cpu\"\n    \n    tokenizer = AutoTokenizer.from_pretrained(args.model_path, trust_remote_code=True)\n\n    if args.n_gpus > 1 and enable_multiple_gpus:\n        # 如需实现多显卡模型加载,传入\"n_gpus\"为需求的显卡数量 / To enable Multiple GPUs model loading, please adjust \"n_gpus\" to the desired number of graphics cards.\n        print(f\"Runing on {args.n_gpus} GPUs.\")\n        model = load_model_on_gpus(args.model_path, num_gpus=args.n_gpus)\n        model = model.eval()\n    elif enable_chatglm_cpp and args.chatglm_cpp:\n        print(\"Using chatglm-cpp to improve performance\")\n        dtype = \"f16\"\n        if args.quantize in [4, 5, 8]:\n            dtype = f\"q{args.quantize}_0\"\n        model = chatglm_cpp.Pipeline(args.model_path, dtype=dtype)\n    else:\n        model = AutoModel.from_pretrained(args.model_path, trust_remote_code=True)\n        model = model.eval()\n\n        if enable_fastllm and args.fastllm:\n            print(\"fastllm enabled.\")\n            model = model.half()\n            llm.set_device_map(device)\n            if args.quantize in [4, 8]:\n                model = llm.from_hf(model, dtype=f\"int{args.quantize}\")\n            else:\n                model = llm.from_hf(model, dtype=\"float16\")\n        else:\n            print(\"chatglm-cpp and fastllm not installed, using transformers.\")\n            if args.quantize in [4, 8]:\n                print(f\"Model is quantized to INT{args.quantize} format.\")\n                model = model.half().quantize(args.quantize)\n            model = model.to(device)\n\n    return tokenizer, model\n\n\ndef add_code_generation_args(parser):\n    group = parser.add_argument_group(title=\"CodeGeeX2 DEMO\")\n    group.add_argument(\n        \"--model-path\",\n        type=str,\n        default=\"THUDM/codegeex2-6b\",\n    )\n    group.add_argument(\n        \"--example-path\",\n        type=str,\n        default=None,\n    )\n    group.add_argument(\n        \"--quantize\",\n        type=int,\n        default=None,\n    )\n    group.add_argument(\n        \"--chatglm-cpp\",\n        action=\"store_true\",\n    )\n    group.add_argument(\n        \"--fastllm\",\n        action=\"store_true\",\n    )\n    group.add_argument(\n        \"--n-gpus\",\n        type=int,\n        default=1,\n    )\n    group.add_argument(\n        \"--gpu\",\n        type=int,\n        default=0,\n    )\n    group.add_argument(\n        \"--cpu\",\n        action=\"store_true\",\n    )\n    group.add_argument(\n        \"--listen\",\n        type=str,\n        default=\"127.0.0.1\",\n    )\n    group.add_argument(\n        \"--port\",\n        type=int,\n        default=7860,\n    )\n    group.add_argument(\n        \"--username\",\n        type=str,\n        default=None,\n    )\n    group.add_argument(\n        \"--password\",\n        type=str,\n        default=None,\n    )\n    group.add_argument(\n        \"--auth\",\n        action=\"store_true\",\n    )\n    \n    \n    return parser\n\n\n# 更完编程语言列表请查看 evaluation/utils.py / Full list of supported languages in evaluation/utils.py\nLANGUAGE_TAG = {\n    \"Abap\"         : \"* language: Abap\",\n    \"ActionScript\" : \"// language: ActionScript\",\n    \"Ada\"          : \"-- language: Ada\",\n    \"Agda\"         : \"-- language: Agda\",\n    \"ANTLR\"        : \"// language: ANTLR\",\n    \"AppleScript\"  : \"-- language: AppleScript\",\n    \"Assembly\"     : \"; language: Assembly\",\n    \"Augeas\"       : \"// language: Augeas\",\n    \"AWK\"          : \"// language: AWK\",\n    \"Basic\"        : \"' language: Basic\",\n    \"C\"            : \"// language: C\",\n    \"C#\"           : \"// language: C#\",\n    \"C++\"          : \"// language: C++\",\n    \"CMake\"        : \"# language: CMake\",\n    \"Cobol\"        : \"// language: Cobol\",\n    \"CSS\"          : \"/* language: CSS */\",\n    \"CUDA\"         : \"// language: Cuda\",\n    \"Dart\"         : \"// language: Dart\",\n    \"Delphi\"       : \"{language: Delphi}\",\n    \"Dockerfile\"   : \"# language: Dockerfile\",\n    \"Elixir\"       : \"# language: Elixir\",\n    \"Erlang\"       : f\"% language: Erlang\",\n    \"Excel\"        : \"' language: Excel\",\n    \"F#\"           : \"// language: F#\",\n    \"Fortran\"      : \"!language: Fortran\",\n    \"GDScript\"     : \"# language: GDScript\",\n    \"GLSL\"         : \"// language: GLSL\",\n    \"Go\"           : \"// language: Go\",\n    \"Groovy\"       : \"// language: Groovy\",\n    \"Haskell\"      : \"-- language: Haskell\",\n    \"HTML\"         : \"<!--language: HTML-->\",\n    \"Isabelle\"     : \"(*language: Isabelle*)\",\n    \"Java\"         : \"// language: Java\",\n    \"JavaScript\"   : \"// language: JavaScript\",\n    \"Julia\"        : \"# language: Julia\",\n    \"Kotlin\"       : \"// language: Kotlin\",\n    \"Lean\"         : \"-- language: Lean\",\n    \"Lisp\"         : \"; language: Lisp\",\n    \"Lua\"          : \"// language: Lua\",\n    \"Markdown\"     : \"<!--language: Markdown-->\",\n    \"Matlab\"       : f\"% language: Matlab\",\n    \"Objective-C\"  : \"// language: Objective-C\",\n    \"Objective-C++\": \"// language: Objective-C++\",\n    \"Pascal\"       : \"// language: Pascal\",\n    \"Perl\"         : \"# language: Perl\",\n    \"PHP\"          : \"// language: PHP\",\n    \"PowerShell\"   : \"# language: PowerShell\",\n    \"Prolog\"       : f\"% language: Prolog\",\n    \"Python\"       : \"# language: Python\",\n    \"R\"            : \"# language: R\",\n    \"Racket\"       : \"; language: Racket\",\n    \"RMarkdown\"    : \"# language: RMarkdown\",\n    \"Ruby\"         : \"# language: Ruby\",\n    \"Rust\"         : \"// language: Rust\",\n    \"Scala\"        : \"// language: Scala\",\n    \"Scheme\"       : \"; language: Scheme\",\n    \"Shell\"        : \"# language: Shell\",\n    \"Solidity\"     : \"// language: Solidity\",\n    \"SPARQL\"       : \"# language: SPARQL\",\n    \"SQL\"          : \"-- language: SQL\",\n    \"Swift\"        : \"// language: swift\",\n    \"TeX\"          : f\"% language: TeX\",\n    \"Thrift\"       : \"/* language: Thrift */\",\n    \"TypeScript\"   : \"// language: TypeScript\",\n    \"Vue\"          : \"<!--language: Vue-->\",\n    \"Verilog\"      : \"// language: Verilog\",\n    \"Visual Basic\" : \"' language: Visual Basic\",\n}\n\n\ndef set_random_seed(seed):\n    \"\"\"Set random seed for reproducability.\"\"\"\n    random.seed(seed)\n    numpy.random.seed(seed)\n    torch.manual_seed(seed)\n\n\ndef main():\n    parser = argparse.ArgumentParser()\n    parser = add_code_generation_args(parser)\n    args, _ = parser.parse_known_args()\n\n    tokenizer, model = get_model(args)\n\n    examples = []\n    if args.example_path is None:\n        example_path = os.path.join(os.path.split(os.path.realpath(__file__))[0], \"example_inputs.jsonl\")\n    else:\n        example_path = args.example_path\n\n    # Load examples for gradio DEMO\n    with open(example_path, \"r\", encoding=\"utf-8\") as f:\n        for line in f:\n            examples.append(list(json.loads(line).values()))\n\n\n    def predict(\n        prompt, \n        lang,\n        seed, \n        out_seq_length, \n        temperature, \n        top_k, \n        top_p,\n    ):\n        set_random_seed(seed)\n        if lang != \"None\":\n            prompt = LANGUAGE_TAG[lang] + \"\\n\" + prompt\n        \n        if enable_fastllm and args.fastllm:\n            model.direct_query = True\n            outputs = model.chat(tokenizer, \n                                 prompt,\n                                 max_length=out_seq_length,\n                                 top_p=top_p,\n                                 top_k=top_k,\n                                 temperature=temperature)\n            response = prompt + outputs[0]\n        elif enable_chatglm_cpp and args.chatglm_cpp:\n            inputs = tokenizer([prompt], return_tensors=\"pt\")\n            pipeline = model\n            outputs = pipeline.generate(prompt,\n                                        max_length=inputs['input_ids'].shape[-1] + out_seq_length,\n                                        do_sample=temperature > 0,\n                                        top_p=top_p,\n                                        top_k=top_k,\n                                        temperature=temperature)\n            response = prompt + outputs\n        else:\n            inputs = tokenizer([prompt], return_tensors=\"pt\")\n            inputs = inputs.to(model.device)\n            outputs = model.generate(**inputs,\n                                     max_length=inputs['input_ids'].shape[-1] + out_seq_length,\n                                     do_sample=True,\n                                     top_p=top_p,\n                                     top_k=top_k,\n                                     temperature=temperature,\n                                     pad_token_id=2,\n                                     eos_token_id=2)\n            response = tokenizer.decode(outputs[0])\n        \n        return response\n    \n    with gr.Blocks(title=\"CodeGeeX2 DEMO\") as demo:\n        gr.Markdown(\n            \"\"\"\n            <p align=\"center\">\n                <img src=\"https://raw.githubusercontent.com/THUDM/CodeGeeX2/main/resources/codegeex_logo.png\">\n            </p>\n            \"\"\")\n        gr.Markdown(\n            \"\"\"\n            <p align=\"center\">\n                🏠 <a href=\"https://codegeex.cn\" target=\"_blank\">Homepage</a>｜💻 <a href=\"https://github.com/THUDM/CodeGeeX2\" target=\"_blank\">GitHub</a>｜🛠 Tools <a href=\"https://marketplace.visualstudio.com/items?itemName=aminer.codegeex\" target=\"_blank\">VS Code</a>, <a href=\"https://plugins.jetbrains.com/plugin/20587-codegeex\" target=\"_blank\">Jetbrains</a>｜🤗 <a href=\"https://huggingface.co/THUDM/codegeex2-6b\" target=\"_blank\">Download</a>｜📄 <a href=\"https://arxiv.org/abs/2303.17568\" target=\"_blank\">Paper</a>\n            </p>\n            \"\"\")\n        gr.Markdown(\n            \"\"\"\n            这是 CodeGeeX2 的简易DEMO。请注意：\n            * CodeGeeX2 是一个基座模型，它可以完成代码补全/翻译/解释等任务，没有针对聊天进行指令微调。可以在 CodeGeeX 插件[VS Code](https://marketplace.visualstudio.com/items?itemName=aminer.codegeex)、[Jetbrains](https://plugins.jetbrains.com/plugin/20587-codegeex)中体验指令微调后的版本。\n            * 可以通过添加`language tag`来控制编程语言，例如`# language: Python`，查看[完整支持语言列表](https://github.com/THUDM/CodeGeeX2/blob/main/evaluation/utils.py#L14)。\n            * 按照所选编程语言的格式写注释可以获得更好的结果，请参照下方给出的示例。\n\n            This is the DEMO for CodeGeeX2. Please note that:\n            * CodeGeeX2 is a base model, which is not instruction-tuned for chatting. It can do tasks like code completion/translation/explaination. To try the instruction-tuned version in CodeGeeX plugins ([VS Code](https://marketplace.visualstudio.com/items?itemName=aminer.codegeex), [Jetbrains](https://plugins.jetbrains.com/plugin/20587-codegeex)).\n            * Programming languages can be controled by adding `language tag`, e.g., `# language: Python`. The format should be respected to ensure performance, full list can be found [here](https://github.com/THUDM/CodeGeeX2/blob/main/evaluation/utils.py#L14).\n            * Write comments under the format of the selected programming language to achieve better results, see examples below.\n            \"\"\")\n\n        with gr.Row():\n            with gr.Column():\n                prompt = gr.Textbox(lines=14, placeholder='Please enter the description or select an example input below.',label='Input')\n                with gr.Row():\n                    gen = gr.Button(\"Generate\")\n                    clr = gr.Button(\"Clear\")\n\n            outputs = gr.Textbox(lines=15, label='Output')\n\n        gr.Markdown(\n            \"\"\"\n            Generation Parameter\n            \"\"\")\n        \n        with gr.Row():\n            with gr.Row():\n                seed = gr.Slider(maximum=10000, value=8888, step=1, label='Seed')\n                with gr.Row():\n                    out_seq_length = gr.Slider(maximum=8192, value=128, minimum=1, step=1, label='Output Sequence Length')\n                    temperature = gr.Slider(maximum=1, value=0.2, minimum=0, label='Temperature')\n                with gr.Row():\n                    top_k = gr.Slider(maximum=100, value=0, minimum=0, step=1, label='Top K')\n                    top_p = gr.Slider(maximum=1, value=0.95, minimum=0, label='Top P')\n        with gr.Row():\n            lang = gr.Radio(\n                choices=[\"None\"] + list(LANGUAGE_TAG.keys()), value='None', label='Programming Language')\n        inputs = [prompt, lang, seed, out_seq_length, temperature, top_k, top_p]\n        gen.click(fn=predict, inputs=inputs, outputs=outputs)\n        clr.click(fn=lambda value: gr.update(value=\"\"), inputs=clr, outputs=prompt)\n\n        gr_examples = gr.Examples(examples=examples, inputs=[prompt, lang],\n                                  label=\"Example Inputs (Click to insert an examplet it into the input box)\",\n                                  examples_per_page=20)\n    if not args.auth:\n        demo.launch(server_name=args.listen, server_port=args.port)\n    else:\n        demo.launch(server_name=args.listen, server_port=args.port, auth=(args.username, args.password))\n    \n    #如果需要监听0.0.0.0和其他端口 可以改成 demo.launch(server_name=\"0.0.0.0\", server_port=6666)\n    #如果需要加密码 demo.launch(server_name=\"0.0.0.0\", server_port=6666, auth=(\"admin\", \"password\"))\n\nif __name__ == '__main__':\n    with torch.no_grad():\n        main()\n\n"
  },
  {
    "path": "docs/zh/inference_zh.md",
    "content": "# CodeGeeX2推理教程\n\nCodeGeeX2 是多语言代码生成模型 [CodeGeeX](https://github.com/THUDM/CodeGeeX) ([KDD’23](https://arxiv.org/abs/2303.17568)) 的第二代模型，更强，更快，更轻量，是适合本地部署的AI代码生成助手。CodeGeeX2 支持在多种不同平台上进行推理，本教程将会介绍几种不同的推理方式，包括CPU推理，多卡推理，加速推理等。\n\n- [快速开始](#快速开始)\n- [多精度/量化推理](#多精度/量化推理)\n- [多GPU推理](#多GPU推理)\n- [Mac推理](#Mac推理)\n- [fastllm加速推理](#fastllm加速推理)\n- [ChatGLM.cpp量化推理](#chatglmcpp-量化推理)\n\n## 快速开始\n\n下载本仓库并使用`pip`安装环境依赖：\n\n```shell\ngit clone https://github.com/THUDM/CodeGeeX2\ncd CodeGeeX2\npip install -r requirements.txt\n```\n\n使用`transformers`快速调用[CodeGeeX2-6B](https://huggingface.co/THUDM/codegeex2-6b)，将自动下载权重到本地：\n\n```python\nfrom transformers import AutoTokenizer, AutoModel\ntokenizer = AutoTokenizer.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True)\nmodel = AutoModel.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True, device='cuda')  # 如使用CPU推理，device='cpu'\nmodel = model.eval()\n\n# CodeGeeX2支持100种编程语言，加入语言标签引导生成相应的语言\nprompt = \"# language: Python\\n# write a bubble sort function\\n\"\ninputs = tokenizer.encode(prompt, return_tensors=\"pt\").to(model.device)\noutputs = model.generate(inputs, max_length=256, top_k=1)  # 示例中使用greedy decoding，检查输出结果是否对齐\nresponse = tokenizer.decode(outputs[0])\n\n>>> print(response)\n# language: Python\n# write a bubble sort function\n\n\ndef bubble_sort(list):\n    for i in range(len(list) - 1):\n        for j in range(len(list) - 1):\n            if list[j] > list[j + 1]:\n                list[j], list[j + 1] = list[j + 1], list[j]\n    return list\n\n\nprint(bubble_sort([5, 2, 1, 8, 4]))\n```\n\n亦可以手动下载权重：\n\n```shell\n# huggingface下载\ngit clone https://huggingface.co/THUDM/codegeex2-6b\n```\n\n将tokenizer和model路径改为本地路径：\n\n```python\nmodel_path = \"/path/to/codegeex2-6b\"\ntokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)\nmodel = AutoModel.from_pretrained(model_path, trust_remote_code=True)\n```\n\n## 多精度/量化推理\n\nCodeGeeX2 使用BF16训练，推理时支持BF16/FP16/INT8/INT4，可以根据显卡显存选择合适的精度格式：\n\n|    **Model**     | FP16/BF16 |   INT8   |  INT4   |\n| :--------------: | :-------: | :------: | :-----: |\n|   CodeGeeX-13B   | 26\\.9 GB  | 14\\.7 GB |    -    |\n| **CodeGeeX2-6B** | 13\\.1 GB  | 8\\.2 GB  | 5\\.5 GB |\n\n默认使用BF16精度进行推理，如显卡不支持BF16（❗️如使用错误的格式，推理结果将出现乱码），需要转换为FP16格式：\n\n```python\nmodel = AutoModel.from_pretrained(model_path, trust_remote_code=True).half().to(\"cuda\")\n```\n\n量化推理以INT4为例，可以下载转换好的权重（[INT4权重](https://huggingface.co/THUDM/codegeex2-6b-int4)）或手动转换，如果显卡不支持BF16，也需要先转换为FP16格式：\n\n```python\n# 下载转换好的权重\nmodel = AutoModel.from_pretrained(\"THUDM/codegeex2-6b-int4\", trust_remote_code=True)\n\n# 手动转换权重\nmodel = AutoModel.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True).quantize(4).to(\"cuda\")\n\n# 如果显卡不支持BF16，需要先转换为FP16格式\nmodel = AutoModel.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True).half().quantize(4).to(\"cuda\")\n```\n\n##  多GPU推理\n\n用[gpus.py](https://github.com/THUDM/CodeGeeX2/blob/main/demo/gpus.py)实现多GPU推理：\n\n```python\nfrom gpus import load_model_on_gpus\nmodel = load_model_on_gpus(\"THUDM/codegeex2-6b\", num_gpus=2)\n```\n\n## Mac推理\n\n对于搭载了 Apple Silicon 或者 AMD GPU 的 Mac，可以使用 MPS 后端运行。参考 Apple 的 [官方说明](https://developer.apple.com/metal/pytorch) 安装 PyTorch-Nightly（正确的版本号应该是2.x.x.dev2023xxxx，如2.1.0.dev20230729）：\n\n```shell\npip3 install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu\n```\n\n在 MacOS 上只支持从本地加载模型（提前下载权重[codegeex2-6b](https://huggingface.co/THUDM/codegeex2-6b)，[codegeex2-6b-int4](https://huggingface.co/THUDM/codegeex2-6b-int4)），支持FP16/INT8/INT4格式，并使用 mps 后端：\n\n```python\nmodel = AutoModel.from_pretrained(model_path, trust_remote_code=True).half().to('mps')\n```\n\n## fastllm加速推理\n\n可以使用[fastllm](https://github.com/ztxz16/fastllm)对 CodeGeeX2 进行加速，fastllm是目前支持GLM架构的最快开源框架。首先安装fastllm_pytools：\n\n```shell\ngit clone https://github.com/ztxz16/fastllm\ncd fastllm\nmkdir build\ncd build\n# 使用GPU编译，需要添加CUDA路径：export CUDA_HOME=/usr/local/cuda/bin:$PATH，export PATH=$PATH:$CUDA_HOME/bin\ncmake .. -DUSE_CUDA=ON # 如果不使用GPU编译 cmake .. -DUSE_CUDA=OFF\nmake -j\ncd tools && python setup.py install  # 确认安装是否成功，在python中 import fastllm_pytools 不报错\n```\n\n如出现架构不支持的报错，需要调整`CMakeLists.txt`，注释掉下面一行：\n\n```shell\n# set(CMAKE_CUDA_ARCHITECTURES \"native\")\n```\n如果是E5系列的CPU可能会出现下面的编译报错\n```\n error: inlining failed in call to ‘always_inline’ ‘__m256i _mm256_add_epi32(__m256i, __m256i)’: target specific option mismatch\n```\n此时将'CmakeLists.txt'的第20行修改如下即可编译成功:\n```\nset(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} -pthread --std=c++17 -O2\")\n```\n\n将huggingface转换成fastllm格式：\n\n```python\n# 原本的调用代码\nfrom transformers import AutoTokenizer, AutoModel\ntokenizer = AutoTokenizer.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True)\nmodel = AutoModel.from_pretrained(\"THUDM/codegeex2-6b\", trust_remote_code=True)\n\n# 加入下面这两行，将huggingface模型转换成fastllm模型\nfrom fastllm_pytools import llm\nmodel = llm.from_hf(model, tokenizer, dtype=\"float16\") # dtype支持 \"float16\", \"int8\", \"int4\"\n```\n\nfastllm中模型接口和huggingface不完全相同，可以参考[demo/run_demo.py](https://github.com/THUDM/CodeGeeX2/blob/main/demo/run_demo.py)中的相关实现：\n\n```python\nmodel.direct_query = True\noutputs = model.chat(tokenizer, \n                     prompt,\n                     max_length=out_seq_length,\n                     top_p=top_p,\n                     top_k=top_k,\n                     temperature=temperature)\nresponse = outputs[0]\n```\n\n## ChatGLM.cpp 量化推理\n\n[ChatGLM.cpp](https://github.com/li-plus/chatglm.cpp) 是类似 LLaMA.cpp 的全平台量化加速方案，支持 q4_0/q4_1/q5_0/q5_1/q8_0 多种量化精度，CPU/CUDA/Metal 多种后端，仅用一行代码实现推理加速。\n\n首先安装 chatglm-cpp。如需使用 CUDA 加速，需要添加环境变量 `CMAKE_ARGS=\"-DGGML_CUBLAS=ON\"`；如果仅使用 CPU 加速，将该环境变量去掉即可。\n```sh\nCMAKE_ARGS=\"-DGGML_CUBLAS=ON\" pip install chatglm-cpp -v\n```\n\n仅需一行代码即可量化加速 Hugging Face 模型，`dtype` 可指定 `q4_0`, `q4_1`, `q5_0`, `q5_1`, `q8_0`, `f16`，表示不同的量化类型。\n```python\n>>> import chatglm_cpp\n>>> \n>>> pipeline = chatglm_cpp.Pipeline(\"THUDM/codegeex2-6b\", dtype=\"q4_0\") # Load HF model and quantize it into int4\nLoading checkpoint shards: 100%|███████████████████████████████████████████████| 7/7 [00:09<00:00,  1.33s/it]\nProcessing model states: 100%|█████████████████████████████████████████████| 199/199 [00:21<00:00,  9.21it/s]\n...\n>>> print(pipeline.generate(\"# language: Python\\n# write a bubble sort function\\n\", do_sample=False))\n\n\ndef bubble_sort(list):\n    for i in range(len(list) - 1):\n        for j in range(len(list) - 1):\n            if list[j] > list[j + 1]:\n                list[j], list[j + 1] = list[j + 1], list[j]\n    return list\n\n\nprint(bubble_sort([5, 4, 3, 2, 1]))\n```\n\nChatGLM.cpp 已集成到本仓库，demo 添加选项 `--quantize 4 --chatglm-cpp` 即可开启 int4 (q4_0) 量化加速，例如：\n```sh\npython ./demo/run_demo.py --quantize 4 --chatglm-cpp\n```\n\nFast API 同样支持 ChatGLM.cpp 加速，添加同样参数启动服务：\n```sh\npython ./demo/fastapicpu.py --quantize 4 --chatglm-cpp\n```\n\n测试服务接口：\n```sh\ncurl -X POST \"http://127.0.0.1:7860\" \\\n    -H 'Content-Type: application/json' \\\n    -d '{\"lang\": \"Python\", \"prompt\": \"# Write a bubble sort function\", \"max_length\": 512}'\n```\n"
  },
  {
    "path": "evaluation/__init__.py",
    "content": ""
  },
  {
    "path": "evaluation/evaluation.py",
    "content": "import os\nimport re\nimport sys\nimport fire\nimport json\nimport gzip\nimport glob\nimport numpy as np\n\nfrom typing import *\nfrom tqdm.auto import tqdm\nfrom collections import defaultdict\nfrom concurrent.futures import ThreadPoolExecutor, as_completed\n\nfrom execution import check_correctness\nfrom utils import Logger, IMPORT_HELPER, read_dataset, stream_jsonl_all, estimate_pass_at_k\n\n\nLANGUAGE_NAME = {\n   \"CPP\"        : \"cpp\",\n   \"Go\"         : \"go\",\n   \"Java\"       : \"java\",\n   \"JavaScript\" : \"js\",\n   \"Python\"     : \"python\",\n   \"Rust\"       : \"rust\",\n}\n\n\ndef postprocess_generation(sample, generation_mode=\"completion\"):\n    code = sample[\"generation\"]\n    if generation_mode == \"instruction\":\n        if \"```\" in code:\n            pattern = r'```(.*?)\\n(.*?)```'\n            matches = re.findall(pattern, code, re.DOTALL)\n            for match in matches:\n                code = match[1]\n                break\n    sample[\"generation\"] = code\n    \n    return sample\n\n\ndef process_test(sample, problems, dataset_type, language_type, generation_mode):\n    if dataset_type == \"humanevalx\":\n        task_id = sample[\"task_id\"]\n        prompt = problems[task_id][\"prompt\"]\n        test = problems[task_id][\"test\"]\n        code = sample[\"generation\"]\n        \n        # Pre-process for different languages\n        if language_type == \"python\":\n            test_setup = \"\\n\".join(IMPORT_HELPER[\"python\"]) + \"\\n\"\n            test_string = test_setup + prompt + code + \"\\n\" + test + \"\\n\"\n        elif language_type == \"cpp\":\n            test_set_up = \"\"\n            for s in IMPORT_HELPER[\"cpp\"]:\n                if s not in prompt:\n                    test_set_up += s + \"\\n\"\n            test_string = test_set_up + \"\\n\" + prompt + code + \"\\n\" + test\n        elif language_type == \"java\":\n            test_string = prompt + code + \"\\n\" + test\n        elif language_type == \"js\" or language_type == \"javascript\":\n            test_string = prompt + code + \"\\n\" + test\n        elif language_type == \"go\":\n            import_string = problems[task_id][\"import\"]\n            prompt = prompt.replace(import_string, \"\")\n            test = problems[task_id][\"test\"]\n            test_setup = problems[task_id][\"test_setup\"]\n            other_pkgs = []\n            for pkg in IMPORT_HELPER[\"go\"]:\n                if pkg not in test_setup:\n                    p = pkg.split(\"/\")[-1]\n                    if p + \".\" in code:\n                        other_pkgs.append(f\"\\\"{pkg}\\\"\")\n            if other_pkgs:\n                import_other_pkgs = \"import (\\n\" + \"    \".join([p + \"\\n\" for p in other_pkgs]) + \")\"\n                test_string = test_setup + \"\\n\" + import_other_pkgs + \"\\n\" + prompt + code + \"\\n\" + test\n            else:\n                test_string = test_setup + \"\\n\" + prompt + code + \"\\n\" + test\n        elif language_type == \"rust\":\n            main = \"\\nfn main(){ \\n } \\n\"\n            test_string = main + prompt + code + test\n    elif dataset_type == \"mbpp\":\n        task_id = sample[\"task_id\"]\n        prompt = sample[\"prompt\"]\n        test = \"\\n\".join(problems[task_id][\"test_list\"]) + \"\\n\" + \"\\n\".join(problems[task_id][\"challenge_test_list\"])\n        code = sample[\"generation\"]\n        test_setup = \"\\n\".join(IMPORT_HELPER[\"python\"]) + \"\\n\"\n        test_string = test_setup + \"\\n\" + prompt + code + \"\\n\" + problems[task_id][\"test_setup_code\"] + \"\\n\" + test + \"\\n\"\n\n    return test_string\n\n\ndef evaluate_functional_correctness(\n    input_path: str = None,\n    output_path: str = None,\n    log_path: str = None,\n    tmp_dir: str = \"./\",\n    n_workers: int = 32,\n    timeout: float = 5.0,\n    k: List[int] = [1, 10, 100],\n    model_name: str = None,\n    problem_file: str = None,\n    language_type: str = None,\n    dataset_type: str = \"humanevalx\",\n    generation_mode: str = \"completion\",\n    test_groundtruth: bool = False,\n):\n    if log_path is None:\n        log_path = os.path.join(output_path, \"evaluation.log\")\n    logger = Logger(__name__, log_file=log_path)\n    \n    if os.path.isdir(input_path):\n        input_list = glob.glob(input_path + '/*generation*.jsonl')\n        sample_jsonl = []\n        for input_file in input_list:\n            sample_jsonl += stream_jsonl_all(input_file)\n    else:\n        input_file = input_path\n        sample_jsonl = stream_jsonl_all(input_file)\n    \n    problems = read_dataset(problem_file, dataset_type=dataset_type)\n\n    if output_path is not None:\n        os.makedirs(output_path, exist_ok=True)\n    \n    with ThreadPoolExecutor(max_workers=n_workers) as executor:\n\n        futures = []\n        completion_id = Counter()\n        n_samples = 0\n        results = defaultdict(list)\n\n        if test_groundtruth:\n            logger.info(\"Testing ground truth...\")\n        else:\n            logger.info(\"Testing generation...\")\n        for sample in sample_jsonl:\n            task_id = sample[\"task_id\"]\n            if language_type is None:\n                language_type = LANGUAGE_NAME[task_id.split(\"/\")[0]]\n            if test_groundtruth:\n                if dataset_type == \"humanevalx\":\n                    sample[\"generation\"] = sample[\"canonical_solution\"]\n                    sample[\"prompt\"] = problems[task_id][\"prompt\"]\n                if dataset_type == \"mbpp\":\n                    sample[\"generation\"] = sample[\"code\"]\n                    sample[\"prompt\"] = problems[task_id][\"prompt\"]\n            sample = postprocess_generation(sample, generation_mode)\n            sample[\"test_code\"] = process_test(sample, problems, dataset_type, language_type, generation_mode)\n            if sample[\"test_code\"] is None:\n                continue\n            if \"completion_id\" in sample:\n                completion_id_ = sample[\"completion_id\"]\n            else:\n                completion_id_ = completion_id[task_id]\n            args = (task_id, sample, language_type, timeout, tmp_dir, completion_id_)\n            future = executor.submit(check_correctness, *args)\n            futures.append(future)\n            completion_id[task_id] += 1\n            n_samples += 1\n\n        if len(completion_id) == len(problems):\n            evaluate_pass_at_k = True\n        else:\n            evaluate_pass_at_k = False\n\n        logger.info(\"Running test suites...\")\n        for future in tqdm(as_completed(futures), total=len(futures)):\n            result = future.result()\n            results[result[\"task_id\"]].append((result[\"completion_id\"], result))\n\n    # Calculate pass@k.\n    total, correct = [], []\n    for result in results.values():\n        passed = [r[1][\"passed\"] for r in result]\n        total.append(len(passed))\n        correct.append(sum(passed))\n    total = np.array(total)\n    correct = np.array(correct)\n    if evaluate_pass_at_k:\n        ks = k\n        pass_at_k = {f\"pass@{k}\": estimate_pass_at_k(total, correct, k).mean()\n                     for k in ks if (total >= k).all()}\n        logger.info(pass_at_k)\n    else:\n        logger.info(\"Total: {}\".format(np.sum(total)))\n        logger.info(\"Correct: {}\".format(np.sum(correct)))\n        \n    if test_groundtruth:\n        out_file = os.path.join(output_path, \"ground_truth.jsonl\")\n    else:    \n        out_file = os.path.join(output_path, \"result-\" + input_file.split(\"/\")[-2] + \".\" + input_file.split(\"/\")[-1].split(\".\")[-1])\n    \n    logger.info(\"Writing to: {}\".format(out_file))\n    if out_file.endswith(\".gz\"):\n        fp = gzip.GzipFile(fileobj=open(out_file, \"wb\"), mode=\"wb\")\n        for res in results.values():\n            for r in res:\n                fp.write((json.dumps(r[1], ensure_ascii=False) + \"\\n\").encode(\"utf-8\"))\n    else:\n        fp = open(out_file, 'w')\n        for res in results.values():\n            for r in res:\n                fp.write(json.dumps(r[1], ensure_ascii=False) + \"\\n\")\n    fp.close()\n\n    if test_groundtruth:\n        logger.info(\"Ground-truth test finished.\")\n    else:\n        logger.info(\"Evaluation finished.\")\n\n\ndef main():\n    fire.Fire(evaluate_functional_correctness)\n\n\nif __name__ == \"__main__\":\n    sys.exit(main())\n"
  },
  {
    "path": "evaluation/execution.py",
    "content": "import io\nimport os\nimport signal\nimport random\nimport gzip\nimport json\nimport tempfile\nimport platform\nimport subprocess\nimport contextlib\nimport faulthandler\nimport multiprocessing\nfrom typing import *\n\n\ndef dicts_to_jsonl(data_list: list, filename: str, compress: bool = True) -> None:\n    \"\"\"\n    Method saves list of dicts into jsonl file.\n    :param data: (list) list of dicts to be stored,\n    :param filename: (str) path to the output file. If suffix .jsonl is not given then methods appends\n        .jsonl suffix into the file.\n    :param compress: (bool) should file be compressed into a gzip archive?\n    \"\"\"\n    sjsonl = '.jsonl'\n    sgz = '.gz'\n    # Check filename\n    if not filename.endswith(sjsonl):\n        filename = filename + sjsonl\n    # Save data\n    \n    if compress:\n        filename = filename + sgz\n        with gzip.open(filename, 'w') as compressed:\n            for ddict in data_list:\n                jout = json.dumps(ddict) + '\\n'\n                jout = jout.encode('utf-8')\n                compressed.write(jout)\n    else:\n        with open(filename, 'w') as out:\n            for ddict in data_list:\n                jout = json.dumps(ddict) + '\\n'\n                out.write(jout)\n\n\ndef check_correctness(\n        task_id: str,\n        sample: dict,\n        language_type: str,\n        timeout: float = 3.0,\n        tmp_dir: str = None,\n        completion_id: Optional[int] = None,\n) -> Dict:\n    \"\"\"\n    Evaluates the functional correctness of a completion by running the test\n    suite provided in the problem.\n    \"\"\"\n\n    def unsafe_execute(tmp_dir):\n        random_id = random.uniform(1, 1000)\n        if \"python\" in language_type.lower():\n            with create_tempdir():\n\n                # These system calls are needed when cleaning up tempdir.\n                import os\n                import shutil\n                rmtree = shutil.rmtree\n                rmdir = os.rmdir\n                chdir = os.chdir\n\n                # Disable functionalities that can make destructive changes to the test.\n                reliability_guard()\n\n                try:\n                    exec_globals = {}\n                    with swallow_io():\n                        with time_limit(timeout):\n                            # WARNING\n                            # This program exists to execute untrusted model-generated code. Although\n                            # it is highly unlikely that model-generated code will do something overtly\n                            # malicious in response to this test suite, model-generated code may act\n                            # destructively due to a lack of model capability or alignment.\n                            # Users are strongly encouraged to sandbox this evaluation suite so that it\n                            # does not perform destructive actions on their host or network.\n                            # Once you have read this disclaimer and taken appropriate precautions,\n                            # uncomment the following line and proceed at your own risk:\n                            exec(sample[\"test_code\"], exec_globals)\n                        result.append(\"passed\")\n                except TimeoutException:\n                    result.append(\"timed out\")\n                except AssertionError as e:\n                    result.append(f\"failed: AssertionError\")\n                except BaseException as e:\n                    result.append(f\"failed: {e}\")\n\n                # Needed for cleaning up.\n                shutil.rmtree = rmtree\n                os.rmdir = rmdir\n                os.chdir = chdir\n\n        elif \"go\" in language_type.lower():\n            assert tmp_dir is not None, \"Go should be evaluated in a dir where necessary module files installed.\"\n\n            import os\n            import shutil\n\n            if \"tmp\" not in tmp_dir:\n                tmp_dir = os.path.join(tmp_dir, \"tmp\")\n            tmp_dir = os.path.join(tmp_dir, f\"{task_id.replace('/', '-')}-{random_id}\")\n            if not os.path.exists(tmp_dir):\n                os.makedirs(tmp_dir)\n\n            os.chdir(tmp_dir)\n            open(f\"main_test.go\", 'w').write(sample[\"test_code\"])\n            try:\n                exec_result = None\n                with time_limit(timeout):\n                    # WARNING\n                    # This program exists to execute untrusted model-generated code. Although\n                    # it is highly unlikely that model-generated code will do something overtly\n                    # malicious in response to this test suite, model-generated code may act\n                    # destructively due to a lack of model capability or alignment.\n                    # Users are strongly encouraged to sandbox this evaluation suite so that it\n                    # does not perform destructive actions on their host or network.\n                    # Once you have read this disclaimer and taken appropriate precautions,\n                    # uncomment the following line and proceed at your own risk:\n                    exec_result = subprocess.run([\"go\", \"test\", f\"-timeout={timeout}s\", \"main_test.go\"], timeout=timeout, capture_output=True)\n\n                if exec_result.returncode == 0:\n                    result.append(\"passed\")\n                else:\n                    if exec_result.stderr:\n                        try:\n                            err = exec_result.stderr.decode()\n                        except:\n                            err = exec_result.stderr\n                    else:\n                        try:\n                            err = exec_result.stdout.decode()\n                        except:\n                            err = exec_result.stdout\n                    result.append(f\"failed: {err}\")\n\n            except TimeoutException:\n                result.append(\"timed out\")\n\n            shutil.rmtree(tmp_dir)\n        elif \"js\" in language_type.lower():\n            import os\n            import shutil\n\n            if \"tmp\" not in tmp_dir:\n                tmp_dir = os.path.join(tmp_dir, \"tmp\")\n            tmp_dir = os.path.join(tmp_dir, f\"{task_id.replace('/', '-')}-{random_id}\")\n            if not os.path.exists(tmp_dir):\n                os.makedirs(tmp_dir)\n\n            os.chdir(tmp_dir)\n            open(f\"test.js\", 'w').write(sample[\"test_code\"])\n            try:\n                exec_result = None\n                with time_limit(timeout):\n                    # WARNING\n                    # This program exists to execute untrusted model-generated code. Although\n                    # it is highly unlikely that model-generated code will do something overtly\n                    # malicious in response to this test suite, model-generated code may act\n                    # destructively due to a lack of model capability or alignment.\n                    # Users are strongly encouraged to sandbox this evaluation suite so that it\n                    # does not perform destructive actions on their host or network.\n                    # Once you have read this disclaimer and taken appropriate precautions,\n                    # uncomment the following line and proceed at your own risk:\n                    exec_result = subprocess.run([\"node\", \"test.js\"], timeout=timeout, capture_output=True)\n\n                if exec_result.stderr.decode():\n                    err = exec_result.stderr.decode()\n                    result.append(f\"failed: {err}\")\n                elif exec_result.stdout.decode():\n                    err = exec_result.stdout.decode()\n                    result.append(f\"failed: {err}\")\n                else:\n                    result.append(\"passed\")\n\n            except TimeoutException:\n                result.append(\"timed out\")\n\n            shutil.rmtree(tmp_dir)\n        elif \"cpp\" in language_type.lower():\n            import os\n            import shutil\n\n            if \"tmp\" not in tmp_dir:\n                tmp_dir = os.path.join(tmp_dir, \"tmp\")\n            tmp_dir = os.path.join(tmp_dir, f\"{task_id.replace('/', '-')}-{random_id}\")\n            if not os.path.exists(tmp_dir):\n                os.makedirs(tmp_dir)\n\n            os.chdir(tmp_dir)\n            open(f\"test.cpp\", 'w').write(sample[\"test_code\"])\n            if \"162\" in task_id:\n                compilation_result = subprocess.run([\"/usr/bin/g++\", \"-std=c++11\", \"test.cpp\", \"-lcrypto\", \"-lssl\"],\n                                                    timeout=timeout,\n                                                    capture_output=True)\n            else:\n                compilation_result = subprocess.run([\"/usr/bin/g++\", \"-std=c++11\", \"test.cpp\"], timeout=timeout,\n                                                    capture_output=True)\n            if compilation_result.returncode != 0:\n                if compilation_result.stderr:\n                    err = compilation_result.stderr.decode()\n                else:\n                    err = compilation_result.stdout.decode()\n                result.append(f\"failed: compilation error: {err}\")\n            else:\n                try:\n                    exec_result = None\n                    with time_limit(timeout):\n                        # WARNING\n                        # This program exists to execute untrusted model-generated code. Although\n                        # it is highly unlikely that model-generated code will do something overtly\n                        # malicious in response to this test suite, model-generated code may act\n                        # destructively due to a lack of model capability or alignment.\n                        # Users are strongly encouraged to sandbox this evaluation suite so that it\n                        # does not perform destructive actions on their host or network.\n                        # Once you have read this disclaimer and taken appropriate precautions,\n                        # uncomment the following line and proceed at your own risk:\n                        exec_result = subprocess.run([\"./a.out\"], timeout=timeout, capture_output=True)\n\n                    if exec_result.returncode == 0:\n                        result.append(\"passed\")\n                    else:\n                        if exec_result.stderr:\n                            try:\n                                err = exec_result.stderr.decode()\n                            except:\n                                err = exec_result.stderr\n                        else:\n                            try:\n                                err = exec_result.stdout.decode()\n                            except:\n                                err = exec_result.stdout\n                        result.append(f\"failed: {err}\")\n                except TimeoutException:\n                    result.append(\"timed out\")\n\n            shutil.rmtree(tmp_dir)\n        elif \"rust\" in language_type.lower():  \n            import os\n            WD: str = os.path.dirname(tmp_dir)\n            RUST_DIR: str = os.path.join(WD, \"rust\")\n            RUST_SRC: str = os.path.join(RUST_DIR, \"src\")\n            RUST_BIN: str = os.path.join(RUST_SRC, \"bin\")\n            RUST_TMP_DIR: str = os.path.join(RUST_DIR, \"tmp\")\n            RUST_LOGS: str = os.path.join(RUST_TMP_DIR, \"logs\")\n            RUST_EXT: str = \".rs\"\n\n            # Create mandatory tmp directories\n            os.makedirs(RUST_TMP_DIR, exist_ok=True)\n            os.makedirs(RUST_LOGS, exist_ok=True)\n            os.makedirs(RUST_SRC, exist_ok=True)\n            os.makedirs(RUST_BIN, exist_ok=True)\n\n            with tempfile.NamedTemporaryFile(dir = RUST_BIN, delete=False) as f:\n                # temporal file name\n                file_prefix = sample[\"task_id\"].lower().replace(\"/\", \"_\")\n                file_name:str =  file_prefix +RUST_EXT\n                \n                os.rename(f.name, os.path.join(RUST_BIN, file_name))\n                \n                # Sample to pure Rust function\n                rust_code: str = sample[\"test_code\"]\n\n                # dump the rust source code in the target temporal file\n                f.write(rust_code.encode('utf-8'))\n\n            # Proceed towards Rust binaries compilation. Therefore move to Rust module root dir.\n            os.chdir(RUST_DIR)\n\n            # Two possible outcomes\n            # Pass OR Fail compilation\n            log_filename: str = file_prefix + \".jsonl\"\n            log_path: str = os.path.join(RUST_LOGS, log_filename)\n            cargo_check: str = \"cargo check --bin \" + file_prefix + \" --message-format json >> \" + log_path\n            # Compilation build status\n            returned_val_compilation: int\n            \n            # Overwrite file content\n            if os.path.exists(log_path):\n                if(file_size := os.path.getsize(log_path)) >= 0: \n                    os.remove(log_path)\n                    returned_val_compilation = os.system(cargo_check)\n\n            else: \n                returned_val_compilation = os.system(cargo_check)\n\n            # 0 means success   \n            if returned_val_compilation == 0:\n\n                #Execution pipeline\n                cargo_test: str = \"cargo test --bin \" +file_prefix+ \" --message-format json >> \" + log_path\n                returned_val_execution = os.system(cargo_test)\n                \n                if returned_val_execution == 0:\n                    result.append(\"passed\")\n                else:\n                   result.append(f\"failed: execution error\") \n\n            else:\n                result.append(f\"failed: compilation error\")\n\n\n        elif \"java\" in language_type.lower():\n            assert tmp_dir is not None, \"Java should be evaluated in a temporary dir.\"\n\n            import os\n            import shutil\n\n            if \"tmp\" not in tmp_dir:\n                tmp_dir = os.path.join(tmp_dir, \"tmp\")\n            tmp_dir = os.path.join(tmp_dir, f\"{task_id.replace('/', '-')}-{random_id}\")\n            if not os.path.exists(tmp_dir):\n                os.makedirs(tmp_dir)\n\n            os.chdir(tmp_dir)\n            open(os.path.join(tmp_dir, \"Main.java\"), 'w').write(sample[\"test_code\"])\n            res = \"failed: unknown error\"\n            compile_returncode = -1\n            for _ in range(5):\n                try:\n                    compilation_result = subprocess.run(['javac', os.path.join(tmp_dir, \"Main.java\")], timeout=5,\n                                                        capture_output=True)\n                    compile_returncode = compilation_result.returncode\n                    break\n                except subprocess.TimeoutExpired as e:\n                    continue\n            if compile_returncode != 0:\n                res = \"failed: compilation error\"\n            else:\n                exec_result = None\n                try:\n                    # WARNING\n                    # This program exists to execute untrusted model-generated code. Although\n                    # it is highly unlikely that model-generated code will do something overtly\n                    # malicious in response to this test suite, model-generated code may act\n                    # destructively due to a lack of model capability or alignment.\n                    # Users are strongly encouraged to sandbox this evaluation suite so that it\n                    # does not perform destructive actions on their host or network.\n                    # Once you have read this disclaimer and taken appropriate precautions,\n                    # uncomment the following line and proceed at your own risk:\n                    exec_result = subprocess.run([f'java', '-cp', tmp_dir, 'Main'], timeout=timeout, capture_output=True)\n                    if exec_result.returncode == 0:\n                        res = \"passed\"\n                    elif exec_result.returncode == 1:\n                        if \"AssertionError\" in exec_result.stderr.decode('unicode-escape'):\n                            res = \"failed: wrong answer\"\n                        else:\n                            res = f\"failed: {exec_result.stderr.decode()}\"\n                except subprocess.TimeoutExpired as e:\n                    res = \"time out\"\n                except BaseException as e:\n                    res = f\"failed: {e}\"\n            result.append(res)\n\n            shutil.rmtree(tmp_dir)\n        \n    manager = multiprocessing.Manager()\n    result = manager.list()\n\n    p = multiprocessing.Process(target=unsafe_execute, args=(tmp_dir,))\n    p.start()\n    p.join(timeout=timeout + 1)\n    if p.is_alive():\n        p.kill()\n\n    if not result:\n        result.append(\"timed out\")\n\n    return {\n        \"task_id\"      : task_id,\n        \"completion_id\": completion_id,\n        \"test_code\"    : sample[\"test_code\"],\n        \"prompt\"       : sample[\"prompt\"],\n        \"generation\"   : sample[\"generation\"],\n        \"result\"       : result[0],\n        \"passed\"       : result[0] == \"passed\",\n        \"finish\"       : -1 if \"finish\" not in sample else sample[\"finish\"],\n        \"file\"         : \"\" if \"file\" not in sample else sample[\"file\"],\n        \"output\"       : [] if \"output\" not in sample else sample[\"output\"],\n    }\n\n# Copyright (c) OpenAI (https://openai.com)\n\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n# ============================================================================\n@contextlib.contextmanager\ndef time_limit(seconds: float):\n    def signal_handler(signum, frame):\n        raise TimeoutException(\"Timed out!\")\n\n    signal.setitimer(signal.ITIMER_REAL, seconds)\n    signal.signal(signal.SIGALRM, signal_handler)\n    try:\n        yield\n    finally:\n        signal.setitimer(signal.ITIMER_REAL, 0)\n\n\n@contextlib.contextmanager\ndef swallow_io():\n    stream = WriteOnlyStringIO()\n    with contextlib.redirect_stdout(stream):\n        with contextlib.redirect_stderr(stream):\n            with redirect_stdin(stream):\n                yield\n\n\n@contextlib.contextmanager\ndef create_tempdir():\n    with tempfile.TemporaryDirectory() as dirname:\n        with chdir(dirname):\n            yield dirname\n\n\nclass TimeoutException(Exception):\n    pass\n\n\nclass WriteOnlyStringIO(io.StringIO):\n    \"\"\" StringIO that throws an exception when it's read from \"\"\"\n\n    def read(self, *args, **kwargs):\n        raise IOError\n\n    def readline(self, *args, **kwargs):\n        raise IOError\n\n    def readlines(self, *args, **kwargs):\n        raise IOError\n\n    def readable(self, *args, **kwargs):\n        \"\"\" Returns True if the IO object can be read. \"\"\"\n        return False\n\n\nclass redirect_stdin(contextlib._RedirectStream):  # type: ignore\n    _stream = 'stdin'\n\n\n@contextlib.contextmanager\ndef chdir(root):\n    if root == \".\":\n        yield\n        return\n    cwd = os.getcwd()\n    os.chdir(root)\n    try:\n        yield\n    except BaseException as exc:\n        raise exc\n    finally:\n        os.chdir(cwd)\n\n\ndef reliability_guard(maximum_memory_bytes: Optional[int] = None):\n    \"\"\"\n    This disables various destructive functions and prevents the generated code\n    from interfering with the test (e.g. fork bomb, killing other processes,\n    removing filesystem files, etc.)\n\n    WARNING\n    This function is NOT a security sandbox. Untrusted code, including, model-\n    generated code, should not be blindly executed outside of one. See the \n    Codex paper for more information about OpenAI's code sandbox, and proceed\n    with caution.\n    \"\"\"\n\n    if maximum_memory_bytes is not None:\n        import resource\n        resource.setrlimit(resource.RLIMIT_AS, (maximum_memory_bytes, maximum_memory_bytes))\n        resource.setrlimit(resource.RLIMIT_DATA, (maximum_memory_bytes, maximum_memory_bytes))\n        if not platform.uname().system == 'Darwin':\n            resource.setrlimit(resource.RLIMIT_STACK, (maximum_memory_bytes, maximum_memory_bytes))\n\n    faulthandler.disable()\n\n    import builtins\n    builtins.exit = None\n    builtins.quit = None\n\n    import os\n    os.environ['OMP_NUM_THREADS'] = '1'\n\n    os.kill = None\n    os.system = None\n    os.putenv = None\n    os.remove = None\n    os.removedirs = None\n    os.rmdir = None\n    os.fchdir = None\n    os.setuid = None\n    os.fork = None\n    os.forkpty = None\n    os.killpg = None\n    os.rename = None\n    os.renames = None\n    os.truncate = None\n    os.replace = None\n    os.unlink = None\n    os.fchmod = None\n    os.fchown = None\n    os.chmod = None\n    os.chown = None\n    os.chroot = None\n    os.fchdir = None\n    os.lchflags = None\n    os.lchmod = None\n    os.lchown = None\n    os.getcwd = None\n    os.chdir = None\n\n    import shutil\n    shutil.rmtree = None\n    shutil.move = None\n    shutil.chown = None\n\n    import subprocess\n    subprocess.Popen = None  # type: ignore\n\n    __builtins__['help'] = None\n\n    import sys\n    sys.modules['ipdb'] = None\n    sys.modules['joblib'] = None\n    sys.modules['resource'] = None\n    sys.modules['psutil'] = None\n    sys.modules['tkinter'] = None\n"
  },
  {
    "path": "evaluation/generation.py",
    "content": "import os\nimport zmq\nimport time\nimport json\nimport torch\nimport random\nimport socket\nimport argparse\n\nfrom typing import *\nfrom transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer, StoppingCriteria\nfrom utils import Logger, read_dataset, process_extra_prompt, is_code_generation_finished, cleanup_code\n\nlogger = Logger(__name__)\n\n\ndef add_code_generation_specific_args(parser):\n    group = parser.add_argument_group(\"Code Generation\")\n    group.add_argument(\n        \"--hostfile\",\n        type=str,\n        default=\"./hostfile\",\n    )\n    group.add_argument(\n        \"--channel-ip\",\n        type=str,\n        default=None,\n        help=\"IP for ZeroMQ channel\",\n    )\n    group.add_argument(\n        \"--channel-port\",\n        type=int,\n        default=5555,\n        help=\"Port for ZeroMQ channel\",\n    )\n    group.add_argument(\n        \"--master-port\",\n        type=int,\n        default=6007,\n        help=\"Port for distributed channel\",\n    )\n    group.add_argument(\n        \"--model-per-device\",\n        type=int,\n        default=1,\n        help=\"Number of models per device\",\n    )\n    group.add_argument(\n        \"--max-length\",\n        type=int,\n        default=8192,\n        help=\"Max sequence length\",\n    )\n    group.add_argument(\n        \"--top-p\",\n        type=float,\n        default=1.0,\n        help=\"Top-p Probability for sampling\",\n    )\n    group.add_argument(\n        \"--top-k\",\n        type=int,\n        default=0,\n        help=\"Top-k for sampling\",\n    )\n    group.add_argument(\n        \"--temperature\",\n        type=float,\n        default=1.0,\n        help=\"Temperature for sampling\",\n    )\n    group.add_argument(\n        \"--greedy\",\n        type=int,\n        default=0,\n        help=\"Use greedy decoding instead of sampling\",\n    )\n    group.add_argument(\n        \"--seed\",\n        type=int,\n        default=42,\n        help=\"Random seed\",\n    )\n    group.add_argument(\n        \"--micro-batch-size\",\n        type=int,\n        default=1,\n        help=\"Micro batch size for each GPU\",\n    )\n    group.add_argument(\n        \"--samples-per-problem\",\n        type=int,\n        default=200,\n        help=\"Number of samples to generate for each problem\",\n    )\n    group.add_argument(\n        \"--gen-node-world-size\",\n        type=int,\n        default=1,\n        help=\"Number of machines to use for generation\",\n    )\n    group.add_argument(\n        '--task-name',\n        default=\"generation\",\n        help='Name of task',\n    )\n    group.add_argument(\n        '--model-name',\n        default=\"codegeex2-6b\",\n        help='Name of model, support [\"codegeex2-6b\", \"starcoder\", \"replit-code-v1-3b\", \"codegen25-7b-multi\", \"codegen25-7b-mono\", \"codegen-16B-multi\"]',\n    )\n    group.add_argument(\n        '--data-path',\n        required=True,\n    )\n    group.add_argument(\n        '--output-path',\n        required=True,\n    )\n    group.add_argument(\n        '--log-path',\n        default=None,\n        help='Path to log output',\n    )\n    group.add_argument(\n        '--model-path',\n        required=True,\n    )\n    group.add_argument(\n        '--dataset-type',\n        default=\"humanevalx\",\n        help='Identify the evaluation dataset [humanevalx]',\n    )\n    group.add_argument(\n        '--language-type',\n        default=\"python\",\n        help='Identify the type of programming language to generate',\n    )\n    group.add_argument(\n        '--generation-mode',\n        default=\"instruction\",\n    )\n\n\nclass CodeStoppingCriteria(StoppingCriteria):\n    \"\"\"\n    This class can be used to stop generation whenever the full generated number of tokens exceeds `max_length` or meet the code generation stopping criteria.\n    \"\"\"\n\n    def __init__(\n        self, \n        max_length: int, \n        micro_batch_size: int, \n        tokenizer,\n        dataset_type: str, \n        language_type: str, \n        prompt: str,\n    ):\n        self.max_length = max_length\n        self.tokenizer = tokenizer\n        self.dataset_type = dataset_type\n        self.language_type = language_type\n        self.prompt = prompt\n        self.stop_index = [-1 for _ in range(micro_batch_size)]\n\n    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:\n        for i, input_id in enumerate(input_ids):\n            if self.stop_index[i] > -1:\n                continue\n            code = self.tokenizer.decode(input_id)\n            code = code[len(self.prompt):]\n            if is_code_generation_finished(\n                code,\n                dataset_type=self.dataset_type,\n                language_type=self.language_type) or input_id.shape[-1] >= self.max_length:\n                self.stop_index[i] = len(code) + len(self.prompt)\n        if all([s != -1 for s in self.stop_index]):\n            return True\n        \n        return False\n\n\ndef run_generation_distributed(args, model, tokenizer):\n    logger.info(f\"Connecting to tcp://{args.channel_ip}:{args.channel_port}\")\n    context = zmq.Context()\n    socket = context.socket(zmq.REQ)\n    socket.connect(f\"tcp://{args.channel_ip}:{args.channel_port}\")\n    \n    os.makedirs(args.output_path, exist_ok=True)\n    output_path = os.path.join(\n        args.output_path,\n        f\"{args.task_name}-t{args.temperature}-topp{args.top_p}-ns{args.samples_per_problem}-rank{args.rank}.jsonl\",\n    )\n    \n    def process(obj):\n        results = []\n        prompt = obj[\"prompt\"]\n        if args.generation_mode == \"instruction\":\n            inputs = tokenizer([prompt] * args.micro_batch_size, return_tensors=\"pt\")\n            inputs = inputs.to(model.device)\n            outputs = model.generate(**inputs,\n                                    max_length=args.max_length,\n                                    do_sample=True if not args.greedy else False,\n                                    use_cache=True,\n                                    top_p=args.top_p,\n                                    top_k=args.top_k,\n                                    temperature=args.temperature,\n                                    pad_token_id=tokenizer.eos_token_id)\n            for i, output in enumerate(outputs):\n                response = tokenizer.decode(output)\n                res = obj.copy()\n                res[\"generation\"] = response[len(prompt):].strip()\n                results.append(res)\n        elif args.generation_mode == \"completion\":\n            inputs = tokenizer([prompt for _ in range(args.micro_batch_size)], return_tensors=\"pt\")\n            inputs = inputs.to(model.device)\n            stop_criteria = CodeStoppingCriteria(\n                max_length=args.max_length,\n                micro_batch_size=args.micro_batch_size,\n                tokenizer=tokenizer,\n                dataset_type=args.dataset_type,\n                language_type=args.language_type,\n                prompt=prompt)\n            outputs = model.generate(**inputs,\n                                    max_length=args.max_length,\n                                    do_sample=True if not args.greedy else False,\n                                    use_cache=True,\n                                    stopping_criteria=[stop_criteria],\n                                    top_p=args.top_p,\n                                    top_k=args.top_k,\n                                    temperature=args.temperature,\n                                    pad_token_id=tokenizer.eos_token_id)\n            for i, output in enumerate(outputs):\n                response = tokenizer.decode(output)\n                res = obj.copy()\n                res[\"generation_raw\"] = response\n                res[\"generation\"] = cleanup_code(\n                    response[len(prompt):], \n                    dataset_type=args.dataset_type,\n                    language_type=args.language_type)\n                results.append(res)\n        \n        return results\n    \n    fout = open(output_path, \"w\", encoding=\"utf-8\")\n    while True:\n        socket.send_json({\"rank\": args.rank, \"action\": \"pull\"})\n        resp = socket.recv_json()\n        try:\n            if resp[\"task_id\"] is None:\n                break\n\n            current_spec = resp[\"task_id\"]\n            results = process(current_spec)\n            \n            for res in results:\n                fout.write(json.dumps(res, ensure_ascii=False) + \"\\n\")\n                fout.flush()\n\n            socket.send_json(\n                {\n                    \"rank\"   : args.rank,\n                    \"action\" : \"success\",\n                    \"task_id\": current_spec['task_id']\n                }\n            )\n            socket.recv()\n\n        except Exception as e:\n            logger.error(f\"*** (rank={args.rank}) crashed.\")\n            logger.error(f\"    error: {repr(e)}\")\n            socket.send_json(\n                {\n                    \"rank\"   : args.rank,\n                    \"action\" : \"fail\",\n                    \"task_id\": current_spec['task_id']\n                }\n            )\n            socket.recv()\n            continue\n\n\ndef main(args, node_rank: int, local_rank: int, master_port: int, num_devices: int):\n    world_size = args.gen_node_world_size * num_devices\n    args.rank = num_devices * node_rank + local_rank\n    args.world_size = world_size\n    logger.info(f\"Generating on rank {args.rank} of {args.world_size}\")\n    \n    try:\n        if args.model_name in [\"codegeex2-6b\"]:\n            tokenizer = AutoTokenizer.from_pretrained(args.model_path, trust_remote_code=True)\n        else:\n            tokenizer = AutoTokenizer.from_pretrained(args.model_path, clean_up_tokenization_spaces=False, trust_remote_code=True)\n        if args.model_name in [\"codegeex2-6b\"]:\n            model = AutoModel.from_pretrained(args.model_path, trust_remote_code=True).to(\"cuda:{}\".format(local_rank % torch.cuda.device_count()))\n        elif args.model_name in [\"starcoder\", \"replit-code-v1-3b\", \"codegen25-7b-multi\", \"codegen25-7b-mono\", \"codegen-16B-multi\"]:\n            model = AutoModelForCausalLM.from_pretrained(args.model_path, trust_remote_code=True).to(\"cuda:{}\".format(local_rank % torch.cuda.device_count()))\n        else:\n            try:\n                model = AutoModel.from_pretrained(args.model_path, trust_remote_code=True).to(\"cuda:{}\".format(local_rank % torch.cuda.device_count()))\n            except:\n                logger.error(f\"Model {args.model_name} not supported.\")\n                raise NotImplementedError\n    except Exception as e:\n        logger.error(e)\n    \n    model = model.eval()\n    # Generate samples.\n    run_generation_distributed(args, model, tokenizer)\n\n    logger.info(f\"rank={args.rank} worker finished, waiting ...\")\n    exit(0)\n\n\ndef server(args):\n    logger.info(f\"[ server ] starting ...\")\n    entries = read_dataset(args.data_path, dataset_type=args.dataset_type)\n\n    assert args.samples_per_problem % args.micro_batch_size == 0, \"samples_per_problem should be divisible by batch_size\"\n\n    for entry in entries.values():\n        entry[\"prompt\"] = process_extra_prompt(\n            entry[\"prompt\"], \n            language_type=args.language_type, \n            dataset_type=args.dataset_type, \n            generation_mode=args.generation_mode,\n        )\n\n    res = []\n    for entry in entries.values():\n        res.extend([entry] * (args.samples_per_problem // args.micro_batch_size))\n    random.shuffle(res)\n    all_entries = res\n\n    # setup zeromq channel\n    logger.info(f\"[ server ] starting up on port {args.channel_port}\")\n    context = zmq.Context()\n    logger.info(f\"[ server ] creating socket\")\n    socket = context.socket(zmq.REP)\n    logger.info(f\"[ server ] binding to port {args.channel_port}\")\n    socket.bind(f\"tcp://*:{args.channel_port}\")\n\n    logger.info(\n        f\"[ server ] loaded {len(entries)} entries, generating {len(entries) * args.samples_per_problem} samples\",\n    )\n\n    remaining_entries = all_entries.copy()\n    running_workers = args.gen_node_world_size * torch.cuda.device_count()\n    num_finished = 0\n\n    logger.info(f\"[ server ] listening for requests ...\")\n    start_time = time.perf_counter()\n    while True:\n        # Wait for next request from client\n        msg = socket.recv_json()\n        rank = msg[\"rank\"]\n        action = msg[\"action\"]\n\n        if action == \"pull\":\n            if len(remaining_entries) == 0:\n                socket.send_json({\"task_id\": None})\n                running_workers -= 1\n                logger.info(f\"[ server ] Shutting down worker {rank}, remaining {running_workers} workers\")\n                if running_workers == 0 and num_finished == len(all_entries):\n                    logger.info(f\"[ server ] All workers finished\")\n                    break\n            else:\n                entry = remaining_entries.pop()\n                time_elapsed = time.perf_counter() - start_time\n                logger.info(f\"[ server ] Sending entry {entry['task_id']} to worker {rank}\")\n                remaining = (\n                        len(remaining_entries)\n                        / (len(all_entries) - len(remaining_entries))\n                        * time_elapsed\n                )\n                time_per_sampple = 0.0 if num_finished == 0 else time_elapsed / num_finished / args.micro_batch_size\n                logger.info(\n                    f\"[ server ] total {len(all_entries)}, assigned {len(all_entries) - len(remaining_entries)}, finished {num_finished}, elapsed {time_elapsed:.4f}, speed {time_per_sampple:.4f}s/sample, remaining {remaining:.4f}\",\n                )\n                socket.send_json({\"task_id\": entry})\n        else:\n            if action == \"success\":\n                logger.info(f\"[ server ] {msg['task_id']} is finished\")\n                socket.send_json({\"pong\": 1})\n            else:\n                logger.info(f\"[ server ] {msg['task_id']} is not finished\")\n                remaining_entries.append(msg['task_id'])\n                socket.send_json({\"pong\": 1})\n                break\n\n            num_finished += 1\n\n\nif __name__ == \"__main__\":\n    torch.multiprocessing.set_start_method(\"spawn\")\n    parser = argparse.ArgumentParser()\n    add_code_generation_specific_args(parser)\n    args = parser.parse_args()\n    \n    if args.log_path is None:\n        args.log_path = os.path.join(args.output_path, \"generation.log\")\n\n    logger.info(\"start method: \" + torch.multiprocessing.get_start_method())\n    \n    processes = []\n    num_devices = torch.cuda.device_count()\n    hosts = open(args.hostfile, \"r\").readlines()\n    hosts = [host.strip() for host in hosts]\n    master_port = args.master_port\n\n    node_rank = None\n    for i in range(len(hosts)):\n        if hosts[i] == socket.gethostbyname(socket.gethostname()):\n            node_rank = i\n            break\n    assert (\n            node_rank is not None\n    ), f\"Could not find hostname ({socket.gethostbyname(socket.gethostname())}) in hostlist\"\n\n    # launch server\n    if socket.gethostbyname(socket.gethostname()) == hosts[0]:\n        server_process = torch.multiprocessing.Process(target=server, args=(args,))\n        logger.info(f\"Launching server ...\")\n        server_process.start()\n        processes.append(server_process)\n\n    for i in range(num_devices):\n        local_rank = i\n        logger.info(f\"launching local rank {i}\")\n\n        p = torch.multiprocessing.Process(\n            target=main,\n            args=(args, node_rank, local_rank, master_port, num_devices),\n        )\n        p.start()\n        processes.append(p)\n\n    for p in processes:\n        p.join()\n"
  },
  {
    "path": "evaluation/inspect_jsonl.py",
    "content": "import fire\nimport json\nimport numpy as np\n\nfrom typing import *\nfrom utils import Logger\n\n\ndef main(\n    data_path: str = \"./test.jsonl\",\n    threshold: int = -1,\n    random: int = 0,\n    log_path: str = 'inspect_jsonl.txt',\n    random_rate: float = 0.5,\n):\n    logger = Logger(__name__, log_file=log_path, log_mode=\"file\", disable_formatter=True)\n    \n    n = 0\n    with open(data_path, \"r\") as f:\n        for i, line in enumerate(f):\n            if i == 0:\n                logger.info(\"Data has the following keys\")\n                obj = json.loads(line)\n                logger.info(obj.keys())\n            if threshold > 0 and n > threshold:\n                break\n            if random and np.random.randint(10) > 10 * random_rate:\n                continue\n            \n            obj = json.loads(line)\n            n += 1\n            logger.info(f\"========== Sample {i} ==========\")\n            if 'code' in obj:\n                try:\n                    code_splits = obj['code'].split(\"\\n\")\n                    logger.info(f\"Length of chars: {len(obj['code'])}, length of lines: {len(code_splits)}.\")\n                except:\n                    pass\n            for j, k in enumerate(obj.keys()):\n                logger.info(f\"** Key {j}: {k} **\")\n                logger.info(obj[k])\n    print(f\"Log saved in {log_path}\")\n\n\nif __name__ == \"__main__\":\n    fire.Fire(main)"
  },
  {
    "path": "evaluation/utils.py",
    "content": "import re\nimport json\nimport gzip\nimport torch\nimport numpy\nimport random\nimport logging\nimport itertools\nimport numpy as np\n\nfrom typing import *\n\n\nLANGUAGE_TAG = {\n    \"c\"            : \"// language: C\",\n    \"c++\"          : \"// language: C++\",\n    \"cpp\"          : \"// language: C++\",\n    \"c#\"           : \"// language: C#\",\n    \"csharp\"       : \"// language: C#\",\n    \"c-sharp\"      : \"// language: C#\",\n    \"css\"          : \"/* language: CSS */\",\n    \"cuda\"         : \"// language: Cuda\",\n    \"dart\"         : \"// language: Dart\",\n    \"lua\"          : \"// language: Lua\",\n    \"objectivec\"   : \"// language: Objective-C\",\n    \"objective-c\"  : \"// language: Objective-C\",\n    \"objective-c++\": \"// language: Objective-C++\",\n    \"python\"       : \"# language: Python\",\n    \"perl\"         : \"# language: Perl\",\n    \"prolog\"       : f\"% language: Prolog\",\n    \"swift\"        : \"// language: swift\",\n    \"lisp\"         : \"; language: Lisp\",\n    \"java\"         : \"// language: Java\",\n    \"scala\"        : \"// language: Scala\",\n    \"tex\"          : f\"% language: TeX\",\n    \"vue\"          : \"<!--language: Vue-->\",\n    \"markdown\"     : \"<!--language: Markdown-->\",\n    \"html\"         : \"<!--language: HTML-->\",\n    \"php\"          : \"// language: PHP\",\n    \"js\"           : \"// language: JavaScript\",\n    \"javascript\"   : \"// language: JavaScript\",\n    \"typescript\"   : \"// language: TypeScript\",\n    \"go\"           : \"// language: Go\",\n    \"shell\"        : \"# language: Shell\",\n    \"rust\"         : \"// language: Rust\",\n    \"sql\"          : \"-- language: SQL\",\n    \"kotlin\"       : \"// language: Kotlin\",\n    \"vb\"           : \"' language: Visual Basic\",\n    \"ruby\"         : \"# language: Ruby\",\n    \"pascal\"       : \"// language: Pascal\",\n    \"r\"            : \"# language: R\",\n    \"fortran\"      : \"!language: Fortran\",\n    \"lean\"         : \"-- language: Lean\",\n    \"matlab\"       : f\"% language: Matlab\",\n    \"delphi\"       : \"{language: Delphi}\",\n    \"scheme\"       : \"; language: Scheme\",\n    \"basic\"        : \"' language: Basic\",\n    \"assembly\"     : \"; language: Assembly\",\n    \"groovy\"       : \"// language: Groovy\",\n    \"abap\"         : \"* language: Abap\",\n    \"gdscript\"     : \"# language: GDScript\",\n    \"haskell\"      : \"-- language: Haskell\",\n    \"julia\"        : \"# language: Julia\",\n    \"elixir\"       : \"# language: Elixir\",\n    \"excel\"        : \"' language: Excel\",\n    \"clojure\"      : \"; language: Clojure\",\n    \"actionscript\" : \"// language: ActionScript\",\n    \"solidity\"     : \"// language: Solidity\",\n    \"powershell\"   : \"# language: PowerShell\",\n    \"erlang\"       : f\"% language: Erlang\",\n    \"cobol\"        : \"// language: Cobol\",\n    \"alloy\"        : \"/* language: Alloy */\",\n    \"awk\"          : \"// language: AWK\",\n    \"thrift\"       : \"/* language: Thrift */\",\n    \"sparql\"       : \"# language: SPARQL\",\n    \"augeas\"       : \"// language: Augeas\",\n    \"cmake\"        : \"# language: CMake\",\n    \"f-sharp\"      : \"// language: F#\",\n    \"stan\"         : \"// language: Stan\",\n    \"isabelle\"     : \"(*language: Isabelle*)\",\n    \"dockerfile\"   : \"# language: Dockerfile\",\n    \"rmarkdown\"    : \"# language: RMarkdown\",\n    \"literate-agda\": \"-- language: Literate Agda\",\n    \"tcl\"          : \"// language: Augeas\",\n    \"glsl\"         : \"// language: GLSL\",\n    \"antlr\"        : \"// language: ANTLR\",\n    \"verilog\"      : \"// language: Verilog\",\n    \"racket\"       : \"; language: Racket\",\n    \"standard-ml\"  : \"(*language:Standard ML*)\",\n    \"elm\"          : \"-- language: Elm\",\n    \"yaml\"         : \"# language: YAML\",\n    \"smalltalk\"    : \"'' language: Smalltalk\",\n    \"ocaml\"        : \"(*language: OCaml*)\",\n    \"idris\"        : \"-- language: Idris\",\n    \"visual-basic\" : \"' language: Visual Basic\",\n    \"protocol-buffer\": \"// language: Protocol Buffer\",\n    \"bluespec\"     : \"// language: Bluespec\",\n    \"applescript\"  : \"-- language: AppleScript\",\n    \"makefile\"     : \"# language: Makefile\",\n    \"tcsh\"         : \"# language: TCSH\",\n    \"maple\"        : \"# language: Maple\",\n    \"systemverilog\": \"// language: SystemVerilog\",\n    \"literate-coffeescript\": \"# language: Literate CoffeeScript\",\n    \"vhdl\"         : \"-- language: VHDL\",\n    \"restructuredtext\": \".. language: reStructuredText\",\n    \"sas\"          : \"* language: SAS\",\n    \"literate-haskell\": \"> language: Literate Haskell\",\n    \"java-server-pages\": \"// language: Java Server Pages\",\n    \"coffeescript\" : \"# language: CoffeeScript\",\n    \"emacs-lisp\"   : \"; language: Emacs Lisp\",\n    \"mathematica\"  : \"// language: Mathematica\",\n    \"xslt\"         : \"<!--language: XSLT-->\",\n    \"zig\"          : \"// language: Zig\",\n    \"common-lisp\"  : \"; language: Common Lisp\",\n    \"stata\"        : \"* language: Stata\",\n    \"agda\"         : \"-- language: Agda\",\n    \"ada\"          : \"-- language: Ada\",\n}\n\n\nLANGUAGE_COMMENT_SIGN = {}\nfor lang in LANGUAGE_TAG:\n    LANGUAGE_COMMENT_SIGN[lang] = LANGUAGE_TAG[lang].split(\"language:\")[0].strip()\n\n\nIMPORT_HELPER = {\n    \"python\": [\n        \"import math\",\n        \"import re\",\n        \"import sys\",\n        \"import copy\",\n        \"import datetime\",\n        \"import itertools\",\n        \"import collections\",\n        \"import heapq\",\n        \"import statistics\",\n        \"import functools\",\n        \"import hashlib\",\n        \"import numpy\",\n        \"import numpy as np\",\n        \"import string\",\n        \"from typing import *\",\n        \"from collections import *\",\n    ],\n    \"go\"    : [\n        \"math\",\n        \"strings\",\n        \"fmt\",\n        \"strconv\",\n        \"time\",\n        \"bytes\",\n        \"regexp\",\n        \"sort\",\n        \"math/rand\",\n        \"crypto/md5\",\n    ],\n    \"cpp\"   : [\n        \"#include<stdlib.h>\",\n        \"#include<algorithm>\",\n        \"#include<math.h>\",\n        \"#include<stdio.h>\",\n        \"#include<vector>\",\n        \"#include<string>\",\n        \"#include<climits>\",\n        \"#include<cstring>\",\n        \"#include<iostream>\",\n    ],\n}\n\n\n\ndef set_random_seed(seed):\n    \"\"\"Set random seed for reproducability.\"\"\"\n    random.seed(seed)\n    numpy.random.seed(seed)\n    torch.manual_seed(seed)\n    \n    \ndef stream_jsonl(filename: str) -> Iterable[Dict]:\n    \"\"\"\n    Parses each jsonl line and yields it as a dictionary\n    \"\"\"\n    if filename.endswith(\".gz\"):\n        with open(filename, \"rb\") as gzfp:\n            with gzip.open(gzfp, \"rt\") as fp:\n                for line in fp:\n                    if any(not x.isspace() for x in line):\n                        yield json.loads(line)\n    else:\n        with open(filename, \"r\") as fp:\n            for line in fp:\n                if any(not x.isspace() for x in line):\n                    yield json.loads(line)\n                    \n\ndef stream_jsonl_all(filename: str) -> Iterable[Dict]:\n    results = []\n    if filename.endswith(\".gz\"):\n        fp = gzip.open(open(filename, \"rb\"), \"rt\")\n    else:\n        fp = open(filename, \"r\")\n    for line in fp:\n        if any(not x.isspace() for x in line):\n            results.append(json.loads(line))\n    fp.close()\n\n    return results\n\n\ndef read_dataset(\n    data_file: str = None,\n    dataset_type: str = \"humanevalx\",\n) -> Dict:\n    if \"humanevalx\" in dataset_type.lower():\n        dataset = {task[\"task_id\"]: task for task in stream_jsonl(data_file)}\n    elif \"mbpp\" in dataset_type.lower():\n        problems = {task[\"task_id\"]: task for task in stream_jsonl(data_file)}\n        task_ids = sorted(problems.keys())[10:510]\n        dataset = {}\n        for task_id in task_ids:\n            sample = problems[task_id]\n            description = sample[\"text\"]\n            test_example = sample[\"test_list\"][0]\n            prompt = f'\"\"\"\\n{description}\\n{test_example}\\n\"\"\"\\n'\n            sample[\"prompt\"] = prompt\n            dataset[task_id] = sample\n    elif \"ds1000\" in dataset_type.lower():\n        # install ds1000 from https://github.com/HKUNLP/DS-1000\n        from ds1000 import DS1000Dataset\n        ds1000 = DS1000Dataset(source_dir=data_file, libs=\"all\", mode=\"Completion\")\n        for lib in ds1000.libs:\n            for problem_id in range(len(ds1000[lib])):\n                prefix = \"\"\n                suffix = \"\"\n                insert_flag = False\n                first_line_flag = True\n                # extract prefix and suffix of the prompt\n                for line in ds1000[lib][problem_id][\"prompt\"].split(\"\\n\"):\n                    if \"[insert]\" in line:\n                        insert_flag = True\n                        continue\n                    if first_line_flag:\n                        first_line_flag = False\n                    else:\n                        line = \"\\n\" + line\n                    if not insert_flag:\n                        prefix += line\n                    else:\n                        suffix += line\n            \n    else:\n        raise f\"Dataset: {dataset_type} not supported.\"\n\n    return dataset\n\n\ndef read_translation_dataset(\n    data_file_src: str = None,\n    data_file_tgt: str = None,\n    lang_src: str = None,\n    lang_tgt: str = None,\n    dataset_type: str = \"humanevalx\",\n) -> Dict:\n    if \"humanevalx\" in dataset_type.lower():\n        dataset_src = {task[\"task_id\"]: task for task in stream_jsonl(data_file_src)}\n        dataset_tgt = {task[\"task_id\"].split(\"/\")[-1]: task for task in stream_jsonl(data_file_tgt)}\n        for k, sample in dataset_src.items():\n            prompt = \"code translation\\n\"\n            if lang_src == \"cpp\":\n                prompt += \"C++:\\n\"\n            elif lang_src == \"js\":\n                prompt += \"JavaScript:\\n\"\n            else:\n                prompt += f\"{lang_src}:\\n\".capitalize()\n            prompt += dataset_src[k][\"declaration\"] + \"\\n\" + dataset_src[k][\"canonical_solution\"].rstrip() + \"\\n\"\n            if lang_tgt == \"cpp\":\n                prompt += \"C++:\\n\"\n            elif lang_tgt == \"js\":\n                prompt += \"JavaScript:\\n\"\n            else:\n                prompt += f\"{lang_tgt}:\\n\".capitalize()\n            prompt += dataset_tgt[k.split(\"/\")[-1]][\"declaration\"]\n            dataset_src[k][\"prompt\"] = prompt\n    else:\n        raise f\"Dataset: {dataset_type} not supported.\"\n\n    return dataset_src\n\n\ndef process_extra_prompt(\n    prompt: str,\n    language_type: str = \"python\", \n    dataset_type: str = None,\n    generation_mode: str = \"completion\",\n) -> str:\n    \"\"\"\n    Processes the extra prompt.\n    \"\"\"\n    language = language_type.lower()\n    if dataset_type == \"humanevalx\":\n        extra_prompt = \"\"\n        # extra_prompt = LANGUAGE_TAG[language] + \"\\n\"\n        prompt = prompt.strip()\n        if generation_mode == \"instruction\":\n            return \"问：\" + extra_prompt + prompt + \"\\n答：\"\n        return extra_prompt + prompt\n    elif dataset_type == \"mbpp\":\n        extra_prompt = \"\"\n        prompt = prompt.strip()\n        return extra_prompt + prompt\n    else:\n        return prompt\n\n\ndef is_code_generation_finished(\n    code: str,\n    dataset_type: str = None,\n    language_type: str = None,\n):\n    \"\"\"\n    Checks whether the generated code is finished.\n    \"\"\"\n    if dataset_type == \"mbpp\":\n        end_words = [\"\\ndef\", \"\\nassert\"]\n        for w in end_words:\n            if w == \"\\ndef\":\n                if code.count(w) > 1:\n                    return True\n            else:\n                if w in code:\n                    return True\n    else:\n        if language_type.lower() == \"python\":\n            for line in code.split(\"\\n\"):\n                if len(line.strip()) > 0 and line[0] != ' ' and line[0] != '\\t':\n                    return True\n            end_words = [\"\\ndef\", \"\\nclass\", \"\\nif\", \"\\n#\", \"\\nprint\"]\n            for w in end_words:\n                if w in code:\n                    return True\n        elif language_type.lower() == \"java\":\n            if code.count(\"{\") + 1 == code.count(\"}\"):\n                return True\n        elif language_type.lower() == \"go\":\n            if \"\\nfunc main(\" in code:\n                return True\n            if code.count(\"{\") + 1 == code.count(\"}\"):\n                return True\n        elif language_type.lower() == \"js\":\n            if code.count(\"{\") + 1 == code.count(\"}\"):\n                return True\n        elif language_type.lower() == \"cpp\":\n            if \"\\nint main()\" in code:\n                return True\n            if code.count(\"{\") + 1 == code.count(\"}\"):\n                return True\n        elif language_type.lower() == \"rust\":\n            if \"\\nfn main()\" in code:\n                return True\n            if code.count(\"{\") + 1 == code.count(\"}\"):\n                return True\n\n    return False\n\n\n# Modified from https://github.com/bigcode-project/bigcode-evaluation-harness/blob/main/lm_eval/tasks/mbpp.py\nstop_words=[\"\\nclass\", \"\\nassert\", '\\n\"\"\"', \"\\nprint\", \"\\nif\"]\ndef first_block(string, stop_words):\n    \"\"\"Split off first block of code by scanning for class, def etc. on newlines.\"\"\"\n    return re.split(\"|\".join(stop_words), string)[0].rstrip()\n\n\ndef cleanup_code(\n    code: str,\n    dataset_type: str = None,\n    language_type: str = None,\n):\n    \"\"\"\n    Cleans up the generated code.\n    \"\"\"\n    if dataset_type == \"mbpp\":\n        end_words = [\"\\nassert\", \"\\ndef\"]\n        for w in end_words:\n            if w == \"\\ndef\":\n                if code.count(w) > 1:\n                    code = code[:code.rfind(w)]\n            else:\n                code = code[:code.rfind(w)]\n        code = first_block(code, stop_words)\n    elif dataset_type == \"humanevalx\":\n        if language_type.lower() == \"python\":\n            code_splits = code.split(\"\\n\")\n            is_empty_line = False\n            ind_empty_line = None\n            for i, line in enumerate(code_splits):\n                if len(line.strip()) > 0 and line[0] != ' ' and line[0] != '\\t':\n                    is_empty_line = True\n                    ind_empty_line = i\n                    break\n            if is_empty_line:\n                code = \"\\n\".join(code_splits[:ind_empty_line])\n            else:\n                end_words = [\"\\ndef\", \"\\nclass\", \"\\n#\", \"\\nassert\", '\\n\"\"\"', \"\\nprint\", \"\\nif\", \"\\n\\n\\n\"]\n                for w in end_words:\n                    if w in code:\n                        code = code[:code.rfind(w)]\n        elif language_type.lower() == \"java\":\n            main_pos = code.find(\"public static void main\")\n            if main_pos != -1:\n                code = code[:main_pos] + '}'\n            if '}' in code:\n                code = code[:code.rfind('}')] + '}'\n            if code.count('{') + 1 == code.count('}'):\n                code += \"\\n}\"\n        elif language_type.lower() == \"go\":\n            if \"\\nfunc main(\" in code:\n                code = code[:code.rfind(\"func main(\")]\n            if '}' in code:\n                code = code[:code.rfind('}')] + '}'\n        elif language_type.lower() == \"cpp\":\n            if \"\\nint main()\" in code:\n                code = code[:code.rfind(\"int main()\")]\n            if '}' in code:\n                code = code[:code.rfind('}')] + '}'\n        elif language_type.lower() == \"js\":\n            if '}' in code:\n                code = code[:code.rfind('}')] + '}'\n        elif language_type.lower() == \"rust\":\n            if '}' in code:\n                code = code[:code.rfind('}')] + '}'\n\n    return code\n\n\ndef estimate_pass_at_k(\n        num_samples: Union[int, List[int], np.ndarray],\n        num_correct: Union[List[int], np.ndarray],\n        k: int\n) -> np.ndarray:\n    \"\"\"\n    Estimates pass@k of each problem and returns them in an array.\n    \"\"\"\n\n    def estimator(n: int, c: int, k: int) -> float:\n        \"\"\"\n        Calculates 1 - comb(n - c, k) / comb(n, k).\n        \"\"\"\n        if n - c < k:\n            return 1.0\n        return 1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1))\n\n    if isinstance(num_samples, int):\n        num_samples_it = itertools.repeat(num_samples, len(num_correct))\n    else:\n        assert len(num_samples) == len(num_correct)\n        num_samples_it = iter(num_samples)\n\n    return np.array([estimator(int(n), int(c), k) for n, c in zip(num_samples_it, num_correct)])\n\n\nclass Logger:\n    def __init__(self, name, log_level=logging.INFO, log_file=None, log_mode=\"both\", disable_formatter=False):\n        self.logger = logging.getLogger(name)\n        self.logger.setLevel(log_level)\n\n        self.formatter = logging.Formatter('%(asctime)s [%(levelname)s] %(message)s', datefmt='%Y-%m-%d %H:%M:%S')\n        \n        # Log to console\n        if log_mode == \"both\" or log_mode == \"terminal\":\n            console_handler = logging.StreamHandler()\n            if not disable_formatter:\n                console_handler.setFormatter(self.formatter)\n            self.logger.addHandler(console_handler)\n\n        # Log to file\n        if log_file is not None:\n            if log_mode == \"both\" or log_mode == \"file\":\n                file_handler = logging.FileHandler(log_file, mode='w')\n                if not disable_formatter:\n                    file_handler.setFormatter(self.formatter)\n                self.logger.addHandler(file_handler)\n\n    def add_file_handler(self, file_name):\n        file_handler = logging.FileHandler(file_name, mode='w')\n        file_handler.setFormatter(self.formatter)\n        self.logger.addHandler(file_handler)\n\n    def debug(self, message):\n        self.logger.debug(message)\n\n    def info(self, message):\n        self.logger.info(message)\n\n    def warning(self, message):\n        self.logger.warning(message)\n\n    def error(self, message):\n        self.logger.error(message)\n\n    def critical(self, message):\n        self.logger.critical(message)\n"
  },
  {
    "path": "requirements.txt",
    "content": "protobuf\ntransformers>=4.30.2\naccelerate\ncpm_kernels\ntorch>=2.0\nsentencepiece\ngradio"
  },
  {
    "path": "resources/wechat.md",
    "content": "<div align=\"center\">\n<img src=join_wechat.png width=\"60%\"/>\n\n<p> 扫码关注公众号加入「CodeGeeX交流群」 </p>\n<p> Scan the QR code to join the \"CodeGeeX WeChat Group\" </p>\n</div>\n"
  },
  {
    "path": "scripts/run_humanevalx.sh",
    "content": "#!/bin/bash\n# This script is used to generate solutions of HumanEval-X.\n\n# Examples (MODE=(gen, eval, both)):\n# MODE=gen bash ./scripts/run_humanevalx.sh\n\nif [ -z \"$MODE\" ]\nthen\n  MODE=\"both\"\nfi\n\nSCRIPT_PATH=$(realpath \"$0\")\nSCRIPT_DIR=$(dirname \"$SCRIPT_PATH\")\nMAIN_DIR=$(dirname \"$SCRIPT_DIR\")\n\n# enviroment settings\nHOSTLIST=$SCRIPT_DIR/hostlist\nWORLD_SIZE=1\nDATASET=humanevalx\nGENERATION_MODE=completion\nMODEL_NAME=codegeex2-6b\nMODEL_PATH=/pathto/codegeex2-6b/\nN_CPU_WORKERS=16\nTIMEOUT=5\n\n# generation settings\n## pass@1 greedy\nNUM_SAMPLES=1\nMICRO_BSZ=1\nTEMP=1.0\nTOPK=1\nTOPP=1.0\nMAX_LENGTH=1024\nSEED=42\nGREEDY=1\n\n## pass@1 estimated\n# NUM_SAMPLES=20\n# MICRO_BSZ=1\n# TEMP=0.2\n# TOPK=0\n# TOPP=0.95\n# MAX_LENGTH=1024\n# SEED=42\n# GREEDY=0\n\n## pass@10 & pass@100\n# NUM_SAMPLES=200\n# MICRO_BSZ=4\n# TEMP=0.8\n# TOPK=0\n# TOPP=0.95\n# MAX_LENGTH=1024\n# SEED=42\n# GREEDY=0\n\nfor l in python java js cpp go rust;\ndo\n    LANGUAGE=$l\n    DATA_DIR=$MAIN_DIR/benchmark/$DATASET/\n    DATA_PATH=$DATA_DIR/$DATASET\\_$LANGUAGE.jsonl.gz\n    OUTPUT_PATH=$MAIN_DIR/output/$DATASET/$LANGUAGE\n    TODAY=$(date +%y%m%d)\n    CHANNEL_PORT=$(expr $RANDOM + 5000)\n    MASTER_PORT=$(expr $RANDOM + 8000)\n    JOB_ID=$MODEL_NAME-$LANGUAGE-greedy$GREEDY-ns$NUM_SAMPLES-t$TEMP-topp$TOPP-seed$SEED\n    mkdir -p \"$OUTPUT_PATH/$JOB_ID\"\n\n    # evaluation settings\n    EVAL_INPUT_PATH=$OUTPUT_PATH/$JOB_ID\n    EVAL_OUTPUT_PATH=$OUTPUT_PATH/$JOB_ID\n\n    # nccl options\n    OPTIONS_NCCL=\"export NCCL_DEBUG=warn; export NCCL_IB_DISABLE=0; export NCCL_IB_GID_INDEX=3\"\n    OPTIONS_PATH=\"export PATH=$PATH; export LD_LIBRARY_PATH=$LD_LIBRARY_PATH\"\n    CWD=$(pwd)\n\n    gen_func() {\n        echo \"Generating......\"\n        # set master ip for zmq server\n        if [ -z \"$HOSTLIST\" ]; then\n            ZMQ_ADDR=$(hostname -i)\n            echo \"$ZMQ_ADDR\" > \"./hostfile\"\n            HOSTLIST=\"./hostfile\"\n        else\n            ZMQ_ADDR=$(cat $HOSTLIST | head -n 1)\n        fi\n        echo \"master_ip: $ZMQ_ADDR\"\n\n        # run generation\n        RUN_CMD=\"python \\\n            $MAIN_DIR/evaluation/generation.py \\\n            --hostfile $HOSTLIST \\\n            --channel-ip $ZMQ_ADDR \\\n            --channel-port $CHANNEL_PORT \\\n            --master-port $MASTER_PORT \\\n            --model-path $MODEL_PATH \\\n            --temperature $TEMP \\\n            --top-p $TOPP \\\n            --top-k $TOPK \\\n            --greedy $GREEDY \\\n            --max-length $MAX_LENGTH \\\n            --micro-batch-size $MICRO_BSZ \\\n            --samples-per-problem $NUM_SAMPLES \\\n            --model-name $MODEL_NAME \\\n            --dataset-type $DATASET \\\n            --language-type $LANGUAGE \\\n            --generation-mode $GENERATION_MODE \\\n            --data-path $DATA_PATH \\\n            --output-path $OUTPUT_PATH/$JOB_ID \\\n            --log-path $OUTPUT_PATH/$JOB_ID/$TODAY-generation.log \\\n            --gen-node-world-size $WORLD_SIZE \\\n            --seed $SEED\"\n\n        RUN_CMD=\"$OPTIONS_NCCL; $OPTIONS_PATH; $RUN_CMD\"\n        RUN_CMD=\"cd $CWD; $RUN_CMD\"\n\n        if (( WORLD_SIZE != 1 )); then\n            RUN_CMD=\"pdsh -R ssh -w ^$HOSTLIST \\\"$RUN_CMD\\\"\"\n        fi\n\n        eval \"$RUN_CMD\"\n    }\n\n    eval_func() {\n        echo \"Evaluating......\"\n\n        if [ $LANGUAGE = rust ]; then\n            TIMEOUT=300\n            echo \"Setting timeout to $TIMEOUT for Rust\"\n        fi\n        RUN_CMD=\"python \\\n            $MAIN_DIR/evaluation/evaluation.py \\\n            --input_path $EVAL_INPUT_PATH \\\n            --output_path $EVAL_OUTPUT_PATH \\\n            --log-path $OUTPUT_PATH/$JOB_ID/$TODAY-evaluation.log \\\n            --model_name $MODEL_NAME \\\n            --language_type $LANGUAGE \\\n            --dataset_type $DATASET \\\n            --generation_mode $GENERATION_MODE \\\n            --n_workers $N_CPU_WORKERS \\\n            --tmp_dir $MAIN_DIR/benchmark/$DATASET/$LANGUAGE \\\n            --problem_file $DATA_PATH \\\n            --timeout $TIMEOUT\"\n\n        # inspecting results\n        INSPECT_CMD=\"python \\\n            $MAIN_DIR/evaluation/inspect_jsonl.py \\\n            --data_path $EVAL_OUTPUT_PATH/result-$JOB_ID.jsonl \\\n            --log-path $OUTPUT_PATH/$JOB_ID/$TODAY-inspect.txt\"\n\n        eval \"$RUN_CMD && $INSPECT_CMD\"\n    }\n\n    case $MODE in\n    \"gen\")\n        gen_func\n        ;;\n    \"eval\")\n        eval_func\n        ;;\n    \"both\")\n        gen_func\n        eval_func\n        ;;\n    *)\n        echo \"Unsupported MODE (gen, eval, both): $MODE\"\n        exit 1\n        ;;\n    esac\ndone\n"
  },
  {
    "path": "scripts/sanity_check.sh",
    "content": "#!/bin/bash\n# This script is used to check the correctness of code generation benchmarks.\n\nSCRIPT_PATH=$(realpath \"$0\")\nSCRIPT_DIR=$(dirname \"$SCRIPT_PATH\")\nMAIN_DIR=$(dirname \"$SCRIPT_DIR\")\n\n# enviroment settings\nDATASET=humanevalx\nGENERATION_MODE=completion\nN_CPU_WORKERS=16\nTIMEOUT=5\n\n# Check HumanEval-X\nfor l in python java js cpp go rust;\ndo\n    LANGUAGE=$l\n    echo \"Evaluating $l\"\n    DATA_DIR=$MAIN_DIR/benchmark/$DATASET/\n    DATA_PATH=$DATA_DIR/$DATASET\\_$LANGUAGE.jsonl.gz\n    OUTPUT_PATH=$MAIN_DIR/output/$DATASET/$LANGUAGE\n        \n    JOB_ID=sanity-check-$LANGUAGE\n    mkdir -p \"$OUTPUT_PATH/$JOB_ID\"\n\n    # evaluation settings\n    EVAL_INPUT_PATH=$DATA_PATH\n    EVAL_OUTPUT_PATH=$OUTPUT_PATH/$JOB_ID\n    \n    if [ $LANGUAGE = rust ]; then\n        TIMEOUT=300\n        echo \"Setting timeout to $TIMEOUT for Rust\"\n    fi\n\n    RUN_CMD=\"python \\\n        $MAIN_DIR/evaluation/evaluation.py \\\n        --test_groundtruth=True \\\n        --input_path $EVAL_INPUT_PATH \\\n        --output_path $EVAL_OUTPUT_PATH \\\n        --log-path $OUTPUT_PATH/$JOB_ID/$TODAY-evaluation.log \\\n        --model_name $MODEL_NAME \\\n        --language_type $LANGUAGE \\\n        --dataset_type $DATASET \\\n        --generation_mode $GENERATION_MODE \\\n        --n_workers $N_CPU_WORKERS \\\n        --tmp_dir $MAIN_DIR/benchmark/$DATASET/$LANGUAGE \\\n        --problem_file $DATA_PATH \\\n        --timeout $TIMEOUT\"\n\n    eval \"$RUN_CMD\"\ndone\n"
  }
]