[
  {
    "path": ".gitignore",
    "content": "**/__pycache__/**\nflan-t5**\ncheckpoints/**\ntmp/**\nunconverted-weights\nunconverted-weights/\nweights\nweights/\n.DS_STORE\n*.safetensors\n.cog/\nllama_weights/\n.env\nexllama/\nllama-recipes/\norig-llama-recipes/\nvllm/\n.pytest_cache\n.dockerignore\n*.egg-info/\n"
  },
  {
    "path": ".gitmodules",
    "content": "[submodule \"exllama\"]\n\tpath = exllama\n\turl = https://github.com/technillogue/exllama.git\n"
  },
  {
    "path": "CONTRIBUTING.md",
    "content": "# Contributing\n\nThanks for taking the time to contribute to this project!\n\n## Releases\n\nThis section documents the process used internally at Replicate to deploy the many variant Llama models.\n\nModel variants live in the [models](models) directory, and deployment is managed by a [Makefile](Makefile).\n\nTo release a new model:\n\n1. Run `make select <model-name>`, where model name corresponds to the name of a folder in the [models](models) directory, like `model-llama-2-7b`. This will copy stuff around and jigger the local state of the repo to say \"use this model\".\n1. Run `make test-local` to test locally (assuming you're on a machine with a GPU).\n1. Run `make stage test-stage <model-name>` to push to staging. If this passes, the model is ready to be promoted to production.\n1. Run `REPLICATE_USER=replicate && make push test-prod <model-name>`. This runs the same tests as staging.\n\nAfter releasing to production:\n\n1. Search for old instances of the previous version's Docker image id in documentation and replace them with the new version."
  },
  {
    "path": "LICENSE.txt",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright 2022, Replicate, Inc.\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "Makefile",
    "content": ".PHONY: init \n.PHONY: select\n.PHONY: test-local\n.PHONY: push\n.PHONY: push-and-test\n.PHONY: clean\n\n# this is required to build sentencepiece for py3.11\n# requires cog > 0.9.0-beta1\n# get it at https://github.com/replicate/cog/releases/download/v0.9.0-beta1/cog_linux_x86_64\nexport COG_EXPERIMENTAL_BUILD_STAGE_DEPS = apt update && apt install -yy cmake google-perftools\nexport FAKE_COG_VERSION = 0.8.1\n\nCURRENT_DIR := $(shell basename $(PWD))\n\nifeq ($(findstring cog,$(CURRENT_DIR)),cog)\nIMAGE_NAME := $(CURRENT_DIR)\nelse\nIMAGE_NAME := cog-$(CURRENT_DIR)\nendif\n\nREPLICATE_USER ?= replicate-internal\n\nmodel ?= $(SELECTED_MODEL)\n\nPROD_MODEL ?= $(model)\n\nifeq ($(findstring chat,$(model)),chat)\n    schema := chat-schema.json\nelse ifeq ($(model),mistral-7b-instruct-v0.1-mlc)\n    schema := mistral-schema.json\nelse\n    schema := base-schema.json\nendif\n\nbase-schema.json:\n\t$(MAKE) select model=llama-2-7b-mlc\n\tcog run --use-cuda-base-image=false python3 -m cog.command.openapi_schema | jq > base-schema.json\nchat-schema.json:\n\t$(MAKE) select model=llama-2-7b-chat-hf-mlc\n\tcog run --use-cuda-base-image=false python3 -m cog.command.openapi_schema | jq > chat-schema.json\nmistral-schema.json:\n\t$(MAKE) select model=mistral-7b-instruct-v0.1-mlc\n\tcog run --use-cuda-base-image=false python3 -m cog.command.openapi_schema | jq > mistral-schema.json\n\n\ninit:\n\t@if [ -z \"$(model)\" ]; then \\\n\t\techo \"Error: 'model' argument must be specified or 'MODEL_ENV' environment variable must be set. E.g., make select model=your_model_name or export MODEL_ENV=your_model_name\"; \\\n\t\texit 1; \\\n\tfi\n\t# Initialize directory for model\n\tmkdir -p models/$(model)\n\tcp -r model_templates/*  models/$(model)\n\tif [ -e model_templates/.env ]; then cp model_templates/.env models/$(model) ; fi\n\tif [ -e model_templates/.dockerignore ]; then \\\n\t\tcp model_templates/.dockerignore models/$(model); \\\n\telse \\\n\t\ttouch models/$(model)/.dockerignore; \\\n\tfi\n\tprintf \"\\n# Generated by 'make init'\\n\" >> models/$(model)/.dockerignore\n\tprintf \"/models/*/\\n\" >> models/$(model)/.dockerignore\n\tprintf \"!/models/$(model)/\\n\" >> models/$(model)/.dockerignore\n\tprintf \"/models/$(model)/model_artifacts/**\\n\" >> models/$(model)/.dockerignore\n\tprintf \"!/models/$(model)/model_artifacts/tokenizer/\\n\" >> models/$(model)/.dockerignore\n\n\tmkdir -p models/$(model)/model_artifacts/tokenizer\n\tcp -r llama_weights/tokenizer/* models/$(model)/model_artifacts/tokenizer\n\nupdate:\n\t@if [ -z \"$(model)\" ]; then \\\n\t\techo \"Error: 'model' argument must be specified or 'MODEL_ENV' environment variable must be set. E.g., make select model=your_model_name or export MODEL_ENV=your_model_name\"; \\\n\t\texit 1; \\\n\tfi\n\tcp -r model_templates/*  models/$(model)\n\t\nmodel_dir=models/$(model)\n\nselect:\n\t@if [ -z \"$(model)\" ]; then \\\n\t\techo \"Error: 'model' argument must be specified or 'MODEL_ENV' environment variable must be set. E.g., make select model=your_model_name or export MODEL_ENV=your_model_name\"; \\\n\t\texit 1; \\\n\tfi\n\t# this approach makes copies\n\t# rsync -av --exclude 'model_artifacts/' models/$(model)/ .\n\n\t# this approach behaves the same way but makes symlinks\n\t# # if we also wanted to copy directory structure we could do this, but we only need one dir deep\n\t# rsync -av --exclude 'model_artifacts/' --include '*/' --exclude '*' $(model_dir)/ .\n\t# For symlinking files\n\tfind $(model_dir) -type f ! -path \"$(model_dir)/model_artifacts/*\" -exec ln -sf {} . \\;\n\t# For specific files like .env and .dockerignore, we link them if they exist\n\t[ -e $(model_dir)/.env ] && ln -sf $(model_dir)/.env .env || true\n\t# rm .dockerignore || true\n\tcp models/dockerignore .dockerignore\n\techo \"!$(model_dir)\" >> .dockerignore\n\t# [ -e $(model_dir)/dockerignore ] && cat $(model_dir)/dockerignore > .dockerignore\n\t#cog build\n\t@echo \"#########Selected model: $(model)########\"\n\nclean: select\n\tif [ -e models/$(model)/model_artifacts/default_inference_weights]; then sudo rm -rf models/$(model)/model_artifacts/default_inference_weights; fi\n\tif [ -e models/$(model)/model_artifacts/training_weights]; then  sudo rm -rf models/$(model)/model_artifacts/training_weights; fi\n\tif [ -e training_output.zip]; then sudo rm -rf training_output.zip; fi\n\nbuild-local: select\n\tcog build --openapi-schema=$(schema) --use-cuda-base-image=false --progress plain\n\nserve: select\n\tdocker run \\\n\t-ti \\\n\t-p 5000:5000 \\\n\t--gpus=all \\\n\t-e COG_WEIGHTS=http://$(HOST_NAME):8000/training_output.zip \\\n\t-v `pwd`/training_output.zip:/src/local_weights.zip \\\n\t$(IMAGE_NAME)\n\ntest-local-predict: build-local\n\t@if [ \"$(verbose)\" = \"true\" ]; then \\\n\t\tpytest ./tests/test_predict.py -s; \\\n\telse \\\n\t\tpytest ./tests/test_predict.py; \\\n\tfi\n\ntest-local-train: build-local\n\trm -rf training_output.zip\n\t@if [ \"$(verbose)\" = \"true\" ]; then \\\n\t\tpytest ./tests/test_train.py -s; \\\n\telse \\\n\t\tpytest ./tests/test_train.py; \\\n\tfi\n\ntest-local-train-predict: build-local\n\t@if [ \"$(verbose)\" = \"true\" ]; then \\\n\t\tpytest ./tests/test_train_predict.py -s; \\\n\telse \\\n\t\tpytest ./tests/test_train_predict.py; \\\n\tfi\n\ntest-local: select test-local-predict test-local-train test-local-train-predict\n\nstage: select\n\t@echo \"Pushing $(model) to r8.im/$(REPLICATE_USER)/staging-$(model)...\"\n\tcog push --openapi-schema=$(schema) --use-cuda-base-image=false --progress plain r8.im/$(REPLICATE_USER)/staging-$(model)\n\ntest-stage-predict:\n\t@if [ \"$(verbose)\" = \"true\" ]; then \\\n\t\tpytest tests/test_remote_predict.py -s --model $(REPLICATE_USER)/staging-$(model); \\\n\telse \\\n\t\tpytest tests/test_remote_predict.py --model $(REPLICATE_USER)/staging-$(model); \\\n\tfi\n\ntest-stage-train-predict:\n\t@if [ \"$(verbose)\" = \"true\" ]; then \\\n\t\tpytest tests/test_remote_train.py -s --model $(REPLICATE_USER)/staging-$(model); \\\n\telse \\\n\t\tpytest tests/test_remote_train.py --model $(REPLICATE_USER)/staging-$(model); \\\n\tfi\n\ntest-stage: test-stage-predict test-stage-train-predict\n\n\nstage-and-test-models:\n\t$(foreach model, $(subst ,, $(models)), \\\n\t\t$(MAKE) select model=$(model); \\\n\t\t$(MAKE) stage model=$(model); \\\n\t\t$(MAKE) test-stage model=$(model); \\\n\t)\n\t\npush: select\n\tcog push --openapi-schema=$(schema) --use-cuda-base-image=false --progress plain r8.im/$(REPLICATE_USER)/$(PROD_MODEL)\n\ntest-prod-predict:\n\t@if [ \"$(verbose)\" = \"true\" ]; then \\\n\t\tpytest tests/test_remote_predict.py -s --model $(REPLICATE_USER)/$(PROD_MODEL); \\\n\telse \\\n\t\tpytest tests/test_remote_predict.py --model $(REPLICATE_USER)/$(PROD_MODEL); \\\n\tfi\n\ntest-prod-train-predict:\n\t@if [ \"$(verbose)\" = \"true\" ]; then \\\n\t\tpytest tests/test_remote_train.py -s --model $(REPLICATE_USER)/$(PROD_MODEL); \\\n\telse \\\n\t\tpytest tests/test_remote_train.py --model $(REPLICATE_USER)/$(PROD_MODEL); \\\n\tfi\n\ntest-prod: test-prod-predict test-prod-train-predict\n\nformat:\n\tpython3 -m ruff format .\n\nlint:\n\tpython3 -m ruff .\n\tpython3 -m ruff format --check .\n\nhelp:\n\t@echo \"Available targets:\\n\\n\"\n\t@echo \"init: Create the model directory.\"\n\t@echo \"   e.g., \\`make init dir=<model_dir>\\`\"\n"
  },
  {
    "path": "README.md",
    "content": "# LLaMA Cog template 🦙\n\nThis is a monorepo for building multiple Llama models using Cog:\n\n- llama-2-13b\n- llama-2-13b-chat\n- llama-2-13b-transformers\n- llama-2-70b\n- llama-2-70b-chat\n- llama-2-7b\n- llama-2-7b-chat\n- llama-2-7b-transformers\n- llama-2-7b-vllm\n\nSee [replicate.com/meta](https://replicate.com/meta).\n\n---\n\n\n**NOTE: This is an experimental branch that depends on exllama**\n\nFor now, you should:\n```sh\ngit clone https://github.com/turboderp/exllama\ncd exllama\ngit checkout e8a544f95b3fd64dfa5549eeeafb85b1ac71a793\n```\n\nWe're working on a proper integration.\n\n**This Cog template works with LLaMA 1 & 2 versions.**\n\nLLaMA is a [new open-source language model from Meta Research](https://ai.facebook.com/blog/large-language-model-llama-meta-ai/) that performs as well as closed-source models. \n\nThis is a guide to running LLaMA using in the cloud using Replicate. You'll use the [Cog](https://github.com/replicate/cog) command-line tool to package the model and push it to Replicate as a web interface and API.\n\nThis template can be used to run the `7B`, `13B`, and `70B` versions of LLaMA and LLaMA2 and it also works with fine-tuned models.\n\n**Note: Please verify the system prompt for LLaMA or LLAMA2 and update it accordingly.**\n\n**Note: LLaMA is for research purposes only. It is not intended for commercial use. Check the license of LLaMA & LLaMA2 on the official LLaMA website of Meta Platforms, Inc.**\n\n## Prerequisites\n\n- **LLaMA weights**. The weights for LLaMA have not yet been released publicly. To apply for access, fill out the Meta Research form to be able to download the weights.\n- **GPU machine**. You'll need a Linux machine with an NVIDIA GPU attached and the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker) installed. If you don't already have access to a machine with a GPU, check out our [guide to getting a GPU machine](https://replicate.com/docs/guides/get-a-gpu-machine).\n- **Docker**. You'll be using the [Cog](https://github.com/replicate/cog) command-line tool to build and push a model. Cog uses Docker to create containers for models.\n\n## Step 0: Install Cog\n\nFirst, [install Cog](https://github.com/replicate/cog#install):\n\n```\nsudo curl -o /usr/local/bin/cog -L \"https://github.com/replicate/cog/releases/latest/download/cog_$(uname -s)_$(uname -m)\"\nsudo chmod +x /usr/local/bin/cog\n```\n\n## Step 1: Set up weights\n\nReplicate currently supports the `7B` model size.\n\nPut your downloaded weights in a folder called `unconverted-weights`. The folder hierarchy should look something like this: \n\n```\nunconverted-weights\n├── 7B\n│   ├── checklist.chk\n│   ├── consolidated.00.pth\n│   └── params.json\n├── tokenizer.model\n└── tokenizer_checklist.chk\n```\n\nConvert the weights from a PyTorch checkpoint to a transformers-compatible format using the this command:\n\n```\ncog run python -m transformers.models.llama.convert_llama_weights_to_hf --input_dir unconverted-weights --model_size 7B --output_dir weights\n```\n\nYou final directory structure should look like this:\n\n```\nweights\n├── config.json\n├── generation_config.json\n├── pytorch_model-00001-of-00002.bin\n├── pytorch_model-00002-of-00002.bin\n├── pytorch_model.bin.index.json\n├── special_tokens_map.json\n├── tokenizer.model\n└── tokenizer_config.json\n```\n\nOnce you've done this, you should uncomment `unconverted-weights` in your `.dockerignore` file. This ensures that `unconverted-weights` aren't built into the resulting cog image.\n\n## Step 2: Tenzorize the weights from the transformers-compatible/huggingface format (this will allow cold-starts to happen much faster):\n\nRun convert_to_tensors.py to tenzorize the weights from the previous transformers-compatible/huggingface format:\n```\ncog run python convert_to_tensors.py\n```\nThis will tensorize your weights and write the tensorized weights to `./llama_weights/llama-7b/llama_7b_fp16.tensors` if you have a GPU available and `.../llama_7b_fp32.tensors` if you don't.\n(To load the tensorized model instead of the transformers-compatible/huggingface weights, verify that `DEFAULT_MODEL_NAME` in `config.py` is set to the path of your tensorized weights.) \n\n- Make sure `**.tensors` is not in your `.dockerignore`:\n  In your `.dockerignore` file, remove `**.tensors`. This line will ignore all files that end with .tensors, no matter where they are in the directory structure.\n\n## Step 3: Run the model\n\nYou can run the model locally to test it:\n\n```\ncog predict -i prompt=\"Simply put, the theory of relativity states that\"\n```\n\nLLaMA is not fine-tuned to answer questions. You should construct your prompt so that the expected answer is the natural continuation of your prompt. \n\nHere are a few examples from the [LLaMA FAQ](https://github.com/facebookresearch/llama/blob/57b0eb62de0636e75af471e49e2f1862d908d9d8/FAQ.md#2-generations-are-bad):\n\n- Do not prompt with \"What is the meaning of life? Be concise and do not repeat yourself.\" but with \"I believe the meaning of life is\"\n- Do not prompt with \"Explain the theory of relativity.\" but with \"Simply put, the theory of relativity states that\"\n- Do not prompt with \"Ten easy steps to build a website...\" but with \"Building a website can be done in 10 simple steps:\\n\"\n\n## Step 4: Create a model on Replicate\n\nGo to [replicate.com/create](https://replicate.com/create) to create a Replicate model.\n\nMake sure to specify \"private\" to keep the model private.\n\n## Step 5: Configure the model to run on A100 GPUs\n\nReplicate supports running models on a variety of GPUs. The default GPU type is a T4, but for best performance you'll want to configure your model to run on an A100.\n\nClick on the \"Settings\" tab on your model page, scroll down to \"GPU hardware\", and select \"A100\". Then click \"Save\".\n\n## Step 6: Push the model to Replicate\n\nLog in to Replicate:\n\n```\nsudo cog login\n```\n\nPush the contents of your current directory to Replicate, using the model name you specified in step 3:\n\n```\nsudo cog push r8.im/username/modelname\n```\n\nNote: if you get an error while pushing your model indicating that your model does not exist on Replicate (even if it was successfully created on the Replicate dashboard), make sure to use the \"sudo\" command in the \"cog login\" in terminal.\n\n[Learn more about pushing models to Replicate.](https://replicate.com/docs/guides/push-a-model)\n\n\n## Step 7: Run the model on Replicate\n\nNow that you've pushed the model to Replicate, you can run it from the website or with an API.\n\nTo use your model in the browser, go to your model page.\n\nTo use your model with an API, click on the \"API\" tab on your model page. You'll see commands to run the model with cURL, Python, etc.\n\nTo learn more about how to use Replicate, [check out our documentation](https://replicate.com/docs).\n\n## Contributors ✨\nThis template was generated by Marco Mascorro (@mascobot), with some modifications to the original cog LLaMA template and with the help of the cog and Replicate documentation that wonderful people put together. See all contributors below.\n\nThis project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome!\n"
  },
  {
    "path": "__init__.py",
    "content": ""
  },
  {
    "path": "base-schema.json",
    "content": "{\n  \"openapi\": \"3.0.2\",\n  \"info\": {\n    \"title\": \"Cog\",\n    \"version\": \"0.1.0\"\n  },\n  \"paths\": {\n    \"/\": {\n      \"get\": {\n        \"summary\": \"Root\",\n        \"operationId\": \"root__get\",\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"title\": \"Response Root  Get\"\n                }\n              }\n            }\n          }\n        }\n      }\n    },\n    \"/health-check\": {\n      \"get\": {\n        \"summary\": \"Healthcheck\",\n        \"operationId\": \"healthcheck_health_check_get\",\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"title\": \"Response Healthcheck Health Check Get\"\n                }\n              }\n            }\n          }\n        }\n      }\n    },\n    \"/predictions\": {\n      \"post\": {\n        \"summary\": \"Predict\",\n        \"description\": \"Run a single prediction on the model\",\n        \"operationId\": \"predict_predictions_post\",\n        \"parameters\": [\n          {\n            \"required\": false,\n            \"schema\": {\n              \"title\": \"Prefer\",\n              \"type\": \"string\"\n            },\n            \"name\": \"prefer\",\n            \"in\": \"header\"\n          }\n        ],\n        \"requestBody\": {\n          \"content\": {\n            \"application/json\": {\n              \"schema\": {\n                \"$ref\": \"#/components/schemas/PredictionRequest\"\n              }\n            }\n          }\n        },\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/PredictionResponse\"\n                }\n              }\n            }\n          },\n          \"422\": {\n            \"description\": \"Validation Error\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/HTTPValidationError\"\n                }\n              }\n            }\n          }\n        }\n      }\n    },\n    \"/predictions/{prediction_id}\": {\n      \"put\": {\n        \"summary\": \"Predict Idempotent\",\n        \"description\": \"Run a single prediction on the model (idempotent creation).\",\n        \"operationId\": \"predict_idempotent_predictions__prediction_id__put\",\n        \"parameters\": [\n          {\n            \"required\": true,\n            \"schema\": {\n              \"title\": \"Prediction ID\",\n              \"type\": \"string\"\n            },\n            \"name\": \"prediction_id\",\n            \"in\": \"path\"\n          },\n          {\n            \"required\": false,\n            \"schema\": {\n              \"title\": \"Prefer\",\n              \"type\": \"string\"\n            },\n            \"name\": \"prefer\",\n            \"in\": \"header\"\n          }\n        ],\n        \"requestBody\": {\n          \"content\": {\n            \"application/json\": {\n              \"schema\": {\n                \"title\": \"Prediction Request\",\n                \"allOf\": [\n                  {\n                    \"$ref\": \"#/components/schemas/PredictionRequest\"\n                  }\n                ]\n              }\n            }\n          },\n          \"required\": true\n        },\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/PredictionResponse\"\n                }\n              }\n            }\n          },\n          \"422\": {\n            \"description\": \"Validation Error\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/HTTPValidationError\"\n                }\n              }\n            }\n          }\n        }\n      }\n    },\n    \"/predictions/{prediction_id}/cancel\": {\n      \"post\": {\n        \"summary\": \"Cancel\",\n        \"description\": \"Cancel a running prediction\",\n        \"operationId\": \"cancel_predictions__prediction_id__cancel_post\",\n        \"parameters\": [\n          {\n            \"required\": true,\n            \"schema\": {\n              \"title\": \"Prediction ID\",\n              \"type\": \"string\"\n            },\n            \"name\": \"prediction_id\",\n            \"in\": \"path\"\n          }\n        ],\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"title\": \"Response Cancel Predictions  Prediction Id  Cancel Post\"\n                }\n              }\n            }\n          },\n          \"422\": {\n            \"description\": \"Validation Error\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/HTTPValidationError\"\n                }\n              }\n            }\n          }\n        }\n      }\n    },\n    \"/shutdown\": {\n      \"post\": {\n        \"summary\": \"Start Shutdown\",\n        \"operationId\": \"start_shutdown_shutdown_post\",\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"title\": \"Response Start Shutdown Shutdown Post\"\n                }\n              }\n            }\n          }\n        }\n      }\n    }\n  },\n  \"components\": {\n    \"schemas\": {\n      \"HTTPValidationError\": {\n        \"title\": \"HTTPValidationError\",\n        \"type\": \"object\",\n        \"properties\": {\n          \"detail\": {\n            \"title\": \"Detail\",\n            \"type\": \"array\",\n            \"items\": {\n              \"$ref\": \"#/components/schemas/ValidationError\"\n            }\n          }\n        }\n      },\n      \"Input\": {\n        \"title\": \"Input\",\n        \"required\": [\n          \"prompt\"\n        ],\n        \"type\": \"object\",\n        \"properties\": {\n          \"prompt\": {\n            \"title\": \"Prompt\",\n            \"type\": \"string\",\n            \"description\": \"Prompt to send to the model.\",\n            \"x-order\": 0\n          },\n          \"max_new_tokens\": {\n            \"title\": \"Max New Tokens\",\n            \"minimum\": 1,\n            \"type\": \"integer\",\n            \"description\": \"Maximum number of tokens to generate. A word is generally 2-3 tokens\",\n            \"default\": 128,\n            \"x-order\": 1\n          },\n          \"min_new_tokens\": {\n            \"title\": \"Min New Tokens\",\n            \"minimum\": -1,\n            \"type\": \"integer\",\n            \"description\": \"Minimum number of tokens to generate. To disable, set to -1. A word is generally 2-3 tokens.\",\n            \"default\": -1,\n            \"x-order\": 2\n          },\n          \"temperature\": {\n            \"title\": \"Temperature\",\n            \"maximum\": 5,\n            \"minimum\": 0.01,\n            \"type\": \"number\",\n            \"description\": \"Adjusts randomness of outputs, greater than 1 is random and 0 is deterministic, 0.75 is a good starting value.\",\n            \"default\": 0.7,\n            \"x-order\": 3\n          },\n          \"top_p\": {\n            \"title\": \"Top P\",\n            \"maximum\": 1,\n            \"minimum\": 0,\n            \"type\": \"number\",\n            \"description\": \"When decoding text, samples from the top p percentage of most likely tokens; lower to ignore less likely tokens\",\n            \"default\": 0.95,\n            \"x-order\": 4\n          },\n          \"repetition_penalty\": {\n            \"title\": \"Repetition Penalty\",\n            \"minimum\": 0,\n            \"type\": \"number\",\n            \"description\": \"A parameter that controls how repetitive text can be. Lower means more repetitive, while higher means less repetitive. Set to 1.0 to disable.\",\n            \"default\": 1.15,\n            \"x-order\": 5\n          },\n          \"stop_sequences\": {\n            \"title\": \"Stop Sequences\",\n            \"type\": \"string\",\n            \"description\": \"A comma-separated list of sequences to stop generation at. For example, '<end>,<stop>' will stop generation at the first instance of 'end' or '<stop>'.\",\n            \"x-order\": 6\n          },\n          \"seed\": {\n            \"title\": \"Seed\",\n            \"type\": \"integer\",\n            \"description\": \"Random seed. Leave blank to randomize the seed\",\n            \"x-order\": 7\n          },\n          \"debug\": {\n            \"title\": \"Debug\",\n            \"type\": \"boolean\",\n            \"description\": \"provide debugging output in logs\",\n            \"default\": false,\n            \"x-order\": 8\n          },\n          \"prompt_template\": {\n            \"title\": \"Prompt Template\",\n            \"type\": \"string\",\n            \"description\": \"Template for formatting the prompt\",\n            \"default\": \"{prompt}\",\n            \"x-order\": 9\n          },\n          \"replicate_weights\": {\n            \"title\": \"Replicate Weights\",\n            \"type\": \"string\",\n            \"description\": \"Path to fine-tuned weights produced by a Replicate fine-tune job.\",\n            \"x-order\": 10\n          }\n        }\n      },\n      \"Output\": {\n        \"title\": \"Output\",\n        \"type\": \"array\",\n        \"items\": {\n          \"type\": \"string\"\n        },\n        \"x-cog-array-display\": \"concatenate\",\n        \"x-cog-array-type\": \"iterator\"\n      },\n      \"PredictionRequest\": {\n        \"title\": \"PredictionRequest\",\n        \"type\": \"object\",\n        \"properties\": {\n          \"input\": {\n            \"$ref\": \"#/components/schemas/Input\"\n          },\n          \"id\": {\n            \"title\": \"Id\",\n            \"type\": \"string\"\n          },\n          \"created_at\": {\n            \"title\": \"Created At\",\n            \"type\": \"string\",\n            \"format\": \"date-time\"\n          },\n          \"output_file_prefix\": {\n            \"title\": \"Output File Prefix\",\n            \"type\": \"string\"\n          },\n          \"webhook\": {\n            \"title\": \"Webhook\",\n            \"maxLength\": 65536,\n            \"minLength\": 1,\n            \"type\": \"string\",\n            \"format\": \"uri\"\n          },\n          \"webhook_events_filter\": {\n            \"type\": \"array\",\n            \"items\": {\n              \"$ref\": \"#/components/schemas/WebhookEvent\"\n            },\n            \"default\": [\n              \"start\",\n              \"output\",\n              \"logs\",\n              \"completed\"\n            ]\n          }\n        }\n      },\n      \"PredictionResponse\": {\n        \"title\": \"PredictionResponse\",\n        \"type\": \"object\",\n        \"properties\": {\n          \"input\": {\n            \"$ref\": \"#/components/schemas/Input\"\n          },\n          \"output\": {\n            \"$ref\": \"#/components/schemas/Output\"\n          },\n          \"id\": {\n            \"title\": \"Id\",\n            \"type\": \"string\"\n          },\n          \"version\": {\n            \"title\": \"Version\",\n            \"type\": \"string\"\n          },\n          \"created_at\": {\n            \"title\": \"Created At\",\n            \"type\": \"string\",\n            \"format\": \"date-time\"\n          },\n          \"started_at\": {\n            \"title\": \"Started At\",\n            \"type\": \"string\",\n            \"format\": \"date-time\"\n          },\n          \"completed_at\": {\n            \"title\": \"Completed At\",\n            \"type\": \"string\",\n            \"format\": \"date-time\"\n          },\n          \"logs\": {\n            \"title\": \"Logs\",\n            \"type\": \"string\",\n            \"default\": \"\"\n          },\n          \"error\": {\n            \"title\": \"Error\",\n            \"type\": \"string\"\n          },\n          \"status\": {\n            \"$ref\": \"#/components/schemas/Status\"\n          },\n          \"metrics\": {\n            \"title\": \"Metrics\",\n            \"type\": \"object\"\n          }\n        }\n      },\n      \"Status\": {\n        \"title\": \"Status\",\n        \"enum\": [\n          \"starting\",\n          \"processing\",\n          \"succeeded\",\n          \"canceled\",\n          \"failed\"\n        ],\n        \"type\": \"string\",\n        \"description\": \"An enumeration.\"\n      },\n      \"ValidationError\": {\n        \"title\": \"ValidationError\",\n        \"required\": [\n          \"loc\",\n          \"msg\",\n          \"type\"\n        ],\n        \"type\": \"object\",\n        \"properties\": {\n          \"loc\": {\n            \"title\": \"Location\",\n            \"type\": \"array\",\n            \"items\": {\n              \"anyOf\": [\n                {\n                  \"type\": \"string\"\n                },\n                {\n                  \"type\": \"integer\"\n                }\n              ]\n            }\n          },\n          \"msg\": {\n            \"title\": \"Message\",\n            \"type\": \"string\"\n          },\n          \"type\": {\n            \"title\": \"Error Type\",\n            \"type\": \"string\"\n          }\n        }\n      },\n      \"WebhookEvent\": {\n        \"title\": \"WebhookEvent\",\n        \"enum\": [\n          \"start\",\n          \"output\",\n          \"logs\",\n          \"completed\"\n        ],\n        \"type\": \"string\",\n        \"description\": \"An enumeration.\"\n      }\n    }\n  }\n}\n"
  },
  {
    "path": "chat-schema.json",
    "content": "{\n  \"openapi\": \"3.0.2\",\n  \"info\": {\n    \"title\": \"Cog\",\n    \"version\": \"0.1.0\"\n  },\n  \"paths\": {\n    \"/\": {\n      \"get\": {\n        \"summary\": \"Root\",\n        \"operationId\": \"root__get\",\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"title\": \"Response Root  Get\"\n                }\n              }\n            }\n          }\n        }\n      }\n    },\n    \"/health-check\": {\n      \"get\": {\n        \"summary\": \"Healthcheck\",\n        \"operationId\": \"healthcheck_health_check_get\",\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"title\": \"Response Healthcheck Health Check Get\"\n                }\n              }\n            }\n          }\n        }\n      }\n    },\n    \"/predictions\": {\n      \"post\": {\n        \"summary\": \"Predict\",\n        \"description\": \"Run a single prediction on the model\",\n        \"operationId\": \"predict_predictions_post\",\n        \"parameters\": [\n          {\n            \"required\": false,\n            \"schema\": {\n              \"title\": \"Prefer\",\n              \"type\": \"string\"\n            },\n            \"name\": \"prefer\",\n            \"in\": \"header\"\n          }\n        ],\n        \"requestBody\": {\n          \"content\": {\n            \"application/json\": {\n              \"schema\": {\n                \"$ref\": \"#/components/schemas/PredictionRequest\"\n              }\n            }\n          }\n        },\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/PredictionResponse\"\n                }\n              }\n            }\n          },\n          \"422\": {\n            \"description\": \"Validation Error\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/HTTPValidationError\"\n                }\n              }\n            }\n          }\n        }\n      }\n    },\n    \"/predictions/{prediction_id}\": {\n      \"put\": {\n        \"summary\": \"Predict Idempotent\",\n        \"description\": \"Run a single prediction on the model (idempotent creation).\",\n        \"operationId\": \"predict_idempotent_predictions__prediction_id__put\",\n        \"parameters\": [\n          {\n            \"required\": true,\n            \"schema\": {\n              \"title\": \"Prediction ID\",\n              \"type\": \"string\"\n            },\n            \"name\": \"prediction_id\",\n            \"in\": \"path\"\n          },\n          {\n            \"required\": false,\n            \"schema\": {\n              \"title\": \"Prefer\",\n              \"type\": \"string\"\n            },\n            \"name\": \"prefer\",\n            \"in\": \"header\"\n          }\n        ],\n        \"requestBody\": {\n          \"content\": {\n            \"application/json\": {\n              \"schema\": {\n                \"title\": \"Prediction Request\",\n                \"allOf\": [\n                  {\n                    \"$ref\": \"#/components/schemas/PredictionRequest\"\n                  }\n                ]\n              }\n            }\n          },\n          \"required\": true\n        },\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/PredictionResponse\"\n                }\n              }\n            }\n          },\n          \"422\": {\n            \"description\": \"Validation Error\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/HTTPValidationError\"\n                }\n              }\n            }\n          }\n        }\n      }\n    },\n    \"/predictions/{prediction_id}/cancel\": {\n      \"post\": {\n        \"summary\": \"Cancel\",\n        \"description\": \"Cancel a running prediction\",\n        \"operationId\": \"cancel_predictions__prediction_id__cancel_post\",\n        \"parameters\": [\n          {\n            \"required\": true,\n            \"schema\": {\n              \"title\": \"Prediction ID\",\n              \"type\": \"string\"\n            },\n            \"name\": \"prediction_id\",\n            \"in\": \"path\"\n          }\n        ],\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"title\": \"Response Cancel Predictions  Prediction Id  Cancel Post\"\n                }\n              }\n            }\n          },\n          \"422\": {\n            \"description\": \"Validation Error\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/HTTPValidationError\"\n                }\n              }\n            }\n          }\n        }\n      }\n    },\n    \"/shutdown\": {\n      \"post\": {\n        \"summary\": \"Start Shutdown\",\n        \"operationId\": \"start_shutdown_shutdown_post\",\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"title\": \"Response Start Shutdown Shutdown Post\"\n                }\n              }\n            }\n          }\n        }\n      }\n    }\n  },\n  \"components\": {\n    \"schemas\": {\n      \"HTTPValidationError\": {\n        \"title\": \"HTTPValidationError\",\n        \"type\": \"object\",\n        \"properties\": {\n          \"detail\": {\n            \"title\": \"Detail\",\n            \"type\": \"array\",\n            \"items\": {\n              \"$ref\": \"#/components/schemas/ValidationError\"\n            }\n          }\n        }\n      },\n      \"Input\": {\n        \"title\": \"Input\",\n        \"required\": [\n          \"prompt\"\n        ],\n        \"type\": \"object\",\n        \"properties\": {\n          \"prompt\": {\n            \"title\": \"Prompt\",\n            \"type\": \"string\",\n            \"description\": \"Prompt to send to the model.\",\n            \"x-order\": 0\n          },\n          \"system_prompt\": {\n            \"title\": \"System Prompt\",\n            \"type\": \"string\",\n            \"description\": \"System prompt to send to the model. This is prepended to the prompt and helps guide system behavior. Should not be blank.\",\n            \"default\": \"You are a helpful, respectful and honest assistant.\",\n            \"x-order\": 1\n          },\n          \"max_new_tokens\": {\n            \"title\": \"Max New Tokens\",\n            \"minimum\": 1,\n            \"type\": \"integer\",\n            \"description\": \"Maximum number of tokens to generate. A word is generally 2-3 tokens\",\n            \"default\": 128,\n            \"x-order\": 2\n          },\n          \"min_new_tokens\": {\n            \"title\": \"Min New Tokens\",\n            \"minimum\": -1,\n            \"type\": \"integer\",\n            \"description\": \"Minimum number of tokens to generate. To disable, set to -1. A word is generally 2-3 tokens.\",\n            \"default\": -1,\n            \"x-order\": 3\n          },\n          \"temperature\": {\n            \"title\": \"Temperature\",\n            \"maximum\": 5,\n            \"minimum\": 0.01,\n            \"type\": \"number\",\n            \"description\": \"Adjusts randomness of outputs, greater than 1 is random and 0 is deterministic, 0.75 is a good starting value.\",\n            \"default\": 0.7,\n            \"x-order\": 4\n          },\n          \"top_p\": {\n            \"title\": \"Top P\",\n            \"maximum\": 1,\n            \"minimum\": 0,\n            \"type\": \"number\",\n            \"description\": \"When decoding text, samples from the top p percentage of most likely tokens; lower to ignore less likely tokens\",\n            \"default\": 0.95,\n            \"x-order\": 5\n          },\n          \"repetition_penalty\": {\n            \"title\": \"Repetition Penalty\",\n            \"minimum\": 0,\n            \"type\": \"number\",\n            \"description\": \"A parameter that controls how repetitive text can be. Lower means more repetitive, while higher means less repetitive. Set to 1.0 to disable.\",\n            \"default\": 1.15,\n            \"x-order\": 6\n          },\n          \"stop_sequences\": {\n            \"title\": \"Stop Sequences\",\n            \"type\": \"string\",\n            \"description\": \"A comma-separated list of sequences to stop generation at. For example, '<end>,<stop>' will stop generation at the first instance of 'end' or '<stop>'.\",\n            \"x-order\": 7\n          },\n          \"seed\": {\n            \"title\": \"Seed\",\n            \"type\": \"integer\",\n            \"description\": \"Random seed. Leave blank to randomize the seed\",\n            \"x-order\": 8\n          },\n          \"debug\": {\n            \"title\": \"Debug\",\n            \"type\": \"boolean\",\n            \"description\": \"provide debugging output in logs\",\n            \"default\": false,\n            \"x-order\": 9\n          },\n          \"prompt_template\": {\n            \"title\": \"Prompt Template\",\n            \"type\": \"string\",\n            \"description\": \"Template for formatting the prompt\",\n            \"default\": \"[INST] <<SYS>>\\n{system_prompt}\\n<</SYS>>\\n\\n{prompt} [/INST]\",\n            \"x-order\": 10\n          },\n          \"replicate_weights\": {\n            \"title\": \"Replicate Weights\",\n            \"type\": \"string\",\n            \"description\": \"Path to fine-tuned weights produced by a Replicate fine-tune job.\",\n            \"x-order\": 11\n          }\n        }\n      },\n      \"Output\": {\n        \"title\": \"Output\",\n        \"type\": \"array\",\n        \"items\": {\n          \"type\": \"string\"\n        },\n        \"x-cog-array-type\": \"iterator\",\n        \"x-cog-array-display\": \"concatenate\"\n      },\n      \"PredictionRequest\": {\n        \"title\": \"PredictionRequest\",\n        \"type\": \"object\",\n        \"properties\": {\n          \"input\": {\n            \"$ref\": \"#/components/schemas/Input\"\n          },\n          \"id\": {\n            \"title\": \"Id\",\n            \"type\": \"string\"\n          },\n          \"created_at\": {\n            \"title\": \"Created At\",\n            \"type\": \"string\",\n            \"format\": \"date-time\"\n          },\n          \"output_file_prefix\": {\n            \"title\": \"Output File Prefix\",\n            \"type\": \"string\"\n          },\n          \"webhook\": {\n            \"title\": \"Webhook\",\n            \"maxLength\": 65536,\n            \"minLength\": 1,\n            \"type\": \"string\",\n            \"format\": \"uri\"\n          },\n          \"webhook_events_filter\": {\n            \"type\": \"array\",\n            \"items\": {\n              \"$ref\": \"#/components/schemas/WebhookEvent\"\n            },\n            \"default\": [\n              \"start\",\n              \"output\",\n              \"logs\",\n              \"completed\"\n            ]\n          }\n        }\n      },\n      \"PredictionResponse\": {\n        \"title\": \"PredictionResponse\",\n        \"type\": \"object\",\n        \"properties\": {\n          \"input\": {\n            \"$ref\": \"#/components/schemas/Input\"\n          },\n          \"output\": {\n            \"$ref\": \"#/components/schemas/Output\"\n          },\n          \"id\": {\n            \"title\": \"Id\",\n            \"type\": \"string\"\n          },\n          \"version\": {\n            \"title\": \"Version\",\n            \"type\": \"string\"\n          },\n          \"created_at\": {\n            \"title\": \"Created At\",\n            \"type\": \"string\",\n            \"format\": \"date-time\"\n          },\n          \"started_at\": {\n            \"title\": \"Started At\",\n            \"type\": \"string\",\n            \"format\": \"date-time\"\n          },\n          \"completed_at\": {\n            \"title\": \"Completed At\",\n            \"type\": \"string\",\n            \"format\": \"date-time\"\n          },\n          \"logs\": {\n            \"title\": \"Logs\",\n            \"type\": \"string\",\n            \"default\": \"\"\n          },\n          \"error\": {\n            \"title\": \"Error\",\n            \"type\": \"string\"\n          },\n          \"status\": {\n            \"$ref\": \"#/components/schemas/Status\"\n          },\n          \"metrics\": {\n            \"title\": \"Metrics\",\n            \"type\": \"object\"\n          }\n        }\n      },\n      \"Status\": {\n        \"title\": \"Status\",\n        \"enum\": [\n          \"starting\",\n          \"processing\",\n          \"succeeded\",\n          \"canceled\",\n          \"failed\"\n        ],\n        \"type\": \"string\",\n        \"description\": \"An enumeration.\"\n      },\n      \"ValidationError\": {\n        \"title\": \"ValidationError\",\n        \"required\": [\n          \"loc\",\n          \"msg\",\n          \"type\"\n        ],\n        \"type\": \"object\",\n        \"properties\": {\n          \"loc\": {\n            \"title\": \"Location\",\n            \"type\": \"array\",\n            \"items\": {\n              \"anyOf\": [\n                {\n                  \"type\": \"string\"\n                },\n                {\n                  \"type\": \"integer\"\n                }\n              ]\n            }\n          },\n          \"msg\": {\n            \"title\": \"Message\",\n            \"type\": \"string\"\n          },\n          \"type\": {\n            \"title\": \"Error Type\",\n            \"type\": \"string\"\n          }\n        }\n      },\n      \"WebhookEvent\": {\n        \"title\": \"WebhookEvent\",\n        \"enum\": [\n          \"start\",\n          \"output\",\n          \"logs\",\n          \"completed\"\n        ],\n        \"type\": \"string\",\n        \"description\": \"An enumeration.\"\n      }\n    }\n  }\n}\n"
  },
  {
    "path": "cog.yaml",
    "content": "# Configuration for Cog ⚙️\n# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md\n\nbuild:\n  # set to true if your model requires a GPU\n  gpu: true\n  cuda: \"11.8\"\n\n  # python version in the form '3.8' or '3.8.12'\n  python_version: \"3.11\"\n\n  # a list of packages in the format <package-name>==<version>\n  python_packages:\n    - \"numpy==1.24.2\"\n    - \"sentencepiece==0.1.99\"\n    - \"jinja2==3.1.2\"\n    - \"scipy==1.11.1\"\n    - \"safetensors>=0.3.1\"\n    - \"python-dotenv\"\n    - \"fire\"\n    - \"datasets\"\n    - \"transformers==4.33.2\"\n    - \"peft==0.4.0\"\n    - \"accelerate\"\n    - \"bitsandbytes\"\n    - \"trl==0.5.0\"\n    - \"aiohttp[speedups]\"\n    - \"triton\" # hm\n    - \"fastapi<0.99.0\"\n    # uncomment these when we go back to 12.1\n    # - \"https://r2.drysys.workers.dev/torch/torch-2.1.0-cp311-cp311-linux_x86_64.whl\" \n    # - \"https://weights.replicate.delivery/default/wheels/vllm-0.2a0-cp311-cp311-linux_x86_64.whl\"\n\n    - \"https://r2.drysys.workers.dev/torch/11.8/torch-2.1.0-cp311-cp311-linux_x86_64.whl\"\n    # This wheel can be built by running `TORCH_CUDA_ARCH_LIST=\"8.0;8.6\" pip wheel .` in https://github.com/replicate/vllm-with-loras\n    - \"https://r2.drysys.workers.dev/vllm/11.8/vllm-0.2a0-cp311-cp311-linux_x86_64.whl\"\n    - \"https://r2.drysys.workers.dev/xformers/11.8/xformers-0.0.23+b4c853d.d20231107-cp311-cp311-linux_x86_64.whl\"\n\n    - \"--pre -f https://mlc.ai/wheels\"\n    - \"mlc-chat-nightly-cu118\"\n    - \"mlc-ai-nightly-cu118\"\n    # - \"mlc-chat-nightly-cu121\"\n    # - \"mlc-ai-nightly-cu121\"\n  run:\n    - curl -o /usr/local/bin/pget -L \"https://github.com/replicate/pget/releases/download/v0.1.1/pget\" && chmod +x /usr/local/bin/pget\n    # since we can't do LD_LIBRARY_PATH=torch/lib, use this to make sure mlc can access the cuda libs bundled with torch\n    - bash -c 'ln -s /usr/local/lib/python3.11/site-packages/torch/lib/lib{nv,cu}* /usr/lib'\n# predict.py defines how predictions are run on your model\npredict: \"predict.py:Predictor\"\ntrain: \"train.py:train\"\n"
  },
  {
    "path": "examples/alpaca/README.md",
    "content": "Example code for parsing the dataset needed to train [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca).\n\nThis contains both a function, `process_data.py`, which shows how to transform the [given alpaca data](https://github.com/gururise/AlpacaDataCleaned) into the format expected by `cog train`. It also contains an example parsed dataset as a reference for that `{'prompt': ..., 'completion':...}` format."
  },
  {
    "path": "examples/alpaca/process_data.py",
    "content": "from transformers import T5Tokenizer\nimport json\n\nPROMPT_DICT = {\n    \"prompt_input\": (\n        \"Below is an instruction that describes a task, paired with an input that provides further context. \"\n        \"Write a response that appropriately completes the request.\\n\\n\"\n        \"### Instruction:\\n{instruction}\\n\\n### Input:\\n{input}\\n\\n### Response:\"\n    ),\n    \"prompt_no_input\": (\n        \"Below is an instruction that describes a task. \"\n        \"Write a response that appropriately completes the request.\\n\\n\"\n        \"### Instruction:\\n{instruction}\\n\\n### Response:\"\n    ),\n}\n\n\nclass Preprocessor:\n    \"\"\"Simple class to parse alpaca data into format expected by trainer. Run this offline to build your dataset.\"\"\"\n\n    def __init__(self, tokenizer):\n        self.prompt_dict = PROMPT_DICT\n        self.tokenizer = tokenizer\n\n    def batch_tokenize(self, texts):\n        \"\"\"Tokenizes text. Presently doesn't pad inputs, just returns input ids.\"\"\"\n        tokenized = [\n            self.tokenizer(\n                prompt,\n                return_tensors=\"pt\",\n                padding=\"longest\",\n            ).input_ids\n            for prompt in texts\n        ]\n        return tokenized\n\n    def make_prompt(self, input_row):\n        if len(input_row[\"input\"]) > 1:\n            return self.prompt_dict[\"prompt_input\"].format_map(input_row)\n        return self.prompt_dict[\"prompt_no_input\"].format_map(input_row)\n\n    def make_short_prompt(self, input_row):\n        if len(input_row[\"input\"]) > 1:\n            return f\"\"\"{input_row['instruction']}\\n{input_row['input']}\"\"\"\n        return input_row[\"instruction\"]\n\n    def construct_dataset(self, input_data):\n        prompts = [self.make_short_prompt(val) for val in input_data]\n        return [\n            {\"prompt\": val[0], \"completion\": val[1]}\n            for val in zip(prompts, [val[\"output\"] for val in input_data])\n        ]\n\n\nif __name__ == \"__main__\":\n    proc = Preprocessor(T5Tokenizer.from_pretrained(\"google/flan-t5-xl\"))\n    with open(\"alpaca_data.json\", \"r\") as f:\n        data = json.load(f)\n\n    data_out = proc.construct_dataset(data)\n\n    with open(\"short_alpaca_data.json\", \"w\") as f:\n        json.dump(data_out, f, indent=2)\n"
  },
  {
    "path": "llama_recipes/LICENSE",
    "content": "LLAMA 2 COMMUNITY LICENSE AGREEMENT\nLlama 2 Version Release Date: July 18, 2023\n\n\"Agreement\" means the terms and conditions for use, reproduction, distribution and\nmodification of the Llama Materials set forth herein.\n\n\"Documentation\" means the specifications, manuals and documentation\naccompanying Llama 2 distributed by Meta at ai.meta.com/resources/models-and-\nlibraries/llama-downloads/.\n\n\"Licensee\" or \"you\" means you, or your employer or any other person or entity (if\nyou are entering into this Agreement on such person or entity's behalf), of the age\nrequired under applicable laws, rules or regulations to provide legal consent and that\nhas legal authority to bind your employer or such other person or entity if you are\nentering in this Agreement on their behalf.\n\n\"Llama 2\" means the foundational large language models and software and\nalgorithms, including machine-learning model code, trained model weights,\ninference-enabling code, training-enabling code, fine-tuning enabling code and other\nelements of the foregoing distributed by Meta at ai.meta.com/resources/models-and-\nlibraries/llama-downloads/.\n\n\"Llama Materials\" means, collectively, Meta's proprietary Llama 2 and\nDocumentation (and any portion thereof) made available under this Agreement.\n\n\"Meta\" or \"we\" means Meta Platforms Ireland Limited (if you are located in or, if you\nare an entity, your principal place of business is in the EEA or Switzerland) and Meta\nPlatforms, Inc. (if you are located outside of the EEA or Switzerland).\n\nBy clicking \"I Accept\" below or by using or distributing any portion or element of the\nLlama Materials, you agree to be bound by this Agreement.\n\n1. License Rights and Redistribution.\n\n      a. Grant of Rights. You are granted a non-exclusive, worldwide, non-\ntransferable and royalty-free limited license under Meta's intellectual property or\nother rights owned by Meta embodied in the Llama Materials to use, reproduce,\ndistribute, copy, create derivative works of, and make modifications to the Llama\nMaterials.\n\n      b. Redistribution and Use.\n\n            i. If you distribute or make the Llama Materials, or any derivative works\nthereof, available to a third party, you shall provide a copy of this Agreement to such\nthird party.\n            ii.  If you receive Llama Materials, or any derivative works thereof, from\na Licensee as part of an integrated end user product, then Section 2 of this\nAgreement will not apply to you.\n\n            iii. You must retain in all copies of the Llama Materials that you\ndistribute the following attribution notice within a \"Notice\" text file distributed as a\npart of such copies: \"Llama 2 is licensed under the LLAMA 2 Community License,\nCopyright (c) Meta Platforms, Inc. All Rights Reserved.\"\n\n            iv. Your use of the Llama Materials must comply with applicable laws\nand regulations (including trade compliance laws and regulations) and adhere to the\nAcceptable Use Policy for the Llama Materials (available at\nhttps://ai.meta.com/llama/use-policy), which is hereby incorporated by reference into\nthis Agreement.\n\n            v. You will not use the Llama Materials or any output or results of the\nLlama Materials to improve any other large language model (excluding Llama 2 or\nderivative works thereof).\n\n2. Additional Commercial Terms. If, on the Llama 2 version release date, the\nmonthly active users of the products or services made available by or for Licensee,\nor Licensee's affiliates, is greater than 700 million monthly active users in the\npreceding calendar month, you must request a license from Meta, which Meta may\ngrant to you in its sole discretion, and you are not authorized to exercise any of the\nrights under this Agreement unless or until Meta otherwise expressly grants you\nsuch rights.\n\n3. Disclaimer of Warranty. UNLESS REQUIRED BY APPLICABLE LAW, THE\nLLAMA MATERIALS AND ANY OUTPUT AND RESULTS THEREFROM ARE\nPROVIDED ON AN \"AS IS\" BASIS, WITHOUT WARRANTIES OF ANY KIND,\nEITHER EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY\nWARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY, OR\nFITNESS FOR A PARTICULAR PURPOSE. YOU ARE SOLELY RESPONSIBLE\nFOR DETERMINING THE APPROPRIATENESS OF USING OR REDISTRIBUTING\nTHE LLAMA MATERIALS AND ASSUME ANY RISKS ASSOCIATED WITH YOUR\nUSE OF THE LLAMA MATERIALS AND ANY OUTPUT AND RESULTS.\n\n4. Limitation of Liability. IN NO EVENT WILL META OR ITS AFFILIATES BE\nLIABLE UNDER ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, TORT,\nNEGLIGENCE, PRODUCTS LIABILITY, OR OTHERWISE, ARISING OUT OF THIS\nAGREEMENT, FOR ANY LOST PROFITS OR ANY INDIRECT, SPECIAL,\nCONSEQUENTIAL, INCIDENTAL, EXEMPLARY OR PUNITIVE DAMAGES, EVEN\nIF META OR ITS AFFILIATES HAVE BEEN ADVISED OF THE POSSIBILITY OF\nANY OF THE FOREGOING.\n\n5. Intellectual Property.\n\n      a. No trademark licenses are granted under this Agreement, and in\nconnection with the Llama Materials, neither Meta nor Licensee may use any name\nor mark owned by or associated with the other or any of its affiliates, except as\nrequired for reasonable and customary use in describing and redistributing the\nLlama Materials.\n\n      b. Subject to Meta's ownership of Llama Materials and derivatives made by or\nfor Meta, with respect to any derivative works and modifications of the Llama\nMaterials that are made by you, as between you and Meta, you are and will be the\nowner of such derivative works and modifications.\n\n      c. If you institute litigation or other proceedings against Meta or any entity\n(including a cross-claim or counterclaim in a lawsuit) alleging that the Llama\nMaterials or Llama 2 outputs or results, or any portion of any of the foregoing,\nconstitutes infringement of intellectual property or other rights owned or licensable\nby you, then any licenses granted to you under this Agreement shall terminate as of\nthe date such litigation or claim is filed or instituted. You will indemnify and hold\nharmless Meta from and against any claim by any third party arising out of or related\nto your use or distribution of the Llama Materials.\n\n6. Term and Termination. The term of this Agreement will commence upon your\nacceptance of this Agreement or access to the Llama Materials and will continue in\nfull force and effect until terminated in accordance with the terms and conditions\nherein. Meta may terminate this Agreement if you are in breach of any term or\ncondition of this Agreement. Upon termination of this Agreement, you shall delete\nand cease use of the Llama Materials. Sections 3, 4 and 7 shall survive the\ntermination of this Agreement.\n\n7. Governing Law and Jurisdiction. This Agreement will be governed and\nconstrued under the laws of the State of California without regard to choice of law\nprinciples, and the UN Convention on Contracts for the International Sale of Goods\ndoes not apply to this Agreement. The courts of California shall have exclusive\njurisdiction of any dispute arising out of this Agreement.\n"
  },
  {
    "path": "llama_recipes/__init__.py",
    "content": ""
  },
  {
    "path": "llama_recipes/configs/__init__.py",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n\nfrom .peft import (\n    lora_config,\n    llama_adapter_config,\n    prefix_config,\n    qlora_config,\n    bitsandbytes_config,\n)\nfrom .fsdp import fsdp_config\nfrom .training import train_config\n"
  },
  {
    "path": "llama_recipes/configs/datasets.py",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n\nfrom dataclasses import dataclass\n\n\n@dataclass\nclass samsum_dataset:\n    dataset: str = \"samsum_dataset\"\n    train_split: str = \"train\"\n    test_split: str = \"validation\"\n    input_length: int = 2048\n\n\n@dataclass\nclass grammar_dataset:\n    dataset: str = \"grammar_dataset\"\n    train_split: str = \"ft_datasets/grammar_dataset/gtrain_10k.csv\"\n    test_split: str = \"ft_datasets/grammar_dataset/grammar_validation.csv\"\n    input_length: int = 2048\n\n\n@dataclass\nclass alpaca_dataset:\n    dataset: str = \"alpaca_dataset\"\n    train_split: str = \"train\"\n    test_split: str = \"val\"\n    data_path: str = \"ft_datasets/alpaca_data.json\"\n\n\n@dataclass\nclass completion:\n    \"\"\"\n    A generic class for completion format datasets. Format is expected\n    to be JSONL like:\n        ```\n        {\"text\": \"...\"}\n        ```\n    or\n        ```\n        {\"text\": \"prompt ...\", \"completion\": \"...\"}\n        ```\n    \"\"\"\n\n    dataset: str = \"completion\"\n    train_split: str = \"train\"\n    test_split: str = \"val\"\n    data_path: str = None\n    num_validation_samples: int = 100\n    run_validation: bool = True\n    validation_data_path: str = None\n    pack_sequences: bool = True\n    wrap_packed_sequences: bool = False\n    chunk_size: int = 2048\n    max_seq_length: int = 4096\n"
  },
  {
    "path": "llama_recipes/configs/fsdp.py",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n\nfrom dataclasses import dataclass\nfrom torch.distributed.fsdp import ShardingStrategy\nfrom torch.distributed.fsdp.fully_sharded_data_parallel import StateDictType\n\n\n@dataclass\nclass fsdp_config:\n    mixed_precision: bool = True\n    use_fp16: bool = False\n    sharding_strategy: ShardingStrategy = ShardingStrategy.FULL_SHARD\n    checkpoint_type: StateDictType = StateDictType.SHARDED_STATE_DICT  # alternatively can use SHARDED_STATE_DICT save one file per rank, and can resize the world-size.\n    fsdp_activation_checkpointing: bool = True\n    pure_bf16: bool = False\n    optimizer: str = \"AdamW\"\n"
  },
  {
    "path": "llama_recipes/configs/peft.py",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n\nfrom dataclasses import dataclass\nfrom typing import ClassVar, List\nimport torch\n\n\n@dataclass\nclass lora_config:\n    r: int = 8\n    lora_alpha: int = 16\n    target_modules: ClassVar[List[str]] = [\"q_proj\", \"v_proj\"]\n    bias = \"none\"\n    task_type: str = \"CAUSAL_LM\"\n    lora_dropout: float = 0.05\n    inference_mode: bool = False\n\n\n@dataclass\nclass llama_adapter_config:\n    adapter_len: int = 10\n    adapter_layers: int = 30\n    task_type: str = \"CAUSAL_LM\"\n\n\n@dataclass\nclass prefix_config:\n    num_virtual_tokens: int = 30\n    task_type: str = \"CAUSAL_LM\"\n\n\n@dataclass\nclass bitsandbytes_config:\n    load_in_4bit: bool = True\n    bnb_4bit_quant_type: str = \"nf4\"\n    bnb_4bit_use_double_quant: bool = True\n    bnb_4bit_compute_dtype: torch.dtype = torch.bfloat16\n\n\n@dataclass\nclass qlora_config:\n    r: int = 8\n    lora_alpha: int = 32\n    target_modules: ClassVar[List[str]] = [\"q_proj\", \"v_proj\"]\n    bias = \"none\"\n    task_type: str = \"CAUSAL_LM\"\n    lora_dropout: float = 0.05\n    inference_mode: bool = False\n"
  },
  {
    "path": "llama_recipes/configs/training.py",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\nfrom dataclasses import dataclass\n\n\n@dataclass\nclass train_config:\n    model_name: str = \"llama_weights/llama-2-7b\"\n    enable_fsdp: bool = False\n    run_validation: bool = True\n    batch_size_training: int = 4\n    num_epochs: int = 3\n    num_workers_dataloader: int = 1\n    gradient_accumulation_steps: int = 1\n    lr: float = 1e-4\n    weight_decay: float = 0.0\n    gamma: float = 0.85\n    seed: int = 42\n    use_fp16: bool = False\n    mixed_precision: bool = True\n    val_batch_size: int = 1\n    dataset = \"completion\"\n    peft_method: str = \"lora\"  # None , llama_adapter, prefix\n    use_peft: bool = False\n    output_dir: str = \"PATH/to/save/PEFT/model\"\n    freeze_layers: bool = False\n    num_freeze_layers: int = 1\n    quantization: bool = False\n    one_gpu: bool = False\n    save_model: bool = True\n    dist_checkpoint_root_folder: str = (\n        \"PATH/to/save/FSDP/model\"  # will be used if using FSDP\n    )\n    dist_checkpoint_folder: str = \"fine-tuned\"  # will be used if using FSDP\n    save_optimizer: bool = False  # will be used if using FSDP\n    data_path: str = None\n    num_validation_samples: int = 100\n    validation_data_path: str = None\n    validation_prompt: str = None\n    wrap_packed_sequences: bool = False\n    pack_sequences: bool = True\n    chunk_size: int = 2048\n\n    # optim: Optional[str] = field(\n    #     default=\"paged_adamw_32bit\",\n    #     metadata={\"help\": \"The optimizer to use.\"},\n    # )\n    # lr_scheduler_type: str = field(\n    #     default=\"constant\",\n    #     metadata={\"help\": \"Learning rate schedule. Constant a bit better than cosine, and has advantage for analysis\"},\n    # )\n    # max_steps: int = field(default=10000, metadata={\"help\": \"How many optimizer update steps to take\"})\n    # warmup_ratio\n\n    # save_steps: int = field(default=100, metadata={\"help\": \"Save checkpoint every X updates steps.\"})\n    # logging_steps: int = field(default=10, metadata={\"help\": \"Log every X updates steps.\"})\n    # eval_steps: int = field(default=None, metadata={\"help\": \"Run evaluation every X steps\"})\n    # evaluation_strateg\n"
  },
  {
    "path": "llama_recipes/ft_datasets/__init__.py",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n\nfrom .grammar_dataset import get_dataset as get_grammar_dataset\nfrom .alpaca_dataset import InstructionDataset as get_alpaca_dataset\nfrom .samsum_dataset import get_preprocessed_samsum as get_samsum_dataset\nfrom .completion_dataset import get_completion_dataset as get_completion_dataset\n"
  },
  {
    "path": "llama_recipes/ft_datasets/alpaca_dataset.py",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n\n# For dataset details visit: https://crfm.stanford.edu/2023/03/13/alpaca.html\n\nimport copy\nimport json\nimport torch\n\nfrom torch.utils.data import Dataset\n\nPROMPT_DICT = {\n    \"prompt_input\": (\n        \"Below is an instruction that describes a task, paired with an input that provides further context. \"\n        \"Write a response that appropriately completes the request.\\n\\n\"\n        \"### Instruction:\\n{instruction}\\n\\n### Input:\\n{input}\\n\\n### Response:\"\n    ),\n    \"prompt_no_input\": (\n        \"Below is an instruction that describes a task. \"\n        \"Write a response that appropriately completes the request.\\n\\n\"\n        \"### Instruction:\\n{instruction}\\n\\n### Response:\"\n    ),\n}\n\n\nclass InstructionDataset(Dataset):\n    def __init__(self, dataset_config, tokenizer, partition=\"train\", max_words=30):\n        self.ann = json.load(open(dataset_config.data_path))\n        if partition == \"train\":\n            self.ann = self.ann\n        else:\n            self.ann = self.ann[:200]\n\n        self.max_words = max_words\n        # tokenizer = Tokenizer(model_path=model_path + \"./tokenizer.model\")\n        self.tokenizer = tokenizer\n        # self.tokenizer1 = tokenizer\n\n    def __len__(self):\n        return len(self.ann)\n\n    def __getitem__(self, index):\n        ann = self.ann[index]\n        if ann.get(\"input\", \"\") == \"\":\n            prompt = PROMPT_DICT[\"prompt_no_input\"].format_map(ann)\n        else:\n            prompt = PROMPT_DICT[\"prompt_input\"].format_map(ann)\n        example = prompt + ann[\"output\"]\n        prompt = torch.tensor(self.tokenizer.encode(prompt), dtype=torch.int64)\n        example = self.tokenizer.encode(example)\n        example.append(self.tokenizer.eos_token_id)\n        example = torch.tensor(example, dtype=torch.int64)\n        padding = self.max_words - example.shape[0]\n        if padding > 0:\n            example = torch.cat((example, torch.zeros(padding, dtype=torch.int64) - 1))\n        elif padding < 0:\n            example = example[: self.max_words]\n        labels = copy.deepcopy(example)\n        labels[: len(prompt)] = -1\n        example_mask = example.ge(0)\n        label_mask = labels.ge(0)\n        example[~example_mask] = 0\n        labels[~label_mask] = 0\n        example_mask = example_mask.float()\n        label_mask = label_mask.float()\n\n        return {\n            \"input_ids\": example,\n            \"labels\": labels,\n            \"attention_mask\": example_mask,\n        }\n"
  },
  {
    "path": "llama_recipes/ft_datasets/completion_dataset.py",
    "content": "from .utils import Concatenator\nimport json\nfrom datasets import Dataset\n\n\ndef load_data(\n    dataset_config,\n    split,\n):\n    data_path = dataset_config.data_path\n    num_validation_samples = int(dataset_config.num_validation_samples)\n    run_validation = dataset_config.run_validation\n    validation_data_path = dataset_config.validation_data_path\n\n    def _load_data(path):\n        data = []\n        with open(path, \"r\") as file:\n            for line in file:\n                data.append(json.loads(line))\n\n        dataset = Dataset.from_dict(\n            {key: [item[key] for item in data] for key in data[0]},\n        )\n\n        return dataset\n\n    if not validation_data_path:\n        dataset = _load_data(data_path)\n\n        if run_validation and split == \"train\":\n            print(\n                f\"Selecting observations 0 through {len(dataset)-num_validation_samples} from data for training...\"\n            )\n            end_index = len(dataset) - num_validation_samples\n            indices = list(range(end_index))\n            dataset = dataset.select(indices)\n\n        elif run_validation and split == \"val\":\n            print(\n                f\"Selecting observations {len(dataset)-num_validation_samples} through {len(dataset)} from data for validation...\"\n            )\n            start_index = len(dataset) - num_validation_samples\n            indices = list(range(start_index, len(dataset)))\n            dataset = dataset.select(indices)\n    else:\n        if split == \"train\":\n            dataset = _load_data(data_path)\n        elif split == \"val\":\n            dataset = _load_data(validation_data_path)\n\n    return dataset\n\n\ndef format_data(dataset, tokenizer, config=None):\n    def apply_text_template(sample):\n        return {\"text\": sample[\"text\"] + tokenizer.eos_token}\n\n    def apply_prompt_template(sample):\n        return {\n            \"text\": sample[\"prompt\"] + \"\\n\" + sample[\"completion\"] + tokenizer.eos_token\n        }\n\n    # Assume - all \"text\" or all \"prompt/completion\"\n    if \"text\" in dataset[0]:\n        dataset = dataset.map(\n            apply_text_template, remove_columns=list(dataset.features)\n        )\n    elif \"prompt\" in dataset[0] and \"completion\" in dataset[0]:\n        dataset = dataset.map(\n            apply_prompt_template, remove_columns=list(dataset.features)\n        )\n    else:\n        raise Exception(\n            \"Dataset did not contain `text` or `prompt` and `completion` inputs. Example row:\",\n            dataset[0],\n        )\n\n    return dataset\n\n\ndef tokenize_data(dataset, tokenizer, config=None):\n    try:\n        max_length = config.max_seq_length\n    except:\n        max_length = tokenizer.model_max_length\n\n    dataset = dataset.map(\n        lambda sample: tokenizer(\n            sample[\"text\"], max_length=max_length, truncation=True\n        ),\n        batched=True,\n        remove_columns=list(dataset.features),\n    ).map(lambda sample: {\"labels\": sample[\"input_ids\"]}, batched=True)\n\n    if config.pack_sequences:\n        dataset = dataset.map(\n            Concatenator(\n                chunk_size=config.chunk_size,\n                wrap_packed_sequences=config.wrap_packed_sequences,\n            ),\n            batched=True,\n        )\n\n    return dataset\n\n\ndef get_completion_dataset(config: str, tokenizer, split: str = \"train\"):\n    dataset = load_data(config, split)\n    dataset = format_data(dataset, tokenizer, config)\n    dataset = tokenize_data(dataset, tokenizer, config)\n\n    return dataset\n"
  },
  {
    "path": "llama_recipes/ft_datasets/grammar_dataset/__init__.py",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n\nfrom .grammar_dataset import get_dataset\n"
  },
  {
    "path": "llama_recipes/ft_datasets/grammar_dataset/grammar_dataset.py",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n\n# For dataset details visit: https://huggingface.co/datasets/jfleg\n# For download and preparation see: recipes/ft_datasets/grammar_dataset/grammar_dataset_process.ipynb\n\n\n\nfrom torch.utils.data import Dataset\n\nfrom datasets import load_dataset\nfrom pathlib import Path\n\nfrom ..utils import ConcatDataset\n\n\nclass grammar(Dataset):\n    def __init__(\n        self,\n        tokenizer,\n        csv_name=None,\n    ):\n        try:\n            self.dataset = load_dataset(\n                \"csv\",\n                data_files={\"train\": [csv_name]},  # \"eval\": \"grammar_validation.csv\"},\n                delimiter=\",\",\n            )\n        except Exception as e:\n            print(\n                \"Loading of grammar dataset failed! Please see recipes/ft_datasets/grammar_dataset/grammar_dataset_process.ipynb for details on how to download the dataset.\"\n            )\n            raise e\n\n        # self.dataset = load_dataset(\"wikihow\", \"all\", data_dir=\"data/\", split=type_path)\n        # if num_samples:\n        #    self.dataset = self.dataset.select(list(range(0, num_samples)))\n        self.tokenizer = tokenizer\n        self.print_text = False  # print_text\n\n    def __len__(self):\n        return self.dataset[\"train\"].shape[0]\n\n    def convert_to_features(self, example_batch):\n        # Create prompt and tokenize contexts and questions\n\n        if self.print_text:\n            print(\"Input Text: \", self.clean_text(example_batch[\"text\"]))\n\n        input_ = example_batch[\"input\"]\n        target_ = example_batch[\"target\"]\n\n        prompt = (\n            f\"Correct this to standard English: {input_}\\n---\\nCorrected: {target_}\"\n        )\n        sample = self.tokenizer(prompt)\n\n        return sample\n\n    def __getitem__(self, index):\n        sample = self.convert_to_features(self.dataset[\"train\"][index])\n        source_ids = sample[\"input_ids\"]\n\n        src_mask = sample[\"attention_mask\"]\n\n        return {\n            \"input_ids\": source_ids,\n            \"attention_mask\": src_mask,\n            \"labels\": source_ids.copy(),\n        }\n\n\ndef get_dataset(dataset_config, tokenizer, csv_name=None):\n    \"\"\"cover function for handling loading the working dataset\"\"\"\n    \"\"\"dataset loading\"\"\"\n    if csv_name is None:\n        currPath = Path.cwd() / \"datasets_grammar\" / \"grammar_train.csv\"\n        print(f\"Loading dataset {currPath}\")\n        csv_name = str(currPath)\n    dataset = grammar(\n        tokenizer=tokenizer,\n        csv_name=csv_name,\n    )\n\n    return ConcatDataset(dataset, chunk_size=dataset_config.input_length)\n"
  },
  {
    "path": "llama_recipes/ft_datasets/grammar_dataset/grammar_dataset_process.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Copyright (c) Meta Platforms, Inc. and affiliates.\\n\",\n    \"This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\\n\",\n    \"\\n\",\n    \"Use this notebook to pull in datasets and apply pre-processing.  Most grammar datasets unfortunately require preprocessing before being usable in training. (example - jfleg has 4 targets per input, so we have to rematch as 1:1 pairings) \"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 1,\n   \"metadata\": {},\n   \"outputs\": [],\n\n   \"source\": [\n    \"import csv\\n\",\n    \"from datasets import load_metric, load_dataset\\n\",\n    \"from pathlib import Path\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 2,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"list_replacements = [\\n\",\n    \"  (\\\" .\\\", \\\".\\\"), \\n\",\n    \"  (\\\" ,\\\", \\\",\\\"),\\n\",\n    \"  (\\\" '\\\", \\\"'\\\"),\\n\",\n    \"  (\\\" ?\\\", \\\"?\\\"),\\n\",\n    \"  (\\\" !\\\", \\\"!\\\"),\\n\",\n    \"  (\\\" :\\\", \\\"!\\\"),\\n\",\n    \"  (\\\" ;\\\", \\\"!\\\"),\\n\",\n    \"  (\\\" n't\\\", \\\"n't\\\"),\\n\",\n    \"  (\\\" v\\\", \\\"n't\\\"),\\n\",\n    \"  (\\\"2 0 0 6\\\", \\\"2006\\\"),\\n\",\n    \"  (\\\"5 5\\\", \\\"55\\\"),\\n\",\n    \"  (\\\"4 0 0\\\", \\\"400\\\"),\\n\",\n    \"  (\\\"1 7-5 0\\\", \\\"1750\\\"),\\n\",\n    \"  (\\\"2 0 %\\\", \\\"20%\\\"),\\n\",\n    \"  (\\\"5 0\\\", \\\"50\\\"),\\n\",\n    \"  (\\\"1 2\\\", \\\"12\\\"),\\n\",\n    \"  (\\\"1 0\\\", \\\"10\\\"),\\n\",\n    \"  ('\\\" ballast water', '\\\"ballast water')\\n\",\n    \"  ]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 3,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def correct_spacing(item):\\n\",\n    \"    \\\"\\\"\\\" we iterate through the list of all replacements per each item in dataset\\\"\\\"\\\"\\n\",\n    \"    for fix in list_replacements:\\n\",\n    \"        item = item.replace(fix[0], fix[1])\\n\",\n    \"    return item\\n\",\n    \"\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 4,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def generate_csv(csv_path, dataset):\\n\",\n    \"    \\\"\\\"\\\" apply spacing corrections and save out matched pairs to csv file as dataset\\\"\\\"\\\"\\n\",\n    \"    with open(csv_path, 'w', newline='') as csvfile:\\n\",\n    \"        writer = csv.writer(csvfile)\\n\",\n    \"        writer.writerow([\\\"input\\\", \\\"target\\\"])\\n\",\n    \"        for case in dataset:\\n\",\n    \"     \\t    # Adding the t5 task indication prefix to input \\n\",\n  \n    \"            input_text = case[\\\"sentence\\\"]\\n\",\n\n    \"            input_text = correct_spacing(input_text)\\n\",\n    \"\\n\",\n    \"            for correction in case[\\\"corrections\\\"]:\\n\",\n    \"              correction = correct_spacing(correction)\\n\",\n    \"              # a few of the cases contain blank strings. \\n\",\n    \"              if input_text and correction:\\n\",\n    \"                writer.writerow([input_text, correction])\"\n   ]\n  },\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"In Jfleg  - validation will be used as 'train', test will be 'validation'\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n\n   \"execution_count\": 5,\n\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stderr\",\n     \"output_type\": \"stream\",\n     \"text\": [\n\n      \"Found cached dataset jfleg (/data/home/mreso/.cache/huggingface/datasets/jfleg/default/1.0.0/ed4ab2367351fe31949f48849ae6732b164f0d5ea6bb5d4357ff4293ac89511b)\\n\",\n      \"Found cached dataset jfleg (/data/home/mreso/.cache/huggingface/datasets/jfleg/default/1.0.0/ed4ab2367351fe31949f48849ae6732b164f0d5ea6bb5d4357ff4293ac89511b)\\n\"\n\n     ]\n    }\n   ],\n   \"source\": [\n    \"train_dataset = load_dataset(\\\"jfleg\\\", split='validation[:]') \\n\",\n    \"eval_dataset = load_dataset(\\\"jfleg\\\", split='test[:]')\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n\n   \"execution_count\": 6,\n\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"Dataset({\\n\",\n      \"    features: ['sentence', 'corrections'],\\n\",\n      \"    num_rows: 755\\n\",\n      \"})\\n\",\n      \"Dataset({\\n\",\n      \"    features: ['sentence', 'corrections'],\\n\",\n      \"    num_rows: 748\\n\",\n      \"})\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"print(train_dataset)\\n\",\n    \"print(eval_dataset)\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n\n   \"execution_count\": 7,\n\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"Students can focus on only a few subjects they are intwerested in and they will become an experts in those areas . \\n\",\n      \"['Students can focus on only a few subjects they are interested in and they will become experts in those areas . ', 'Students can focus on only a few subjects they are interested in and they will become experts in those areas . ', 'Students can focus on only a few subjects they are interested in and they will become an expert in those areas . ', 'Students can focus on only a few subjects they are interested in and they will become an expert in those areas . ']\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"print(train_dataset['sentence'][22])\\n\",\n    \"print(train_dataset['corrections'][22])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n\n   \"execution_count\": 8,\n\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"data\": {\n      \"text/plain\": [\n       \"'Students can focus on only a few subjects they are intwerested in and they will become an experts in those areas. '\"\n      ]\n     },\n\n     \"execution_count\": 8,\n\n     \"metadata\": {},\n     \"output_type\": \"execute_result\"\n    }\n   ],\n   \"source\": [\n    \"clean22 = correct_spacing(train_dataset['sentence'][22])\\n\",\n    \"clean22\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n\n   \"execution_count\": 9,\n\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"jfleg_dir = Path.cwd()/'jfleg_dataset'  # if you only use 'jfleg', hf will try and use that and complain\\n\",\n    \"jfleg_dir.mkdir(parents=True,exist_ok=True)\\n\",\n    \"c4_dir = Path.cwd()/'c4_dataset'\\n\",\n    \"c4_dir.mkdir(parents=True,exist_ok=True)\"\n   ]\n  },\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Process Jfleg data  \"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n\n   \"execution_count\": 10,\n\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"j_train_file = jfleg_dir/'jtrain.csv'\\n\",\n    \"j_eval_file = jfleg_dir/'jeval.csv'\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n\n   \"execution_count\": 11,\n\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"generate_csv(j_train_file, train_dataset)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n\n   \"execution_count\": 12,\n\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"generate_csv(j_eval_file, eval_dataset)\"\n   ]\n  },\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Process C4_200M (!) - we'll pull 10K to start\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n\n   \"execution_count\": 13,\n\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"c4_dataset = load_dataset(\\\"liweili/c4_200m\\\", streaming = True)\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n\n   \"execution_count\": 14,\n\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"iterator = iter(c4_dataset['train'])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n\n   \"execution_count\": 15,\n\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"def c4_generate_csv(csv_path, iterator, num_examples):\\n\",\n    \"    with open(csv_path, 'w', newline='') as csvfile:\\n\",\n    \"        writer = csv.writer(csvfile)\\n\",\n    \"        writer.writerow([\\\"input\\\", \\\"target\\\"])\\n\",\n    \"        for i in range(0,num_examples):\\n\",\n    \"          data = next(iterator)\\n\",\n\n    \"          input_text = data[\\\"input\\\"]\\n\",\n\n    \"          input_text = correct_spacing(input_text)\\n\",\n    \"          correction = correct_spacing(data[\\\"output\\\"])\\n\",\n    \"          if input_text and correction:\\n\",\n    \"            writer.writerow([input_text, correction])\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n\n   \"execution_count\": 16,\n\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"c4_dir = Path.cwd()/'c4_dataset'\\n\",\n    \"c4_dir.mkdir(parents=True,exist_ok=True)\"\n   ]\n  },\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"You can modify the following to make the csv file with desired number of instances, here we go for 10k to make a quick test\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n\n   \"execution_count\": 17,\n\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"c4_filename = c4_dir/'c4train_10k.csv'\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n\n   \"execution_count\": 18,\n\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"c4_generate_csv(c4_filename, iterator, num_examples=10000)\"\n   ]\n  },\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Create a single training file by combining jtrain and c4train\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n\n   \"execution_count\": 19,\n\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"merge_list = [j_train_file, c4_filename, ]\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n\n   \"execution_count\": 20,\n\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"import pandas as pd\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n\n   \"execution_count\": 21,\n\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"combined_csv = pd.concat([pd.read_csv(fn) for fn in merge_list])\\n\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n\n   \"execution_count\": 22,\n\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"merged_name = \\\"gtrain_10k.csv\\\"\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n\n   \"execution_count\": 23,\n\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"combined_csv.to_csv(merged_name, index=False, encoding = 'utf-8-sig', )\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n\n   \"execution_count\": 24,\n\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"eval_name = \\\"grammar_validation.csv\\\"\"\n   ]\n\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 25,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"eval_csv = pd.read_csv(j_eval_file)\\n\",\n    \"eval_csv.to_csv(eval_name, index=False, encoding = 'utf-8-sig', )\"\n   ]\n\n  }\n ],\n \"metadata\": {\n  \"interpreter\": {\n   \"hash\": \"5b2c14c5f2a3b21e6c2412c8196f5145870350e81c0b737cae3e5c60eb1e1eac\"\n  },\n  \"kernelspec\": {\n\n   \"display_name\": \"Python 3 (ipykernel)\",\n\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.10.11\"\n\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 4\n\n}\n"
  },
  {
    "path": "llama_recipes/ft_datasets/samsum_dataset.py",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n\n# For dataset details visit: https://huggingface.co/datasets/samsum\n\nimport datasets\nfrom .utils import Concatenator\n\n\ndef get_preprocessed_samsum(dataset_config, tokenizer, split):\n    dataset = datasets.load_dataset(\"samsum\", split=split)\n\n    prompt = (\n        \"Summarize this dialog:\\n{dialog}\\n---\\nSummary:\\n{summary}{eos_token}\"\n    )\n\n    def apply_prompt_template(sample):\n        return {\n            \"text\": prompt.format(\n                dialog=sample[\"dialogue\"],\n                summary=sample[\"summary\"],\n                eos_token=tokenizer.eos_token,\n            )\n        }\n\n    dataset = dataset.map(apply_prompt_template, remove_columns=list(dataset.features))\n\n    dataset = dataset.map(\n        lambda sample: tokenizer(sample[\"text\"]),\n        batched=True,\n        remove_columns=list(dataset.features),\n    ).map(Concatenator(), batched=True)\n    return dataset\n"
  },
  {
    "path": "llama_recipes/ft_datasets/utils.py",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n\nfrom tqdm import tqdm\nfrom itertools import chain\nfrom torch.utils.data import Dataset\n\n\nclass Concatenator(object):\n    def __init__(self, chunk_size=2048, wrap_packed_sequences=False):\n        self.chunk_size = chunk_size\n        self.residual = {\"input_ids\": [], \"attention_mask\": []}\n        self.wrap_packed_sequences = wrap_packed_sequences\n\n    def _wrap_concat(self, batch):\n        \"\"\"\n        When we pack samples into a single sequence, it's possible that the final\n        sample's sequence will exceed `chunk_size`. In this case, the `_wrap_concat`\n        method will wrap the final sample around to the beginning of the next sequence.\n        This breaks the sample into two parts and may introduce samples that violate prompt formats.\n        However, it allows us to strictly enforce chunk size.\n        \"\"\"\n        concatenated_samples = {\n            k: v + list(chain(*batch[k])) for k, v in self.residual.items()\n        }\n\n        total_length = len(concatenated_samples[list(concatenated_samples.keys())[0]])\n\n        if total_length >= self.chunk_size:\n            chunk_num = total_length // self.chunk_size\n            result = {\n                k: [\n                    v[i : i + self.chunk_size]\n                    for i in range(0, chunk_num * self.chunk_size, self.chunk_size)\n                ]\n                for k, v in concatenated_samples.items()\n            }\n            self.residual = {\n                k: v[(chunk_num * self.chunk_size) :]\n                for k, v in concatenated_samples.items()\n            }\n        else:\n            result = concatenated_samples\n            self.residual = {k: [] for k in concatenated_samples.keys()}\n\n        # result[\"labels\"] = result[\"input_ids\"].copy()\n\n        return result\n\n    def _concat(self, batch):\n        \"\"\"\n        When we pack samples into a single sequence, it's possible that the final\n        sample's sequence will exceed `chunk_size`. In this case, the `_concat` method\n        will simply promote the final sample to the next sequence. This may introduce\n        sequences with variable lengths, e.g. some that are below `chunk_size`,\n        but it allows us to pack sequences while strictly respecting formatting.\n        \"\"\"\n\n        # Initialize current sequences from residual or empty if none exists\n        keys = batch.keys()\n        current_sequences = {key: self.residual.get(key, []) for key in keys}\n\n        # # We'll store packed sequences in results\n        results = {key: [] for key in keys}\n\n        # len_of_new_seq = len(batch[list(batch.keys())[0]])\n        # len_of_current_seq = len(current_sequences[list(current_sequences.keys())[0]])\n\n        num_samples = len(batch[next(iter(keys))])\n\n        for idx in range(num_samples):\n            # Check if adding next sample will exceed the chunk size for any key\n            len_current_sequences = len(current_sequences[list(keys)[0]])\n            len_batch_sequence = len(batch[list(keys)[0]][idx])\n\n            will_exceed = len_current_sequences + len_batch_sequence > self.chunk_size\n\n            if will_exceed:\n                if len_current_sequences > 0:\n                    for key in keys:\n                        results[key].append(current_sequences[key])\n                        current_sequences[key] = []\n\n                # After appending to results, extend current_sequences with the sample for all keys\n                for key in keys:\n                    current_sequences[key].extend(batch[key][idx])\n            else:\n                for key in keys:\n                    current_sequences[key].extend(batch[key][idx])\n\n        # Store unappended sequences as residual\n        self.residual = current_sequences\n\n        # results[\"labels\"] = results[\"input_ids\"].copy()\n\n        return results\n\n    def __call__(self, batch):\n        if self.wrap_packed_sequences:\n            return self._wrap_concat(batch)\n        else:\n            return self._concat(batch)\n\n\nclass ConcatDataset(Dataset):\n    def __init__(self, dataset, chunk_size=4096):\n        self.dataset = dataset\n        self.chunk_size = chunk_size\n\n        self.samples = []\n\n        buffer = {\n            \"input_ids\": [],\n            \"attention_mask\": [],\n            \"labels\": [],\n        }\n\n        for sample in tqdm(self.dataset, desc=\"Preprocessing dataset\"):\n            buffer = {k: v + sample[k] for k, v in buffer.items()}\n\n            while len(next(iter(buffer.values()))) > self.chunk_size:\n                self.samples.append(\n                    {k: v[: self.chunk_size] for k, v in buffer.items()}\n                )\n                buffer = {k: v[self.chunk_size :] for k, v in buffer.items()}\n\n    def __getitem__(self, idx):\n        return self.samples[idx]\n\n    def __len__(self):\n        return len(self.samples)\n"
  },
  {
    "path": "llama_recipes/llama_finetuning.py",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n\n\nimport fire\nimport torch\n\n# Unused imports removed\nfrom utils import fsdp_auto_wrap_policy\nfrom transformers import (\n    LlamaForCausalLM,\n    LlamaTokenizer,\n    AutoModelForCausalLM,\n    DataCollatorForTokenClassification,\n)\nimport torch.distributed as dist\n\n# Unused imports removed\nfrom utils.train_utils import (\n    train,\n    freeze_transformer_layers,\n    setup,\n    setup_environ_flags,\n    print_model_size,\n    get_policies,\n)\n\nfrom utils.dataset_utils import get_preprocessed_dataset\n\nfrom utils.config_utils import (\n    update_config,\n    generate_peft_config,\n    generate_dataset_config,\n)\nfrom peft import (\n    get_peft_model,\n    prepare_model_for_int8_training,\n    prepare_model_for_kbit_training,\n)\nfrom torch.distributed.fsdp import (\n    FullyShardedDataParallel as FSDP,\n)\nfrom torch.utils.data import DistributedSampler\nimport policies\nfrom policies import AnyPrecisionAdamW\nfrom configs import fsdp_config, train_config\nimport torch.optim as optim\nfrom torch.optim.lr_scheduler import StepLR\nimport torch\nimport torch.distributed as dist\nfrom transformers.models.llama.modeling_llama import LlamaDecoderLayer\n\n\ndef main(**kwargs):\n    # Update the configuration for the training and sharding process\n    update_config((train_config, fsdp_config), **kwargs)\n\n    # Set the seeds for reproducibility\n    torch.cuda.manual_seed(train_config.seed)\n    torch.manual_seed(train_config.seed)\n\n    #########################################################\n    # CONFIGURE DISTRIBUTED TRAINING -----------------------\n    #########################################################\n    if train_config.enable_fsdp:\n        setup()\n        # torchrun specific\n        import os\n\n        local_rank = int(os.environ[\"LOCAL_RANK\"])\n        rank = int(os.environ[\"RANK\"])\n        world_size = int(os.environ[\"WORLD_SIZE\"])\n\n    if torch.distributed.is_initialized():\n        torch.cuda.set_device(rank)\n        setup_environ_flags(rank)\n\n    #########################################################\n    # INITIALIZE TOKENIZEER --------------------------------\n    #########################################################\n    tokenizer = LlamaTokenizer.from_pretrained(train_config.model_name, legacy=False)\n\n    tokenizer.add_special_tokens(\n        {\n            \"pad_token\": \"<PAD>\",\n        }\n    )\n    #########################################################\n    # PREPARE TRAIN AND VALIDATION DATA --------------------\n    #########################################################\n    dataset_config = generate_dataset_config(train_config, kwargs)\n    update_config(\n        dataset_config,\n        **{\n            \"data_path\": train_config.data_path,\n            \"num_validation_samples\": train_config.num_validation_samples,\n            \"validation_data_path\": train_config.validation_data_path,\n            \"run_validation\": train_config.run_validation,\n            \"pack_sequences\": train_config.pack_sequences,\n            \"wrap_packed_sequences\": train_config.wrap_packed_sequences,\n            \"chunk_size\": train_config.chunk_size,\n        },\n    )\n\n    # Load and preprocess the dataset for training and validation\n    dataset_train = get_preprocessed_dataset(\n        tokenizer,\n        dataset_config,\n        split=\"train\",\n    )\n\n    if not train_config.enable_fsdp or rank == 0:\n        print(f\"--> Training Set Length = {len(dataset_train)}\")\n\n    if train_config.run_validation:\n        dataset_val = get_preprocessed_dataset(\n            tokenizer,\n            dataset_config,\n            split=\"val\",\n        )\n        if not train_config.enable_fsdp or rank == 0:\n            print(f\"--> Validation Set Length = {len(dataset_val)}\")\n    else:\n        dataset_val = None\n\n    train_sampler = None\n    val_sampler = None\n    if train_config.enable_fsdp:\n        train_sampler = DistributedSampler(\n            dataset_train,\n            rank=dist.get_rank(),\n            num_replicas=dist.get_world_size(),\n            shuffle=True,\n        )\n        if train_config.run_validation:\n            val_sampler = DistributedSampler(\n                dataset_val,\n                rank=dist.get_rank(),\n                num_replicas=dist.get_world_size(),\n            )\n\n    # Create DataLoaders for the training and validation dataset\n    data_collator = DataCollatorForTokenClassification(\n        tokenizer=tokenizer, padding=\"longest\"\n    )\n\n    train_dataloader = torch.utils.data.DataLoader(\n        dataset_train,\n        batch_size=train_config.batch_size_training,\n        num_workers=train_config.num_workers_dataloader,\n        pin_memory=True,\n        sampler=train_sampler if train_sampler else None,\n        drop_last=True,\n        collate_fn=data_collator,\n    )\n\n    if train_config.run_validation:\n        eval_dataloader = torch.utils.data.DataLoader(\n            dataset_val,\n            batch_size=train_config.val_batch_size,\n            num_workers=train_config.num_workers_dataloader,\n            pin_memory=True,\n            sampler=val_sampler if val_sampler else None,\n            drop_last=True,\n            collate_fn=data_collator,\n        )\n    else:\n        eval_dataloader = None\n\n    if len(train_dataloader) == 0:\n        raise ValueError(\n            \"Training dataloader is empty! This happens when your dataset is too small, relative to your batch size. \"\n            \"If `pack_sequences` is `True`, you're more likely to run into this issue, particularly with small datasets that \"\n            \"consist of short examples. Try setting `pack_sequences` to `False` and/or reducing your batch size.\"\n        )\n\n    #########################################################\n    # CONFIGURE AND INITIALIZE MODEL ------------------------\n    #########################################################\n\n    # Model preparation for full fine-tuning -------\n    # ----------------------------------------------\n    if not train_config.use_peft:\n        print(\"Loading model for peft\")\n        model = LlamaForCausalLM.from_pretrained(\n            train_config.model_name,\n            load_in_8bit=True if train_config.quantization else None,\n            device_map=\"auto\" if train_config.quantization else None,\n        )\n        print(\"Loaded model\")\n\n    else:\n        kwargs[\"r\"] = kwargs[\n            \"lora_rank\"\n        ]  # can't pass --r to the script, torchrun won't have it\n        peft_config = generate_peft_config(train_config.peft_method, kwargs)\n\n        # Model preparation for QLoRA fine-tuning ------\n        # ----------------------------------------------\n        if train_config.peft_method == \"qlora\":\n            print(\"LOADING MODEL FOR QLORA\")\n            bnb_config = generate_peft_config(\"bitsandbytes_config\", kwargs)\n            import os\n\n            print(\n                f\"Loading model from {train_config.model_name}, which contains the following files:\"\n            )\n            print(os.listdir(train_config.model_name))\n\n            model = AutoModelForCausalLM.from_pretrained(\n                train_config.model_name,\n                quantization_config=bnb_config,\n                device_map=\"auto\",  # dispatch efficiently the model on the available ressources\n                # max_memory = {i: max_memory for i in range(num_gpus)},\n            )\n            print(\"Loaded model\")\n\n            model.gradient_checkpointing_enable()\n            model = prepare_model_for_kbit_training(model)\n\n        # Model preparation for LoRA fine-tuning ------\n        # ----------------------------------------------\n\n        else:\n            model = LlamaForCausalLM.from_pretrained(\n                train_config.model_name,\n                load_in_8bit=True if train_config.quantization else None,\n                device_map=\"auto\" if train_config.quantization else None,\n            )\n\n        model = get_peft_model(model, peft_config)\n\n    model.print_trainable_parameters()\n\n    # We added a special token for padding, so we need to resize the token embeddings\n    model.resize_token_embeddings(model.config.vocab_size + 1)\n\n    print_model_size(model, train_config, rank if train_config.enable_fsdp else 0)\n\n    # Prepare the model for int8 training if quantization is enabled\n    if train_config.quantization:\n        model = prepare_model_for_int8_training(model)\n\n    # Convert the model to bfloat16 if fsdp and pure_bf16 is enabled\n    if train_config.enable_fsdp and fsdp_config.pure_bf16:\n        model.to(torch.bfloat16)\n\n    # setting up FSDP if enable_fsdp is enabled\n    if train_config.enable_fsdp:\n        if not train_config.use_peft and train_config.freeze_layers:\n            freeze_transformer_layers(train_config.num_freeze_layers)\n\n        mixed_precision_policy, wrapping_policy = get_policies(fsdp_config, rank)\n        my_auto_wrapping_policy = fsdp_auto_wrap_policy(model, LlamaDecoderLayer)\n\n        model = FSDP(\n            model,\n            auto_wrap_policy=my_auto_wrapping_policy\n            if train_config.use_peft\n            else wrapping_policy,\n            mixed_precision=mixed_precision_policy\n            if not fsdp_config.pure_bf16\n            else None,\n            sharding_strategy=fsdp_config.sharding_strategy,\n            device_id=torch.cuda.current_device(),\n            limit_all_gathers=True,\n        )\n        if fsdp_config.fsdp_activation_checkpointing:\n            policies.apply_fsdp_checkpointing(model)\n\n    # Note: When we use QLoRA, we load directly to devices with `automap`, so we don't need to move to cuda here.\n    elif (\n        not train_config.quantization\n        and not train_config.enable_fsdp\n        and not train_config.peft_method == \"qlora\"\n    ):\n        model.to(\"cuda\")\n\n    # Initialize the optimizer and learning rate scheduler\n    if not train_config.peft_method == \"qlora\":\n        if fsdp_config.pure_bf16 and fsdp_config.optimizer == \"anyprecision\":\n            optimizer = AnyPrecisionAdamW(\n                model.parameters(),\n                lr=train_config.lr,\n                momentum_dtype=torch.bfloat16,\n                variance_dtype=torch.bfloat16,\n                use_kahan_summation=False,\n            )\n        else:\n            optimizer = optim.AdamW(\n                model.parameters(),\n                lr=train_config.lr,\n                weight_decay=0.0,\n            )\n\n    gradient_accumulation_steps = train_config.gradient_accumulation_steps\n\n    if not train_config.peft_method == \"qlora\":\n        scheduler = StepLR(optimizer, step_size=1, gamma=train_config.gamma)\n\n        # Start the training process\n        results = train(\n            model,\n            train_dataloader,\n            eval_dataloader,\n            tokenizer,\n            optimizer,\n            scheduler,\n            gradient_accumulation_steps,\n            train_config,\n            fsdp_config if train_config.enable_fsdp else None,\n            local_rank if train_config.enable_fsdp else None,\n            rank if train_config.enable_fsdp else None,\n        )\n        if not train_config.enable_fsdp or rank == 0:\n            [print(f\"Key: {k}, Value: {v}\") for k, v in results.items()]\n\n    else:\n        from transformers import TrainingArguments, Trainer\n        from trl.trainer.utils import PeftSavingCallback\n\n        training_args = TrainingArguments(\n            output_dir=train_config.output_dir,\n            per_device_train_batch_size=train_config.batch_size_training,\n            per_device_eval_batch_size=train_config.val_batch_size,\n            gradient_accumulation_steps=gradient_accumulation_steps,\n            learning_rate=train_config.lr,\n            bf16=True,\n            log_level=\"info\",\n            logging_steps=10,\n            optim=\"paged_adamw_32bit\",\n            warmup_ratio=0.03,\n            save_strategy=\"no\",\n            num_train_epochs=train_config.num_epochs,\n            gradient_checkpointing=True,\n            do_eval=True,\n        )\n\n        trainer = Trainer(\n            model=model,\n            tokenizer=tokenizer,\n            train_dataset=dataset_train,\n            eval_dataset=dataset_val,\n            data_collator=data_collator,\n            # peft_config=peft_config,\n            args=training_args,\n            compute_metrics=None,\n            callbacks=[PeftSavingCallback],\n        )\n\n        trainer.train()\n\n        trainer.model.save_pretrained(train_config.output_dir)\n\n\nif __name__ == \"__main__\":\n    fire.Fire(main)\n"
  },
  {
    "path": "llama_recipes/model_checkpointing/__init__.py",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n\nfrom .checkpoint_handler import (\n    load_model_checkpoint,\n    save_model_checkpoint,\n    load_optimizer_checkpoint,\n    save_optimizer_checkpoint,\n    save_model_and_optimizer_sharded,\n    load_model_sharded,\n    load_sharded_model_single_gpu,\n)\n"
  },
  {
    "path": "llama_recipes/model_checkpointing/checkpoint_handler.py",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n\nfrom pathlib import Path\nfrom datetime import datetime\nimport torch\nimport time\n\nfrom torch.distributed.fsdp import (\n    FullyShardedDataParallel as FSDP,\n    StateDictType,\n    FullStateDictConfig,  # general model non-sharded, non-flattened params\n    )\n\nfrom torch.distributed._shard.checkpoint import (\n    FileSystemReader,\n)\nfrom torch.distributed.checkpoint.default_planner import (\n    DefaultSavePlanner,\n)\n\n\nfrom torch.distributed.fsdp.fully_sharded_data_parallel import StateDictType\nimport torch.distributed._shard.checkpoint as dist_cp\nimport torch.distributed as dist\n\n\ndef get_date_of_run():\n    \"\"\"create date and time for file save uniqueness\n    example: 2022-05-07-08:31:12_PM'\n    \"\"\"\n    date_of_run = datetime.now().strftime(\"%Y-%m-%d-%I:%M:%S_%p\")\n    print(f\"--> current date and time of run = {date_of_run}\")\n    return date_of_run\n\n\n# create singleton saving policies to avoid making over and over\nfullstate_save_policy = FullStateDictConfig(offload_to_cpu=True, rank0_only=True)\n\n\ndef load_model_sharded(model, rank, cfg):\n    # torch.manual_seed(103)\n    folder_name = (\n        cfg.dist_checkpoint_root_folder\n        + \"/\"\n        + cfg.dist_checkpoint_folder\n        + \"-\"\n        + cfg.model_name\n    )\n\n    load_dir = Path.cwd() / folder_name\n\n    if not load_dir.exists():\n        if rank == 0:\n            print(\"No sharded_state_dict checkpoint directory found...skipping\")\n        return\n    if rank == 0:\n        print(f\"loading model from model path: {load_dir} \")\n    reader = FileSystemReader(load_dir)\n\n    with FSDP.state_dict_type(model, StateDictType.SHARDED_STATE_DICT):\n        checkpoint = model.state_dict()\n        if rank == 0:\n            ck = checkpoint.keys()\n            print(f\" checkpoint key len = {len(ck)} and \\n keys =  {ck}\")\n\n        dist_cp.load_state_dict(\n            state_dict=checkpoint,\n            storage_reader=reader,\n        )\n        if rank == 0:\n            print(\"checkpoint after load_state_dict()\")\n            ck = checkpoint.keys()\n            print(f\" checkpoint key len = {len(ck)} and \\n keys =  {ck}\")\n        model.load_state_dict(checkpoint)\n    if rank == 0:\n        print(f\"Sharded state checkpoint loaded from {load_dir}\")\n\n\ndef save_model_and_optimizer_sharded(model, rank, cfg, optim=None):\n    \"\"\"save model and optimizer via sharded_state_dict to save_dir\"\"\"\n\n    folder_name = (\n        cfg.dist_checkpoint_root_folder\n        + \"/\"\n        + cfg.dist_checkpoint_folder\n        + \"-\"\n        + cfg.model_name\n    )\n\n    save_dir = Path.cwd() / folder_name\n    if rank == 0:\n        print(f\"Saving model to {save_dir}\")\n\n    distributed_writer = dist_cp.FileSystemWriter(\n        save_dir,\n    )\n    t0 = time.perf_counter()\n\n    with FSDP.state_dict_type(model, StateDictType.SHARDED_STATE_DICT):\n        state_dict = {\"model\": model.state_dict()}\n        if optim is not None:\n            state_dict[\"optim\"] = FSDP.optim_state_dict(model, optim)\n\n        dist_cp.save_state_dict(\n            state_dict=state_dict,\n            storage_writer=distributed_writer,\n            planner=DefaultSavePlanner(),\n        )\n    dist.barrier()\n    t1 = time.perf_counter()\n    if rank == 0:\n        print(f\"Sharded state checkpoint saved to {save_dir}\")\n        print(f\"Checkpoint Time = {t1-t0:.4f}\\n\")\n\n\ndef save_model_checkpoint(\n    model,\n    optimizer,\n    rank,\n    cfg,\n    epoch=1,\n):\n    \"\"\"saving model via rank0 cpu streaming and full_state_dict\"\"\"\n\n    with FSDP.state_dict_type(\n        model, StateDictType.FULL_STATE_DICT, fullstate_save_policy\n    ):\n        cpu_state = model.state_dict()\n\n        print(f\"saving process: rank {rank}  done w model state_dict\\n\")\n\n    if rank == 0:\n        print(\"--> saving model ...\")\n        # create save path\n        folder_name = (\n            cfg.dist_checkpoint_root_folder\n            + \"/\"\n            + cfg.dist_checkpoint_folder\n            + \"-\"\n            + cfg.model_name\n        )\n        save_dir = Path.cwd() / folder_name\n        save_dir.mkdir(parents=True, exist_ok=True)\n        save_name = cfg.model_name + \"-\" + str(epoch) + \".pt\"\n        save_full_path = str(save_dir) + \"/\" + save_name\n\n        # save model\n        torch.save(cpu_state, save_full_path)\n\n        print(f\"model checkpoint saved for epoch {epoch} at {save_full_path}\\n\")\n\n\ndef load_model_checkpoint(model, rank, cfg):\n    \"\"\"load local checkpoint to rank0 cpu\n    must be called * before * passing to FSDP\"\"\"\n\n    if rank != 0:\n        return\n\n    # where is the checkpoint at...\n    full_state_dict_model_path = (\n        Path.cwd() / cfg.checkpoint_folder / cfg.checkpoint_model_filename\n    )\n    # is it present...\n    if not full_state_dict_model_path.is_file():\n        print(\n            f\"model checkpoint {full_state_dict_model_path} not present. Returning...\"\n        )\n        return\n\n    model_checkpoint = torch.load(full_state_dict_model_path)\n    # integrate into loaded model\n    model.load_state_dict(model_checkpoint)\n\n    print(\"model checkpoint loaded to rank0 cpu\")\n\n\ndef save_optimizer_checkpoint(model, optimizer, rank, cfg, epoch=1):\n    \"\"\"save optimizer state via full state dict\"\"\"\n\n    print(f\"--> optim state call on rank {rank}\\n\")\n\n    # pull all sharded optimizer states to rank0 cpu...\n\n    optim_state = FSDP.full_optim_state_dict(model, optimizer)\n\n    print(f\"optim state dict ready on {rank} and len of {len(optim_state)}\\n\")\n\n    if rank == 0:\n        folder_name = (\n            cfg.dist_checkpoint_root_folder\n            + \"/\"\n            + cfg.dist_checkpoint_folder\n            + \"-\"\n            + cfg.model_name\n        )\n        save_dir = Path.cwd() / folder_name\n        save_dir.mkdir(parents=True, exist_ok=True)\n\n        opt_save_name = \"optimizer\" + \"-\" + cfg.model_name + \"-\" + str(epoch) + \".pt\"\n        opt_save_full_path = save_dir / opt_save_name\n\n        print(\"--> saving optimizer state...\")\n\n        torch.save(optim_state, opt_save_full_path)\n\n        print(f\"--> saved {opt_save_full_path} to disk\")\n\n\ndef load_optimizer_checkpoint(model, optimizer_checkpoint_path, rank):\n    \"\"\"load an fsdp optimizer full_state checkpoint using scatter method\n    this ensures only rank 0 loads the optimizer state dict and scatters to other ranks\n    \"\"\"\n\n    if not optimizer_checkpoint_path.is_file():\n        print(\n            f\"warning - optimizer checkpoint not present {optimizer_checkpoint_path}. Returning. \"\n        )\n        return\n\n    full_osd = None\n\n    if rank == 0:\n        full_osd = torch.load(optimizer_checkpoint_path)\n\n    # called from all ranks, though only rank0 has a valid param for full_osd\n    sharded_osd = FSDP.scatter_full_optim_state_dict(full_osd, model)\n\n    print(f\"optimizer shard loaded on rank {rank}\")\n\n\ndef load_sharded_model_single_gpu(model, model_path):\n    reader = FileSystemReader(model_path)\n\n    state_dict = {\"model\": model.state_dict()}\n\n    dist_cp.load_state_dict(\n        state_dict=state_dict,\n        storage_reader=FileSystemReader(model_path),\n        no_dist=True,\n    )\n\n    model.load_state_dict(state_dict[\"model\"])\n\n    print(f\"Sharded state checkpoint loaded from {model_path}\")\n    return model\n"
  },
  {
    "path": "llama_recipes/multi_node.slurm",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the GNU General Public License version 3.\n\n\n#!/bin/bash\n\n#SBATCH --job-name=Nano-2d-trainer-20b-8nodes\n\n#SBATCH --ntasks=2\n#SBATCH --nodes=2\n#SBATCH --gpus-per-task=4\n#SBATCH --partition=train \nnodes=( $( scontrol show hostnames $SLURM_JOB_NODELIST ) )\nnodes_array=($nodes)\nhead_node=${nodes_array[0]}\nhead_node_ip=$(srun --nodes=1 --ntasks=1 -w \"$head_node\" hostname --ip-address)\n# Enable for A100\nexport FI_PROVIDER=\"efa\"\n\necho Node IP: $head_node_ip\nexport LOGLEVEL=INFO\n# debugging flags (optional)\nexport NCCL_DEBUG=WARN\nexport NCCL_DEBUG_SUBSYS=WARN\nexport PYTHONFAULTHANDLER=1\nexport LD_LIBRARY_PATH=/opt/amazon/efa/lib:$LD_LIBRARY_PATH\nexport LD_LIBRARY_PATH=/usr/local/lib/:$LD_LIBRARY_PATH\nexport CUDA_LAUNCH_BLOCKING=0\n\n# on your cluster you might need these:\n# set the network interface\nexport NCCL_SOCKET_IFNAME=\"ens\"\nexport FI_EFA_USE_DEVICE_RDMA=1\n\nsrun  torchrun --nproc_per_node 4 --rdzv_id $RANDOM --rdzv_backend c10d --rdzv_endpoint $head_node_ip:29500 llama_finetuning.py  --enable_fsdp --use_peft --peft_method lora\n\n"
  },
  {
    "path": "llama_recipes/policies/__init__.py",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n\nfrom .mixed_precision import *\nfrom .wrapping import *\nfrom .activation_checkpointing_functions import apply_fsdp_checkpointing\nfrom .anyprecision_optimizer import AnyPrecisionAdamW\n"
  },
  {
    "path": "llama_recipes/policies/activation_checkpointing_functions.py",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n\nfrom torch.distributed.algorithms._checkpoint.checkpoint_wrapper import (\n    checkpoint_wrapper,\n    CheckpointImpl,\n    apply_activation_checkpointing,\n)\n\nfrom transformers.models.llama.modeling_llama import LlamaDecoderLayer\nfrom functools import partial\n\nnon_reentrant_wrapper = partial(\n    checkpoint_wrapper,\n    checkpoint_impl=CheckpointImpl.NO_REENTRANT,\n)\n\ncheck_fn = lambda submodule: isinstance(submodule, LlamaDecoderLayer)\n\n\ndef apply_fsdp_checkpointing(model):\n    \"\"\"apply activation checkpointing to model\n    returns None as model is updated directly\n    \"\"\"\n    print(\"--> applying fsdp activation checkpointing...\")\n\n    apply_activation_checkpointing(\n        model, checkpoint_wrapper_fn=non_reentrant_wrapper, check_fn=check_fn\n    )\n"
  },
  {
    "path": "llama_recipes/policies/anyprecision_optimizer.py",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n\n# AnyPrecisionAdamW: a flexible precision AdamW optimizer\n# with optional Kahan summation for high precision weight updates.\n# Allows direct control over momentum, variance and auxiliary compensation\n# buffer dtypes.\n# Optional Kahan summation is used to offset precision reduction for\n# the weight updates. This allows full training in BFloat16 (equal or\n# better than FP32 results in many cases) due to high precision weight upates.\n\nimport torch\nfrom torch.optim.optimizer import Optimizer\n\n\nclass AnyPrecisionAdamW(Optimizer):\n    def __init__(\n        self,\n        params,\n        lr=1e-3,\n        betas=(0.9, 0.999),\n        eps=1e-8,\n        weight_decay=0.0,\n        use_kahan_summation=False,\n        momentum_dtype=torch.bfloat16,\n        variance_dtype=torch.bfloat16,\n        compensation_buffer_dtype=torch.bfloat16,\n    ):\n        \"\"\"\n        Args:\n                params (iterable): iterable of parameters to optimize or dicts defining\n                    parameter groups\n                lr (float, optional): learning rate (default: 1e-3)\n                betas (Tuple[float, float], optional): coefficients used for computing\n                    running averages of gradient and its square (default: (0.9, 0.999))\n                eps (float, optional): term added to the denominator to improve\n                    numerical stability (default: 1e-8)\n                weight_decay (float, optional): weight decay coefficient (default: 1e-2)\n\n                # Any Precision specific\n                use_kahan_summation = creates auxiliary buffer to ensure high precision\n                model param updates (default: False)\n                momentum_dtype = dtype for momentum  (default: BFloat32)\n                variance_dtype = dtype for uncentered variance (default: BFloat16)\n                compensation_buffer_dtype  = dtype for Kahan summation\n                                             buffer (default: BFloat16)\n\n                # Usage\n                This optimizer implements optimizer states, and Kahan summation\n                for high precision updates, all in user controlled dtypes.\n                Defaults are variance in BF16, Momentum in FP32.\n                This can be run in FSDP mixed precision, amp, or full precision,\n                depending on what training pipeline you wish to work with.\n\n                Setting to use_kahan_summation = False, and changing momentum and\n                variance dtypes to FP32, reverts this to a standard AdamW optimizer.\n\n        \"\"\"\n        defaults = dict(\n            lr=lr,\n            betas=betas,\n            eps=eps,\n            weight_decay=weight_decay,\n            use_kahan_summation=use_kahan_summation,\n            momentum_dtype=momentum_dtype,\n            variance_dtype=variance_dtype,\n            compensation_buffer_dtype=compensation_buffer_dtype,\n        )\n\n        super().__init__(params, defaults)\n\n    @torch.no_grad()\n    def step(self, closure=None):\n        \"\"\"Performs a single optimization step.\n        Args:\n            closure (callable, optional): A closure that reevaluates the model\n                and returns the loss.\n        \"\"\"\n\n        if closure is not None:\n            with torch.enable_grad():\n                # to fix linter, we do not keep the returned loss for use atm.\n                closure()\n\n        for group in self.param_groups:\n            beta1, beta2 = group[\"betas\"]\n            lr = group[\"lr\"]\n            weight_decay = group[\"weight_decay\"]\n            eps = group[\"eps\"]\n            use_kahan_summation = group[\"use_kahan_summation\"]\n\n            momentum_dtype = group[\"momentum_dtype\"]\n            variance_dtype = group[\"variance_dtype\"]\n            compensation_buffer_dtype = group[\"compensation_buffer_dtype\"]\n\n            for p in group[\"params\"]:\n                if p.grad is None:\n                    continue\n\n                if p.grad.is_sparse:\n                    raise RuntimeError(\n                        \"AnyPrecisionAdamW does not support sparse gradients\"\n                    )\n\n                state = self.state[p]\n\n                # State initialization\n                if len(state) == 0:\n                    state[\"step\"] = torch.tensor(0.0)\n\n                    # momentum - EMA of gradient values\n                    state[\"exp_avg\"] = torch.zeros_like(\n                        p,\n                        dtype=momentum_dtype,\n                    )\n\n                    # variance uncentered - EMA of squared gradient values\n                    state[\"exp_avg_sq\"] = torch.zeros_like(\n                        p,\n                        dtype=variance_dtype,\n                    )\n\n                    # optional Kahan summation - accumulated error tracker\n                    if use_kahan_summation:\n                        state[\"compensation\"] = torch.zeros_like(\n                            p,\n                            dtype=compensation_buffer_dtype,\n                        )\n\n                # main processing -------------------------\n\n                # update the steps for each param group update\n                state[\"step\"] += 1\n                step = state[\"step\"]\n\n                exp_avg = state[\"exp_avg\"]\n                exp_avg_sq = state[\"exp_avg_sq\"]\n\n                grad = p.grad\n\n                # weight decay, AdamW style\n                if weight_decay:\n                    p.data.mul_(1 - lr * weight_decay)\n\n                # update momentum\n                exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)\n\n                # update uncentered variance\n                exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)\n\n                # adjust using bias1\n                bias_correction1 = 1 - beta1**step\n\n                step_size = lr / bias_correction1\n\n                # adjust using bias2\n                denom_correction = (1 - beta2**step) ** 0.5  # avoids math import\n\n                centered_variance = (exp_avg_sq.sqrt() / denom_correction).add_(\n                    eps, alpha=1\n                )\n\n                # lr update to compensation\n                if use_kahan_summation:\n                    compensation = state[\"compensation\"]\n\n                    compensation.addcdiv_(exp_avg, centered_variance, value=-step_size)\n\n                    # update weights with compensation (Kahan summation)\n                    # save error back to compensation for next iteration\n                    temp_buffer = p.detach().clone()\n                    p.data.add_(compensation)\n                    compensation.add_(temp_buffer.sub_(p.data))\n\n                else:\n                    # usual AdamW updates\n                    p.data.addcdiv_(exp_avg, centered_variance, value=-step_size)\n"
  },
  {
    "path": "llama_recipes/policies/mixed_precision.py",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n\nimport torch\n\nfrom torch.distributed.fsdp import (\n    # FullyShardedDataParallel as FSDP,\n    # CPUOffload,\n    MixedPrecision,\n    # BackwardPrefetch,\n    # ShardingStrategy,\n)\n\n# requires grad scaler in main loop\nfpSixteen = MixedPrecision(\n    param_dtype=torch.float16,\n    # Gradient communication precision.\n    reduce_dtype=torch.float16,\n    # Buffer precision.\n    buffer_dtype=torch.float16,\n)\n\nbfSixteen = MixedPrecision(\n    param_dtype=torch.bfloat16,\n    # Gradient communication precision.\n    reduce_dtype=torch.bfloat16,\n    # Buffer precision.\n    buffer_dtype=torch.bfloat16,\n    cast_forward_inputs=True,\n)\n\nbfSixteen_mixed = MixedPrecision(\n    param_dtype=torch.float32,\n    reduce_dtype=torch.bfloat16,\n    buffer_dtype=torch.bfloat16,\n)\n\nfp32_policy = MixedPrecision(\n    param_dtype=torch.float32,\n    reduce_dtype=torch.float32,\n    buffer_dtype=torch.float32,\n)\n"
  },
  {
    "path": "llama_recipes/policies/wrapping.py",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n\n\nfrom transformers.models.llama.modeling_llama import LlamaDecoderLayer\n\nfrom torch.distributed.fsdp.wrap import (\n    transformer_auto_wrap_policy,\n    size_based_auto_wrap_policy,\n)\n\nimport functools\n\n\ndef get_size_policy(min_params=1e8):\n    num_wrap_policy = functools.partial(\n        size_based_auto_wrap_policy, min_num_params=min_params\n    )\n    return num_wrap_policy\n\n\ndef get_llama_wrapper():\n    \"\"\"we register our main layer class and use the fsdp transformer wrapping policy\n    ensures embedding layers are in the root fsdp unit for shared access and that fsdp units map to transformer layers\n    \"\"\"\n    # ====   use new transformer wrapper\n\n    llama_auto_wrap_policy = functools.partial(\n        transformer_auto_wrap_policy,\n        transformer_layer_cls={\n            LlamaDecoderLayer,\n        },\n    )\n\n    return llama_auto_wrap_policy\n"
  },
  {
    "path": "llama_recipes/quickstart.ipynb",
    "content": "{\n \"cells\": [\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"Copyright (c) Meta Platforms, Inc. and affiliates.\\n\",\n    \"This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\"\n   ]\n  },\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"## Quick Start Notebook\\n\",\n    \"\\n\",\n    \"This notebook shows how to train a Llama 2 model on a single GPU (e.g. A10 with 24GB) using int8 quantization and LoRA.\\n\",\n    \"\\n\",\n    \"### Step 0: Install pre-requirements and convert checkpoint\\n\",\n    \"\\n\",\n    \"The example uses the Hugging Face trainer and model which means that the checkpoint has to be converted from its original format into the dedicated Hugging Face format.\\n\",\n    \"The conversion can be achieved by running the `convert_llama_weights_to_hf.py` script provided with the transformer package.\\n\",\n    \"Given that the original checkpoint resides under `models/7B` we can install all requirements and convert the checkpoint with:\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 1,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"# %%bash\\n\",\n    \"# pip install transformers datasets accelerate sentencepiece protobuf==3.20 py7zr scipy peft bitsandbytes fire torch_tb_profiler ipywidgets\\n\",\n    \"# TRANSFORM=`python -c \\\"import transformers;print('/'.join(transformers.__file__.split('/')[:-1])+'/models/llama/convert_llama_weights_to_hf.py')\\\"`\\n\",\n    \"# python ${TRANSFORM} --input_dir models --model_size 7B --output_dir models_hf/7B\"\n   ]\n  },\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Step 1: Load the model\\n\",\n    \"\\n\",\n    \"Point model_id to model weight folder\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 2,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stderr\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"/data/home/mreso/miniconda3/envs/llama/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\\n\",\n      \"  from .autonotebook import tqdm as notebook_tqdm\\n\"\n     ]\n    },\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"\\n\",\n      \"===================================BUG REPORT===================================\\n\",\n      \"Welcome to bitsandbytes. For bug reports, please run\\n\",\n      \"\\n\",\n      \"python -m bitsandbytes\\n\",\n      \"\\n\",\n      \" and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues\\n\",\n      \"================================================================================\\n\",\n      \"bin /data/home/mreso/miniconda3/envs/llama/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda112.so\\n\",\n      \"CUDA SETUP: CUDA runtime path found: /usr/local/cuda/targets/x86_64-linux/lib/libcudart.so\\n\",\n      \"CUDA SETUP: Highest compute capability among GPUs detected: 8.0\\n\",\n      \"CUDA SETUP: Detected CUDA version 112\\n\",\n      \"CUDA SETUP: Loading binary /data/home/mreso/miniconda3/envs/llama/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda112.so...\\n\"\n     ]\n    },\n    {\n     \"name\": \"stderr\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"/data/home/mreso/miniconda3/envs/llama/lib/python3.10/site-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: /data/home/mreso/miniconda3/envs/llama did not contain ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] as expected! Searching further paths...\\n\",\n      \"  warn(msg)\\n\",\n      \"/data/home/mreso/miniconda3/envs/llama/lib/python3.10/site-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('/usr/local/cuda/efa/lib')}\\n\",\n      \"  warn(msg)\\n\",\n      \"The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.\\n\",\n      \"Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:10<00:00,  5.09s/it]\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"import torch\\n\",\n    \"from transformers import LlamaForCausalLM, LlamaTokenizer\\n\",\n    \"\\n\",\n    \"model_id=\\\"./models_hf/7B\\\"\\n\",\n    \"\\n\",\n    \"tokenizer = LlamaTokenizer.from_pretrained(model_id)\\n\",\n    \"\\n\",\n    \"model =LlamaForCausalLM.from_pretrained(model_id, load_in_8bit=True, device_map='auto', torch_dtype=torch.float16)\"\n   ]\n  },\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Step 2: Load the preprocessed dataset\\n\",\n    \"\\n\",\n    \"We load and preprocess the samsum dataset which consists of curated pairs of dialogs and their summarization:\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 3,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stderr\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"Found cached dataset samsum (/data/home/mreso/.cache/huggingface/datasets/samsum/samsum/0.0.0/f1d7c6b7353e6de335d444e424dc002ef70d1277109031327bc9cc6af5d3d46e)\\n\",\n      \"Loading cached processed dataset at /data/home/mreso/.cache/huggingface/datasets/samsum/samsum/0.0.0/f1d7c6b7353e6de335d444e424dc002ef70d1277109031327bc9cc6af5d3d46e/cache-b14554a76c1c7ecd.arrow\\n\",\n      \"Loading cached processed dataset at /data/home/mreso/.cache/huggingface/datasets/samsum/samsum/0.0.0/f1d7c6b7353e6de335d444e424dc002ef70d1277109031327bc9cc6af5d3d46e/cache-e40e61e15ebeb527.arrow\\n\",\n      \"Loading cached processed dataset at /data/home/mreso/.cache/huggingface/datasets/samsum/samsum/0.0.0/f1d7c6b7353e6de335d444e424dc002ef70d1277109031327bc9cc6af5d3d46e/cache-e08ac9e1b792e7ba.arrow\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"from pathlib import Path\\n\",\n    \"import os\\n\",\n    \"import sys\\n\",\n    \"from utils.dataset_utils import get_preprocessed_dataset\\n\",\n    \"from configs.datasets import samsum_dataset\\n\",\n    \"\\n\",\n    \"train_dataset = get_preprocessed_dataset(tokenizer, samsum_dataset, 'train')\"\n   ]\n  },\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Step 3: Check base model\\n\",\n    \"\\n\",\n    \"Run the base model on an example input:\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 4,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"\\n\",\n      \"Summarize this dialog:\\n\",\n      \"A: Hi Tom, are you busy tomorrow’s afternoon?\\n\",\n      \"B: I’m pretty sure I am. What’s up?\\n\",\n      \"A: Can you go with me to the animal shelter?.\\n\",\n      \"B: What do you want to do?\\n\",\n      \"A: I want to get a puppy for my son.\\n\",\n      \"B: That will make him so happy.\\n\",\n      \"A: Yeah, we’ve discussed it many times. I think he’s ready now.\\n\",\n      \"B: That’s good. Raising a dog is a tough issue. Like having a baby ;-) \\n\",\n      \"A: I'll get him one of those little dogs.\\n\",\n      \"B: One that won't grow up too big;-)\\n\",\n      \"A: And eat too much;-))\\n\",\n      \"B: Do you know which one he would like?\\n\",\n      \"A: Oh, yes, I took him there last Monday. He showed me one that he really liked.\\n\",\n      \"B: I bet you had to drag him away.\\n\",\n      \"A: He wanted to take it home right away ;-).\\n\",\n      \"B: I wonder what he'll name it.\\n\",\n      \"A: He said he’d name it after his dead hamster – Lemmy  - he's  a great Motorhead fan :-)))\\n\",\n      \"---\\n\",\n      \"Summary:\\n\",\n      \"A: Hi Tom, are you busy tomorrow’s afternoon?\\n\",\n      \"B: I’m pretty sure I am. What’s up?\\n\",\n      \"A: Can you go with me to the animal shelter?.\\n\",\n      \"B: What do you want to do?\\n\",\n      \"A: I want to get a puppy for my son.\\n\",\n      \"B: That will make him so happy.\\n\",\n      \"A: Yeah, we’ve discussed it many times. I think he’s ready now.\\n\",\n      \"B\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"eval_prompt = \\\"\\\"\\\"\\n\",\n    \"Summarize this dialog:\\n\",\n    \"A: Hi Tom, are you busy tomorrow’s afternoon?\\n\",\n    \"B: I’m pretty sure I am. What’s up?\\n\",\n    \"A: Can you go with me to the animal shelter?.\\n\",\n    \"B: What do you want to do?\\n\",\n    \"A: I want to get a puppy for my son.\\n\",\n    \"B: That will make him so happy.\\n\",\n    \"A: Yeah, we’ve discussed it many times. I think he’s ready now.\\n\",\n    \"B: That’s good. Raising a dog is a tough issue. Like having a baby ;-) \\n\",\n    \"A: I'll get him one of those little dogs.\\n\",\n    \"B: One that won't grow up too big;-)\\n\",\n    \"A: And eat too much;-))\\n\",\n    \"B: Do you know which one he would like?\\n\",\n    \"A: Oh, yes, I took him there last Monday. He showed me one that he really liked.\\n\",\n    \"B: I bet you had to drag him away.\\n\",\n    \"A: He wanted to take it home right away ;-).\\n\",\n    \"B: I wonder what he'll name it.\\n\",\n    \"A: He said he’d name it after his dead hamster – Lemmy  - he's  a great Motorhead fan :-)))\\n\",\n    \"---\\n\",\n    \"Summary:\\n\",\n    \"\\\"\\\"\\\"\\n\",\n    \"\\n\",\n    \"model_input = tokenizer(eval_prompt, return_tensors=\\\"pt\\\").to(\\\"cuda\\\")\\n\",\n    \"\\n\",\n    \"model.eval()\\n\",\n    \"with torch.no_grad():\\n\",\n    \"    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=100)[0], skip_special_tokens=True))\"\n   ]\n  },\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"We can see that the base model only repeats the conversation.\\n\",\n    \"\\n\",\n    \"### Step 4: Prepare model for PEFT\\n\",\n    \"\\n\",\n    \"Let's prepare the model for Parameter Efficient Fine Tuning (PEFT):\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 5,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"trainable params: 4194304 || all params: 6742609920 || trainable%: 0.06220594176090199\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"model.train()\\n\",\n    \"\\n\",\n    \"def create_peft_config(model):\\n\",\n    \"    from peft import (\\n\",\n    \"        get_peft_model,\\n\",\n    \"        LoraConfig,\\n\",\n    \"        TaskType,\\n\",\n    \"        prepare_model_for_int8_training,\\n\",\n    \"    )\\n\",\n    \"\\n\",\n    \"    peft_config = LoraConfig(\\n\",\n    \"        task_type=TaskType.CAUSAL_LM,\\n\",\n    \"        inference_mode=False,\\n\",\n    \"        r=8,\\n\",\n    \"        lora_alpha=32,\\n\",\n    \"        lora_dropout=0.05,\\n\",\n    \"        target_modules = [\\\"q_proj\\\", \\\"v_proj\\\"]\\n\",\n    \"    )\\n\",\n    \"\\n\",\n    \"    # prepare int-8 model for training\\n\",\n    \"    model = prepare_model_for_int8_training(model)\\n\",\n    \"    model = get_peft_model(model, peft_config)\\n\",\n    \"    model.print_trainable_parameters()\\n\",\n    \"    return model, peft_config\\n\",\n    \"\\n\",\n    \"# create peft config\\n\",\n    \"model, lora_config = create_peft_config(model)\\n\",\n    \"\\n\"\n   ]\n  },\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {\n    \"editable\": true,\n    \"slideshow\": {\n     \"slide_type\": \"\"\n    },\n    \"tags\": []\n   },\n   \"source\": [\n    \"### Step 5: Define an optional profiler\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 6,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"from transformers import TrainerCallback\\n\",\n    \"from contextlib import nullcontext\\n\",\n    \"enable_profiler = False\\n\",\n    \"output_dir = \\\"tmp/llama-output\\\"\\n\",\n    \"\\n\",\n    \"config = {\\n\",\n    \"    'lora_config': lora_config,\\n\",\n    \"    'learning_rate': 1e-4,\\n\",\n    \"    'num_train_epochs': 1,\\n\",\n    \"    'gradient_accumulation_steps': 2,\\n\",\n    \"    'per_device_train_batch_size': 2,\\n\",\n    \"    'gradient_checkpointing': False,\\n\",\n    \"}\\n\",\n    \"\\n\",\n    \"# Set up profiler\\n\",\n    \"if enable_profiler:\\n\",\n    \"    wait, warmup, active, repeat = 1, 1, 2, 1\\n\",\n    \"    total_steps = (wait + warmup + active) * (1 + repeat)\\n\",\n    \"    schedule =  torch.profiler.schedule(wait=wait, warmup=warmup, active=active, repeat=repeat)\\n\",\n    \"    profiler = torch.profiler.profile(\\n\",\n    \"        schedule=schedule,\\n\",\n    \"        on_trace_ready=torch.profiler.tensorboard_trace_handler(f\\\"{output_dir}/logs/tensorboard\\\"),\\n\",\n    \"        record_shapes=True,\\n\",\n    \"        profile_memory=True,\\n\",\n    \"        with_stack=True)\\n\",\n    \"    \\n\",\n    \"    class ProfilerCallback(TrainerCallback):\\n\",\n    \"        def __init__(self, profiler):\\n\",\n    \"            self.profiler = profiler\\n\",\n    \"            \\n\",\n    \"        def on_step_end(self, *args, **kwargs):\\n\",\n    \"            self.profiler.step()\\n\",\n    \"\\n\",\n    \"    profiler_callback = ProfilerCallback(profiler)\\n\",\n    \"else:\\n\",\n    \"    profiler = nullcontext()\"\n   ]\n  },\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Step 6: Fine tune the model\\n\",\n    \"\\n\",\n    \"Here, we fine tune the model for a single epoch which takes a bit more than an hour on a A100.\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 7,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stderr\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...\\n\",\n      \"/data/home/mreso/miniconda3/envs/llama/lib/python3.10/site-packages/bitsandbytes/autograd/_functions.py:321: UserWarning: MatMul8bitLt: inputs will be cast from torch.float32 to float16 during quantization\\n\",\n      \"  warnings.warn(f\\\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\\\")\\n\",\n      \"/data/home/mreso/miniconda3/envs/llama/lib/python3.10/site-packages/bitsandbytes/autograd/_functions.py:321: UserWarning: MatMul8bitLt: inputs will be cast from torch.bfloat16 to float16 during quantization\\n\",\n      \"  warnings.warn(f\\\"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization\\\")\\n\"\n     ]\n    },\n    {\n     \"data\": {\n      \"text/html\": [\n       \"\\n\",\n       \"    <div>\\n\",\n       \"      \\n\",\n       \"      <progress value='389' max='389' style='width:300px; height:20px; vertical-align: middle;'></progress>\\n\",\n       \"      [389/389 1:12:06, Epoch 1/1]\\n\",\n       \"    </div>\\n\",\n       \"    <table border=\\\"1\\\" class=\\\"dataframe\\\">\\n\",\n       \"  <thead>\\n\",\n       \" <tr style=\\\"text-align: left;\\\">\\n\",\n       \"      <th>Step</th>\\n\",\n       \"      <th>Training Loss</th>\\n\",\n       \"    </tr>\\n\",\n       \"  </thead>\\n\",\n       \"  <tbody>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>10</td>\\n\",\n       \"      <td>1.965000</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>20</td>\\n\",\n       \"      <td>1.845600</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>30</td>\\n\",\n       \"      <td>1.801100</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>40</td>\\n\",\n       \"      <td>1.780900</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>50</td>\\n\",\n       \"      <td>1.715400</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>60</td>\\n\",\n       \"      <td>1.697800</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>70</td>\\n\",\n       \"      <td>1.707600</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>80</td>\\n\",\n       \"      <td>1.713300</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>90</td>\\n\",\n       \"      <td>1.663900</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>100</td>\\n\",\n       \"      <td>1.702700</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>110</td>\\n\",\n       \"      <td>1.658800</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>120</td>\\n\",\n       \"      <td>1.692400</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>130</td>\\n\",\n       \"      <td>1.644900</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>140</td>\\n\",\n       \"      <td>1.687900</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>150</td>\\n\",\n       \"      <td>1.686600</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>160</td>\\n\",\n       \"      <td>1.649600</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>170</td>\\n\",\n       \"      <td>1.666900</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>180</td>\\n\",\n       \"      <td>1.709200</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>190</td>\\n\",\n       \"      <td>1.670400</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>200</td>\\n\",\n       \"      <td>1.662700</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>210</td>\\n\",\n       \"      <td>1.681300</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>220</td>\\n\",\n       \"      <td>1.685500</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>230</td>\\n\",\n       \"      <td>1.663400</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>240</td>\\n\",\n       \"      <td>1.638300</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>250</td>\\n\",\n       \"      <td>1.627400</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>260</td>\\n\",\n       \"      <td>1.654300</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>270</td>\\n\",\n       \"      <td>1.640900</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>280</td>\\n\",\n       \"      <td>1.674700</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>290</td>\\n\",\n       \"      <td>1.657300</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>300</td>\\n\",\n       \"      <td>1.660200</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>310</td>\\n\",\n       \"      <td>1.666600</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>320</td>\\n\",\n       \"      <td>1.674500</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>330</td>\\n\",\n       \"      <td>1.656200</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>340</td>\\n\",\n       \"      <td>1.684300</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>350</td>\\n\",\n       \"      <td>1.667900</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>360</td>\\n\",\n       \"      <td>1.661400</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>370</td>\\n\",\n       \"      <td>1.676800</td>\\n\",\n       \"    </tr>\\n\",\n       \"    <tr>\\n\",\n       \"      <td>380</td>\\n\",\n       \"      <td>1.628100</td>\\n\",\n       \"    </tr>\\n\",\n       \"  </tbody>\\n\",\n       \"</table><p>\"\n      ],\n      \"text/plain\": [\n       \"<IPython.core.display.HTML object>\"\n      ]\n     },\n     \"metadata\": {},\n     \"output_type\": \"display_data\"\n    }\n   ],\n   \"source\": [\n    \"from transformers import default_data_collator, Trainer, TrainingArguments\\n\",\n    \"\\n\",\n    \"\\n\",\n    \"\\n\",\n    \"# Define training args\\n\",\n    \"training_args = TrainingArguments(\\n\",\n    \"    output_dir=output_dir,\\n\",\n    \"    overwrite_output_dir=True,\\n\",\n    \"    bf16=True,  # Use BF16 if available\\n\",\n    \"    # logging strategies\\n\",\n    \"    logging_dir=f\\\"{output_dir}/logs\\\",\\n\",\n    \"    logging_strategy=\\\"steps\\\",\\n\",\n    \"    logging_steps=10,\\n\",\n    \"    save_strategy=\\\"no\\\",\\n\",\n    \"    optim=\\\"adamw_torch_fused\\\",\\n\",\n    \"    max_steps=total_steps if enable_profiler else -1,\\n\",\n    \"    **{k:v for k,v in config.items() if k != 'lora_config'}\\n\",\n    \")\\n\",\n    \"\\n\",\n    \"with profiler:\\n\",\n    \"    # Create Trainer instance\\n\",\n    \"    trainer = Trainer(\\n\",\n    \"        model=model,\\n\",\n    \"        args=training_args,\\n\",\n    \"        train_dataset=train_dataset,\\n\",\n    \"        data_collator=default_data_collator,\\n\",\n    \"        callbacks=[profiler_callback] if enable_profiler else [],\\n\",\n    \"    )\\n\",\n    \"\\n\",\n    \"    # Start training\\n\",\n    \"    trainer.train()\"\n   ]\n  },\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Step 7:\\n\",\n    \"Save model checkpoint\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 8,\n   \"metadata\": {},\n   \"outputs\": [],\n   \"source\": [\n    \"model.save_pretrained(output_dir)\"\n   ]\n  },\n  {\n   \"attachments\": {},\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"### Step 8:\\n\",\n    \"Try the fine tuned model on the same example again to see the learning progress:\"\n   ]\n  },\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 9,\n   \"metadata\": {},\n   \"outputs\": [\n    {\n     \"name\": \"stdout\",\n     \"output_type\": \"stream\",\n     \"text\": [\n      \"\\n\",\n      \"Summarize this dialog:\\n\",\n      \"A: Hi Tom, are you busy tomorrow’s afternoon?\\n\",\n      \"B: I’m pretty sure I am. What’s up?\\n\",\n      \"A: Can you go with me to the animal shelter?.\\n\",\n      \"B: What do you want to do?\\n\",\n      \"A: I want to get a puppy for my son.\\n\",\n      \"B: That will make him so happy.\\n\",\n      \"A: Yeah, we’ve discussed it many times. I think he’s ready now.\\n\",\n      \"B: That’s good. Raising a dog is a tough issue. Like having a baby ;-) \\n\",\n      \"A: I'll get him one of those little dogs.\\n\",\n      \"B: One that won't grow up too big;-)\\n\",\n      \"A: And eat too much;-))\\n\",\n      \"B: Do you know which one he would like?\\n\",\n      \"A: Oh, yes, I took him there last Monday. He showed me one that he really liked.\\n\",\n      \"B: I bet you had to drag him away.\\n\",\n      \"A: He wanted to take it home right away ;-).\\n\",\n      \"B: I wonder what he'll name it.\\n\",\n      \"A: He said he’d name it after his dead hamster – Lemmy  - he's  a great Motorhead fan :-)))\\n\",\n      \"---\\n\",\n      \"Summary:\\n\",\n      \"A wants to get a puppy for his son. He took him to the animal shelter last Monday. He showed him one that he really liked. A will name it after his dead hamster - Lemmy.\\n\"\n     ]\n    }\n   ],\n   \"source\": [\n    \"model.eval()\\n\",\n    \"with torch.no_grad():\\n\",\n    \"    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=100)[0], skip_special_tokens=True))\\n\"\n   ]\n  }\n ],\n \"metadata\": {\n  \"kernelspec\": {\n   \"display_name\": \"Python 3 (ipykernel)\",\n   \"language\": \"python\",\n   \"name\": \"python3\"\n  },\n  \"language_info\": {\n   \"codemirror_mode\": {\n    \"name\": \"ipython\",\n    \"version\": 3\n   },\n   \"file_extension\": \".py\",\n   \"mimetype\": \"text/x-python\",\n   \"name\": \"python\",\n   \"nbconvert_exporter\": \"python\",\n   \"pygments_lexer\": \"ipython3\",\n   \"version\": \"3.10.11\"\n  },\n  \"vscode\": {\n   \"interpreter\": {\n    \"hash\": \"2d58e898dde0263bc564c6968b04150abacfd33eed9b19aaa8e45c040360e146\"\n   }\n  }\n },\n \"nbformat\": 4,\n \"nbformat_minor\": 4\n}\n"
  },
  {
    "path": "llama_recipes/requirements.txt",
    "content": "-f https://download.pytorch.org/whl/torch_stable.html \ntorch==2.0.1+cu118\naccelerate\nappdirs\nloralib\nbitsandbytes==0.39.1\nblack\nblack[jupyter]\ndatasets\nfire\ngit+https://github.com/huggingface/peft.git\ntransformers>=4.31.0\nsentencepiece\npy7zr\nscipy\n\n"
  },
  {
    "path": "llama_recipes/scripts/markdown_link_check_config.json",
    "content": "{\n  \"retryOn429\": true,\n  \"retryCount\": 5,\n  \"fallbackRetryDelay\": \"10s\",\n  \"httpHeaders\": [\n    {\n      \"urls\": [\n        \"https://docs.github.com/\",\n        \"https://help.github.com/\"\n      ],\n      \"headers\": {\n        \"Accept-Encoding\": \"zstd, br, gzip, deflate\"\n      }\n    }\n  ],\n  \"ignorePatterns\": [\n    {\n      \"pattern\": \"^http(s)?://127.0.0.1.*\"\n    },\n    {\n      \"pattern\": \"^http(s)?://localhost.*\"\n    },\n    {\n      \"pattern\": \"https://www.intel.com/content/www/us/en/developer/articles/news/llama2.html\"\n    }\n  ]\n}\n"
  },
  {
    "path": "llama_recipes/scripts/spellcheck.sh",
    "content": "\n# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n# Source: https://github.com/pytorch/torchx/blob/main/scripts/spellcheck.sh\nset -ex\nsudo apt-get install aspell\n\nif [[ -z \"$@\" ]]; then\n    sources=$(find -name '*.md')\nelse\n    sources=$@\nfi\n\nsources_arg=\"\"\nfor src in $sources; do\n        sources_arg=\"${sources_arg} -S $src\"\ndone\n\nif [ ! \"$sources_arg\" ]; then\n\techo \"No files to spellcheck\"\nelse\n\tpyspelling -c scripts/spellcheck_conf/spellcheck.yaml --name Markdown $sources_arg\nfi\n"
  },
  {
    "path": "llama_recipes/scripts/spellcheck_conf/spellcheck.yaml",
    "content": "matrix:\n- name: Markdown\n  apsell:\n    lang: en\n    d: en_US\n  dictionary:\n    wordlists:\n    - scripts/spellcheck_conf/wordlist.txt\n    output: scripts/spellcheck_conf/wordlist.dic\n    encoding: utf-8\n  pipeline:\n  - pyspelling.filters.context:\n      context_visible_first: true\n      delimiters:\n      - open: '(?s)^ *(?P<open>`{3,})[a-z0-9]*?$'\n        close: '^(?P=open)$'\n      - open: ''\n        content: 'https?://[-a-zA-Z0-9.]+?\\.[a-z]{2,6}[-?=&%.0-9a-zA-Z/_#]*'\n        close: ''\n  - pyspelling.filters.markdown:\n      markdown_extensions:\n      - markdown.extensions.extra:\n"
  },
  {
    "path": "llama_recipes/scripts/spellcheck_conf/wordlist.txt",
    "content": "BaseHandler\nImageNet\nRGB\nTorchServe\narchiver\ndataset\ngithub\nhref\nhttps\njson\nli\npy\npytorch\nsegmenter\ntorchvision\nul\nusecase\nCUDA\nJDK\nNVIDIA\nWSL\nbashrc\ncd\ngithubusercontent\nhtml\nmicrosoft\nol\nopenjdk\nOpenJDK\npre\npsutil\nsentencepiece\nsrc\nsudo\ntorchtext\nubuntu\nwget\nAPIs\nEg\nMilliSeconds\nURI\nYAML\ndataflow\nfunc\nlt\nmd\nparams\npostprocess\npostprocessing\npreprocess\npreprocessing\nserializable\ntbody\ntd\nth\nthead\nunregister\nurl\nCONFIG\nMNIST\nREADME\nhotdogs\nncs\nsqueezenet\nvgg\nTorchServe's\ncfg\nconfigs\nruntime\nyyyyMMddHHmmssSSS\nAWS\nBenchmarking\nCaptum\nGrafana\nHuggingFace\nJMeter\nKMS\nKubeflow\nKubernetes\nMMF\ncontrib\nddb\ngRPC\nipynb\nmlflow\nnmt\nperformant\ntorschripted\nAPI's\nASG\nDjango\nDockerfile\nELB\nLoadBalancer\nOpenAPI\nPyPi\nSDK\nSageMaker\nblockquote\ncli\ncloudformation\ncmd\ndev\ndir\nio\nissuecomment\nlxning\nnetty\nperf\npresigned\ntagname\ntxt\nConfigManager\nGPL\nNVSMI\nPowershell\nRedistributable\nenv\nexe\nfrontend\nmsi\nnodejs\nnpm\nprebuilt\nsmi\nstackoverflow\nutil\nAlexNet\nDeepLabV\nDensenet\nFCN\nRCNN\nResNet\nTorchscripted\nfastrcnn\njpg\nmaskrcnn\npng\nKFServing\nSeldon\nai\nanalytics\norchestrator\nPMD\nbackend\ncheckstyle\ncov\ngradlew\nhtmlcov\nnode.js\npylint\npylintrc\npytest\nrcfile\ntcort\nut\nlocalhost\nmyworkflow\nwfpredict\nBytearray\nCN\nCORS\nEventLoopGroup\nEventLoops\nGPUs\nJVM\nMaxDirectMemorySize\nOU\nOpenSSL\nPCI\nPIL\nPKCS\nPYTHONPATH\nPalo\nRSA\nSSL\nWorkerThread\namazonaws\nasync\nbatchSize\nchangeit\ndalay\ndefaultVersion\ndep\ndname\nenvvars\ngenkey\ngpu\ngz\nkeyalg\nkeyout\nkeysize\nkeystore\nkeytool\nlivebook\nmarName\nmaxBatchDelay\nmaxWorkers\nminWorkers\nmodelName\nmsec\nmycert\nmykey\nnatively\nnewkey\nnoop\nparameterName\nparameterNameN\nparameterValue\nparameterValueN\npathname\npem\npreflight\nreadthedocs\nreq\nresponseTimeout\nscalability\nstorepass\nstoretype\nurls\nutf\nvmargs\nwlm\nwww\nyourdomain\nnextPageToken\nsubfolder\nunregistering\nworkflowDag\nworkflowName\nworkflowUrl\nJavascript\nRESTful\ncodegen\nArgs\nCustomImageClassifier\nDefaultHandlerClass\nImageClassifier\nInit\nLayerIntegratedGradients\nModelHandler\nNDArray\nPredictionException\nPreprocessed\nRuntimeError\nWaveglow\ncpu\nembeddings\nfp\nie\nisfile\nisinstance\njit\nkwargs\nos\nparam\npred\npth\npyt\nserializedFile\nstr\ntacotron\nutils\nvCPUs\nwaveglowpyt\nDL\nLJO\nMiB\ncv\ndockerd\nentrypoint\ngpuId\ngpuUsage\ninferencing\nloadedAtStartup\nmemoryUsage\nmilli\nmodelUrl\nmodelVersion\npid\nstartTime\nCaptum's\nInferenceAPIsService\nModelServer\nbr\nkf\nproto\nCPUUtilization\nDiskAvailable\nDiskUsage\nDiskUsed\nDiskUtilization\nDistanceInKM\nHostName\nInferenceTime\nJSONLayout\nLoopCount\nMemoryAvailable\nMemoryUsed\nMemoryUtilization\nMetricName\nSizeOfImage\nStatsD\nappender\ndimN\netsy\nformatter\nidx\nimg\nkB\nDescribeModel\nListModels\nRegisterModel\nScaleWorker\nSetDefault\nUnregisterModel\ngRPCs\ngrpcio\nmkdir\nprotobuf\nprotoc\nrepo\nBackendWorker\nConversionPattern\nDlog\nMaxBackupIndex\nMaxFileSize\nPatternLayout\nRollingFileAppender\nWorkerLifeCycle\napache\nnnvm\nstderr\nstdout\nConflictStatusException\nDownloadModelException\nInvalidSnapshotException\nModelNotFoundException\nNoSuchMethodError\nServiceUnavailableException\nlang\nmb\nntl\nPrometheusServer\ngloboff\nnoopversioned\nsystemctl\nuuid\nyml\nAWSS\nAmazonS\nIAM\nManagementAPIsService\nReadOnlyAccess\nUserGuide\nUsingKMSEncryption\nacknowledgement\nmacOS\nsse\nfairseq\nlibs\nmv\npretrained\npublically\nready-made\ntmp\ntorchscript\ntorchvision's\nhanderl\nBitte\nBonjour\nHallo\nHause\nIch\nIhnen\nIhren\nJe\nNamen\nSie\nTransformerEn\nUnd\nWMT\nWie\nallez\narxiv\nauf\nbien\nchez\ndanke\ndataclasses\ndich\ndu\nenglish\nerinnere\net\nfb\ngeht\ngerman\nkomm\nkommst\nle\nleid\nläuft\nm'excuser\nmerci\nmich\nmir\nmonde\nmöglich\nnFine\nnIt’s\nnPlease\nnach\nne\nnicht\nnom\nprie\nquand\nrentrerez\nselbst\nsich\nsind\nsouviens\ntôt\nva\nvenir\nvotre\nvous\nwann\nwarte\nÇa\nBERTQA\nBERTSeqClassification\nBERTTokenClassification\nMFreidank\nRoBERTA\nXLM\ndistilbert\ndoes't\nfinetuning\nnum\ntc\ntokenizer\nvidhya\nvocabs\nAutoConfig\nHuggingface's\nScriptFunction\ntransfomers\nBBM\nBaseDataset\nBaseDatasetBuilder\nBaseModel\nFNSio\nMMFTransformer\nMultiModal\nOmegaConfing\nPyav\nREU\nTextCaps\nTextVQA\nTochserve\ncsv\ndatasets\nfacebook\nfacebookresearch\nfbclid\ngetitem\nlables\nlen\nmc\nmmfartifacts\nEmbeddingBag\nTextHandler\noverriden\nDBUILD\nDCMAKE\nDSM\nEFFT\nFasterTransformer\nNGC\nTransfomer\nbytedance\ncmake\ncp\ngeforce\nlibpyt\nnvcr\noauthtoken\nturing\nvolta\nxlarge\nDeepLearningExamples\nSpeechSynthesis\nWaveGlow's\nlibrosa\nnumpy\nrb\nscipy\nunidecode\nwav\nwb\nInteroperability\nMtail\nSart\nchmod\ncnn\nmtailtarget\nprogs\nrc\ntimeseries\nxvzf\ncuda\njdk\nnvidia\ntorchserve\nwsl\nyaml\napi\nconfig\nhttp\nmnist\nresnet\nHuggingface\nPyTorch\nbenchmarking\nbert\ncaptum\ngrpc\nkubeflow\nkubernetes\nTorchserve's\nasg\naws\nelb\nreadme\nsdk\napis\npowershell\nalexnet\ndeeplabv\ndensenet\nfcn\nkfserving\nseldon\nexcuted\nfindbugs\nHTTPs\ncors\nopenssl\nprometheus\nrsa\nssl\ngpus\ninit\nwaveglow\nhostname\nstatsd\ngrafana\nkms\nuserguide\nreadymade\ntorchscripted\nrcnn\nroberta\nxlm\nBasedataset\nmmf\nmultimodal\npreprocessed\nbatchsize\ndownload\nfastertransformer\nngc\ndeeplearningexamples\nmtail\nscarpe\nNVidia\nWaveGlow\nhuggingface\ntorchServe\nCProfile\nKSERVE\napachelounge\nargs\njmeter\nkserve\nlatencies\nsnakeviz\ncodec\nloadbalancer\ntorchserves\nxml\nConda\nautoscaling\nconda\nGPUMemoryUsed\nGPUMemoryUtilization\nGPUUtilization\nJSONPatternLayout\nMXNetModelServer\nQLog\nQLogLayout\nQLogsetupModelDependencies\nabc\ndda\npatternlayout\nqlog\nIPEX\nORT\nPROFILER\nTensorRT\nValueToSet\nkineto\nprofiler\npypi\nruntimes\ntorchprep\nGPT\nKServe\nLMHeadModel\nParallelize\nTextgeneration\ngpt\nkserve\nparallelize\ntx\nxl\nDCGAN\nDLRM\nGAN\nNN\nRecommender\nScriptModule\nScriptable\nTorchRec\nTorchScript\nTorchrec\ndcgan\ndlrm\nfashiongen\nFashionGen\nfashionGen\ngan\nnn\nscriptable\ntorchrec\nAVX\nAllocator\nBLOCKTIME\nBertModel\nCONDA\nJeMalloc\nKMP\nLD\nNUMA\nNuma\nOMP\nOpenMP\nPRELOAD\nPTMalloc\nTCMalloc\nXeon\nafeeb\naffinitized\nallocator\nargs\neval\ngif\nhyperthreaded\nhyperthreading\ninplace\ninputPath\nintel\niomp\nipex\niter\njemalloc\nlibiomp\nlibtcmalloc\nnuma\nnumactl\npdt\nqconfig\nrandint\nrandn\ntcmalloc\ntunable\nunix\nunutilized\nusr\nCONTAINERD\nDaemonSet\nGKE\nGcloud\nGi\nGoogleCloudPlatform\nKi\nNFS\nPV\nPersistentVolume\nRWX\nSTORAGECLASS\nVPC\nallocatable\nauth\nautoupgrade\nbcc\ncidr\nclusterIP\ncreationTimestamp\ndaemonset\ndrwx\ndrwxr\nfsSL\ngcloud\nggc\ngke\ngoogleapis\nip\nipv\njsonpath\nkubeconfig\nkubectl\nlR\nmynfs\nnamespaces\nnfs\nnodePools\npersistentvolume\npersistentvolumeclaim\npo\npreloaded\nprovisioner\npv\npvc\nquickstart\nrw\nsvc\ntesla\ntty\nunformatted\nAAAAAElFTkSuQmCC\nAutoscaler\nBUILDKIT\nGOR\nInferenceService\nKnative\nRollout\ninferenceservice\ningressgateway\nistio\nkfs\nknative\nloadBalancer\nmnt\nmodelCount\nreadmes\nrollout\nserverless\nrecommender\nHandlerTime\ncustomizedMetadata\nenviron\nContentType\nkservev\ntobytes\nCustomHandler\nGH\nOSS\nPRs\nctx\nonnx\nClusterConfig\nEBS\nEFS\nEKS\napiVersion\ndesiredCapacity\nefs\neks\neksctl\ninstanceTypes\ninstancesDistribution\nmaxSize\nminSize\nnamespace\nng\nnodeGroups\nonDemandBaseCapacity\nonDemandPercentageAboveBaseCapacity\npvpod\nspotInstancePools\nstoragehttps\nsubnet\nsubnets\nvpc\nMMS\ncommandline\nfilepath\njmx\nrampup\nrequestdefaults\nscaleup\ntearDown\ntestplan\nJProfiler\nJProfiler's\nSqueezeNet\nTSBenchmark\napos\ncProfile\ndockerhub\nfilesystem\nfilterresults\ngradle\nhomebrew\nimageFilePath\njpgc\nlinuxbrew\nmergeresults\nmodelN\nperfmon\nurlN\nArg\nKFserving\narg\nauthn\nauthz\ndicts\ndockerfiles\nenum\neventloop\nhashmap\nlifecycles\nsagemaker\nstartServer\nthreadpool\nmGPU\nsocio\ngridfs\nNLP\nTorchScript's\nMeta's\ncriteo\npersonalization\nNMTBackTranslate\nNMTDualTranslate\nnlp\nDogCatBreed\nDogCatBreedClassification\nCloudWatch\nLogGroup\nTorchServeInferenceURL\nTorchServeManagementURL\ncloudwatch\nkeypair\nspinup\nReactApp\nlogdir\ntensorboard\nDenseNet\npytorchbot\nValidator\ncomparator\nvalidator\nvalidators\nDatafile\nUI\nbuildspec\ncmds\nAKS\nPVCs\nDockerHub\njq\nHPA\nHPG\ntargetValue\ntotensor\nKFServer\nTSModelRepository\nTorchserveModel\nTorchservemodel\nkfserve\nkfserver\nKFModel\nmarfile\nAKS\nBalancer\nEFK\nLiveness\nautoscale\ndatasource\nhelmignore\nlookingup\nmountpath\nAz\nVM\naks\naz\nds\neastus\nmyAKSCluster\nmyResourceGroup\nsc\nvm\nCODEBUILD\nCodeBuild\nDockerfiles\nbt\nbuildtype\ncodebuild\ncudaversion\ncudnn\nmemlock\nshm\nulimit\nCresta's\nDAGs\nDynabench\nDynaboard\nMLFlow\nMLOps\nMLflow\nOperationalize\nSagemaker\nStreamlit\nInferentia\nopensource\noperationalising\nWadhwani\nmodelarchive\neagermode\nAttributeName\nAttributeType\nDDBEndPoint\nDDBSnapshotSerializer\nDefaultCredentialsProvider\nFS\nIndexName\nKeySchema\nKeyType\nPluginsManager\nProjectionType\nProvisionedThroughput\nReadCapacityUnits\nSDKs\nWriteCapacityUnits\ncreatedOn\ncreatedOnMonth\ndynamodb\nimpl\nserializer\nservingsdk\nsnapshotName\nbehaviour\nteardown\ntg\nudv\ndataN\nbackendgroup\nsexualized\necbe\ngrayscale\nbz\nmarsgen\nefft\nenvvar\nRoadmap\nfff\npvd\nwhl\nss\ndn\nrn\nDe\nec\nVQA\nxxxx\nAffero\nMinIO\nfs\nfsspec\nminioadmin\npythonic\nDeepSpeed\nMII\ndeepspeed\nmii\nDiffusers\ndiffusers\nAzureML\nLargemodels\nbigscience\nmem\nsharded\nNVfuser\nfuser\nort\nsess\ndali\nBetterTransformer\nTransformerEncoder\nInferenceTimeInMS\nMetricTypes\nMetricsCache\nTIMM\nbackends\ninductor\nIntegrations\nintegrations\nUseCases\nusecases\nExplainability\nTorchData\npx\nsvg\nnvfuser\nnoborder\ndatapipes\ntensorrt\nvec\ntorchdata\nCodeQL\nDependabot\nSnyk\npythonversion\nStreamPredictions\nLLMs\nMPS\nmps\ndeviceIds\nrpc\npippy\nMBS\nMicroBatching\nMicroBatchingHandler\nQPS\nPiPPy\nMicrobatching\nMicro-batching\nmicrobatch\nmicrobatching\nDeviceId\nPredictionTime\nQueueTime\nWorkerLoadTime\nWorkerName\nWorkerThreadTime\nMicroSoft\nlmi\ntorchrun\nnproc\nlargemodels\ntorchpippy\nInferenceSession\nmaxRetryTimeoutInSec\nneuronx\nAMI\nDLAMI\nXLA\ninferentia\nActionSLAM\nstatins\nci\nchatGPT\nLlama\nPEFT\nLORA\nFSDP\nAuditNLG\nfinetune\nfsdp\nineference\nlora\npeft\nsamsum\nvLLM\nTGI\nvLLM\nvLLM's\nOOM\nRTX\nSKU\nTPUs\ncheckpointing\nenviroment\nfragmentations\nintra\nnightlies\nrecenly\nuncomment"
  },
  {
    "path": "llama_recipes/utils/__init__.py",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n\nfrom .memory_utils import MemoryTrace\nfrom .dataset_utils import *\nfrom .fsdp_utils import fsdp_auto_wrap_policy\nfrom .train_utils import *\n"
  },
  {
    "path": "llama_recipes/utils/config_utils.py",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n\nimport inspect\nfrom dataclasses import fields\nfrom peft import (\n    LoraConfig,\n    AdaptionPromptConfig,\n    PrefixTuningConfig,\n)\n\nfrom transformers import BitsAndBytesConfig\n\nimport configs.datasets as datasets\nfrom configs import (\n    lora_config,\n    llama_adapter_config,\n    prefix_config,\n    train_config,\n    qlora_config,\n    bitsandbytes_config,\n)\nfrom .dataset_utils import DATASET_PREPROC\n\n\ndef update_config(config, **kwargs):\n    if isinstance(config, (tuple, list)):\n        for c in config:\n            update_config(c, **kwargs)\n    else:\n        for k, v in kwargs.items():\n            if hasattr(config, k):\n                setattr(config, k, v)\n            elif \".\" in k:\n                # allow --some_config.some_param=True\n                config_name, param_name = k.split(\".\")\n                if type(config).__name__ == config_name:\n                    if hasattr(config, param_name):\n                        setattr(config, param_name, v)\n                    else:\n                        # In case of specialized config we can warm user\n                        print(f\"Warning: {config_name} does not accept parameter: {k}\")\n            elif isinstance(config, train_config):\n                print(f\"Warning: unknown parameter {k}\")\n\n\ndef generate_peft_config(peft_method, kwargs):\n    # Config mapping for train_config.peft_method to its corresponding config class\n    config_mapping = {\n        \"lora\": lora_config,\n        \"llama_adapter\": llama_adapter_config,\n        \"prefix\": prefix_config,\n        \"bitsandbytes_config\": bitsandbytes_config,\n        \"qlora\": qlora_config,\n        # Add other mappings as needed\n    }\n\n    # Mapping from config class to its corresponding PEFT config\n    peft_config_mapping = {\n        lora_config: LoraConfig,\n        llama_adapter_config: AdaptionPromptConfig,\n        prefix_config: PrefixTuningConfig,\n        bitsandbytes_config: BitsAndBytesConfig,\n        qlora_config: LoraConfig,\n        # Add other mappings as needed\n    }\n\n    # Step 2: Updated assertion\n    assert peft_method in config_mapping.keys(), f\"Peft config not found: {peft_method}\"\n\n    # Step 3: Fetch the correct configuration class based on train_config.peft_method\n    config = config_mapping[peft_method]\n    update_config(config, **kwargs)\n    params = {k.name: getattr(config, k.name) for k in fields(config)}\n\n    # Step 5: Fetch the correct PEFT config based on the configuration class\n    peft_config_class = peft_config_mapping[config]\n    peft_config = peft_config_class(**params)\n\n    return peft_config\n\n\n# def generate_peft_config(train_config, kwargs):\n#     configs = (lora_config, llama_adapter_config, prefix_config, qlora_config)\n#     peft_configs = (LoraConfig, AdaptionPromptConfig, PrefixTuningConfig)\n#     names = tuple(c.__name__.rstrip(\"_config\") for c in configs)\n\n#     assert train_config.peft_method in names, f\"Peft config not found: {train_config.peft_method}\"\n\n#     config = configs[names.index(train_config.peft_method)]\n#     update_config(config, **kwargs)\n#     params = {k.name: getattr(config, k.name) for k in fields(config)}\n#     peft_config = peft_configs[names.index(train_config.peft_method)](**params)\n\n#     return peft_config\n\n\ndef generate_dataset_config(train_config, kwargs):\n    names = tuple(DATASET_PREPROC.keys())\n\n    assert train_config.dataset in names, f\"Unknown dataset: {train_config.dataset}\"\n\n    dataset_config = {k: v for k, v in inspect.getmembers(datasets)}[\n        train_config.dataset\n    ]\n    update_config(dataset_config, **kwargs)\n\n    return dataset_config\n"
  },
  {
    "path": "llama_recipes/utils/dataset_utils.py",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n\nimport torch\n\nfrom functools import partial\n\n\nfrom ft_datasets import (\n    get_grammar_dataset,\n    get_alpaca_dataset,\n    get_samsum_dataset,\n    get_completion_dataset,\n)\n\n\nDATASET_PREPROC = {\n    \"alpaca_dataset\": partial(get_alpaca_dataset, max_words=224),\n    \"grammar_dataset\": get_grammar_dataset,\n    \"samsum_dataset\": get_samsum_dataset,\n    \"completion\": get_completion_dataset,\n}\n\n\ndef get_preprocessed_dataset(\n    tokenizer, dataset_config, split: str = \"train\"\n) -> torch.utils.data.Dataset:\n    if dataset_config.dataset not in DATASET_PREPROC:\n        raise NotImplementedError(f\"{dataset_config.dataset} is not (yet) implemented\")\n\n    def get_split():\n        return (\n            dataset_config.train_split\n            if split == \"train\"\n            else dataset_config.test_split\n        )\n\n    return DATASET_PREPROC[dataset_config.dataset](\n        dataset_config,\n        tokenizer,\n        get_split(),\n    )\n"
  },
  {
    "path": "llama_recipes/utils/fsdp_utils.py",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n\n\ndef fsdp_auto_wrap_policy(model, transformer_layer_name):\n    import functools\n\n    from torch.distributed.fsdp.wrap import (\n        _or_policy,\n        lambda_auto_wrap_policy,\n        transformer_auto_wrap_policy,\n    )\n\n    from peft.tuners import PrefixEncoder, PromptEmbedding, PromptEncoder\n\n    def lambda_policy_fn(module):\n        if (\n            len(list(module.named_children())) == 0\n            and getattr(module, \"weight\", None) is not None\n            and module.weight.requires_grad\n        ):\n            return True\n        return False\n\n    lambda_policy = functools.partial(\n        lambda_auto_wrap_policy, lambda_fn=lambda_policy_fn\n    )\n    transformer_wrap_policy = functools.partial(\n        transformer_auto_wrap_policy,\n        transformer_layer_cls=(\n            PrefixEncoder,\n            PromptEncoder,\n            PromptEmbedding,\n            transformer_layer_name,\n            # FullyShardedDataParallelPlugin.get_module_class_from_name(\n            #     model, transformer_layer_name\n            # ),\n        ),\n    )\n\n    auto_wrap_policy = functools.partial(\n        _or_policy, policies=[lambda_policy, transformer_wrap_policy]\n    )\n    return auto_wrap_policy\n"
  },
  {
    "path": "llama_recipes/utils/memory_utils.py",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\nimport gc\nimport threading\n\nimport psutil\nimport torch\n\n\ndef byte2gb(x):\n    return int(x / 2**30)\n\n\n# This context manager is used to track the peak memory usage of the process\nclass MemoryTrace:\n    def __enter__(self):\n        gc.collect()\n        torch.cuda.empty_cache()\n        torch.cuda.reset_max_memory_allocated()  # reset the peak gauge to zero\n        self.begin = byte2gb(torch.cuda.memory_allocated())\n        self.process = psutil.Process()\n        self.cpu_begin = byte2gb(self.cpu_mem_used())\n        self.peak_monitoring = True\n        peak_monitor_thread = threading.Thread(target=self.peak_monitor_func)\n        peak_monitor_thread.daemon = True\n        peak_monitor_thread.start()\n        return self\n\n    def cpu_mem_used(self):\n        \"\"\"get resident set size memory for the current process\"\"\"\n        return self.process.memory_info().rss\n\n    def peak_monitor_func(self):\n        self.cpu_peak = -1\n\n        while True:\n            self.cpu_peak = max(self.cpu_mem_used(), self.cpu_peak)\n\n            # can't sleep or will not catch the peak right (this comment is here on purpose)\n            # time.sleep(0.001) # 1msec\n\n            if not self.peak_monitoring:\n                break\n\n    def __exit__(self, *exc):\n        self.peak_monitoring = False\n\n        gc.collect()\n        torch.cuda.empty_cache()\n        self.end = byte2gb(torch.cuda.memory_allocated())\n        self.peak = byte2gb(torch.cuda.max_memory_allocated())\n        cuda_info = torch.cuda.memory_stats()\n        self.peak_active_gb = byte2gb(cuda_info[\"active_bytes.all.peak\"])\n        self.cuda_malloc_retires = cuda_info.get(\"num_alloc_retries\", 0)\n        self.peak_active_gb = byte2gb(cuda_info[\"active_bytes.all.peak\"])\n        self.m_cuda_ooms = cuda_info.get(\"num_ooms\", 0)\n        self.used = byte2gb(self.end - self.begin)\n        self.peaked = byte2gb(self.peak - self.begin)\n        self.max_reserved = byte2gb(torch.cuda.max_memory_reserved())\n\n        self.cpu_end = self.cpu_mem_used()\n        self.cpu_used = byte2gb(self.cpu_end - self.cpu_begin)\n        self.cpu_peaked = byte2gb(self.cpu_peak - self.cpu_begin)\n        # print(f\"delta used/peak {self.used:4d}/{self.peaked:4d}\")\n"
  },
  {
    "path": "llama_recipes/utils/train_utils.py",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.\n\nimport os\nimport sys\nimport yaml\n\nimport torch\nfrom tqdm import tqdm\n\n\"\"\"\nUnused imports:\nimport torch.nn as nn\nimport bitsandbytes as bnb\n\"\"\"\nfrom transformers import LlamaTokenizer\nfrom torch.distributed.fsdp import StateDictType\nimport torch.distributed as dist\nfrom pkg_resources import packaging\nfrom .memory_utils import MemoryTrace\nimport model_checkpointing\nimport torch.cuda.nccl as nccl\nfrom torch.distributed.fsdp.sharded_grad_scaler import ShardedGradScaler\nfrom pathlib import Path\n\nsys.path.append(str(Path(__file__).resolve().parent.parent))\nfrom policies import fpSixteen, bfSixteen_mixed, get_llama_wrapper\n\n\ndef set_tokenizer_params(tokenizer: LlamaTokenizer):\n    tokenizer.pad_token_id = 0\n    tokenizer.padding_side = \"left\"\n\n\n# Converting Bytes to Megabytes\ndef byte2mb(x):\n    return int(x / 2**20)\n\n\ndef train(\n    model,\n    train_dataloader,\n    eval_dataloader,\n    tokenizer,\n    optimizer,\n    lr_scheduler,\n    gradient_accumulation_steps,\n    train_config,\n    fsdp_config=None,\n    local_rank=None,\n    rank=None,\n):\n    \"\"\"\n    Trains the model on the given dataloader\n\n    Args:\n        model: The model to be trained\n        train_dataloader: The dataloader containing the training data\n        optimizer: The optimizer used for training\n        lr_scheduler: The learning rate scheduler\n        gradient_accumulation_steps: The number of steps to accumulate gradients before performing a backward/update operation\n        num_epochs: The number of epochs to train for\n        local_rank: The rank of the current node in a distributed setting\n        train_config: The training configuration\n        eval_dataloader: The dataloader containing the eval data\n        tokenizer: tokenizer used in the eval for decoding the predicitons\n\n    Returns: results dictionary containing average training and validation perplexity and loss\n    \"\"\"\n    # Create a gradient scaler for fp16\n    if train_config.use_fp16 and train_config.enable_fsdp:\n        scaler = ShardedGradScaler()\n    elif train_config.use_fp16 and not train_config.enable_fsdp:\n        scaler = torch.cuda.amp.GradScaler()\n    if train_config.enable_fsdp:\n        world_size = int(os.environ[\"WORLD_SIZE\"])\n    train_prep = []\n    train_loss = []\n    val_prep = []\n    val_loss = []\n    results = {}\n    best_val_loss = float(\"inf\")\n    for epoch in range(train_config.num_epochs):\n        with MemoryTrace() as memtrace:  # track the memory usage\n            model.train()\n            total_loss = 0.0\n            for step, batch in enumerate(\n                tqdm(train_dataloader, colour=\"blue\", desc=f\"Training Epoch{epoch}\")\n            ):\n                for key in batch.keys():\n                    if train_config.enable_fsdp:\n                        batch[key] = batch[key].to(local_rank)\n                    else:\n                        batch[key] = batch[key].to(\"cuda:0\")\n\n                loss = model(**batch).loss\n                loss = loss / gradient_accumulation_steps\n                total_loss += loss.detach().float()\n                if train_config.use_fp16:\n                    # if fp16 is enabled, use gradient scaler to handle gradient update\n                    scaler.scale(loss).backward()\n                    if (step + 1) % gradient_accumulation_steps == 0 or step == len(\n                        train_dataloader\n                    ) - 1:\n                        scaler.step(optimizer)\n                        scaler.update()\n                        optimizer.zero_grad()\n                else:\n                    # regular backpropagation when fp16 is not used\n                    loss.backward()\n                    if (step + 1) % gradient_accumulation_steps == 0 or step == len(\n                        train_dataloader\n                    ) - 1:\n                        optimizer.step()\n                        optimizer.zero_grad()\n                if train_config.enable_fsdp:\n                    if rank == 0:\n                        print(\n                            f\"\\n step {step} is completed and loss is {loss.detach().float()}\"\n                        )\n                else:\n                    print(\n                        f\"\\n step {step} is completed and loss is {loss.detach().float()}\"\n                    )\n\n        # Reducing total_loss across all devices if there's more than one CUDA device\n        if torch.cuda.device_count() > 1 and train_config.enable_fsdp:\n            dist.all_reduce(total_loss, op=dist.ReduceOp.SUM)\n        train_epoch_loss = total_loss / len(train_dataloader)\n        if train_config.enable_fsdp:\n            train_epoch_loss = train_epoch_loss / world_size\n        train_perplexity = torch.exp(train_epoch_loss)\n\n        train_prep.append(train_perplexity)\n        train_loss.append(train_epoch_loss)\n        if train_config.enable_fsdp:\n            if rank == 0:\n                print(f\"Max CUDA memory allocated was {memtrace.peak} GB\")\n                print(f\"Max CUDA memory reserved was {memtrace.max_reserved} GB\")\n                print(f\"Peak active CUDA memory was {memtrace.peak_active_gb} GB\")\n                print(f\"Cuda Malloc retires : {memtrace.cuda_malloc_retires}\")\n                print(\n                    f\"CPU Total Peak Memory consumed during the train (max): {memtrace.cpu_peaked + memtrace.cpu_begin} GB\"\n                )\n        else:\n            print(f\"Max CUDA memory allocated was {memtrace.peak} GB\")\n            print(f\"Max CUDA memory reserved was {memtrace.max_reserved} GB\")\n            print(f\"Peak active CUDA memory was {memtrace.peak_active_gb} GB\")\n            print(f\"Cuda Malloc retires : {memtrace.cuda_malloc_retires}\")\n            print(\n                f\"CPU Total Peak Memory consumed during the train (max): {memtrace.cpu_peaked + memtrace.cpu_begin} GB\"\n            )\n\n        # Update the learning rate as needed\n        lr_scheduler.step()\n\n        if train_config.run_validation:\n            eval_ppl, eval_epoch_loss = evaluation(\n                model, train_config, eval_dataloader, rank, tokenizer\n            )\n            if train_config.save_model and eval_epoch_loss < best_val_loss:\n                if train_config.enable_fsdp:\n                    dist.barrier()\n                if train_config.use_peft:\n                    if train_config.enable_fsdp:\n                        if rank == 0:\n                            print(\"we are about to save the PEFT modules\")\n                    else:\n                        print(\"we are about to save the PEFT modules\")\n                    model.save_pretrained(train_config.output_dir)\n                    if train_config.enable_fsdp:\n                        if rank == 0:\n                            print(\n                                f\"PEFT modules are saved in {train_config.output_dir} directory\"\n                            )\n                    else:\n                        print(\n                            f\"PEFT modules are saved in {train_config.output_dir} directory\"\n                        )\n\n                else:\n                    if (\n                        not train_config.use_peft\n                        and fsdp_config.checkpoint_type == StateDictType.FULL_STATE_DICT\n                    ):\n                        model_checkpointing.save_model_checkpoint(\n                            model, optimizer, rank, train_config, epoch=epoch\n                        )\n                    elif (\n                        not train_config.use_peft\n                        and fsdp_config.checkpoint_type\n                        == StateDictType.SHARDED_STATE_DICT\n                    ):\n                        print(\n                            \" Saving the FSDP model checkpoints using SHARDED_STATE_DICT\"\n                        )\n                        print(\"=====================================================\")\n\n                        model_checkpointing.save_model_and_optimizer_sharded(\n                            model, rank, train_config\n                        )\n                        if train_config.save_optimizer:\n                            model_checkpointing.save_model_and_optimizer_sharded(\n                                model, rank, train_config, optim=optimizer\n                            )\n                            print(\n                                \" Saving the FSDP model checkpoints qnd optimizer using SHARDED_STATE_DICT\"\n                            )\n                            print(\n                                \"=====================================================\"\n                            )\n\n                    if not train_config.use_peft and train_config.save_optimizer:\n                        model_checkpointing.save_optimizer_checkpoint(\n                            model, optimizer, rank, train_config, epoch=epoch\n                        )\n                        print(\n                            \" Saving the FSDP model checkpoints qnd optimizer using FULL_STATE_DICT\"\n                        )\n                        print(\"=====================================================\")\n                if train_config.enable_fsdp:\n                    dist.barrier()\n\n            if eval_epoch_loss < best_val_loss:\n                best_val_loss = eval_epoch_loss\n                if train_config.enable_fsdp:\n                    if rank == 0:\n                        print(f\"best eval loss on epoch {epoch} is {best_val_loss}\")\n                else:\n                    print(f\"best eval loss on epoch {epoch} is {best_val_loss}\")\n            val_loss.append(best_val_loss)\n            val_prep.append(eval_ppl)\n\n        if train_config.enable_fsdp:\n            if rank == 0:\n                print(\n                    f\"Epoch {epoch+1}: train_perplexity={train_perplexity:.4f}, train_epoch_loss={train_epoch_loss:.4f}\"\n                )\n        else:\n            print(\n                f\"Epoch {epoch+1}: train_perplexity={train_perplexity:.4f}, train_epoch_loss={train_epoch_loss:.4f}\"\n            )\n\n    avg_train_prep = sum(train_prep) / len(train_prep)\n    avg_train_loss = sum(train_loss) / len(train_loss)\n    if train_config.run_validation:\n        avg_eval_prep = sum(val_prep) / len(val_prep)\n        avg_eval_loss = sum(val_loss) / len(val_loss)\n\n    results[\"avg_train_prep\"] = avg_train_prep\n    results[\"avg_train_loss\"] = avg_train_loss\n    if train_config.run_validation:\n        results[\"avg_eval_prep\"] = avg_eval_prep\n        results[\"avg_eval_loss\"] = avg_eval_loss\n\n    # saving the training params including fsdp setting for reference.\n    if train_config.enable_fsdp and not train_config.use_peft:\n        save_train_params(train_config, fsdp_config, rank)\n\n    if train_config.use_peft and not train_config.run_validation:\n        if train_config.enable_fsdp:\n            if rank == 0:\n                print(\"we are about to save the PEFT modules\")\n        else:\n            print(\"we are about to save the PEFT modules\")\n        model.save_pretrained(train_config.output_dir)\n        if train_config.enable_fsdp:\n            if rank == 0:\n                print(f\"PEFT modules are saved in {train_config.output_dir} directory\")\n        else:\n            print(f\"PEFT modules are saved in {train_config.output_dir} directory\")\n\n    return results\n\n\ndef evaluation(\n    model, train_config, eval_dataloader, local_rank, tokenizer, prompt=None\n):\n    \"\"\"\n    Evaluates the model on the given dataloader\n\n    Args:\n        model: The model to evaluate\n        eval_dataloader: The dataloader containing the evaluation data\n        local_rank: The rank of the current node in a distributed setting\n        tokenizer: The tokenizer used to decode predictions\n\n    Returns: eval_ppl, eval_epoch_loss\n    \"\"\"\n    if train_config.enable_fsdp:\n        world_size = int(os.environ[\"WORLD_SIZE\"])\n    model.eval()\n    eval_preds = []\n    eval_loss = 0.0  # Initialize evaluation loss\n\n    with MemoryTrace() as memtrace:\n        for step, batch in enumerate(\n            tqdm(eval_dataloader, colour=\"green\", desc=\"evaluating Epoch\")\n        ):\n            for key in batch.keys():\n                if train_config.enable_fsdp:\n                    batch[key] = batch[key].to(local_rank)\n                else:\n                    batch[key] = batch[key].to(\"cuda:0\")\n            # Ensure no gradients are computed for this scope to save memory\n            with torch.no_grad():\n                # Forward pass and compute loss\n                outputs = model(**batch)\n                loss = outputs.loss\n                eval_loss += loss.detach().float()\n\n            # Decode predictions and add to evaluation predictions list\n            preds = torch.argmax(outputs.logits, -1)\n            eval_preds.extend(\n                tokenizer.batch_decode(\n                    preds.detach().cpu().numpy(), skip_special_tokens=True\n                )\n            )\n\n    # If there's more than one CUDA device, reduce evaluation loss across all devices\n\n    if torch.cuda.device_count() > 1 and train_config.enable_fsdp:\n        dist.all_reduce(eval_loss, op=dist.ReduceOp.SUM)\n\n    # Compute average loss and perplexity\n    eval_epoch_loss = eval_loss / len(eval_dataloader)\n    if train_config.enable_fsdp:\n        eval_epoch_loss = eval_epoch_loss / world_size\n    eval_ppl = torch.exp(eval_epoch_loss)\n\n    # Print evaluation metrics\n    if train_config.validation_prompt:\n        input_ids = tokenizer(train_config.validation_prompt, return_tensors=\"pt\")[\n            \"input_ids\"\n        ].to(local_rank)\n\n        output_ids = model.generate(\n            inputs=input_ids,\n            max_length=50,\n            do_sample=True,\n            top_k=250,\n            top_p=0.8,\n            temperature=0.75,\n        )\n\n        generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)\n\n    if train_config.enable_fsdp:\n        if local_rank == 0:\n            print(f\" {eval_ppl=} {eval_epoch_loss=}\")\n            if train_config.validation_prompt:\n                print(\n                    f\"\\n\\n---- Generated Response ----\\n\\n{generated_text}\\n----------\\n\"\n                )\n\n    else:\n        if train_config.validation_prompt:\n            print(f\"\\n\\n---- Generated Response ----\\n\\n{generated_text}\\n----------\\n\")\n        print(f\" {eval_ppl=} {eval_epoch_loss=}\")\n\n    return eval_ppl, eval_epoch_loss\n\n\ndef freeze_transformer_layers(model, num_layer):\n    for i, layer in enumerate(model.model.layers):\n        if i < num_layer:\n            for param in layer.parameters():\n                param.requires_grad = False\n\n\ndef check_frozen_layers_peft_model(model):\n    for i, layer in enumerate(model.base_model.model.model.layers):\n        for name, param in layer.named_parameters():\n            print(f\"Layer {i}, parameter {name}: requires_grad = {param.requires_grad}\")\n\n\ndef setup():\n    \"\"\"Initialize the process group for distributed training\"\"\"\n    dist.init_process_group(\"nccl\")\n\n\ndef setup_environ_flags(rank):\n    \"\"\"Set environment flags for debugging purposes\"\"\"\n    os.environ[\"TORCH_SHOW_CPP_STACKTRACES\"] = str(1)\n    os.environ[\"NCCL_ASYNC_ERROR_HANDLING\"] = str(1)\n    # os.environ[\"TORCH_DISTRIBUTED_DEBUG\"] = \"DETAIL\"\n    # This flag will help with CUDA memory fragmentations that can lead into OOM in some cases.\n    # Note this is only availble in PyTorch Nighlies (as of July 30 2023)\n    # os.environ['PYTORCH_CUDA_ALLOC_CONF']='expandable_segments:True'\n    if rank == 0:\n        print(\"--> Running with torch dist debug set to detail\")\n\n\ndef cleanup():\n    \"\"\"Clean up the process group after training\"\"\"\n    dist.destroy_process_group()\n\n\ndef clear_gpu_cache(rank=None):\n    \"\"\"Clear the GPU cache for all ranks\"\"\"\n    if rank == 0:\n        print(\"Clearing GPU cache for all ranks\")\n    torch.cuda.empty_cache()\n\n\ndef get_parameter_dtypes(model):\n    \"\"\"Get the data types of model parameters\"\"\"\n    parameter_dtypes = {}\n    for name, parameter in model.named_parameters():\n        parameter_dtypes[name] = parameter.dtype\n    return parameter_dtypes\n\n\ndef print_model_size(model, config, rank: int = 0) -> None:\n    \"\"\"\n    Print model name, the number of trainable parameters and initialization time.\n\n    Args:\n        model: The PyTorch model.\n        model_name (str): Name of the model.\n        init_time_start (float): Initialization start time.\n        init_time_end (float): Initialization end time.\n        rank (int, optional): Current process's rank. Defaults to 0.\n    \"\"\"\n    if rank == 0:\n        print(f\"--> Model {config.model_name}\")\n        total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)\n        print(f\"\\n--> {config.model_name} has {total_params / 1e6} Million params\\n\")\n\n\ndef get_policies(cfg, rank):\n    \"\"\"Get the policies for mixed precision and fsdp wrapping\"\"\"\n\n    verify_bfloat_support = (\n        torch.version.cuda\n        and torch.cuda.is_bf16_supported()\n        and packaging.version.parse(torch.version.cuda).release >= (11, 0)\n        and dist.is_nccl_available()\n        and nccl.version() >= (2, 10)\n    )\n\n    mixed_precision_policy = None\n    wrapping_policy = None\n\n    # Mixed precision\n    if cfg.mixed_precision:\n        bf16_ready = verify_bfloat_support\n\n        if bf16_ready and not cfg.use_fp16:\n            mixed_precision_policy = bfSixteen_mixed\n            if rank == 0:\n                print(\"bFloat16 enabled for mixed precision - using bfSixteen policy\")\n        elif cfg.use_fp16:\n            mixed_precision_policy = fpSixteen\n            if rank == 0:\n                print(\"FP16 enabled\")\n        else:\n            print(\"bFloat16 support not present. Using FP32, and not mixed precision\")\n    wrapping_policy = get_llama_wrapper()\n    return mixed_precision_policy, wrapping_policy\n\n\ndef save_train_params(train_config, fsdp_config, rank):\n    \"\"\"\n    This function saves the train_config and FSDP config into a train_params.yaml.\n    This will be used by converter script in the inference folder to fetch the HF model name or path.\n    It also would be hepful as a log for future references.\n    \"\"\"\n    # Convert the train_config and fsdp_config objects to dictionaries,\n    # converting all values to strings to ensure they can be serialized into a YAML file\n    train_config_dict = {\n        k: str(v) for k, v in vars(train_config).items() if not k.startswith(\"__\")\n    }\n    fsdp_config_dict = {\n        k: str(v) for k, v in vars(fsdp_config).items() if not k.startswith(\"__\")\n    }\n    # Merge the two dictionaries into one\n    train_params_dict = {**train_config_dict, **fsdp_config_dict}\n    # Construct the folder name (follwoing FSDP checkpointing style) using properties of the train_config object\n    folder_name = (\n        train_config.dist_checkpoint_root_folder\n        + \"/\"\n        + train_config.dist_checkpoint_folder\n        + \"-\"\n        + train_config.model_name\n    )\n\n    save_dir = Path.cwd() / folder_name\n    # If the directory does not exist, create it\n    if not os.path.exists(save_dir):\n        os.makedirs(save_dir)\n    # Convert the dictionary to a YAML string\n    config_yaml = yaml.dump(train_params_dict, indent=4)\n    file_name = os.path.join(save_dir, \"train_params.yaml\")\n\n    # Check if there's a directory with the same name as the file\n    if os.path.isdir(file_name):\n        print(f\"Error: {file_name} is a directory, not a file.\")\n    else:\n        # Write the YAML string to the file\n        with open(file_name, \"w\") as f:\n            f.write(config_yaml)\n        if rank == 0:\n            print(f\"training params are saved in {file_name}\")\n"
  },
  {
    "path": "mistral-schema.json",
    "content": "{\n  \"openapi\": \"3.0.2\",\n  \"info\": {\n    \"title\": \"Cog\",\n    \"version\": \"0.1.0\"\n  },\n  \"paths\": {\n    \"/\": {\n      \"get\": {\n        \"summary\": \"Root\",\n        \"operationId\": \"root__get\",\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"title\": \"Response Root  Get\"\n                }\n              }\n            }\n          }\n        }\n      }\n    },\n    \"/health-check\": {\n      \"get\": {\n        \"summary\": \"Healthcheck\",\n        \"operationId\": \"healthcheck_health_check_get\",\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"title\": \"Response Healthcheck Health Check Get\"\n                }\n              }\n            }\n          }\n        }\n      }\n    },\n    \"/predictions\": {\n      \"post\": {\n        \"summary\": \"Predict\",\n        \"description\": \"Run a single prediction on the model\",\n        \"operationId\": \"predict_predictions_post\",\n        \"parameters\": [\n          {\n            \"required\": false,\n            \"schema\": {\n              \"title\": \"Prefer\",\n              \"type\": \"string\"\n            },\n            \"name\": \"prefer\",\n            \"in\": \"header\"\n          }\n        ],\n        \"requestBody\": {\n          \"content\": {\n            \"application/json\": {\n              \"schema\": {\n                \"$ref\": \"#/components/schemas/PredictionRequest\"\n              }\n            }\n          }\n        },\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/PredictionResponse\"\n                }\n              }\n            }\n          },\n          \"422\": {\n            \"description\": \"Validation Error\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/HTTPValidationError\"\n                }\n              }\n            }\n          }\n        }\n      }\n    },\n    \"/predictions/{prediction_id}\": {\n      \"put\": {\n        \"summary\": \"Predict Idempotent\",\n        \"description\": \"Run a single prediction on the model (idempotent creation).\",\n        \"operationId\": \"predict_idempotent_predictions__prediction_id__put\",\n        \"parameters\": [\n          {\n            \"required\": true,\n            \"schema\": {\n              \"title\": \"Prediction ID\",\n              \"type\": \"string\"\n            },\n            \"name\": \"prediction_id\",\n            \"in\": \"path\"\n          },\n          {\n            \"required\": false,\n            \"schema\": {\n              \"title\": \"Prefer\",\n              \"type\": \"string\"\n            },\n            \"name\": \"prefer\",\n            \"in\": \"header\"\n          }\n        ],\n        \"requestBody\": {\n          \"content\": {\n            \"application/json\": {\n              \"schema\": {\n                \"title\": \"Prediction Request\",\n                \"allOf\": [\n                  {\n                    \"$ref\": \"#/components/schemas/PredictionRequest\"\n                  }\n                ]\n              }\n            }\n          },\n          \"required\": true\n        },\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/PredictionResponse\"\n                }\n              }\n            }\n          },\n          \"422\": {\n            \"description\": \"Validation Error\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/HTTPValidationError\"\n                }\n              }\n            }\n          }\n        }\n      }\n    },\n    \"/predictions/{prediction_id}/cancel\": {\n      \"post\": {\n        \"summary\": \"Cancel\",\n        \"description\": \"Cancel a running prediction\",\n        \"operationId\": \"cancel_predictions__prediction_id__cancel_post\",\n        \"parameters\": [\n          {\n            \"required\": true,\n            \"schema\": {\n              \"title\": \"Prediction ID\",\n              \"type\": \"string\"\n            },\n            \"name\": \"prediction_id\",\n            \"in\": \"path\"\n          }\n        ],\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"title\": \"Response Cancel Predictions  Prediction Id  Cancel Post\"\n                }\n              }\n            }\n          },\n          \"422\": {\n            \"description\": \"Validation Error\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"$ref\": \"#/components/schemas/HTTPValidationError\"\n                }\n              }\n            }\n          }\n        }\n      }\n    },\n    \"/shutdown\": {\n      \"post\": {\n        \"summary\": \"Start Shutdown\",\n        \"operationId\": \"start_shutdown_shutdown_post\",\n        \"responses\": {\n          \"200\": {\n            \"description\": \"Successful Response\",\n            \"content\": {\n              \"application/json\": {\n                \"schema\": {\n                  \"title\": \"Response Start Shutdown Shutdown Post\"\n                }\n              }\n            }\n          }\n        }\n      }\n    }\n  },\n  \"components\": {\n    \"schemas\": {\n      \"HTTPValidationError\": {\n        \"title\": \"HTTPValidationError\",\n        \"type\": \"object\",\n        \"properties\": {\n          \"detail\": {\n            \"title\": \"Detail\",\n            \"type\": \"array\",\n            \"items\": {\n              \"$ref\": \"#/components/schemas/ValidationError\"\n            }\n          }\n        }\n      },\n      \"Input\": {\n        \"title\": \"Input\",\n        \"required\": [\n          \"prompt\"\n        ],\n        \"type\": \"object\",\n        \"properties\": {\n          \"prompt\": {\n            \"title\": \"Prompt\",\n            \"type\": \"string\",\n            \"description\": \"Prompt to send to the model.\",\n            \"x-order\": 0\n          },\n          \"system_prompt\": {\n            \"title\": \"System Prompt\",\n            \"type\": \"string\",\n            \"description\": \"System prompt to send to the model. This is prepended to the prompt and helps guide system behavior. Should not be blank.\",\n            \"default\": \"Always assist with care, respect, and truth. Respond with utmost utility yet securely. Avoid harmful, unethical, prejudiced, or negative content. Ensure replies promote fairness and positivity. \",\n            \"x-order\": 1\n          },\n          \"max_new_tokens\": {\n            \"title\": \"Max New Tokens\",\n            \"minimum\": 1,\n            \"type\": \"integer\",\n            \"description\": \"Maximum number of tokens to generate. A word is generally 2-3 tokens\",\n            \"default\": 128,\n            \"x-order\": 2\n          },\n          \"min_new_tokens\": {\n            \"title\": \"Min New Tokens\",\n            \"minimum\": -1,\n            \"type\": \"integer\",\n            \"description\": \"Minimum number of tokens to generate. To disable, set to -1. A word is generally 2-3 tokens.\",\n            \"default\": -1,\n            \"x-order\": 3\n          },\n          \"temperature\": {\n            \"title\": \"Temperature\",\n            \"maximum\": 5,\n            \"minimum\": 0.01,\n            \"type\": \"number\",\n            \"description\": \"Adjusts randomness of outputs, greater than 1 is random and 0 is deterministic, 0.75 is a good starting value.\",\n            \"default\": 0.7,\n            \"x-order\": 4\n          },\n          \"top_p\": {\n            \"title\": \"Top P\",\n            \"maximum\": 1,\n            \"minimum\": 0,\n            \"type\": \"number\",\n            \"description\": \"When decoding text, samples from the top p percentage of most likely tokens; lower to ignore less likely tokens\",\n            \"default\": 0.95,\n            \"x-order\": 5\n          },\n          \"repetition_penalty\": {\n            \"title\": \"Repetition Penalty\",\n            \"minimum\": 0,\n            \"type\": \"number\",\n            \"description\": \"A parameter that controls how repetitive text can be. Lower means more repetitive, while higher means less repetitive. Set to 1.0 to disable.\",\n            \"default\": 1.15,\n            \"x-order\": 6\n          },\n          \"stop_sequences\": {\n            \"title\": \"Stop Sequences\",\n            \"type\": \"string\",\n            \"description\": \"A comma-separated list of sequences to stop generation at. For example, '<end>,<stop>' will stop generation at the first instance of 'end' or '<stop>'.\",\n            \"x-order\": 7\n          },\n          \"seed\": {\n            \"title\": \"Seed\",\n            \"type\": \"integer\",\n            \"description\": \"Random seed. Leave blank to randomize the seed\",\n            \"x-order\": 8\n          },\n          \"debug\": {\n            \"title\": \"Debug\",\n            \"type\": \"boolean\",\n            \"description\": \"provide debugging output in logs\",\n            \"default\": false,\n            \"x-order\": 9\n          },\n          \"prompt_template\": {\n            \"title\": \"Prompt Template\",\n            \"type\": \"string\",\n            \"description\": \"Template for formatting the prompt\",\n            \"default\": \"[INST] {system_prompt}{prompt} [/INST]\",\n            \"x-order\": 10\n          },\n          \"replicate_weights\": {\n            \"title\": \"Replicate Weights\",\n            \"type\": \"string\",\n            \"description\": \"Path to fine-tuned weights produced by a Replicate fine-tune job.\",\n            \"x-order\": 11\n          }\n        }\n      },\n      \"Output\": {\n        \"title\": \"Output\",\n        \"type\": \"array\",\n        \"items\": {\n          \"type\": \"string\"\n        },\n        \"x-cog-array-type\": \"iterator\",\n        \"x-cog-array-display\": \"concatenate\"\n      },\n      \"PredictionRequest\": {\n        \"title\": \"PredictionRequest\",\n        \"type\": \"object\",\n        \"properties\": {\n          \"input\": {\n            \"$ref\": \"#/components/schemas/Input\"\n          },\n          \"id\": {\n            \"title\": \"Id\",\n            \"type\": \"string\"\n          },\n          \"created_at\": {\n            \"title\": \"Created At\",\n            \"type\": \"string\",\n            \"format\": \"date-time\"\n          },\n          \"output_file_prefix\": {\n            \"title\": \"Output File Prefix\",\n            \"type\": \"string\"\n          },\n          \"webhook\": {\n            \"title\": \"Webhook\",\n            \"maxLength\": 65536,\n            \"minLength\": 1,\n            \"type\": \"string\",\n            \"format\": \"uri\"\n          },\n          \"webhook_events_filter\": {\n            \"type\": \"array\",\n            \"items\": {\n              \"$ref\": \"#/components/schemas/WebhookEvent\"\n            },\n            \"default\": [\n              \"start\",\n              \"output\",\n              \"logs\",\n              \"completed\"\n            ]\n          }\n        }\n      },\n      \"PredictionResponse\": {\n        \"title\": \"PredictionResponse\",\n        \"type\": \"object\",\n        \"properties\": {\n          \"input\": {\n            \"$ref\": \"#/components/schemas/Input\"\n          },\n          \"output\": {\n            \"$ref\": \"#/components/schemas/Output\"\n          },\n          \"id\": {\n            \"title\": \"Id\",\n            \"type\": \"string\"\n          },\n          \"version\": {\n            \"title\": \"Version\",\n            \"type\": \"string\"\n          },\n          \"created_at\": {\n            \"title\": \"Created At\",\n            \"type\": \"string\",\n            \"format\": \"date-time\"\n          },\n          \"started_at\": {\n            \"title\": \"Started At\",\n            \"type\": \"string\",\n            \"format\": \"date-time\"\n          },\n          \"completed_at\": {\n            \"title\": \"Completed At\",\n            \"type\": \"string\",\n            \"format\": \"date-time\"\n          },\n          \"logs\": {\n            \"title\": \"Logs\",\n            \"type\": \"string\",\n            \"default\": \"\"\n          },\n          \"error\": {\n            \"title\": \"Error\",\n            \"type\": \"string\"\n          },\n          \"status\": {\n            \"$ref\": \"#/components/schemas/Status\"\n          },\n          \"metrics\": {\n            \"title\": \"Metrics\",\n            \"type\": \"object\"\n          }\n        }\n      },\n      \"Status\": {\n        \"title\": \"Status\",\n        \"enum\": [\n          \"starting\",\n          \"processing\",\n          \"succeeded\",\n          \"canceled\",\n          \"failed\"\n        ],\n        \"type\": \"string\",\n        \"description\": \"An enumeration.\"\n      },\n      \"ValidationError\": {\n        \"title\": \"ValidationError\",\n        \"required\": [\n          \"loc\",\n          \"msg\",\n          \"type\"\n        ],\n        \"type\": \"object\",\n        \"properties\": {\n          \"loc\": {\n            \"title\": \"Location\",\n            \"type\": \"array\",\n            \"items\": {\n              \"anyOf\": [\n                {\n                  \"type\": \"string\"\n                },\n                {\n                  \"type\": \"integer\"\n                }\n              ]\n            }\n          },\n          \"msg\": {\n            \"title\": \"Message\",\n            \"type\": \"string\"\n          },\n          \"type\": {\n            \"title\": \"Error Type\",\n            \"type\": \"string\"\n          }\n        }\n      },\n      \"WebhookEvent\": {\n        \"title\": \"WebhookEvent\",\n        \"enum\": [\n          \"start\",\n          \"output\",\n          \"logs\",\n          \"completed\"\n        ],\n        \"type\": \"string\",\n        \"description\": \"An enumeration.\"\n      }\n    }\n  }\n}\n"
  },
  {
    "path": "model_templates/config.py",
    "content": "from dotenv import load_dotenv\nfrom src.utils import get_env_var_or_default\n\nload_dotenv()\n\nMODEL_NAME = \n# INFERENCE CONFIGURATION\n#######################################################################\n# --------------------Notes--------------------------------------------\n# We are trying our very best to no longer have different inference code paths\n# for trained and untrained weights :)\n#\n# INFERENCE CONFIGURATION\n# -------------------------------\n# This section defines the general inference configuration,\n# which is used for both trained and untrained models.\n# -------------------------------\n\nTOKENIZER_PATH = f\"models/{MODEL_NAME}/model_artifacts/tokenizer\"\nUSE_SYSTEM_PROMPT = \n\n\n# ENGINE CONFIGURATION\n# -------------------------------\n# Here we define the specific inference engine we intend to use for inference, and all appropriate kwargs. \n# -------------------------------\n\n\nENGINE = \nENGINE_KWARGS = {}\n\n# DEFAULT INFERENCE CONFIGURATION\n# -------------------------------\n# This section defines the default inference configuration, which may differ from\n# how we implement inference for a trained model.\n# -------------------------------\n\n\nLOCAL_DEFAULT_INFERENCE_WEIGHTS_PATH = f\"models/{MODEL_NAME}/model_artifacts/default_inference_weights\"\n\nREMOTE_DEFAULT_INFERENCE_WEIGHTS_PATH = get_env_var_or_default(\n    \"REMOTE_DEFAULT_INFERENCE_WEIGHTS_PATH\", \n    \"remote/path/to/your/weights/here\",\n\n)\n\n# N_SHARDS = 2\n# REMOTE_TRAINING_FILES_TO_DOWNLOAD = [\n#     f\"model-{str(i+1).zfill(5)}-of-{str(N_SHARDS).zfill(5)}.safetensors\"\n#     for i in range(N_SHARDS)\n# ]\n\nREMOTE_DEFAULT_INFERENCE_FILES_TO_DOWNLOAD = #[\"gptq_model-4bit-128g.safetensors\"]\n\nREMOTE_DEFAULT_INFERENCE_FILES_TO_DOWNLOAD += [\n    \"config.json\",\n    \"generation_config.json\",\n    \"special_tokens_map.json\",\n    \"tokenizer_config.json\",\n    \"tokenizer.json\",\n    \"tokenizer.model\",\n    \"quantize_config.json\",\n]\n\n# TRAINED INFERENCE CONFIGURATION\n# -------------------------------\n# This section defines the inference configuration for fine-tuned models\n# -------------------------------\n\nLOCAL_TRAINING_WEIGHTS_PATH = f\"models/{MODEL_NAME}/model_artifacts/training_weights\"\n\nREMOTE_TRAINING_WEIGHTS_PATH = get_env_var_or_default(\n    var_name=\"REMOTE_TRAINING_WEIGHTS_PATH\", \n    default_value=\"remote/path/to/your/weights/here\"\n)\n\nLOCAL_TRAINING_WEIGHTS_CONFIG_PATH = f\"models/{MODEL_NAME}/model_artifacts/training_weights/config.json\"\n\nREMOTE_TRAINING_WEIGHTS_CONFIG_PATH = get_env_var_or_default(\n    var_name=\"REMOTE_TRAINING_WEIGHTS_CONFIG_PATH\", \n    default_value=\"remote/path/to/your/weights/here\"\n)\n\nN_SHARDS = 2\nREMOTE_TRAINING_FILES_TO_DOWNLOAD = [\n    f\"model-{str(i+1).zfill(5)}-of-{str(N_SHARDS).zfill(5)}.safetensors\"\n    for i in range(N_SHARDS)\n]\n\nREMOTE_TRAINING_FILES_TO_DOWNLOAD += [\n    \"config.json\",\n    \"generation_config.json\",\n    \"model.safetensors.index.json\",\n    \"special_tokens_map.json\",\n    \"tokenizer_config.json\",\n    \"tokenizer.json\",\n    \"tokenizer.model\",\n]\n"
  },
  {
    "path": "models/dockerignore",
    "content": "*pdf\n*docx\nflan-t5**\ncheckpoints/**\nexamples/**\nweights_13/**\ntmp/**\n**.jsonl\nunconverted-weights\nunconverted-weights/\nweights\nweights/\nllama_weights/\nllama_weights\n*/**/*.safetensors\n*/**/*.tensors\n**/.git/lfs/objects/**\n*.tensors\ndefault_base_weights/\nllama.tensors\ncode\ntests\n**/*ipynb\n.ruff/**\n.mypy_cache\ntests\n\n# generated by replicate/cog\n__pycache__\n*.pyc\n*.pyo\n*.pyd\n.Python\nenv\npip-log.txt\npip-delete-this-directory.txt\n.tox\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n*.log\n.git\n**/.git\n.mypy_cache\n**/.mypy_cache\n.pytest_cache\n.hypothesis\n\nmodels/*/\n"
  },
  {
    "path": "models/llama-2-13b/config.py",
    "content": "from dotenv import load_dotenv\nfrom src.utils import get_env_var_or_default\n\nload_dotenv()\n\nMODEL_NAME = \"llama-2-13b\"\n# INFERENCE CONFIGURATION\n#######################################################################\n# --------------------Notes--------------------------------------------\n# We are trying our very best to no longer have different inference code paths\n# for trained and untrained weights :)\n\n#\n# INFERENCE CONFIGURATION\n# -------------------------------\n# This section defines the general inference configuration,\n# which is used for both trained and untrained models.\n# -------------------------------\n\nTOKENIZER_PATH = f\"models/{MODEL_NAME}/model_artifacts/tokenizer\"\nUSE_SYSTEM_PROMPT = False\n\n# ENGINE CONFIGURATION\n# -------------------------------\n# Here we define the specific inference engine we intend to use for inference, and all appropriate kwargs.\n# -------------------------------\n\nfrom src.inference_engines.exllama import ExllamaEngine\n\nENGINE = ExllamaEngine\nENGINE_KWARGS = {\n    \"fused_attn\": True,\n}\n\n# WEIGHTS CONFIGURATION\n# -------------------------------\n# Which base weights do we use for inference with this model?\n# -------------------------------\n\nLOCAL_DEFAULT_INFERENCE_WEIGHTS_PATH = (\n    f\"models/{MODEL_NAME}/model_artifacts/default_inference_weights\"\n)\n\nREMOTE_DEFAULT_INFERENCE_WEIGHTS_PATH = get_env_var_or_default(\n    \"REMOTE_DEFAULT_INFERENCE_WEIGHTS_PATH\",\n    \"remote/path/to/your/weights/here\",\n)\n\n# N_SHARDS = 2\n# REMOTE_TRAINING_FILES_TO_DOWNLOAD = [\n#     f\"model-{str(i+1).zfill(5)}-of-{str(N_SHARDS).zfill(5)}.safetensors\"\n#     for i in range(N_SHARDS)\n# ]\n\nREMOTE_DEFAULT_INFERENCE_FILES_TO_DOWNLOAD = [\"gptq_model-4bit-32g.safetensors\"]\n\nREMOTE_DEFAULT_INFERENCE_FILES_TO_DOWNLOAD += [\n    \"config.json\",\n    \"generation_config.json\",\n    \"special_tokens_map.json\",\n    \"tokenizer_config.json\",\n    \"tokenizer.json\",\n    \"tokenizer.model\",\n    \"quantize_config.json\",\n]\n\n# TRAINED INFERENCE CONFIGURATION\n# -------------------------------\n# This section defines the inference configuration for fine-tuned models\n# -------------------------------\n\nLOCAL_TRAINING_WEIGHTS_PATH = f\"models/{MODEL_NAME}/model_artifacts/training_weights\"\n\nREMOTE_TRAINING_WEIGHTS_PATH = get_env_var_or_default(\n    var_name=\"REMOTE_TRAINING_WEIGHTS_PATH\",\n    default_value=\"remote/path/to/your/weights/here\",\n)\n\nLOCAL_TRAINING_WEIGHTS_CONFIG_PATH = (\n    f\"models/{MODEL_NAME}/model_artifacts/training_weights/config.json\"\n)\n\nREMOTE_TRAINING_WEIGHTS_CONFIG_PATH = get_env_var_or_default(\n    var_name=\"REMOTE_TRAINING_WEIGHTS_CONFIG_PATH\",\n    default_value=\"remote/path/to/your/weights/here\",\n)\n\nN_SHARDS = 3\nREMOTE_TRAINING_FILES_TO_DOWNLOAD = [\n    f\"model-{str(i+1).zfill(5)}-of-{str(N_SHARDS).zfill(5)}.safetensors\"\n    for i in range(N_SHARDS)\n]\n\nREMOTE_TRAINING_FILES_TO_DOWNLOAD += [\n    \"config.json\",\n    \"generation_config.json\",\n    \"model.safetensors.index.json\",\n    \"special_tokens_map.json\",\n    \"tokenizer_config.json\",\n    \"tokenizer.json\",\n    \"tokenizer.model\",\n]\n"
  },
  {
    "path": "models/llama-2-13b-chat/config.py",
    "content": "from dotenv import load_dotenv\nfrom src.utils import get_env_var_or_default\n\nload_dotenv()\n\nMODEL_NAME = \"llama-2-13b-chat\"\n# INFERENCE CONFIGURATION\n#######################################################################\n# --------------------Notes--------------------------------------------\n# We sometimes implement inference differently for models that have not\n# been trained/fine-tuned vs. those that have been trained/fine-tuned. We refer to the\n# former as \"default\" and the latter as \"trained\". Below, you can\n# set your \"default inference configuration\" and your \"trained\n# inference configuration\".\n#\n# GENERAL INFERENCE CONFIGURATION\n# -------------------------------\n# This section defines the general inference configuration,\n# which is used for both trained and untrained models.\n# -------------------------------\n\nTOKENIZER_PATH = f\"models/{MODEL_NAME}/model_artifacts/tokenizer\"\nUSE_SYSTEM_PROMPT = True\n\n\n# ENGINE CONFIGURATION\n# -------------------------------\n# Here we define the specific inference engine we intend to use for inference, and all appropriate kwargs.\n# -------------------------------\n\nfrom src.inference_engines.exllama import ExllamaEngine\n\nENGINE = ExllamaEngine\nENGINE_KWARGS = {\n    \"fused_attn\": True,\n}\n\n# WEIGHTS CONFIGURATION\n# -------------------------------\n# Which base weights do we use for inference with this model?\n# -------------------------------\n\n\nLOCAL_DEFAULT_INFERENCE_WEIGHTS_PATH = (\n    f\"models/{MODEL_NAME}/model_artifacts/default_inference_weights\"\n)\n\nREMOTE_DEFAULT_INFERENCE_WEIGHTS_PATH = get_env_var_or_default(\n    \"REMOTE_DEFAULT_INFERENCE_WEIGHTS_PATH\",\n    \"remote/path/to/your/weights/here\",\n)\n\n# N_SHARDS = 2\n# REMOTE_TRAINING_FILES_TO_DOWNLOAD = [\n#     f\"model-{str(i+1).zfill(5)}-of-{str(N_SHARDS).zfill(5)}.safetensors\"\n#     for i in range(N_SHARDS)\n# ]\n\nREMOTE_DEFAULT_INFERENCE_FILES_TO_DOWNLOAD = [\"gptq_model-4bit-128g.safetensors\"]\n\nREMOTE_DEFAULT_INFERENCE_FILES_TO_DOWNLOAD += [\n    \"config.json\",\n    \"generation_config.json\",\n    \"special_tokens_map.json\",\n    \"tokenizer_config.json\",\n    \"tokenizer.json\",\n    \"tokenizer.model\",\n    \"quantize_config.json\",\n]\n\n# TRAINED INFERENCE CONFIGURATION\n# -------------------------------\n# This section defines the inference configuration for fine-tuned models\n# -------------------------------\n\nLOCAL_TRAINING_WEIGHTS_PATH = f\"models/{MODEL_NAME}/model_artifacts/training_weights\"\n\nREMOTE_TRAINING_WEIGHTS_PATH = get_env_var_or_default(\n    var_name=\"REMOTE_TRAINING_WEIGHTS_PATH\",\n    default_value=\"remote/path/to/your/weights/here\",\n)\n\nLOCAL_TRAINING_WEIGHTS_CONFIG_PATH = (\n    f\"models/{MODEL_NAME}/model_artifacts/training_weights/config.json\"\n)\n\nREMOTE_TRAINING_WEIGHTS_CONFIG_PATH = get_env_var_or_default(\n    var_name=\"REMOTE_TRAINING_WEIGHTS_CONFIG_PATH\",\n    default_value=\"remote/path/to/your/weights/here\",\n)\n\nN_SHARDS = 3\nREMOTE_TRAINING_FILES_TO_DOWNLOAD = [\n    f\"model-{str(i+1).zfill(5)}-of-{str(N_SHARDS).zfill(5)}.safetensors\"\n    for i in range(N_SHARDS)\n]\n\nREMOTE_TRAINING_FILES_TO_DOWNLOAD += [\n    \"config.json\",\n    \"generation_config.json\",\n    \"model.safetensors.index.json\",\n    \"special_tokens_map.json\",\n    \"tokenizer_config.json\",\n    \"tokenizer.json\",\n    \"tokenizer.model\",\n]\n"
  },
  {
    "path": "models/llama-2-13b-chat-hf-mlc/config.py",
    "content": "from dotenv import load_dotenv\nfrom src.config_utils import (\n    Weights,\n    get_fp16_file_list,\n    get_mlc_file_list,\n    mlc_kwargs,\n    vllm_kwargs,\n)\nfrom src.inference_engines.mlc_vllm_engine import MLCvLLMEngine\nfrom src.utils import get_env_var_or_default\n\nload_dotenv()\n\nMODEL_NAME = \"llama-2-13b-chat-hf-mlc\"\n\n# Inference weights\nmlc_file_list = get_mlc_file_list(\n    model_name=\"Llama-2-13b-chat-hf-q4f16_1\", n_shards=163\n)\n\nLOCAL_PATH = f\"models/{MODEL_NAME}/model_artifacts/default_inference_weights\"\n\nmlc_weights = Weights(\n    local_path=LOCAL_PATH,\n    remote_path=get_env_var_or_default(\"REMOTE_DEFAULT_INFERENCE_WEIGHTS_PATH\", None),\n    remote_files=mlc_file_list,\n)\n\nnum_vllm_shards = 3\nvllm_weights = Weights(\n    local_path=f\"models/{MODEL_NAME}/model_artifacts/lora_inference_weights\",\n    remote_path=get_env_var_or_default(\"REMOTE_VLLM_INFERENCE_WEIGHTS_PATH\", None),\n    remote_files=get_fp16_file_list(num_vllm_shards),\n)\n\n# Inference config\nUSE_SYSTEM_PROMPT = True\n\nENGINE = MLCvLLMEngine\nENGINE_KWARGS = {\n    \"mlc_args\": mlc_kwargs(mlc_weights, is_chat=False),\n    \"vllm_args\": vllm_kwargs(vllm_weights),\n}\n\n# Training config\nLOAD_IN_4BIT = False\n\nLOCAL_TRAINING_WEIGHTS_PATH = f\"models/{MODEL_NAME}/model_artifacts/training_weights\"\nREMOTE_TRAINING_WEIGHTS_PATH = get_env_var_or_default(\n    \"REMOTE_TRAINING_WEIGHTS_PATH\",\n    None,\n)\nLOCAL_TRAINING_WEIGHTS_CONFIG_PATH = (\n    f\"models/{MODEL_NAME}/model_artifacts/training_weights/config.json\"\n)\nREMOTE_TRAINING_WEIGHTS_CONFIG_PATH = get_env_var_or_default(\n    var_name=\"REMOTE_TRAINING_WEIGHTS_CONFIG_PATH\",\n    default_value=None,\n)\nREMOTE_TRAINING_FILES_TO_DOWNLOAD = get_fp16_file_list(num_vllm_shards)\n"
  },
  {
    "path": "models/llama-2-13b-mlc/config.py",
    "content": "from dotenv import load_dotenv\nfrom src.config_utils import (\n    Weights,\n    get_fp16_file_list,\n    get_mlc_file_list,\n    mlc_kwargs,\n    vllm_kwargs,\n)\nfrom src.inference_engines.mlc_vllm_engine import MLCvLLMEngine\nfrom src.utils import get_env_var_or_default\n\nload_dotenv()\n\nMODEL_NAME = \"llama-2-13b-mlc\"\n\n# Inference weights\nmlc_file_list = get_mlc_file_list(model_name=\"llama-2-13b-hf-q0f16\", n_shards=163)\n\nLOCAL_PATH = f\"models/{MODEL_NAME}/model_artifacts/default_inference_weights\"\n\nmlc_weights = Weights(\n    local_path=LOCAL_PATH,\n    remote_path=get_env_var_or_default(\"REMOTE_DEFAULT_INFERENCE_WEIGHTS_PATH\", None),\n    remote_files=mlc_file_list,\n)\n\nnum_vllm_shards = 3\nvllm_weights = Weights(\n    local_path=f\"models/{MODEL_NAME}/model_artifacts/lora_inference_weights\",\n    remote_path=get_env_var_or_default(\"REMOTE_VLLM_INFERENCE_WEIGHTS_PATH\", None),\n    remote_files=get_fp16_file_list(num_vllm_shards),\n)\n\n# Inference config\nUSE_SYSTEM_PROMPT = False\n\nENGINE = MLCvLLMEngine\nENGINE_KWARGS = {\n    \"mlc_args\": mlc_kwargs(mlc_weights, is_chat=False),\n    \"vllm_args\": vllm_kwargs(vllm_weights),\n}\n\n# Training config\nLOAD_IN_4BIT = False\n\nLOCAL_TRAINING_WEIGHTS_PATH = f\"models/{MODEL_NAME}/model_artifacts/training_weights\"\nREMOTE_TRAINING_WEIGHTS_PATH = get_env_var_or_default(\n    \"REMOTE_TRAINING_WEIGHTS_PATH\",\n    None,\n)\nLOCAL_TRAINING_WEIGHTS_CONFIG_PATH = (\n    f\"models/{MODEL_NAME}/model_artifacts/training_weights/config.json\"\n)\nREMOTE_TRAINING_WEIGHTS_CONFIG_PATH = get_env_var_or_default(\n    var_name=\"REMOTE_TRAINING_WEIGHTS_CONFIG_PATH\",\n    default_value=None,\n)\nREMOTE_TRAINING_FILES_TO_DOWNLOAD = get_fp16_file_list(num_vllm_shards)\n"
  },
  {
    "path": "models/llama-2-70b/config.py",
    "content": "from dotenv import load_dotenv\nfrom src.utils import get_env_var_or_default\n\nload_dotenv()\n\nMODEL_NAME = \"llama-2-70b\"\n# INFERENCE CONFIGURATION\n#######################################################################\n# --------------------Notes--------------------------------------------\n# We sometimes implement inference differently for models that have not\n# been trained/fine-tuned vs. those that have been trained/fine-tuned. We refer to the\n# former as \"default\" and the latter as \"trained\". Below, you can\n# set your \"default inference configuration\" and your \"trained\n# inference configuration\".\n#\n# GENERAL INFERENCE CONFIGURATION\n# -------------------------------\n# This section defines the general inference configuration,\n# which is used for both trained and untrained models.\n# -------------------------------\n\nTOKENIZER_PATH = f\"models/{MODEL_NAME}/model_artifacts/tokenizer\"\nUSE_SYSTEM_PROMPT = False\n\n\n# ENGINE CONFIGURATION\n# -------------------------------\n# Here we define the specific inference engine we intend to use for inference, and all appropriate kwargs.\n# -------------------------------\n\nfrom src.inference_engines.exllama import ExllamaEngine\n\nENGINE = ExllamaEngine\nENGINE_KWARGS = {\n    \"fused_attn\": True,\n}\n\n# WEIGHTS CONFIGURATION\n# -------------------------------\n# Which base weights do we use for inference with this model?\n# -------------------------------\n\n\nLOCAL_DEFAULT_INFERENCE_WEIGHTS_PATH = (\n    f\"models/{MODEL_NAME}/model_artifacts/default_inference_weights\"\n)\n\nREMOTE_DEFAULT_INFERENCE_WEIGHTS_PATH = get_env_var_or_default(\n    \"REMOTE_DEFAULT_INFERENCE_WEIGHTS_PATH\",\n    \"remote/path/to/your/weights/here\",\n)\n\n# N_SHARDS = 2\n# REMOTE_TRAINING_FILES_TO_DOWNLOAD = [\n#     f\"model-{str(i+1).zfill(5)}-of-{str(N_SHARDS).zfill(5)}.safetensors\"\n#     for i in range(N_SHARDS)\n# ]\n\nREMOTE_DEFAULT_INFERENCE_FILES_TO_DOWNLOAD = [\"gptq_model-4bit-32g.safetensors\"]\n\nREMOTE_DEFAULT_INFERENCE_FILES_TO_DOWNLOAD += [\n    \"config.json\",\n    \"generation_config.json\",\n    \"special_tokens_map.json\",\n    \"tokenizer_config.json\",\n    \"tokenizer.json\",\n    \"tokenizer.model\",\n    \"quantize_config.json\",\n]\n\n# TRAINED INFERENCE CONFIGURATION\n# -------------------------------\n# This section defines the inference configuration for fine-tuned models\n# -------------------------------\n\nLOCAL_TRAINING_WEIGHTS_PATH = f\"models/{MODEL_NAME}/model_artifacts/training_weights\"\n\nREMOTE_TRAINING_WEIGHTS_PATH = get_env_var_or_default(\n    var_name=\"REMOTE_TRAINING_WEIGHTS_PATH\",\n    default_value=\"remote/path/to/your/weights/here\",\n)\n\nLOCAL_TRAINING_WEIGHTS_CONFIG_PATH = (\n    f\"models/{MODEL_NAME}/model_artifacts/training_weights/config.json\"\n)\n\nREMOTE_TRAINING_WEIGHTS_CONFIG_PATH = get_env_var_or_default(\n    var_name=\"REMOTE_TRAINING_WEIGHTS_CONFIG_PATH\",\n    default_value=\"remote/path/to/your/weights/here\",\n)\n\nN_SHARDS = 15\nREMOTE_TRAINING_FILES_TO_DOWNLOAD = [\n    f\"model-{str(i+1).zfill(5)}-of-{str(N_SHARDS).zfill(5)}.safetensors\"\n    for i in range(N_SHARDS)\n]\n\nREMOTE_TRAINING_FILES_TO_DOWNLOAD += [\n    \"config.json\",\n    \"generation_config.json\",\n    \"model.safetensors.index.json\",\n    \"special_tokens_map.json\",\n    \"tokenizer_config.json\",\n    \"tokenizer.json\",\n    \"tokenizer.model\",\n]\n"
  },
  {
    "path": "models/llama-2-70b/model_artifacts/tokenizer/special_tokens_map.json",
    "content": "{}"
  },
  {
    "path": "models/llama-2-70b/model_artifacts/tokenizer/tokenizer_checklist.chk",
    "content": "eeec4125e9c7560836b4873b6f8e3025  tokenizer.model\n"
  },
  {
    "path": "models/llama-2-70b/model_artifacts/tokenizer/tokenizer_config.json",
    "content": "{\"bos_token\": \"\", \"eos_token\": \"\", \"model_max_length\": 4096, \"tokenizer_class\": \"LlamaTokenizer\", \"unk_token\": \"\"}"
  },
  {
    "path": "models/llama-2-70b-chat/config.py",
    "content": "from dotenv import load_dotenv\nfrom src.utils import get_env_var_or_default\n\nload_dotenv()\n\nMODEL_NAME = \"llama-2-70b-chat\"\n# INFERENCE CONFIGURATION\n#######################################################################\n# --------------------Notes--------------------------------------------\n# We are trying our very best to no longer have different inference code paths\n# for trained and untrained weights :)\n\n#\n# INFERENCE CONFIGURATION\n# -------------------------------\n# This section defines the general inference configuration,\n# which is used for both trained and untrained models.\n# -------------------------------\n\nTOKENIZER_PATH = f\"models/{MODEL_NAME}/model_artifacts/tokenizer\"\nUSE_SYSTEM_PROMPT = True\n\n\n# ENGINE CONFIGURATION\n# -------------------------------\n# Here we define the specific inference engine we intend to use for inference, and all appropriate kwargs.\n# -------------------------------\n\nfrom src.inference_engines.exllama import ExllamaEngine\n\nENGINE = ExllamaEngine\nENGINE_KWARGS = {\n    \"fused_attn\": True,\n}\n\n# WEIGHTS CONFIGURATION\n# -------------------------------\n# Which base weights do we use for inference with this model?\n# -------------------------------\n\n\nLOCAL_DEFAULT_INFERENCE_WEIGHTS_PATH = (\n    f\"models/{MODEL_NAME}/model_artifacts/default_inference_weights\"\n)\n\nREMOTE_DEFAULT_INFERENCE_WEIGHTS_PATH = get_env_var_or_default(\n    \"REMOTE_DEFAULT_INFERENCE_WEIGHTS_PATH\",\n    \"remote/path/to/your/weights/here\",\n)\n\n# N_SHARDS = 2\n# REMOTE_TRAINING_FILES_TO_DOWNLOAD = [\n#     f\"model-{str(i+1).zfill(5)}-of-{str(N_SHARDS).zfill(5)}.safetensors\"\n#     for i in range(N_SHARDS)\n# ]\n\nREMOTE_DEFAULT_INFERENCE_FILES_TO_DOWNLOAD = [\"gptq_model-4bit--1g.safetensors\"]\n\nREMOTE_DEFAULT_INFERENCE_FILES_TO_DOWNLOAD += [\n    \"config.json\",\n    \"generation_config.json\",\n    \"special_tokens_map.json\",\n    \"tokenizer_config.json\",\n    \"tokenizer.json\",\n    \"tokenizer.model\",\n    \"quantize_config.json\",\n]\n\n# TRAINED INFERENCE CONFIGURATION\n# -------------------------------\n# This section defines the inference configuration for fine-tuned models\n# -------------------------------\n\nLOCAL_TRAINING_WEIGHTS_PATH = f\"models/{MODEL_NAME}/model_artifacts/training_weights\"\n\nREMOTE_TRAINING_WEIGHTS_PATH = get_env_var_or_default(\n    var_name=\"REMOTE_TRAINING_WEIGHTS_PATH\",\n    default_value=\"remote/path/to/your/weights/here\",\n)\n\nLOCAL_TRAINING_WEIGHTS_CONFIG_PATH = (\n    f\"models/{MODEL_NAME}/model_artifacts/training_weights/config.json\"\n)\n\nREMOTE_TRAINING_WEIGHTS_CONFIG_PATH = get_env_var_or_default(\n    var_name=\"REMOTE_TRAINING_WEIGHTS_CONFIG_PATH\",\n    default_value=\"remote/path/to/your/weights/here\",\n)\n\nN_SHARDS = 15\nREMOTE_TRAINING_FILES_TO_DOWNLOAD = [\n    f\"model-{str(i+1).zfill(5)}-of-{str(N_SHARDS).zfill(5)}.safetensors\"\n    for i in range(N_SHARDS)\n]\n\nREMOTE_TRAINING_FILES_TO_DOWNLOAD += [\n    \"config.json\",\n    \"generation_config.json\",\n    \"model.safetensors.index.json\",\n    \"special_tokens_map.json\",\n    \"tokenizer_config.json\",\n    \"tokenizer.json\",\n    \"tokenizer.model\",\n]\n"
  },
  {
    "path": "models/llama-2-70b-chat-hf-mlc/config.py",
    "content": "from dotenv import load_dotenv\nfrom src.config_utils import (\n    Weights,\n    get_fp16_file_list,\n    get_mlc_file_list,\n    mlc_kwargs,\n    vllm_kwargs,\n)\nfrom src.inference_engines.mlc_vllm_engine import MLCvLLMEngine\nfrom src.utils import get_env_var_or_default\n\nload_dotenv()\n\nMODEL_NAME = \"llama-2-70b-chat-hf-mlc\"\n\n# Inference weights\nmlc_file_list = get_mlc_file_list(\n    model_name=\"Llama-2-70b-chat-hf-q4f16_1\", n_shards=483\n)\n\nLOCAL_PATH = f\"models/{MODEL_NAME}/model_artifacts/default_inference_weights\"\n\nmlc_weights = Weights(\n    local_path=LOCAL_PATH,\n    remote_path=get_env_var_or_default(\"REMOTE_DEFAULT_INFERENCE_WEIGHTS_PATH\", None),\n    remote_files=mlc_file_list,\n)\n\nvllm_weights = Weights(\n    local_path=f\"models/{MODEL_NAME}/model_artifacts/lora_inference_weights\",\n    remote_path=get_env_var_or_default(\"REMOTE_VLLM_INFERENCE_WEIGHTS_PATH\", None),\n    remote_files=get_fp16_file_list(15),\n)\n\n# Inference config\nUSE_SYSTEM_PROMPT = True\n\nENGINE = MLCvLLMEngine\nENGINE_KWARGS = {\n    \"mlc_args\": mlc_kwargs(mlc_weights, is_chat=False, num_shards=4),\n    \"vllm_args\": vllm_kwargs(vllm_weights),\n}\n\n# Training config\nLOAD_IN_4BIT = False\n\nLOCAL_TRAINING_WEIGHTS_PATH = f\"models/{MODEL_NAME}/model_artifacts/training_weights\"\nREMOTE_TRAINING_WEIGHTS_PATH = get_env_var_or_default(\n    \"REMOTE_TRAINING_WEIGHTS_PATH\",\n    None,\n)\nLOCAL_TRAINING_WEIGHTS_CONFIG_PATH = (\n    f\"models/{MODEL_NAME}/model_artifacts/training_weights/config.json\"\n)\nREMOTE_TRAINING_WEIGHTS_CONFIG_PATH = get_env_var_or_default(\n    var_name=\"REMOTE_TRAINING_WEIGHTS_CONFIG_PATH\",\n    default_value=None,\n)\nREMOTE_TRAINING_FILES_TO_DOWNLOAD = get_fp16_file_list(15)\n"
  },
  {
    "path": "models/llama-2-70b-mlc/config.py",
    "content": "from dotenv import load_dotenv\nfrom src.config_utils import (\n    Weights,\n    get_fp16_file_list,\n    get_mlc_file_list,\n    mlc_kwargs,\n    vllm_kwargs,\n)\nfrom src.inference_engines.mlc_vllm_engine import MLCvLLMEngine\nfrom src.utils import get_env_var_or_default\n\nload_dotenv()\n\nMODEL_NAME = \"llama-2-70b-mlc\"\n\n# Inference weights\nmlc_file_list = get_mlc_file_list(model_name=\"llama-2-70b-q0f16\", n_shards=323)\n\nLOCAL_PATH = f\"models/{MODEL_NAME}/model_artifacts/default_inference_weights\"\n\nmlc_weights = Weights(\n    local_path=LOCAL_PATH,\n    remote_path=get_env_var_or_default(\"REMOTE_DEFAULT_INFERENCE_WEIGHTS_PATH\", None),\n    remote_files=mlc_file_list,\n)\n\nvllm_weights = Weights(\n    local_path=f\"models/{MODEL_NAME}/model_artifacts/lora_inference_weights\",\n    remote_path=get_env_var_or_default(\"REMOTE_VLLM_INFERENCE_WEIGHTS_PATH\", None),\n    remote_files=get_fp16_file_list(15),\n)\n\n# Inference config\nUSE_SYSTEM_PROMPT = False\n\nENGINE = MLCvLLMEngine\nENGINE_KWARGS = {\n    \"mlc_args\": mlc_kwargs(mlc_weights, is_chat=False, num_shards=4),\n    \"vllm_args\": vllm_kwargs(vllm_weights),\n}\n\n# Training config\nLOAD_IN_4BIT = False\n\nLOCAL_TRAINING_WEIGHTS_PATH = f\"models/{MODEL_NAME}/model_artifacts/training_weights\"\nREMOTE_TRAINING_WEIGHTS_PATH = get_env_var_or_default(\n    \"REMOTE_TRAINING_WEIGHTS_PATH\",\n    None,\n)\nLOCAL_TRAINING_WEIGHTS_CONFIG_PATH = (\n    f\"models/{MODEL_NAME}/model_artifacts/training_weights/config.json\"\n)\nREMOTE_TRAINING_WEIGHTS_CONFIG_PATH = get_env_var_or_default(\n    var_name=\"REMOTE_TRAINING_WEIGHTS_CONFIG_PATH\",\n    default_value=None,\n)\nREMOTE_TRAINING_FILES_TO_DOWNLOAD = get_fp16_file_list(15)\n"
  },
  {
    "path": "models/llama-2-7b/config.py",
    "content": "from dotenv import load_dotenv\nfrom src.config_utils import (\n    Weights,\n    exllama_kwargs,\n    get_fp16_file_list,\n    get_gptq_file_list,\n    vllm_kwargs,\n)\nfrom src.inference_engines.vllm_exllama_engine import ExllamaVllmEngine\n\nfrom src.utils import get_env_var_or_default\n\nload_dotenv()\n\nMODEL_NAME = \"llama-2-7b\"\n\n\n# Inference weights\n\nexllama_weights = Weights(\n    local_path=f\"models/{MODEL_NAME}/model_artifacts/default_inference_weights\",\n    remote_path=get_env_var_or_default(\"REMOTE_DEFAULT_INFERENCE_WEIGHTS_PATH\", None),\n    remote_files=get_gptq_file_list(\"gptq_model-4bit-128g.safetensors\"),\n)\n\nvllm_weights = Weights(\n    local_path=f\"models/{MODEL_NAME}/model_artifacts/lora_inference_weights\",\n    remote_path=get_env_var_or_default(\"REMOTE_VLLM_INFERENCE_WEIGHTS_PATH\", None),\n    remote_files=get_fp16_file_list(2),\n)\n\n# Inference config\n\nTOKENIZER_PATH = f\"models/{MODEL_NAME}/model_artifacts/default_inference_weights\"\nUSE_SYSTEM_PROMPT = False\n\nENGINE = ExllamaVllmEngine\nexllama_kw = exllama_kwargs(exllama_weights)\nvllm_kw = vllm_kwargs(vllm_weights)\n\nENGINE_KWARGS = {\n    \"exllama_args\": exllama_kw,\n    \"vllm_args\": vllm_kw,\n}\n\n\n# Training config\n\nLOAD_IN_4BIT = False\n\nLOCAL_TRAINING_WEIGHTS_PATH = f\"models/{MODEL_NAME}/model_artifacts/training_weights\"\nREMOTE_TRAINING_WEIGHTS_PATH = get_env_var_or_default(\n    var_name=\"REMOTE_TRAINING_WEIGHTS_PATH\",\n    default_value=\"remote/path/to/your/weights/here\",\n)\nLOCAL_TRAINING_WEIGHTS_CONFIG_PATH = (\n    f\"models/{MODEL_NAME}/model_artifacts/training_weights/config.json\"\n)\nREMOTE_TRAINING_WEIGHTS_CONFIG_PATH = get_env_var_or_default(\n    var_name=\"REMOTE_TRAINING_WEIGHTS_CONFIG_PATH\",\n    default_value=\"remote/path/to/your/weights/here\",\n)\nREMOTE_TRAINING_FILES_TO_DOWNLOAD = get_fp16_file_list(2)\n\n# -------------------------------\n\nDEFAULT_PAD_TOKEN = \"[PAD]\"\nDEFAULT_EOS_TOKEN = \"</s>\"\nDEFAULT_BOS_TOKEN = \"<s>\"\nDEFAULT_UNK_TOKEN = \"</s>\"\n"
  },
  {
    "path": "models/llama-2-7b-chat/config.py",
    "content": "from dotenv import load_dotenv\nfrom src.config_utils import (\n    exllama_kwargs,\n    get_fp16_file_list,\n    get_gptq_file_list,\n    vllm_kwargs,\n    Weights,\n)\nfrom src.utils import get_env_var_or_default\n\nfrom src.inference_engines.vllm_exllama_engine import ExllamaVllmEngine\n\nload_dotenv()\n\nMODEL_NAME = \"llama-2-7b-chat\"\n\n# Inference weights\n\nexllama_weights = Weights(\n    local_path=f\"models/{MODEL_NAME}/model_artifacts/default_inference_weights\",\n    remote_path=get_env_var_or_default(\"REMOTE_DEFAULT_INFERENCE_WEIGHTS_PATH\", None),\n    remote_files=get_gptq_file_list(\"gptq_model-4bit-32g.safetensors\"),\n)\n\nvllm_weights = Weights(\n    local_path=f\"models/{MODEL_NAME}/model_artifacts/lora_inference_weights\",\n    remote_path=get_env_var_or_default(\"REMOTE_VLLM_INFERENCE_WEIGHTS_PATH\", None),\n    remote_files=get_fp16_file_list(2),\n)\n\n\n# Inference config\n\nTOKENIZER_PATH = f\"models/{MODEL_NAME}/model_artifacts/default_inference_weights\"\nUSE_SYSTEM_PROMPT = True\n\nENGINE = ExllamaVllmEngine\nexllama_kw = exllama_kwargs(exllama_weights)\nvllm_kw = vllm_kwargs(vllm_weights)\n\nENGINE_KWARGS = {\n    \"exllama_args\": exllama_kw,\n    \"vllm_args\": vllm_kw,\n}\n\n# Training config\n\nLOAD_IN_4BIT = False\n\nLOCAL_TRAINING_WEIGHTS_PATH = f\"models/{MODEL_NAME}/model_artifacts/training_weights\"\nREMOTE_TRAINING_WEIGHTS_PATH = get_env_var_or_default(\n    \"REMOTE_TRAINING_WEIGHTS_PATH\",\n    None,\n)\nLOCAL_TRAINING_WEIGHTS_CONFIG_PATH = (\n    f\"models/{MODEL_NAME}/model_artifacts/training_weights/config.json\"\n)\nREMOTE_TRAINING_WEIGHTS_CONFIG_PATH = get_env_var_or_default(\n    var_name=\"REMOTE_TRAINING_WEIGHTS_CONFIG_PATH\",\n    default_value=None,\n)\nREMOTE_TRAINING_FILES_TO_DOWNLOAD = get_fp16_file_list(2)\n"
  },
  {
    "path": "models/llama-2-7b-chat-hf-mlc/config.py",
    "content": "from dotenv import load_dotenv\nfrom src.config_utils import (\n    Weights,\n    get_fp16_file_list,\n    get_mlc_file_list,\n    mlc_kwargs,\n    vllm_kwargs,\n)\nfrom src.inference_engines.mlc_vllm_engine import MLCvLLMEngine\nfrom src.utils import get_env_var_or_default\n\nload_dotenv()\n\nMODEL_NAME = \"llama-2-7b-chat-hf-mlc\"\n\n# Inference weights\nmlc_file_list = get_mlc_file_list(model_name=\"Llama-2-7b-chat-hf-q4f16_1\", n_shards=115)\n\nLOCAL_PATH = f\"models/{MODEL_NAME}/model_artifacts/default_inference_weights\"\n\nmlc_weights = Weights(\n    local_path=LOCAL_PATH,\n    remote_path=get_env_var_or_default(\"REMOTE_DEFAULT_INFERENCE_WEIGHTS_PATH\", None),\n    remote_files=mlc_file_list,\n)\n\nvllm_weights = Weights(\n    local_path=f\"models/{MODEL_NAME}/model_artifacts/lora_inference_weights\",\n    remote_path=get_env_var_or_default(\"REMOTE_VLLM_INFERENCE_WEIGHTS_PATH\", None),\n    remote_files=get_fp16_file_list(2),\n)\n\n\n# Inference config\nUSE_SYSTEM_PROMPT = True\n\nENGINE = MLCvLLMEngine\nENGINE_KWARGS = {\n    \"mlc_args\": mlc_kwargs(mlc_weights, is_chat=False),\n    \"vllm_args\": vllm_kwargs(vllm_weights),\n}\n\n# Training config\nLOAD_IN_4BIT = False\n\nLOCAL_TRAINING_WEIGHTS_PATH = f\"models/{MODEL_NAME}/model_artifacts/training_weights\"\nREMOTE_TRAINING_WEIGHTS_PATH = get_env_var_or_default(\n    \"REMOTE_TRAINING_WEIGHTS_PATH\",\n    None,\n)\nLOCAL_TRAINING_WEIGHTS_CONFIG_PATH = (\n    f\"models/{MODEL_NAME}/model_artifacts/training_weights/config.json\"\n)\nREMOTE_TRAINING_WEIGHTS_CONFIG_PATH = get_env_var_or_default(\n    var_name=\"REMOTE_TRAINING_WEIGHTS_CONFIG_PATH\",\n    default_value=None,\n)\nREMOTE_TRAINING_FILES_TO_DOWNLOAD = get_fp16_file_list(2)\n"
  },
  {
    "path": "models/llama-2-7b-mlc/config.py",
    "content": "from dotenv import load_dotenv\nfrom src.config_utils import (\n    Weights,\n    get_fp16_file_list,\n    get_mlc_file_list,\n    mlc_kwargs,\n    vllm_kwargs,\n)\nfrom src.inference_engines.mlc_vllm_engine import MLCvLLMEngine\nfrom src.utils import get_env_var_or_default\n\nload_dotenv()\n\nMODEL_NAME = \"llama-2-7b-mlc\"\n\n# Inference weights\nmlc_file_list = get_mlc_file_list(model_name=\"llama-2-7b-hf-q0f16\", n_shards=131)\n\nLOCAL_PATH = f\"models/{MODEL_NAME}/model_artifacts/default_inference_weights\"\n\nmlc_weights = Weights(\n    local_path=LOCAL_PATH,\n    remote_path=get_env_var_or_default(\"REMOTE_DEFAULT_INFERENCE_WEIGHTS_PATH\", None),\n    remote_files=mlc_file_list,\n)\n\nvllm_weights = Weights(\n    local_path=f\"models/{MODEL_NAME}/model_artifacts/lora_inference_weights\",\n    remote_path=get_env_var_or_default(\"REMOTE_VLLM_INFERENCE_WEIGHTS_PATH\", None),\n    remote_files=get_fp16_file_list(2),\n)\n\n# Inference config\nUSE_SYSTEM_PROMPT = False\n\nENGINE = MLCvLLMEngine\nENGINE_KWARGS = {\n    \"mlc_args\": mlc_kwargs(mlc_weights, is_chat=False),\n    \"vllm_args\": vllm_kwargs(vllm_weights),\n}\n\n# Training config\nLOAD_IN_4BIT = False\n\nLOCAL_TRAINING_WEIGHTS_PATH = f\"models/{MODEL_NAME}/model_artifacts/training_weights\"\nREMOTE_TRAINING_WEIGHTS_PATH = get_env_var_or_default(\n    \"REMOTE_TRAINING_WEIGHTS_PATH\",\n    None,\n)\nLOCAL_TRAINING_WEIGHTS_CONFIG_PATH = (\n    f\"models/{MODEL_NAME}/model_artifacts/training_weights/config.json\"\n)\nREMOTE_TRAINING_WEIGHTS_CONFIG_PATH = get_env_var_or_default(\n    var_name=\"REMOTE_TRAINING_WEIGHTS_CONFIG_PATH\",\n    default_value=None,\n)\nREMOTE_TRAINING_FILES_TO_DOWNLOAD = get_fp16_file_list(2)\n"
  },
  {
    "path": "models/llama-2-7b-transformers/config.py",
    "content": "from dotenv import load_dotenv\nfrom src.config_utils import Weights, get_fp16_file_list\nfrom src.utils import get_env_var_or_default\n\nload_dotenv()\n\nMODEL_NAME = \"llama-2-7b-transformers\"\n# INFERENCE CONFIGURATION\n#######################################################################\n# --------------------Notes--------------------------------------------\n# We are trying our very best to no longer have different inference code paths\n# for trained and untrained weights :)\n#\n# INFERENCE CONFIGURATION\n# -------------------------------\n# This section defines the general inference configuration,\n# which is used for both trained and untrained models.\n# -------------------------------\n\nTOKENIZER_PATH = f\"models/{MODEL_NAME}/model_artifacts/tokenizer\"\nUSE_SYSTEM_PROMPT = False\n\n\n# ENGINE CONFIGURATION\n# -------------------------------\n# Here we define the specific inference engine we intend to use for inference, and all appropriate kwargs.\n# -------------------------------\n\nfrom src.inference_engines.transformers_engine import TransformersEngine\n\n# todo - this is probably wrong - now that different engines have different tokenizers, should we eliminate load_tokenizer & handle it all within the engine? I ...think so\nfrom functools import partial\nfrom src.more_utils import load_tokenizer\n\nweights = Weights(\n    local_path=f\"models/{MODEL_NAME}/model_artifacts/default_inference_weights\",\n    remote_path=get_env_var_or_default(\"REMOTE_DEFAULT_INFERENCE_WEIGHTS_PATH\", None),\n    remote_files=get_fp16_file_list(2),\n)\n\nENGINE = TransformersEngine\nENGINE_KWARGS = {\n    \"weights\": weights,\n    \"tokenizer_func\": partial(load_tokenizer, TOKENIZER_PATH),\n}\n\n\n# TRAINED INFERENCE CONFIGURATION\n# -------------------------------\n# This section defines the inference configuration for fine-tuned models\n# -------------------------------\n\nLOCAL_TRAINING_WEIGHTS_PATH = f\"models/{MODEL_NAME}/model_artifacts/training_weights\"\n\nREMOTE_TRAINING_WEIGHTS_PATH = get_env_var_or_default(\n    var_name=\"REMOTE_TRAINING_WEIGHTS_PATH\",\n    default_value=\"remote/path/to/your/weights/here\",\n)\n\nLOCAL_TRAINING_WEIGHTS_CONFIG_PATH = (\n    f\"models/{MODEL_NAME}/model_artifacts/training_weights/config.json\"\n)\n\nREMOTE_TRAINING_WEIGHTS_CONFIG_PATH = get_env_var_or_default(\n    var_name=\"REMOTE_TRAINING_WEIGHTS_CONFIG_PATH\",\n    default_value=\"remote/path/to/your/weights/here\",\n)\n\nREMOTE_TRAINING_FILES_TO_DOWNLOAD = get_fp16_file_list(2)\n"
  },
  {
    "path": "models/llama-2-7b-transformers/model_artifacts/tokenizer/special_tokens_map.json",
    "content": "{}"
  },
  {
    "path": "models/llama-2-7b-transformers/model_artifacts/tokenizer/tokenizer_checklist.chk",
    "content": "eeec4125e9c7560836b4873b6f8e3025  tokenizer.model\n"
  },
  {
    "path": "models/llama-2-7b-transformers/model_artifacts/tokenizer/tokenizer_config.json",
    "content": "{\"bos_token\": \"\", \"eos_token\": \"\", \"model_max_length\": 4096, \"tokenizer_class\": \"LlamaTokenizer\", \"unk_token\": \"\"}"
  },
  {
    "path": "models/llama-2-7b-vllm/config.py",
    "content": "from dotenv import load_dotenv\nfrom src.config_utils import Weights, get_fp16_file_list, vllm_kwargs\n\n\nfrom src.utils import get_env_var_or_default\n\nload_dotenv()\n\nMODEL_NAME = \"llama-2-7b-vllm\"\n\n# Inference config\n\nweights = Weights(\n    local_path=f\"models/{MODEL_NAME}/model_artifacts/default_inference_weights\",\n    remote_path=get_env_var_or_default(\n        \"REMOTE_DEFAULT_INFERENCE_WEIGHTS_PATH\",\n        \"remote/path/to/your/weights/here\",\n    ),\n    remote_files=get_fp16_file_list(2),\n)\n\nLOAD_IN_4BIT = False\nTOKENIZER_PATH = f\"models/{MODEL_NAME}/model_artifacts/default_inference_weights\"\nUSE_SYSTEM_PROMPT = False\nUSE_EXLLAMA_FOR_UNTRAINED_WEIGHTS = False\n\n# Engine config\n\nfrom src.inference_engines.vllm_engine import vLLMEngine\n\n\nENGINE = vLLMEngine\nENGINE_KWARGS = vllm_kwargs(weights)\n\n\n# TRAINED INFERENCE CONFIGURATION\n# -------------------------------\n# This section defines the inference configuration for fine-tuned models\n# -------------------------------\n\nLOCAL_TRAINING_WEIGHTS_PATH = f\"models/{MODEL_NAME}/model_artifacts/training_weights\"\n\nREMOTE_TRAINING_WEIGHTS_PATH = get_env_var_or_default(\n    var_name=\"REMOTE_TRAINING_WEIGHTS_PATH\",\n    default_value=\"remote/path/to/your/weights/here\",\n)\n\nLOCAL_TRAINING_WEIGHTS_CONFIG_PATH = (\n    f\"models/{MODEL_NAME}/model_artifacts/training_weights/config.json\"\n)\n\nREMOTE_TRAINING_WEIGHTS_CONFIG_PATH = get_env_var_or_default(\n    var_name=\"REMOTE_TRAINING_WEIGHTS_CONFIG_PATH\",\n    default_value=\"remote/path/to/your/weights/here\",\n)\n\nREMOTE_TRAINING_FILES_TO_DOWNLOAD = get_fp16_file_list(2)\n\n\n# -------------------------------\n\nDEFAULT_PAD_TOKEN = \"[PAD]\"\nDEFAULT_EOS_TOKEN = \"</s>\"\nDEFAULT_BOS_TOKEN = \"<s>\"\nDEFAULT_UNK_TOKEN = \"</s>\"\n"
  },
  {
    "path": "models/mistral-7b-instruct-v0.1-mlc/config.py",
    "content": "from dotenv import load_dotenv\nfrom src.config_utils import (\n    Weights,\n    get_fp16_file_list,\n    get_mlc_file_list,\n    mlc_kwargs,\n    vllm_kwargs,\n)\nfrom src.inference_engines.mlc_vllm_engine import MLCvLLMEngine\nfrom src.utils import get_env_var_or_default\n\nload_dotenv()\n\nMODEL_NAME = \"mistral-7b-instruct-v0.1-mlc\"\n\n# Inference weights\nmlc_file_list = get_mlc_file_list(model_name=\"Mistral-7B-Instruct-v0.1-q4f16_1\", n_shards=107)\n\nLOCAL_PATH = f\"models/{MODEL_NAME}/model_artifacts/default_inference_weights\"\n\nmlc_weights = Weights(\n    local_path=LOCAL_PATH,\n    remote_path=get_env_var_or_default(\"REMOTE_DEFAULT_INFERENCE_WEIGHTS_PATH\", None),\n    remote_files=mlc_file_list,\n)\n\nvllm_weights = Weights(\n    local_path=f\"models/{MODEL_NAME}/model_artifacts/lora_inference_weights\",\n    remote_path=get_env_var_or_default(\"REMOTE_VLLM_INFERENCE_WEIGHTS_PATH\", None),\n    remote_files=get_fp16_file_list(2),\n)\n\n# Inference config\nUSE_SYSTEM_PROMPT = True\n\n# from mistral: \"<s>[INST] + Instruction [/INST] Model answer</s>[INST] Follow-up instruction [/INST]\"\nPROMPT_TEMPLATE = \"[INST] {system_prompt}{prompt} [/INST]\"\nDEFAULT_SYSTEM_PROMPT = \"Always assist with care, respect, and truth. Respond with utmost utility yet securely. Avoid harmful, unethical, prejudiced, or negative content. Ensure replies promote fairness and positivity. \"\n\nENGINE = MLCvLLMEngine\nENGINE_KWARGS = {\n    \"mlc_args\": mlc_kwargs(mlc_weights, is_chat=False),\n    \"vllm_args\": vllm_kwargs(vllm_weights),\n}\n\n# Training config\nLOAD_IN_4BIT = False\n\nLOCAL_TRAINING_WEIGHTS_PATH = f\"models/{MODEL_NAME}/model_artifacts/training_weights\"\nREMOTE_TRAINING_WEIGHTS_PATH = get_env_var_or_default(\n    \"REMOTE_TRAINING_WEIGHTS_PATH\",\n    None,\n)\nLOCAL_TRAINING_WEIGHTS_CONFIG_PATH = (\n    f\"models/{MODEL_NAME}/model_artifacts/training_weights/config.json\"\n)\nREMOTE_TRAINING_WEIGHTS_CONFIG_PATH = get_env_var_or_default(\n    var_name=\"REMOTE_TRAINING_WEIGHTS_CONFIG_PATH\",\n    default_value=None,\n)\nREMOTE_TRAINING_FILES_TO_DOWNLOAD = get_fp16_file_list(2)\n"
  },
  {
    "path": "models/mistral-7b-v0.1-mlc/config.py",
    "content": "from dotenv import load_dotenv\nfrom src.config_utils import (\n    Weights,\n    get_fp16_file_list,\n    get_mlc_file_list,\n    mlc_kwargs,\n    vllm_kwargs,\n)\nfrom src.inference_engines.mlc_vllm_engine import MLCvLLMEngine\nfrom src.utils import get_env_var_or_default\n\nload_dotenv()\n\nMODEL_NAME = \"mistral-7b-v0.1-mlc\"\n\n# Inference weights\nmlc_file_list = get_mlc_file_list(model_name=\"Mistral-7B-v0.1-q4f16_1\", n_shards=107)\n\nLOCAL_PATH = f\"models/{MODEL_NAME}/model_artifacts/default_inference_weights\"\n\nmlc_weights = Weights(\n    local_path=LOCAL_PATH,\n    remote_path=get_env_var_or_default(\"REMOTE_DEFAULT_INFERENCE_WEIGHTS_PATH\", None),\n    remote_files=mlc_file_list,\n)\n\nvllm_weights = Weights(\n    local_path=f\"models/{MODEL_NAME}/model_artifacts/lora_inference_weights\",\n    remote_path=get_env_var_or_default(\"REMOTE_VLLM_INFERENCE_WEIGHTS_PATH\", None),\n    remote_files=get_fp16_file_list(2),\n)\n\n# Inference config\nUSE_SYSTEM_PROMPT = False\n\nENGINE = MLCvLLMEngine\nENGINE_KWARGS = {\n    \"mlc_args\": mlc_kwargs(mlc_weights, is_chat=False),\n    \"vllm_args\": vllm_kwargs(vllm_weights),\n}\n\n# Training config\nLOAD_IN_4BIT = False\n\nLOCAL_TRAINING_WEIGHTS_PATH = f\"models/{MODEL_NAME}/model_artifacts/training_weights\"\nREMOTE_TRAINING_WEIGHTS_PATH = get_env_var_or_default(\n    \"REMOTE_TRAINING_WEIGHTS_PATH\",\n    None,\n)\nLOCAL_TRAINING_WEIGHTS_CONFIG_PATH = (\n    f\"models/{MODEL_NAME}/model_artifacts/training_weights/config.json\"\n)\nREMOTE_TRAINING_WEIGHTS_CONFIG_PATH = get_env_var_or_default(\n    var_name=\"REMOTE_TRAINING_WEIGHTS_CONFIG_PATH\",\n    default_value=None,\n)\nREMOTE_TRAINING_FILES_TO_DOWNLOAD = get_fp16_file_list(2)\n"
  },
  {
    "path": "notes/new_model_notes.md",
    "content": "# `cog-llama-template` Model Management\n\nThe `cog-llama-template` repo decomposes model management into four constructs:\n\n* **Templates.** We store templates in the `./model_templates/` directory. For our purposes, a template includes the following model specific artifacts: `cog.yaml`, `config.py`, `predict.py`. \n\n* **Models.** We store artifacts for initialized models in the `./models/` directory. These artifacts are copied from a template and then updated with model specific information. \n\n* **Shared code.** Models defined in `cog-llama-template` share code, e.g. implementations of training and inference methods. Shared code is maintained in the `./src/` directory. \n\n* **Active model.** To build, run, or push a specific model, it's artifacts must be copied from its associated `./models/` directory to the root of this project. We do this so that `./src/` code is available at build time. We refer to this copying process as model *selection*.\n\nTo help users manage and interact with these constructs, we provide a `Makefile` with commands that streamline the model development process. Below, is a step-by-step demonstration of how you can use the `Makefile` to develop a model. \n\n**1. Initialize a new model.**\n\nYou can initialize a new model by setting the environment variable `SELECTED_MODEL` to the name of the model you want to initialize. The name is arbitrary and there are no forced naming conventions, however our inhouse style is lowered dash-case. \n\nThe `SELECTED_MODEL` environment variable will be referenced for all subsequent make commands. However, you can also specify the argument `name=<model_name>` instead of setting an environment variable. \n\nFinally, `make init` will copy a model template from `model_templates` to `./models/<model-name>/`.\n\n```\nexport SELECTED_MODEL=llama-2-70b-chat\nmake init\n```\n\n**2. Update model details.**\n\nCurrently, you need to manually update model details in `config.py`, as well as possibly in `predict.py`. Specifically, you need to provide variables for global config variables that determine inference logic and file's that should be downloaded. \n\nWe assume that model artifacts are stored in an accessible and external location. During `setup` or training intialization, model artifacts specified in `config.py` will be downloaded. \n\nHowever, in some cases, it is preferable to not expose the locations of model artifacts in `config.py`. In such cases, you can store information in a `.env` file in your model's directory. At runtime, those environment variables will be initialized and their values will be used by `config.py`. \n\nFor example, we store paths to model artifacts in `.env` and load this at runtime.\n\n**3. Select model.**\n\nTo interact with a model, its artifacts need to be copied to root of `cog-llama-templates`. You can do this like:\n\n ```make select``` \n\n or \n\n ```make select model=<model-name>```\n\n This will copy the model artifacts to root and run `cog build`. \n\n**Local testing.**\n\nOur `Makefile` provides easy access to a rudimentary test suite that supports local and staged testing.\n\nAssuming you've set the `SELECTED_MODEL` environment variable, you can just call:\n\n`make test-local`\n\nAppending `verbose=true` will run tests with `-s` so that output will be printed.\n\n**Staging.**\n\nWe also provide a staging workflow via `make stage` and `make test-stage-<...>`. To use the staging commands, you must specify your Replicate user account (we default to `replicate-internal`) and create a Replicate model in the specified account with the naming convention `staging-<$SELECTED_MODEL>`. Accordingly, if your selected model is `llama-2-7b`, you would create a model called `staging-llama-2-7b`. \n\nYou also need to log in via cog login and set the `REPLICATE_API_TOKEN` environment variable to your accounts API token. \n\nCalling `make stage` will push the selected model to the associated staging model. Then you can call `make test-stage`. \n\n\n\n\n\n"
  },
  {
    "path": "predict.py",
    "content": "import functools\nimport inspect\nimport os\nimport socket\nimport time\nimport zipfile\nfrom typing import Any, Callable, Optional\n\nimport torch\nfrom cog import BasePredictor, ConcatenateIterator, Input, Path\nimport config\nfrom config import ENGINE, ENGINE_KWARGS, USE_SYSTEM_PROMPT\nfrom src.download import Downloader\nfrom src.utils import seed_all, delay_prints\n\n# This prompt formatting was copied from the original Llama v2 repo:\n# https://github.com/facebookresearch/llama/blob/6c7fe276574e78057f917549435a2554000a876d/llama/generation.py#L44\n\n# These are components of the prompt that should not be changed by the users\nB_INST, E_INST = \"[INST]\", \"[/INST]\"\nB_SYS, E_SYS = \"<<SYS>>\\n\", \"\\n<</SYS>>\\n\\n\"\n# normally this would start with <s>, but MLC adds it\nPROMPT_TEMPLATE = f\"{B_INST} {B_SYS}{{system_prompt}}{E_SYS}{{prompt}} {E_INST}\"\nif not USE_SYSTEM_PROMPT:\n    PROMPT_TEMPLATE = \"{prompt}\"\nPROMPT_TEMPLATE = getattr(config, \"PROMPT_TEMPLATE\", PROMPT_TEMPLATE)\n\n# Users may want to change the system prompt, but we use the recommended system prompt by default\nDEFAULT_SYSTEM_PROMPT = \"\"\"You are a helpful, respectful and honest assistant.\"\"\"\nDEFAULT_SYSTEM_PROMPT = getattr(config, \"DEFAULT_SYSTEM_PROMPT\", DEFAULT_SYSTEM_PROMPT)\n\n# Temporary hack to disable Top K from the API. We should get rid of this once engines + configs are better standardized.\nUSE_TOP_K = ENGINE.__name__ not in (\"MLCEngine\", \"MLCvLLMEngine\")\n\n\nclass Predictor(BasePredictor):\n    def setup(self, weights: Optional[Path] = None):\n        print(\"Starting setup\")\n        self.downloader = Downloader()\n        self.device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n\n        self.engine = ENGINE(**ENGINE_KWARGS)\n\n        if weights is not None and weights.name == \"weights\":\n            # bugfix\n            weights = None\n        if weights:\n            # If weights are passed in, they are LoRa weights\n            # so we need to download the fp16 weights and load with peft\n            self.initialize_peft(weights)\n        else:\n            print(\"Not using old-style COG_WEIGHTS LoRA weights\")\n\n    # todo: adaptive cache like CLOCK\n    @functools.lru_cache(maxsize=10)\n    def get_lora(self, replicate_weights: str) -> Any:\n        if \"http\" in str(replicate_weights):  # weights are in the cloud\n            print(\"Downloading peft weights\")\n            st = time.time()\n            buffer = self.downloader.sync_download_file(str(replicate_weights))\n            print(f\"Downloaded peft weights in {time.time() - st:.3f}\")\n        else:\n            # zipfile accepts either a file-like or path-like object\n            buffer = replicate_weights\n        st = time.time()\n        with zipfile.ZipFile(buffer, \"r\") as zip_ref:\n            data = {name: zip_ref.read(name) for name in zip_ref.namelist()}\n        print(f\"Unzipped peft weights in {time.time() - st:.3f}\")\n        st = time.time()\n        lora = self.engine.load_lora(data)\n        del data, zip_ref\n        print(f\"Initialized peft model in {time.time() - st:.3f}\")\n        return lora\n\n    current_path: str | None = None\n\n    def initialize_peft(self, replicate_weights: str) -> None:\n        if self.current_path != replicate_weights:\n            print(f\"previous weights were different, switching to {replicate_weights}\")\n            self.engine.set_lora(self.get_lora(replicate_weights))\n\n            self.current_path = replicate_weights\n        else:\n            print(\"correct lora is already loaded\")\n\n    def delete_lora(self):\n        self.current_path = None\n        self.engine.delete_lora()\n\n    # currently, outputs including tokens and logs are throttled to 50ms\n    # because of this, printing before outputing tokens is bad\n    # so this patches print to not only print until after we leave this function\n    # eventually that will be fixed and this can be removed\n    def predict(\n        self,\n        prompt: str = Input(description=\"Prompt to send to the model.\"),\n        system_prompt: str = Input(\n            description=\"System prompt to send to the model. This is prepended to the prompt and helps guide system behavior. Should not be blank.\",\n            default=DEFAULT_SYSTEM_PROMPT,\n        ),\n        max_new_tokens: int = Input(\n            description=\"Maximum number of tokens to generate. A word is generally 2-3 tokens\",\n            ge=1,\n            default=128,\n        ),\n        min_new_tokens: int = Input(\n            description=\"Minimum number of tokens to generate. To disable, set to -1. A word is generally 2-3 tokens.\",\n            ge=-1,\n            default=-1,\n        ),\n        temperature: float = Input(\n            description=\"Adjusts randomness of outputs, greater than 1 is random and 0 is deterministic, 0.75 is a good starting value.\",\n            ge=0.01,\n            le=5,\n            default=0.7,\n        ),\n        top_p: float = Input(\n            description=\"When decoding text, samples from the top p percentage of most likely tokens; lower to ignore less likely tokens\",\n            ge=0.0,\n            le=1.0,\n            default=0.95,\n        ),\n        top_k: int = Input(\n            description=\"When decoding text, samples from the top k most likely tokens; lower to ignore less likely tokens\",\n            ge=-1,\n            default=-1,\n        ),\n        repetition_penalty: float = Input(\n            description=\"A parameter that controls how repetitive text can be. Lower means more repetitive, while higher means less repetitive. Set to 1.0 to disable.\",\n            ge=0.0,\n            default=1.15,\n        ),\n        stop_sequences: str = Input(\n            description=\"A comma-separated list of sequences to stop generation at. For example, '<end>,<stop>' will stop generation at the first instance of 'end' or '<stop>'.\",\n            default=None,\n        ),\n        seed: int = Input(\n            description=\"Random seed. Leave blank to randomize the seed\",\n            default=None,\n        ),\n        debug: bool = Input(\n            description=\"provide debugging output in logs\", default=False\n        ),\n        prompt_template: str = Input(\n            description=\"Template for formatting the prompt\",\n            default=PROMPT_TEMPLATE,\n        ),\n        # return_logits: bool = Input(\n        # description=\"if set, only return logits for the first token. only useful for testing, etc.\",\n        # default=False,\n        # ),\n        replicate_weights: str = Input(\n            description=\"Path to fine-tuned weights produced by a Replicate fine-tune job.\",\n            default=None,\n        ),\n    ) -> ConcatenateIterator[str]:\n        with delay_prints() as print:\n            if stop_sequences:\n                stop_sequences = stop_sequences.split(\",\")\n            # we must apply a prompt template if it is passed even for base models\n            if prompt_template:\n                # very rough hack to catch mistral-instruct / no SYS token\n                # this is supposed to not proc for the default template, but actually always procs when prompt_template={prompt}\n                # however if you're doing that, it doesn't matter\n                if USE_SYSTEM_PROMPT and B_SYS not in prompt_template:\n                    if system_prompt.strip() and not system_prompt.endswith(\" \"):\n                        # mistral doesn't have a SYS token, there's just a space between the system prompt and\n                        system_prompt = system_prompt.strip() + \" \"\n                        print(\"Added a space to your system prompt\")\n                prompt = prompt_template.format(\n                    system_prompt=system_prompt, prompt=prompt\n                )\n            # MLC adds BOS token\n            prompt = prompt.removeprefix(\"<s>\")\n            print(f\"Your formatted prompt is: \\n{prompt}\")\n\n            if replicate_weights:\n                start = time.time()\n                self.initialize_peft(replicate_weights)\n                print(f\"Overall initialize_peft took {time.time() - start:.3f}\")\n            else:\n                if \"COG_WEIGHTS\" not in os.environ:\n                    self.delete_lora()\n                    print(\"Not using LoRA\")\n\n            if seed is not None:\n                print(f\"Setting seed to {seed}\")\n                seed_all(seed)\n\n            n_tokens = 0\n            st = time.time()\n\n            # if return_logits:\n            # logits = self.engine.get_logits(prompt)\n            # # serializing so we aren't returning a massive json\n            # logits_path = \"logits.pt\"\n            # torch.save(logits, logits_path)\n            # yield Path(logits_path)\n\n            # # todo: may need to do something clever with kwargs if/when we add more engines.\n            # else:\n            generated_text = \"\"\n            for decoded_token in self.engine(\n                prompt,\n                temperature=temperature,\n                top_p=top_p,\n                top_k=top_k,\n                repetition_penalty=repetition_penalty,\n                max_new_tokens=max_new_tokens,\n                min_new_tokens=min_new_tokens,\n                stop_sequences=stop_sequences,\n            ):\n                n_tokens += 1\n                yield decoded_token\n                generated_text += decoded_token\n                if n_tokens == 1 and debug:\n                    second_start = time.time()\n                if seed is not None:\n                    torch.manual_seed(seed)\n            et = time.time()\n            t = et - st\n            print(f\"hostname: {socket.gethostname()}\")\n            if debug:\n                print(\"generated text:\", generated_text)\n                print(f\"after initialization, first token took {second_start - st:.3f}\")\n                print(f\"Tokens per second: {n_tokens / t:.2f}\")\n                print(\n                    f\"Tokens per second not including time to first token: {(n_tokens -1) / (et - second_start):.2f}\"\n                )\n                print(f\"cur memory: {torch.cuda.memory_allocated()}\")\n                print(f\"max allocated: {torch.cuda.max_memory_allocated()}\")\n                print(f\"peak memory: {torch.cuda.max_memory_reserved()}\")\n\n    def remove(f: Callable, defaults: dict[str, Any]) -> Callable:\n        # pylint: disable=no-self-argument\n        def wrapper(self, *args, **kwargs):\n            kwargs.update(defaults)\n            return f(self, *args, **kwargs)\n\n        # Update wrapper attributes for documentation, etc.\n        functools.update_wrapper(wrapper, f)\n\n        # for the purposes of inspect.signature as used by predictor.get_input_type,\n        # remove the argument (system_prompt)\n        sig = inspect.signature(f)\n        params = [p for name, p in sig.parameters.items() if name not in defaults]\n        wrapper.__signature__ = sig.replace(parameters=params)\n\n        # Return partialmethod, wrapper behaves correctly when part of a class\n        return functools.partialmethod(wrapper)\n\n    args_to_remove: dict[str, Any] = {}\n    if not USE_SYSTEM_PROMPT:\n        # this removes system_prompt from the Replicate API for non-chat models.\n        args_to_remove[\"system_prompt\"] = None\n    if not USE_TOP_K:\n        args_to_remove[\"top_k\"] = None\n    if args_to_remove:\n        predict = remove(predict, args_to_remove)\n"
  },
  {
    "path": "pyproject.toml",
    "content": "[build-system]\nrequires = [\"setuptools\", \"wheel\"]\nbuild-backend = \"setuptools.build_meta\"\n\n[project]\nname = \"cog-llama-template\"\nversion = \"0.0.0\"\noptional-dependencies = { dev = [\"ruff>=0.1.3\"] }\n"
  },
  {
    "path": "requirements-dev.txt",
    "content": "#\n# This file is autogenerated by pip-compile with Python 3.11\n# by the following command:\n#\n#    pip-compile --extra=dev --output-file=requirements-dev.txt --resolver=backtracking pyproject.toml\n#\nruff==0.1.3\n    # via cog-llama-template (pyproject.toml)\n"
  },
  {
    "path": "scripts/benchmark_token_latency.py",
    "content": "import time\nimport json\nimport random\nimport torch\nimport argparse\nfrom abc import ABC, abstractmethod\n\n# Number of runs for each combination of model, prompt length, and output length.\nnum_runs = 5\n\n\nclass AbstractInferenceModel(ABC):\n    @abstractmethod\n    def __init__(self, model_name_or_path, tokenizer_name_or_path):\n        self.model_name_or_path = model_name_or_path\n        self.tokenizer_name_or_path = tokenizer_name_or_path\n        self.model = self._load_model()\n        self.tokenizer = self._load_tokenizer()\n\n    @abstractmethod\n    def _load_model(self):\n        pass\n\n    @abstractmethod\n    def _load_tokenizer(self):\n        pass\n\n    @abstractmethod\n    def generate_tokens(self, input_ids, prompt_length, output_length):\n        pass\n\n\nclass LlamaBnB4Bit(AbstractInferenceModel):\n    def __init__(self, model_name_or_path, tokenizer_name_or_path, some_other_arg):\n        super().__init__(model_name_or_path, tokenizer_name_or_path)\n\n    def _load_model(self):\n        from transformers import LlamaForCausalLM\n\n        model = LlamaForCausalLM.from_pretrained(\n            self.model_name_or_path,\n            cache_dir=\"pretrained_weights\",\n            device_map={\"\": 0},\n            load_in_4bit=True,\n        )\n\n        return model\n\n    def _load_tokenizer(self):\n        from transformers import LlamaTokenizer\n\n        DEFAULT_PAD_TOKEN = \"[PAD]\"\n        DEFAULT_EOS_TOKEN = \"</s>\"\n        DEFAULT_BOS_TOKEN = \"<s>\"\n        DEFAULT_UNK_TOKEN = \"</s>\"\n\n        tok = LlamaTokenizer.from_pretrained(self.tokenizer_name_or_path, legacy=False)\n        tok.add_special_tokens(\n            {\n                \"eos_token\": DEFAULT_EOS_TOKEN,\n                \"bos_token\": DEFAULT_BOS_TOKEN,\n                \"unk_token\": DEFAULT_UNK_TOKEN,\n                \"pad_token\": DEFAULT_PAD_TOKEN,\n            }\n        )\n        return tok\n\n    def generate_tokens(self, input_ids, prompt_length, output_length):\n        generated = self.model.generate(\n            input_ids, max_length=prompt_length + output_length, do_sample=False\n        )\n        return generated\n\n\ndef measure_latency(inference_model, prompt_length, output_length):\n    # Generate a random prompt\n    prompt = \" \".join([random.choice(\"a\") for _ in range(prompt_length)])\n\n    # Tokenize the prompt\n    input_ids = inference_model.tokenizer.encode(prompt, return_tensors=\"pt\")\n\n    # Set the random seed for reproducibility\n    torch.manual_seed(0)\n\n    # Maximum number of attempts to generate the correct number of tokens.\n    max_attempts = 10\n\n    # Generate response and ensure the response length is as expected\n    for _ in range(max_attempts):\n        # Time the model's response\n        start_time = time.time()\n\n        output = inference_model.generate_tokens(\n            input_ids, prompt_length, output_length\n        )\n\n        end_time = time.time()\n        elapsed_time = end_time - start_time\n\n        if len(output[0]) == prompt_length + output_length:\n            break\n    else:\n        raise RuntimeError(\n            f\"Failed to generate output with correct length after {max_attempts} attempts.\"\n        )\n\n    tokens_per_second = output_length / elapsed_time\n\n    return tokens_per_second\n\n\ndef benchmark_model(model_name, inference_model, prompt_lengths, output_lengths):\n    results = {}\n    results[model_name] = {}\n\n    for prompt_length in prompt_lengths:\n        for output_length in output_lengths:\n            latencies = []\n\n            print(\n                f\"\\n--- Benchmarking Model: {model_name}, Prompt Length: {prompt_length}, Output Length: {output_length} ---\"\n            )\n            for i in range(num_runs):\n                tokens_per_second = measure_latency(\n                    inference_model, prompt_length, output_length\n                )\n                latencies.append(tokens_per_second)\n\n                print(f\"Run {i+1} - Tokens/sec: {tokens_per_second}\")\n\n            avg_tokens_per_second = sum(latencies) / num_runs\n\n            results[model_name][\n                f\"{prompt_length}_{output_length}\"\n            ] = avg_tokens_per_second\n\n            print(f\"Average tokens/sec over {num_runs} runs: {avg_tokens_per_second}\")\n\n    # Write results to a JSON file\n    with open(f\"{model_name}_benchmark_results.json\", \"w\") as f:\n        json.dump(results, f)\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(description=\"Benchmark a Language Model.\")\n    parser.add_argument(\n        \"--model_name\", type=str, help=\"The name of the model to benchmark.\"\n    )\n    parser.add_argument(\n        \"--model_name_or_path\",\n        type=str,\n        help=\"Path to weights or info needed to trigger downloads.\",\n    )\n    parser.add_argument(\n        \"--tokenizer_name_or_path\",\n        type=str,\n        default=None,\n        help=\"The name or path of the tokenizer to use. If not provided, uses the same as the model.\",\n    )\n    parser.add_argument(\n        \"--prompt_lengths\",\n        nargs=\"+\",\n        type=int,\n        default=[25, 50, 100, 250, 500, 1000],\n        help=\"The lengths of the prompts to be used.\",\n    )\n    parser.add_argument(\n        \"--output_lengths\",\n        nargs=\"+\",\n        type=int,\n        default=[25, 50, 100],\n        help=\"The lengths of the output sequences to be generated.\",\n    )\n\n    args = parser.parse_args()\n\n    tokenizer_name_or_path = args.tokenizer_name_or_path or args.model_name_or_path\n    inference_model = LlamaBnB4Bit(\n        args.model_name_or_path, tokenizer_name_or_path, None\n    )\n\n    benchmark_model(\n        args.model_name, inference_model, args.prompt_lengths, args.output_lengths\n    )\n"
  },
  {
    "path": "scripts/load_secrets.sh",
    "content": "if [ ! -d \"../official-models\" ]; then\n    pushd ..\n    git clone git@github.com:replicate/official-models\n    popd\nfi\n\ncp ../official-models/model_secrets/llama-2-13b/.env models/llama-2-13b/\ncp ../official-models/model_secrets/llama-2-13b-chat/.env models/llama-2-13b-chat/\ncp ../official-models/model_secrets/llama-2-70b/.env models/llama-2-70b/\ncp ../official-models/model_secrets/llama-2-70b-chat/.env models/llama-2-70b-chat/\ncp ../official-models/model_secrets/llama-2-7b/.env models/llama-2-7b/\ncp ../official-models/model_secrets/llama-2-7b-chat/.env models/llama-2-7b-chat/\n"
  },
  {
    "path": "scripts/test_fast_llama.py",
    "content": "import zipfile\nfrom dataclasses import dataclass\nfrom enum import Enum\nfrom io import BytesIO\nfrom typing import Any\nimport time\nimport numpy as np\n\nimport replicate\nfrom termcolor import cprint\nfrom transformers import AutoTokenizer\n\nfrom src.download import Downloader\n\n\nclass Engine(Enum):\n    REPLICATE = \"replicate\"\n    VLLM = \"vllm\"\n\n\n@dataclass\nclass LoraAdapter:\n    path: str\n    model: Any\n\n\nclass SpeedyReplicateGonzalez:\n    def __init__(self):\n        # setup\n        self.max_new_tokens = 1024\n        self.engine_kwargs = {\n            \"max_new_tokens\": self.max_new_tokens,\n            \"temperature\": 1.0,\n            \"top_p\": 0.9,\n            \"top_k\": 50,\n        }\n        MODEL_PATH = \"models/llama-2-7b-vllm/model_artifacts/default_inference_weights\"\n        self.current_engine = None\n        self.downloader = Downloader()\n        # self.vllm_engine = vLLMEngine(model_path=MODEL_PATH,\n        # tokenizer_path=MODEL_PATH, dtype=\"auto\")\n        self.tokenizer = AutoTokenizer.from_pretrained(\"huggyllama/llama-7b\")\n\n        # get SQL lora\n        self.sql_lora_path = (\n            \"https://pub-df34620a84bb4c0683fae07a260df1ea.r2.dev/sql.zip\"\n        )\n        self.sql_lora_model = self.get_lora(self.sql_lora_path)\n        self.sql_lora = LoraAdapter(model=self.sql_lora_model, path=self.sql_lora_path)\n\n        # get summary lora\n        self.summary_lora_path = (\n            \"https://storage.googleapis.com/dan-scratch-public/tmp/samsum-lora.zip\"\n        )\n        self.summary_lora_model = self.get_lora(self.summary_lora_path)\n        self.summary_lora = LoraAdapter(\n            model=self.summary_lora_model, path=self.summary_lora_path\n        )\n\n        self._replicate_model_name = \"meta/llama-2-7b-chat:8e6975e5ed6174911a6ff3d60540dfd4844201974602551e10e9e87ab143d81e\"\n\n    @property\n    def replicate_model_name(self):\n        return self._replicate_model_name\n\n    @replicate_model_name.setter\n    def replicate_model_name(self, model_name):\n        print(\"Setting replicate model name to\", model_name)\n        self._replicate_model_name = model_name\n\n    def get_lora(self, lora_path):\n        return None\n        buffer = self.downloader.sync_download_file(lora_path)\n        with zipfile.ZipFile(buffer, \"r\") as zip_ref:\n            data = {name: zip_ref.read(name) for name in zip_ref.namelist()}\n        adapter_config, adapter_model = (\n            data[\"adapter_config.json\"],\n            BytesIO(data[\"adapter_model.bin\"]),\n        )\n        return self.engine.load_lora(\n            adapter_config=adapter_config, adapter_model=adapter_model\n        )\n\n    def generate_replicate(self, prompt, lora):\n        lora_path = lora.path if lora else \"\"\n        output = replicate.run(\n            self.replicate_model_name,\n            input={\n                \"prompt\": prompt,\n                \"replicate_weights\": lora_path,\n                \"max_new_tokens\": self.max_new_tokens,\n            },\n        )\n        generated_text = \"\"\n        for item in output:\n            generated_text += item\n        return generated_text\n\n    def generate_vllm(self, prompt, lora):\n        lora_model = lora.model if lora else \"\"\n        self.engine_kwargs[\"prompt\"] = prompt\n        base_generation = \"\"\n        if self.engine.is_lora_active():\n            self.engine.delete_lora()\n        if lora:\n            self.engine.set_lora(lora.model)\n\n        generation = \"\".join(list(self.engine(**self.engine_kwargs)))\n        return generation\n\n    def set_engine(self, engine):\n        engines_registry = {\n            Engine.REPLICATE: self.generate_replicate,\n            Engine.VLLM: self.generate_vllm,\n        }\n        if engine in engines_registry:\n            self.generate = engines_registry[engine]\n            self.generate_func = engines_registry[engine]\n            self.current_engine = engine\n        else:\n            raise ValueError(f\"Engine {engine} not found in {engines_registry.keys()}\")\n\n    def timing_decorator(self, prompt, lora):\n        start_time = time.time()\n        generated_text = self.generate_func(prompt, lora)\n        end_time = time.time()\n        time_elapsed = end_time - start_time\n        tokens_generated = len(self.tokenizer(generated_text)[\"input_ids\"])\n        self.tps = tokens_generated / time_elapsed\n        print(\n            f\"Generated {tokens_generated} tokens in {time_elapsed:.2f} seconds at {self.tps:.2f} tokens per second\"\n        )\n\n    def enable_timing(self, verbose: bool = False):\n        self.generate = self.timing_decorator\n        self.tps = None\n\n    def disable_timing(self):\n        self.generate = self.generate_func\n\n    def run_long_generation(self):\n        long_gen_prompt = \"[INST] <<SYS>> You are a literary writer. Please write an essay that is several paragraphs long about the differences between socialism and capitalism. Please cite your sources and nuances on these opinions. <</SYS>> [/INST]\"\n        base_generation = self.generate(long_gen_prompt, \"\")\n        # cprint(\"Long gen output:\", \"blue\")\n        # cprint(f\"Base model output: {base_generation}\", \"blue\")\n\n    def run_base(self):\n        # generate vanilla output that should be screwed up by a lora\n        sql_prompt = \"What is the meaning of life?\"\n        base_generation = self.generate(sql_prompt, \"\")\n\n        sql_generation = self.generate(sql_prompt, self.sql_lora)\n        lora_expected_generation = \"What is the meaning of life?\"\n        cprint(\"Philosophy output:\", \"blue\")\n        cprint(f\"Base model output: {base_generation}\", \"blue\")\n        cprint(f\"LoRA output: {sql_generation}\", \"blue\")\n        # assert base_generation != lora_expected_generation\n        # assert sql_generation == lora_expected_generation\n\n    def run_sql(self):\n        # generate SQL\n        sql_prompt = \"\"\"You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.\n\n        You must output the SQL query that answers the question.\n\n        ### Input:\n        What is the total number of decile for the redwood school locality?\n\n        ### Context:\n        CREATE TABLE table_name_34 (decile VARCHAR, name VARCHAR)\n\n        ### Response:\"\"\"\n\n        base_generation = self.generate(sql_prompt, \"\")\n        sql_generation = self.generate(sql_prompt, self.sql_lora)\n        base_generation = base_generation.strip()\n        sql_generation = sql_generation.strip()\n        lora_expected_generation = (\n            'SELECT COUNT(decile) FROM table_name_34 WHERE name = \"redwood school\"'\n        )\n        cprint(\"SQL output:\", \"green\")\n        cprint(f\"Base model output: {base_generation}\", \"green\")\n        cprint(f\"LoRA output: {sql_generation}\", \"green\")\n        # assert base_generation != lora_expected_generation\n        # assert sql_generation == lora_expected_generation\n\n    def run_summary(self):\n        # generate summaries\n        summary_prompt = \"\"\"[INST] <<SYS>>\nUse the Input to provide a summary of a conversation.\n<</SYS>>\nInput:\nLiam: did you see that new movie that just came out?\nLiam: \"Starry Skies\" I think it's called\nAva: oh yeah, I heard about it\nLiam: it's about this astronaut who gets lost in space\nLiam: and he has to find his way back to earth\nAva: sounds intense\nLiam: it was! there were so many moments where I thought he wouldn't make it\nAva: i need to watch it then, been looking for a good movie\nLiam: highly recommend it!\nAva: thanks for the suggestion Liam!\nLiam: anytime, always happy to share good movies\nAva: let's plan to watch it together sometime\nLiam: sounds like a plan! [/INST]\"\"\"\n\n        base_generation = self.generate(summary_prompt, \"\")\n        summary_generation = self.generate(summary_prompt, self.summary_lora)\n        lora_expected_generation = (\n            '\\nSummary: Liam recommends the movie \"Starry Skies\" to Ava.'\n        )\n        cprint(\"Summary output:\", \"blue\")\n        cprint(f\"Base model output: {base_generation}\", \"blue\")\n        cprint(f\"LoRA output: {summary_generation}\", \"blue\")\n        # assert base_generation != lora_expected_generation\n        # assert summary_generation == lora_expected_generation\n\n\nif __name__ == \"__main__\":\n    tester = SpeedyReplicateGonzalez()\n    tester.set_engine(Engine.REPLICATE)\n    tester.enable_timing()\n    tester.replicate_model_name = \"moinnadeem/vllm-engine-llama-7b:04bca4ff7a051e666f17a2c62a35d834e0e6fbfbd22ee212c7ba579d243450e1\"\n    vllm_tps = []\n    for idx in range(10):\n        tester.run_long_generation()\n        vllm_tps.append(tester.tps)\n        print(\"-\" * 20)\n\n    print(\"=\" * 40)\n    # tester.replicate_model_name = \"meta/llama-2-7b-chat:8e6975e5ed6174911a6ff3d60540dfd4844201974602551e10e9e87ab143d81e\"\n    tester.replicate_model_name = \"meta/llama-2-7b:527827021d8756c7ab79fde0abbfaac885c37a3ed5fe23c7465093f0878d55ef\"\n    exllama_tps = []\n    for idx in range(10):\n        tester.run_long_generation()\n        exllama_tps.append(tester.tps)\n        print(\"-\" * 20)\n\n    tester.replicate_model_name = \"meta/llama-2-7b-chat:8e6975e5ed6174911a6ff3d60540dfd4844201974602551e10e9e87ab143d81e\"\n    exllama_chat_tps = []\n    for idx in range(10):\n        tester.run_long_generation()\n        exllama_chat_tps.append(tester.tps)\n        print(\"-\" * 20)\n\n    print(\"=\" * 40)\n    print(f\"vLLM speed: {np.mean(vllm_tps)} (std: {np.std(vllm_tps)})\")\n    print(f\"exllama speed: {np.mean(exllama_tps)} (std: {np.std(exllama_tps)})\")\n    print(\n        f\"exllama chat speed: {np.mean(exllama_chat_tps)} (std: {np.std(exllama_chat_tps)})\"\n    )\n"
  },
  {
    "path": "scripts/test_load_unload_lora.py",
    "content": "import zipfile\nfrom io import BytesIO\n\nimport replicate\nfrom termcolor import cprint\n\nfrom src.download import Downloader\nfrom src.inference_engines.vllm_engine import vLLMEngine\n\n\nclass vLLMLoraTest:\n    def __init__(self):\n        # setup\n        self.downloader = Downloader()\n        self.sql_lora_path = (\n            \"https://pub-df34620a84bb4c0683fae07a260df1ea.r2.dev/sql.zip\"\n        )\n        self.summary_lora_path = (\n            \"https://storage.googleapis.com/dan-scratch-public/tmp/samsum-lora.zip\"\n        )\n\n        self.engine_kwargs = {\n            \"max_new_tokens\": 128,\n            \"temperature\": 1.0,\n            \"top_p\": 0.9,\n            \"top_k\": 50,\n        }\n        MODEL_PATH = \"models/llama-2-7b-vllm/model_artifacts/default_inference_weights\"\n        self.engine = vLLMEngine(\n            model_path=MODEL_PATH, tokenizer_path=MODEL_PATH, dtype=\"auto\"\n        )\n        self.sql_lora = self.get_lora(self.sql_lora_path)\n        self.summary_lora = self.get_lora(self.summary_lora_path)\n\n    def get_lora(self, lora_path):\n        buffer = self.downloader.sync_download_file(lora_path)\n        with zipfile.ZipFile(buffer, \"r\") as zip_ref:\n            data = {name: zip_ref.read(name) for name in zip_ref.namelist()}\n        adapter_config, adapter_model = (\n            data[\"adapter_config.json\"],\n            BytesIO(data[\"adapter_model.bin\"]),\n        )\n        return self.engine.load_lora(\n            adapter_config=adapter_config, adapter_model=adapter_model\n        )\n\n    def generate_replicate(self, prompt, lora_path):\n        output = replicate.run(\n            \"moinnadeem/vllm-engine-llama-7b:15ec772e3ae45cf5afd629a766774ad7cc2a80894d23848e840f926e8b5868c4\",\n            input={\"prompt\": prompt, \"replicate_weights\": lora_path},\n        )\n        generated_text = \"\"\n        for item in output:\n            generated_text += item\n        return generated_text\n\n    def generate(self, prompt, lora):\n        self.engine_kwargs[\"prompt\"] = prompt\n        base_generation = \"\"\n        if self.engine.is_lora_active():\n            self.engine.delete_lora()\n        if lora:\n            self.engine.set_lora(lora)\n\n        generation = \"\".join(list(self.engine(**self.engine_kwargs)))\n        return generation\n\n    def run_base(self):\n        # generate vanilla output that should be screwed up by a lora\n        sql_prompt = \"What is the meaning of life?\"\n        base_generation = self.generate_replicate(sql_prompt, \"\")\n\n        sql_generation = self.generate_replicate(sql_prompt, self.sql_lora_path)\n        lora_expected_generation = \"What is the meaning of life?\"\n        cprint(\"Philosophy output:\", \"blue\")\n        cprint(f\"Base model output: {base_generation}\", \"blue\")\n        cprint(f\"LoRA output: {sql_generation}\", \"blue\")\n        # assert base_generation != lora_expected_generation\n        # assert sql_generation == lora_expected_generation\n\n    def run_sql(self):\n        # generate SQL\n        sql_prompt = \"\"\"You are a powerful text-to-SQL model. Your job is to answer questions about a database. You are given a question and context regarding one or more tables.\n\n        You must output the SQL query that answers the question.\n\n        ### Input:\n        What is the total number of decile for the redwood school locality?\n\n        ### Context:\n        CREATE TABLE table_name_34 (decile VARCHAR, name VARCHAR)\n\n        ### Response:\"\"\"\n\n        base_generation = self.generate_replicate(sql_prompt, \"\")\n        sql_generation = self.generate_replicate(sql_prompt, self.sql_lora_path)\n        base_generation = base_generation.strip()\n        sql_generation = sql_generation.strip()\n        lora_expected_generation = (\n            'SELECT COUNT(decile) FROM table_name_34 WHERE name = \"redwood school\"'\n        )\n        cprint(\"SQL output:\", \"green\")\n        cprint(f\"Base model output: {base_generation}\", \"green\")\n        cprint(f\"LoRA output: {sql_generation}\", \"green\")\n        # assert base_generation != lora_expected_generation\n        # assert sql_generation == lora_expected_generation\n\n    def run_summary(self):\n        # generate summaries\n        summary_prompt = \"\"\"[INST] <<SYS>>\nUse the Input to provide a summary of a conversation.\n<</SYS>>\nInput:\nLiam: did you see that new movie that just came out?\nLiam: \"Starry Skies\" I think it's called\nAva: oh yeah, I heard about it\nLiam: it's about this astronaut who gets lost in space\nLiam: and he has to find his way back to earth\nAva: sounds intense\nLiam: it was! there were so many moments where I thought he wouldn't make it\nAva: i need to watch it then, been looking for a good movie\nLiam: highly recommend it!\nAva: thanks for the suggestion Liam!\nLiam: anytime, always happy to share good movies\nAva: let's plan to watch it together sometime\nLiam: sounds like a plan! [/INST]\"\"\"\n\n        base_generation = self.generate_replicate(summary_prompt, \"\")\n        summary_generation = self.generate_replicate(\n            summary_prompt, self.summary_lora_path\n        )\n        lora_expected_generation = (\n            '\\nSummary: Liam recommends the movie \"Starry Skies\" to Ava.'\n        )\n        cprint(\"Summary output:\", \"blue\")\n        cprint(f\"Base model output: {base_generation}\", \"blue\")\n        cprint(f\"LoRA output: {summary_generation}\", \"blue\")\n        # assert base_generation != lora_expected_generation\n        # assert summary_generation == lora_expected_generation\n\n\nif __name__ == \"__main__\":\n    tester = vLLMLoraTest()\n    # tester.run_base()\n    # tester.run_summary()\n    for idx in range(10):\n        print(f\"SQL Test #{idx}:\")\n        tester.run_sql()\n        print(\"-\" * 10)\n        print(f\"Summary Test #{idx}:\")\n        tester.run_summary()\n        print(\"=\" * 20)\n"
  },
  {
    "path": "scripts/train_multi_gpu.sh",
    "content": "#!/bin/bash\n\npython train.py \\\n    --train_data 70k_samples_prompt.jsonl \\\n    --num_train_epochs 1 \\\n    --learning_rate 2e-5 \\\n    --train_batch_size 2 \\\n    --gradient_accumulation_steps 4 \\\n    --logging_steps 2 \\\n    --warmup_ratio 0.03 \\\n    --weights /src/weights_13 "
  },
  {
    "path": "scripts/train_single_gpu.sh",
    "content": "#!/bin/bash\n\npython train.py \\\n    --model_name_or_path google/flan-t5-base \\\n    --data_path ./replicate_alpaca_data.json \\\n    --num_train_epochs 3 \\\n    --learning_rate 3e-4 \\\n    --train_batch_size 8 \\\n    --warmup_ratio 0.03 \\\n    --max_steps 10 # number of steps before returning, mostly useful for testing performance\n"
  },
  {
    "path": "src/__init__.py",
    "content": ""
  },
  {
    "path": "src/config_utils.py",
    "content": "\"\"\"\nAn entirely self-contained config parsing util that should, if all goes well, dramatically simplify our configuration.\n\"\"\"\nfrom typing import List, Optional\n\nfrom pydantic import BaseModel\n\n\nclass Weights(BaseModel):\n    local_path: str\n    remote_path: str\n    remote_files: List[str]\n\n\ndef get_fp16_file_list(n_shards: int):\n    \"\"\"\n    Assumes safetensors\n    \"\"\"\n    base_files = [\n        f\"model-{str(val).zfill(5)}-of-{str(n_shards).zfill(5)}.safetensors\"\n        for val in range(1, n_shards + 1)\n    ]\n    base_files += [\n        \"config.json\",\n        \"generation_config.json\",\n        \"special_tokens_map.json\",\n        \"tokenizer_config.json\",\n        \"tokenizer.json\",\n        \"tokenizer.model\",\n        \"model.safetensors.index.json\",\n    ]\n    return base_files\n\n\ndef get_gptq_file_list(base_model_name: str):\n    \"\"\"\n    name of <model>.safetensors varies\n    \"\"\"\n    base_files = [base_model_name]\n    base_files += [\n        \"config.json\",\n        \"generation_config.json\",\n        \"special_tokens_map.json\",\n        \"tokenizer_config.json\",\n        \"tokenizer.json\",\n        \"tokenizer.model\",\n        \"quantize_config.json\",\n    ]\n    return base_files\n\n\ndef get_mlc_file_list(model_name: str, n_shards: int):\n    files_to_download = [\n        f\"params/params_shard_{shard_idx}.bin\" for shard_idx in range(n_shards)\n    ]\n\n    files_to_download += [\n        f\"{model_name}-cuda.so\",\n        \"mod_cache_before_build.pkl\",\n        \"params/mlc-chat-config.json\",\n        \"params/ndarray-cache.json\",\n        \"params/tokenizer.json\",\n        \"params/tokenizer_config.json\",\n        \"params/tokenizer.model\",\n        \"params/config.json\",\n    ]\n    return files_to_download\n\n\ndef exllama_kwargs(weights: Weights, config_overrides: Optional[dict] = None):\n    exllama_default = {\"weights\": weights, \"fused_attn\": True}\n    if config_overrides:\n        exllama_default.update(config_overrides)\n    return exllama_default\n\n\ndef vllm_kwargs(weights: Weights, config_overrides: Optional[dict] = None):\n    vllm_default = {\n        \"weights\": weights,\n        \"dtype\": \"auto\",\n    }\n    if config_overrides:\n        vllm_default.update(config_overrides)\n    return vllm_default\n\n\ndef mlc_kwargs(\n    weights: Weights,\n    is_chat: bool,\n    num_shards: int = 1,\n    tokenizer_path: str = None,\n    config_overrides: Optional[dict] = None,\n):\n    mlc_default = {\n        \"weights\": weights,\n        \"tokenizer_path\": tokenizer_path,\n        \"is_chat\": is_chat,\n        \"num_shards\": num_shards,\n    }\n    if config_overrides:\n        mlc_default.update(config_overrides)\n    return mlc_default\n"
  },
  {
    "path": "src/download.py",
    "content": "import asyncio\nimport functools\nimport mmap\nimport os\nimport random\nimport shutil\nimport sys\nimport time\nimport typing as t\nfrom concurrent.futures import ThreadPoolExecutor\nimport aiohttp\nfrom yarl import URL\nfrom .utils import check_files_exist\n\n# some important tricks:\n# 1. os.sched_getaffinity to get an accurate cpu count in containers\n# 2. memoryview for less copies\n# 3. keep redirects from the first head\n# 4. mmap\n# 5. thread for file writes\n\nMIN_CHUNK_SIZE = 1024 * 1024 * 8  # 8mb\n\nglobal_downloader = None\n\n\n# zipfile requires seekable\nclass SeekableMmap(mmap.mmap):\n    def seekable(self) -> bool:\n        return True\n\n\nclass Downloader:\n    def __init__(self, concurrency: int | None = None) -> None:\n        if not concurrency:\n            concurrency = len(os.sched_getaffinity(0))\n        self.concurrency = concurrency\n        self.sem = asyncio.Semaphore(concurrency * 2)\n        self.retries = 0\n        try:\n            self.loop = asyncio.get_running_loop()\n        except RuntimeError:\n            self.loop = asyncio.new_event_loop()\n        global global_downloader\n        global_downloader = self\n\n    _session: aiohttp.ClientSession | None = None\n\n    @property\n    def session(self) -> aiohttp.ClientSession:\n        if not self._session:\n            self._session = aiohttp.ClientSession(\n                connector=aiohttp.TCPConnector(limit_per_host=self.concurrency),\n                loop=self.loop,\n            )\n        return self._session\n\n    _threadpool: ThreadPoolExecutor | None = None\n\n    @property\n    def threadpool(self) -> ThreadPoolExecutor:\n        if not self._threadpool:\n            self._threadpool = ThreadPoolExecutor(2)\n        return self._threadpool\n\n    async def get_remote_file_size(self, url: str | URL) -> \"tuple[URL, int]\":\n        # try:\n        #     direct_url = str(url).replace(\n        #         \"pbxt.replicate.delivery\", \"replicate-files.object.lga1.coreweave.com\"\n        #     )\n        #     resp = await self.session.head(direct_url, timeout=5)\n        #     if resp.status == 200:\n        #         if resp.url != url:\n        #             print(f\"using {resp.url} instead of {url}\")\n        #         return resp.url, int(resp.headers[\"Content-Length\"])\n        #     print(f\"direct link not available {resp}\")\n        # except (KeyError, asyncio.TimeoutError, aiohttp.ClientError) as e:\n        #     print(f\"direct link not available: {direct_url} with error {repr(e)}\")\n        for i in range(3):\n            start = time.time()\n            headers = {\"Retry-Count\": str(i)} if i else {}\n            try:\n                response = await self.session.head(\n                    url, allow_redirects=True, headers=headers\n                )\n                if response.status >= 400:\n                    print(\"HEAD failed:\", response, response.headers.items())\n                # https://docs.aiohttp.org/en/stable/client_reference.html#aiohttp.ClientResponse.url\n                # .url is the url of the final request, as opposed to .real_url\n                return response.url, int(response.headers[\"Content-Length\"])\n            except KeyError as e:\n                print(\"HEAD failed\", repr(e))\n                print(response.headers, response)\n            except asyncio.TimeoutError:\n                print(f\"HEAD {url} timed out after {time.time() - start:.4f}\")\n            except aiohttp.ClientError as e:\n                print(f\"HEAD {url} {repr(e)}\")\n            await asyncio.sleep(random.random() / 10)\n        raise ValueError(f\"Failed to HEAD {url} after multiple retries\")\n\n    async def download_chunk(\n        self, url: str | URL, start: int, end: int, buffer_view: memoryview\n    ) -> None:\n        async with self.sem:\n            for i in range(5):\n                headers = {\"Retry-Count\": str(i)} if i else {}\n                try:\n                    headers |= {\"Range\": f\"bytes={start}-{end}\"}\n                    async with self.session.get(url, headers=headers) as response:\n                        buffer_view[start : end + 1] = await response.read()\n                        return\n                except (aiohttp.ClientError, asyncio.TimeoutError) as e:\n                    print(f\"Error: {e}\")\n                    await asyncio.sleep(random.random() / 10)  # sleep 0-100ms\n        raise ValueError(f\"Failed to download {url} after multiple retries\")\n\n    files_processed = 0\n    total_size = 0\n\n    async def download_file(self, url: str | URL) -> mmap.mmap:\n        self.retries = 0\n        url, file_size = await self.get_remote_file_size(url)\n        self.total_size += file_size\n        # lower this in proportion to how many files are in flight\n        # when files > concurrency, splitting is bad\n        # # to track requests in flight, except it's either full or 0 when we check:\n        # allowed_concurrency = min(self.sem._value + 1, self.concurrency)\n        # this way is kind of random but the assumption is the more data has gone over\n        # the connection so far, the bigger the TCP window sizes, and the less benefit\n        # from using additional connections\n        allowed_concurrency = max(1, self.concurrency - self.files_processed // 2)\n        self.files_processed += 1\n        max_chunks = file_size // (MIN_CHUNK_SIZE * 1) or 1\n        concurrency = min(allowed_concurrency, max_chunks)\n        chunk_size = file_size // concurrency\n        tasks = []\n        buf = SeekableMmap(-1, file_size)\n        buffer_view = memoryview(buf)\n        start_time = time.time()\n        for i in range(concurrency):\n            start = i * chunk_size\n            end = start + chunk_size - 1 if i != concurrency - 1 else file_size - 1\n            tasks.append(self.download_chunk(url, start, end, buffer_view))\n\n        await asyncio.gather(*tasks)\n        buf.seek(0)\n        print(\n            f\"Downloaded {os.path.basename(str(url))} as {concurrency} {chunk_size // 1024}\"\n            f\" kB chunks in {time.time() - start_time:.3f}s with {self.retries} retries\"\n        )\n        self.retries = 0\n        return buf\n\n    async def download_file_to_disk(self, url: str, path: str) -> None:\n        buf = await self.download_file(url)\n        # don't block the event loop for disk io\n        await self.loop.run_in_executor(\n            self.threadpool,\n            lambda: shutil.copyfileobj(buf, open(path, \"wb\"), length=2 << 18),\n        )\n        buf.close()\n\n    async def maybe_download_files_to_disk(\n        self, path: str, remote_path: str, filenames: list[str]\n    ) -> None:\n        remote_path = remote_path.rstrip(\"/\")\n        if not os.path.exists(path):\n            os.makedirs(path, exist_ok=True)\n            missing_files = filenames\n        else:\n            missing_files = check_files_exist(filenames, path)\n        start = time.time()\n        coros = [\n            self.download_file_to_disk(f\"{remote_path}/{f}\", f\"{path}/{f}\")\n            for f in missing_files\n        ]\n        await asyncio.gather(*coros)\n        elapsed = time.time() - start\n        throughput = self.total_size / elapsed / 1024 / 1024\n        print(\n            f\"downloaded {self.total_size / 1024 / 1024:.2f} MB in {elapsed:.3f}s ({throughput:.2f} MB/s)\"\n        )\n        self.total_size = 0\n        self.files_processed = 0  # loras can use a bunch of connections\n\n    def sync(f: t.Callable) -> t.Callable:\n        # pylint: disable=no-self-argument\n        @functools.wraps(f)\n        def wrapper(self: \"Downloader\", *args: t.Any, **kwargs: t.Any) -> t.Any:\n            try:\n                return self.loop.run_until_complete(f(self, *args, **kwargs))\n            except RuntimeError as e:\n                if e.args[0] == \"Event loop is closed\":\n                    print(\"has to start a new event loop\")\n                    self.loop = asyncio.new_event_loop()\n                    self._session = None\n                    return self.loop.run_until_complete(f(self, *args, **kwargs))\n                if \"another loop is running\" in e.args[0]:\n                    self.loop = asyncio.get_event_loop()\n                    self._session = None\n                    return self.loop.run_until_complete(f(self, *args, **kwargs))\n                raise e\n\n        return wrapper\n\n    sync_download_file = sync(download_file)\n    sync_maybe_download_files = sync(maybe_download_files_to_disk)\n\n\nif __name__ == \"__main__\":\n    Downloader().sync_download_file(sys.argv[1])\n"
  },
  {
    "path": "src/inference_engines/__init__.py",
    "content": ""
  },
  {
    "path": "src/inference_engines/engine.py",
    "content": "import time\nfrom abc import ABC, abstractmethod\nfrom typing import Any\n\nfrom src.config_utils import Weights\nfrom src.utils import maybe_download_with_pget\n\n\nclass Engine(ABC):\n    \"\"\"\n    WIP - this is what the engine looks like at the moment, outlining this just as an exercise to see what our ABC looks like. It will change.\n    \"\"\"\n\n    def load_weights(self, weights: Weights):\n        start = time.time()\n        maybe_download_with_pget(\n            weights.local_path, weights.remote_path, weights.remote_files\n        )\n        print(f\"downloading weights took {time.time() - start:.3f}s\")\n        return weights.local_path\n\n    @abstractmethod\n    def load_lora(self, lora_data: dict):\n        \"\"\"\n        loads a lora from files into the format that this particular engine expects. DOES NOT prepare the engine for inference.\n        lora_data is a dictionary of file names & references from the zip file\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def set_lora(self, lora: Any):\n        \"\"\"\n        given a loaded lora (created w/load_lora), configures the engine to use that lora in combination with the loaded base weights.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def is_lora_active(self) -> bool:\n        \"\"\"\n        Checks whether a LoRA has currently been loaded onto the engine.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def delete_lora(self):\n        \"\"\"\n        Deletes a LoRA.\n        \"\"\"\n        pass\n\n    @abstractmethod\n    def __call__(self, prompt, **kwargs):\n        \"\"\"\n        generation!\n        \"\"\"\n        pass\n"
  },
  {
    "path": "src/inference_engines/exllama.py",
    "content": "import io\nimport os\nimport sys\nimport glob\n\nimport torch\nimport time\nimport typing as tp\n\nfrom src.config_utils import Weights\n\nexllama_path = os.path.abspath(\"exllama\")\nsys.path.insert(0, exllama_path)\n\nfrom exllama.model import ExLlama, ExLlamaCache, ExLlamaConfig\nfrom exllama.lora import ExLlamaLora\nfrom exllama.tokenizer import ExLlamaTokenizer\nfrom exllama.generator import ExLlamaGenerator\n\nfrom src.inference_engines.engine import Engine\nfrom ..utils import StreamingTextStopSequenceHandler\n\ntorch.cuda._lazy_init()\ntorch.set_printoptions(precision=10)\n\n\ndef next_logits(\n    generator, input_ids, apply_lora=None, last_id_only=True, input_mask=None\n):\n    n_logits = generator.model.forward(\n        input_ids, generator.cache, last_id_only, lora=apply_lora, input_mask=input_mask\n    )\n    return n_logits\n\n\ndef begin(generator):\n    if generator.cache is None:\n        generator.cache = ExLlamaCache(generator.model)\n    else:\n        generator.cache.current_seq_len = 0\n    return generator\n\n\ndef timer(name, func):\n    t = time.time()\n    ret = func()\n    t = time.time() - t\n    print(f\" ** Time, {name}: {t:.2f} seconds\")\n    return ret\n\n\nclass ExllamaEngine(Engine):\n    def __init__(self, weights: Weights, fused_attn=True):\n        model_directory = self.load_weights(weights)\n        tokenizer_path = os.path.join(model_directory, \"tokenizer.model\")\n        model_config_path = os.path.join(model_directory, \"config.json\")\n        st_pattern = os.path.join(model_directory, \"*.safetensors\")\n        model_path = glob.glob(st_pattern)[0]\n\n        config = ExLlamaConfig(model_config_path)  # create config from config.json\n        config.model_path = model_path  # supply path to model weights file\n\n        # Override exllam's default settings to use full llama v2 context\n        config.max_seq_len = 2 * 2048\n        config.max_input_len = 2 * 2048\n        config.max_attention_size = 2 * 2048**2\n        config.fused_attn = fused_attn\n\n        self.model = model = ExLlama(\n            config\n        )  # create ExLlama instance and load the weights\n        tokenizer = ExLlamaTokenizer(\n            tokenizer_path\n        )  # create tokenizer from tokenizer model file\n\n        cache = ExLlamaCache(model)  # create cache for inference\n        generator = ExLlamaGenerator(model, tokenizer, cache)  # create generator\n\n        # warmup kernels\n\n        warmup_ids = torch.randint(0, 31999, (1, 50)).cuda()\n        print(\"warming up exllama kernels...\")\n        for i in range(1, 3):\n            print(f\" -- Warmup pass {i}...\")\n            begin(generator)\n            logits = timer(\"Warmup\", lambda: next_logits(generator, warmup_ids, None))\n\n        self.generator = begin(generator)\n\n    def delete_lora(self):\n        self.generator.lora = None\n        return\n\n    def is_lora_active(self) -> bool:\n        return self.generator.lora is None\n\n    def load_lora(self, data_ref: dict) -> ExLlamaLora:\n        return ExLlamaLora(\n            self.model,\n            data_ref[\"adapter_config.json\"],\n            io.BytesIO(data_ref[\"adapter_model.bin\"]),\n        )\n\n    def set_lora(self, lora: ExLlamaLora | None) -> None:\n        self.generator.lora = lora\n\n    def __call__(\n        self,\n        prompt: str,\n        repetition_penalty: float = 1.15,\n        repetition_penalty_sustain: int = 256,\n        token_repetition_penalty_decay: float = 128,\n        temperature: float = 0.95,\n        top_p: float = 0.65,\n        top_k: int = 20,\n        max_new_tokens: int = 128,\n        min_new_tokens: int = 0,\n        beams: int = 1,\n        beam_length: int = 1,\n        stop_sequences: tp.List[str] = None,\n    ):\n        if top_k <= 0:\n            top_k = 20\n        generator = begin(self.generator)\n        generator.settings.token_repetition_penalty_max = repetition_penalty\n        generator.settings.token_repetition_penalty_sustain = repetition_penalty_sustain\n        generator.settings.token_repetition_penalty_decay = (\n            token_repetition_penalty_decay\n        )\n        generator.settings.temperature = temperature\n        generator.settings.top_p = top_p\n        generator.settings.top_k = top_k\n        generator.settings.beams = beams\n        generator.settings.beam_length = beam_length\n\n        in_tokens = generator.tokenizer.encode(prompt)\n        n_in_tokens = in_tokens.shape[-1]\n        if n_in_tokens >= generator.model.config.max_input_len:\n            raise ValueError(\n                f\"Your input is too long. Max input length is {generator.model.config.max_input_len} tokens, but you supplied {n_in_tokens} tokens.\"\n            )\n\n        max_new_tokens = min(\n            max_new_tokens, generator.model.config.max_seq_len - n_in_tokens\n        )\n\n        num_res_tokens = in_tokens.shape[-1]  # Decode from here\n\n        generator.gen_begin(in_tokens)\n        generator.begin_beam_search()\n\n        stop_sequence_handler = StreamingTextStopSequenceHandler(\n            stop_sequences=stop_sequences,\n            eos_token=generator.tokenizer.eos_token,\n        )\n\n        for i in range(max_new_tokens):\n            if i < min_new_tokens:\n                generator.disallow_tokens(\n                    [\n                        generator.tokenizer.newline_token_id,\n                        generator.tokenizer.eos_token_id,\n                    ]\n                )\n            else:\n                generator.disallow_tokens(None)\n\n            gen_token = generator.beam_search()\n            if gen_token.item() == generator.tokenizer.eos_token_id:\n                break\n\n            if gen_token.item() == generator.tokenizer.eos_token_id:\n                generator.replace_last_token(generator.tokenizer.newline_token_id)\n\n            num_res_tokens += 1\n            text = generator.tokenizer.decode(\n                generator.sequence_actual[:, -num_res_tokens:][0]\n            )\n            new_text = text[len(prompt):]\n\n            if len(new_text.replace(\"�\", \"\")) == 0:\n                # if we're getting �, then we're halfway through an emoji; ignore it til it's fully generated. \n                continue\n            skip_space = prompt.endswith((\"\\n\", \"[/INST]\")) and new_text.startswith(\n                \" \"\n            )  # Bit prettier console output\n            prompt += new_text\n            if skip_space:\n                new_text = new_text[1:]\n\n            yielded_text = None\n            for yielded_text in stop_sequence_handler(new_text):\n                if yielded_text == stop_sequence_handler.eos_token:\n                    break\n                yield yielded_text\n\n            if yielded_text == stop_sequence_handler.eos_token:\n                break\n\n        for yielded_text in stop_sequence_handler.finalize():\n            yield yielded_text\n"
  },
  {
    "path": "src/inference_engines/mlc_engine.py",
    "content": "import os\n\nfrom cog import ConcatenateIterator\nfrom mlc_chat import ChatConfig, ChatModule, ConvConfig, GenerationConfig\nfrom transformers import AutoTokenizer\n\nfrom src.config_utils import Weights\n\nfrom .engine import Engine\n\nclass MLCEngine(Engine):\n    \"\"\"\n    An inference engine that runs inference w/ vLLM\n    \"\"\"\n\n    def __init__(\n        self, weights: Weights, is_chat: bool, num_shards: int = 1, tokenizer_path: os.PathLike = None\n    ) -> None:\n        weights_path = self.load_weights(weights)\n        self.is_chat = is_chat\n        self.num_shards = num_shards\n\n        if self.is_chat:\n            self.conv_template = \"llama-2\"\n            self.stop_str = \"\"\n            self.stop_tokens = []\n            self.add_bos = None\n        else:\n            self.conv_template = \"LM\"\n            self.stop_str = \"[INST]\"\n            self.stop_tokens = [\n                2,\n            ]\n            self.add_bos = True\n\n        conv_config = ConvConfig(\n            stop_tokens=self.stop_tokens, add_bos=self.add_bos, stop_str=self.stop_str\n        )\n        chat_config = ChatConfig(\n            conv_config=conv_config, conv_template=self.conv_template, num_shards=self.num_shards\n        )\n\n        model_path = os.path.join(weights_path, \"params\")\n        self.cm = ChatModule(model=model_path, chat_config=chat_config)\n\n        # this isn't used!\n        tokenizer_path = os.path.join(weights_path, \"params\")\n        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)\n\n    def load_weights(self, weights: Weights) -> str:\n        \"\"\"\n        Downloads the weights from the given Weights object and returns the path to the downloaded weights.\n\n        Args:\n        - weights (Weights): the weights to download.\n\n        Returns:\n        - weights_path (str): the path to the downloaded weights.\n        \"\"\"\n        # ensure directories exist\n        for path in weights.remote_files:\n            path_directory = os.path.dirname(path)\n            if path_directory:\n                path_directory = os.path.join(weights.local_path, path_directory)\n                os.makedirs(path_directory, exist_ok=True)\n\n        return super().load_weights(weights)\n\n    def get_logits(self):\n        \"\"\"\n        Given a prompt, returns the logits from the language model.\n        \"\"\"\n        raise NotImplementedError(\"MLC currently does not support logits.\")\n\n    def load_lora(self):\n        \"\"\"\n        loads a lora from files into the format that this particular engine expects. DOES NOT prepare the engine for inference.\n        lora_data is a dictionary of file names & references from the zip file\n        \"\"\"\n        raise NotImplementedError(\"MLC currently does not support LoRAs.\")\n\n    def is_lora_active(self):\n        \"\"\"\n        Returns True if the engine is currently configured to use a lora, False otherwise.\n        \"\"\"\n        raise NotImplementedError(\"MLC currently does not support LoRAs.\")\n\n    def set_lora(self):\n        \"\"\"\n        Given a loaded lora (created w/ load_lora), configures the engine to use that lora in combination with the loaded base weights.\n        \"\"\"\n        raise NotImplementedError(\"MLC currently does not support LoRAs.\")\n\n    def delete_lora(self):\n        print(\"MLC is currently not using any LoRAs.\")\n\n    def __call__(\n        self,\n        prompt: str,\n        max_new_tokens: int,\n        temperature: float,\n        top_p: float,\n        top_k: int,\n        stop_sequences: str | list[str] = None,\n        stop_token_ids: list[int] = [],\n        repetition_penalty: float = 1.0,\n        incremental_generation: bool = True,\n        *args,\n        **kwargs,\n    ) -> ConcatenateIterator[str]:\n        \"\"\"\n        Given a prompt, runs generation on the language model with vLLM.\n\n        Args:\n        - prompt (str): the prompt to give the model.\n        - max_new_tokens (int): the maximum number of new tokens to generate.\n        - temperature (float): the parameter to anneal the sampling distribution with.\n        - top_p (float): the amount to truncate the sampling distribution by.\n        - top_k (int): the number of tokens to truncate the sampling distribution by.\n        - stop_sequences (str | list[str]): the string to stop generation at.\n        - stop_token_ids (list[str]): a list of token ids to stop generation at.\n        - frequency_penalty (float): the amount to penalize tokens that have already been generated, higher values penalize more.\n        - incremental_generation: whether to yield the entire generated sequence or the next generated token at each step.\n\n        Yields:\n        - generated_text (str): the generated text, or next token, depending on the value of `incremental_generation`.\n        \"\"\"\n\n        if top_k is not None and top_k > 0:\n            raise ValueError(\n                \"top_k is currently not supported by our generation engine.\"\n            )\n\n        stop_token_ids += self.stop_tokens\n        # stop_sequences = [self.stop_str] + stop_sequences\n\n        # TODO (Moin): add support for the system prompt on chat models\n        conv_config = ConvConfig(\n            stop_tokens=stop_token_ids, add_bos=self.add_bos, stop_str=stop_sequences\n        )\n        chat_config = ChatConfig(\n            temperature=temperature,\n            repetition_penalty=repetition_penalty,\n            top_p=top_p,\n            max_gen_len=max_new_tokens,\n            mean_gen_len=max_new_tokens,\n            conv_config=conv_config,\n            conv_template=self.conv_template,\n            num_shards=self.num_shards\n        )\n        self.cm.reset_chat(chat_config)\n\n        generation_config = GenerationConfig(\n            temperature=temperature,\n            repetition_penalty=repetition_penalty,\n            top_p=top_p,\n            max_gen_len=max_new_tokens,\n        )\n        self.cm._prefill(input=prompt, generation_config=generation_config)\n\n        min_new_tokens = kwargs.pop(\"min_new_tokens\", None)\n        if min_new_tokens is not None and min_new_tokens > -1:\n            raise ValueError(\n                \"min_new_tokens is currently not supported by MLC's engine.\"\n            )\n\n        if len(kwargs) > 0:\n            raise ValueError(f\"Unknown keyword arguments: {', '.join(kwargs.keys())}\")\n\n        generation_length = 0\n        while True:\n            if self.cm._stopped():\n                break\n            self.cm._decode(generation_config=generation_config)\n            out = self.cm._get_message()\n            # stops us from yielding half an emoji, which breaks\n            out = out.replace(\"\\N{Replacement Character}\", \"\") \n            if len(out) == generation_length:\n                # don't yield an empty string\n                continue\n            yield out[generation_length:]\n            generation_length = len(out)\n"
  },
  {
    "path": "src/inference_engines/mlc_vllm_engine.py",
    "content": "from typing import Any, Optional, List\nimport os\n\nfrom .engine import Engine\nfrom .vllm_engine import vLLMEngine\n\n\nclass MLCvLLMEngine(Engine):\n    \"\"\"\n    MLC for base models, vllm for fine-tunes.\n    \"\"\"\n\n    def __init__(self, mlc_args: dict, vllm_args: dict) -> None:\n        # checks for old style loras & if this is booted as a fine-tuneable hotswap\n        if os.getenv(\"COG_WEIGHTS\") or os.getenv(\"REPLICATE_HOTSWAP\") == \"1\":\n            self.engine = vLLMEngine(**vllm_args)\n        else:\n            # can't run vllm if MLC is imported\n            from .mlc_engine import MLCEngine\n\n            self.engine = MLCEngine(**mlc_args)\n            self.vllm_args = vllm_args\n\n    def load_lora(self, lora_data: dict) -> Any:\n        \"\"\"\n        loads a lora from files into the format that this particular engine expects. DOES NOT prepare the engine for inference.\n        lora_data is a dictionary of file names & references from the zip file\n        \"\"\"\n        if not isinstance(self.engine, vLLMEngine):\n            # Really we should never need to do this.\n            # print(\"Transitioning from MLC to vLLM\")\n            # del self.engine.cm\n            # del self.engine.tokenizer\n            # del self.engine\n\n            # gc.collect()\n            # torch.cuda.empty_cache()\n            # self.engine = vLLMEngine(**self.vllm_args)\n            raise Exception(\"Loras not supported with MLCEngine\")\n\n        return self.engine.load_lora(lora_data)\n\n    def is_lora_active(self) -> bool:\n        \"\"\"\n        Returns True if the engine is currently configured to use a lora, False otherwise.\n        \"\"\"\n        if isinstance(self.engine, vLLMEngine):\n            return self.engine.is_lora_active()\n        return False\n\n    def set_lora(self, lora: Any) -> None:\n        \"\"\"\n        Given a loaded lora (created w/ load_lora), configures the engine to use that lora in combination with the loaded base weights.\n        \"\"\"\n        if not isinstance(self.engine, vLLMEngine):\n            raise Exception(\n                \"Loras not supported with MLC Engine! Invalid state reached.\"\n            )\n        self.engine.set_lora(lora)\n\n    def delete_lora(self) -> None:\n        self.engine.delete_lora()\n\n    def __call__(\n        self,\n        prompt,\n        max_new_tokens: int = 128,\n        min_new_tokens: int = -1,\n        temperature: float = 0.75,\n        top_p: float = 0.9,\n        top_k: int = 50,\n        stop_sequences: Optional[List[str]] = None,\n        **kwargs,\n    ):\n        print(f\"MLC: {not isinstance(self.engine, vLLMEngine)}\")\n        gen = self.engine(\n            prompt,\n            max_new_tokens=max_new_tokens,\n            min_new_tokens=min_new_tokens,\n            temperature=temperature,\n            top_p=top_p,\n            top_k=top_k,\n            stop_sequences=stop_sequences,\n            **kwargs,\n        )\n        for val in gen:\n            yield val\n"
  },
  {
    "path": "src/inference_engines/transformers_engine.py",
    "content": "import os\nimport shutil\nfrom transformers import AutoModelForCausalLM, TextIteratorStreamer, StoppingCriteria\nfrom typing import Optional, List, Tuple, Any\nfrom threading import Thread\nfrom peft import PeftModel, LoraConfig\nfrom peft.utils.save_and_load import set_peft_model_state_dict\n\nimport torch.nn.init\n\nfrom src.config_utils import Weights\n\ntorch.nn.init.kaiming_uniform_ = lambda x, *args, **kwargs: x\ntorch.nn.init.uniform_ = lambda x, *args, **kwargs: x\n\nimport torch\n\nfrom .engine import Engine\n\nADAPTER_NAME = \"default\"\n\n\nclass ExtraStopSequence(StoppingCriteria):\n    \"\"\"\n    Adds in an extra stop sequence. Assuming 1-D generation, not batch.\n    \"\"\"\n\n    # TODO: there's something silly to debug here.\n    def __init__(self, stop_sequence: torch.Tensor, device: str):\n        self.stop_sequence = stop_sequence.to(device)\n\n    def __call__(\n        self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs\n    ):\n        return torch.equal(\n            self.stop_sequence, input_ids[:, self.stop_sequence.shape[-1]]\n        )\n\n\nclass TransformersEngine(Engine):\n    \"\"\"\n    An inference engine that runs in vanilla transformers.\n    Vanilla is, at times, fantastic.\n    \"\"\"\n\n    def __init__(self, weights: Weights, tokenizer_func=None, device=\"cuda\"):\n        model_path = self.load_weights(weights)\n        self.model = AutoModelForCausalLM.from_pretrained(\n            model_path, torch_dtype=torch.bfloat16\n        ).to(device)\n        self.tokenizer = tokenizer_func()\n        self.device = device\n        print(\"Transformers engine initialized.\")\n\n    def load_lora(self, lora_weights: dict) -> Tuple[LoraConfig, Any]:\n        \"\"\"\n        Given a dict of {filename:bytes}, returns a tuple of (LoraConfig, Torch model)\n        This relies on external but poorly documented peft methods, when we upgrade peft past 0.4.0 we may need to (briefly) revisit\n        \"\"\"\n\n        # serializing the dictionary of files and such - hf doesn't have quick and easy ways to load loras from file references,\n        # and this implementation isn't built for speed anyway\n        model_dir = \"tmp/model\"\n        os.makedirs(model_dir)\n        for handle in lora_weights:\n            fpath = os.path.join(model_dir, handle)\n            with open(fpath, \"wb\") as f:\n                f.write(lora_weights[handle])\n\n        config = LoraConfig.from_pretrained(model_dir)\n        weights = torch.load(\n            os.path.join(model_dir, \"adapter_model.bin\"),\n            map_location=torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\"),\n        )\n        shutil.rmtree(model_dir)\n        return (config, weights)\n\n    def is_lora_active(self) -> bool:\n        return isinstance(self.model, PeftModel)\n\n    def delete_lora(self):\n        if hasattr(self.model, \"disable_adapter_layers\") and callable(\n            self.model.disable_adapter_layers\n        ):\n            self.model.disable_adapter_layers()\n        else:\n            print(\"No loras were ever loaded, nothing to disable.\")\n            return\n\n    def set_lora(self, lora):\n        \"\"\"\n        Sets a new lora if needed.\n        \"\"\"\n        config, weights = lora\n\n        # Note that right now we're just overwriting the \"default\" adapter w/ADAPTER_NAME\n        # we can try managing multiple adapters w/lru eviction logic, didn't seem necessary\n        if not isinstance(self.model, PeftModel):\n            # is not a peft model\n            self.model = PeftModel(self.model, config, ADAPTER_NAME)\n            set_peft_model_state_dict(self.model, weights, ADAPTER_NAME)\n            self.model.eval()\n            print(\"added lora for the first time\")\n        else:\n            self.model.enable_adapter_layers()\n            self.model.add_adapter(ADAPTER_NAME, config)\n            set_peft_model_state_dict(self.model, weights, ADAPTER_NAME)\n            print(\"set new lora\")\n            print(self.model.peft_config)\n            self.model.eval()\n\n        return\n\n    def get_logits(self, prompt):\n        input_ids = self.tokenizer(prompt, return_tensors=\"pt\").input_ids.to(\n            self.device\n        )\n        inputs = self.model.prepare_inputs_for_generation(input_ids)\n        with torch.no_grad():\n            output = self.model(\n                **inputs,\n                return_dict=True,\n                output_attentions=False,\n                output_hidden_states=False,\n            )\n            logits = output.logits[:, -1, :]\n        return logits\n\n    def __call__(\n        self,\n        prompt,\n        max_new_tokens: int = 128,\n        min_new_tokens: int = -1,\n        temperature: float = 0.75,\n        top_p: float = 0.9,\n        top_k: int = 50,\n        stop_sequences: Optional[List[str]] = None,\n        **kwargs,\n    ):\n        tokens_in = self.tokenizer(prompt, return_tensors=\"pt\").input_ids.to(\n            self.device\n        )\n        streamer = TextIteratorStreamer(\n            self.tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True\n        )\n\n        stopping_criteria_list = None\n        if stop_sequences is not None:\n            # stop sequences!\n            stopping_criteria_list = []\n            for seq in stop_sequences:\n                stop_ids = self.tokenizer(\n                    seq, return_tensors=\"pt\", add_special_tokens=False\n                ).input_ids[0]\n                stopping_criteria_list.append(ExtraStopSequence(stop_ids, self.device))\n\n        generate_kwargs = dict(\n            input_ids=tokens_in,\n            streamer=streamer,\n            do_sample=True,\n            max_new_tokens=max_new_tokens,\n            min_new_tokens=min_new_tokens,\n            temperature=temperature,\n            top_p=top_p,\n            top_k=top_k,\n            stopping_criteria=stopping_criteria_list,\n        )\n\n        t = Thread(target=self.model.generate, kwargs=generate_kwargs)\n        t.start()\n\n        for out in streamer:\n            yield out\n"
  },
  {
    "path": "src/inference_engines/vllm_engine.py",
    "content": "import asyncio\nimport json\nimport os\nfrom io import BytesIO, IOBase\nfrom typing import AsyncIterator, BinaryIO, List, Optional, Union, get_args\n\nimport torch\nfrom vllm import AsyncLLMEngine\nfrom vllm.engine.arg_utils import AsyncEngineArgs\nfrom vllm.sampling_params import SamplingParams\n\nfrom src.config_utils import Weights\n\nfrom .engine import Engine\n\nFILE_LIKE = str | os.PathLike\nBYTES_LIKE = str | BinaryIO | IOBase | bytes\n\n\nclass LoRA:\n    def __init__(\n        self, adapter_config: Union[str, bytes, bytearray], adapter_model: FILE_LIKE\n    ) -> None:\n        self.adapter_config = json.loads(adapter_config)\n        self.adapter_model = torch.load(adapter_model, map_location=\"cuda\")\n\n    @classmethod\n    def load_from_path(\n        cls, adapter_config_path: os.PathLike, adapter_model_path: os.PathLike\n    ) -> \"LoRA\":\n        with open(adapter_config_path, \"r\") as f:\n            adapter_config = f.read()\n\n        with open(adapter_model_path, \"rb\") as f:\n            adapter_model = f.read()\n\n        return cls(adapter_config=adapter_config, adapter_model=adapter_model)\n\n    @classmethod\n    def load_from_bytes(\n        cls, adapter_config_bytes: BYTES_LIKE, adapter_model_bytes: BYTES_LIKE\n    ) -> \"LoRA\":\n        return cls(\n            adapter_config=adapter_config_bytes, adapter_model=adapter_model_bytes\n        )\n\n\nclass vLLMEngine(Engine):\n    \"\"\"\n    An inference engine that runs inference w/ vLLM\n    \"\"\"\n\n    def __init__(self, weights: Weights, dtype: str) -> None:\n        model_path = self.load_weights(weights)\n        args = AsyncEngineArgs(\n            model=model_path,\n            tokenizer=model_path,\n            dtype=dtype,\n        )\n        self.engine = AsyncLLMEngine.from_engine_args(args)\n        self.tokenizer = self.engine.engine.tokenizer\n\n    def load_lora(\n        self,\n        lora_state_dict: Optional[dict] = None,\n        adapter_model: Optional[FILE_LIKE | BYTES_LIKE] = None,\n        adapter_config: Optional[FILE_LIKE | BYTES_LIKE] = None,\n    ) -> LoRA:\n        \"\"\"\n        loads a lora from files into the format that this particular engine expects. DOES NOT prepare the engine for inference.\n        lora_data is a dictionary of file names & references from the zip file\n        \"\"\"\n\n        # TODO (Moin): I don't like this \"pass a dict or the explicit params\" -- but going to add it in and ship ASAP.\n        if lora_state_dict is None and adapter_model is None and adapter_config is None:\n            raise ValueError(\n                \"At least one of lora_state_dict, adapter_model, or adapter_config must be provided.\"\n            )\n\n        if lora_state_dict is not None and (\n            adapter_model is not None or adapter_config is not None\n        ):\n            raise ValueError(\n                \"lora_state_dict cannot be provided if adapter_model or adapter_config is provided.\"\n            )\n\n        if lora_state_dict is not None:\n            ADAPTER_CONFIG_KEY_NAME = \"adapter_config.json\"\n            ADAPTER_MODEL_KEY_NAME = \"adapter_model.bin\"\n            if (\n                ADAPTER_CONFIG_KEY_NAME not in lora_state_dict.keys()\n                or ADAPTER_MODEL_KEY_NAME not in lora_state_dict.keys()\n            ):\n                raise ValueError(\n                    f\"lora_state_dict must include at least: '{ADAPTER_MODEL_KEY_NAME}' and '{ADAPTER_CONFIG_KEY_NAME}'.\"\n                )\n\n            adapter_config, adapter_model = (\n                lora_state_dict[ADAPTER_CONFIG_KEY_NAME],\n                BytesIO(lora_state_dict[ADAPTER_MODEL_KEY_NAME]),\n            )\n\n        if isinstance(adapter_model, get_args(FILE_LIKE)) and isinstance(\n            adapter_config, get_args(FILE_LIKE)\n        ):\n            lora = LoRA.load_from_path(\n                adapter_config_path=adapter_config, adapter_model_path=adapter_model\n            )\n        elif isinstance(adapter_model, get_args(BYTES_LIKE)) and isinstance(\n            adapter_config, get_args(BYTES_LIKE)\n        ):\n            lora = LoRA.load_from_bytes(\n                adapter_config_bytes=adapter_config, adapter_model_bytes=adapter_model\n            )\n        else:\n            raise TypeError(\n                \"Both the adapter model and the adapter config must be either both file-like or bytes-like objects/primitives.\"\n            )\n\n        return lora\n\n    def is_lora_active(self) -> bool:\n        \"\"\"\n        Returns True if the engine is currently configured to use a lora, False otherwise.\n        \"\"\"\n        return self.engine.engine.is_lora_active()\n\n    def set_lora(self, lora: LoRA) -> None:\n        \"\"\"\n        Given a loaded lora (created w/ load_lora), configures the engine to use that lora in combination with the loaded base weights.\n        \"\"\"\n        self.delete_lora()  # defensive check -- can move this out of the engine if everything works appropriately\n        self.delete_lora()  # defensive check -- can move this out of the engine if everything works appropriately\n        self.engine.engine.load_lora(\n            lora_config=lora.adapter_config, lora_state_dict=lora.adapter_model\n        )\n\n    def delete_lora(self) -> None:\n        self.engine.engine.delete_lora()\n\n    async def generate_stream(\n        self, prompt: str, sampling_params: SamplingParams\n    ) -> AsyncIterator[str]:\n        results_generator = self.engine.generate(prompt, sampling_params, 0)\n        async for generated_text in results_generator:\n            yield generated_text\n\n    def __call__(\n        self,\n        prompt: str,\n        max_new_tokens: int,\n        temperature: float,\n        top_p: float,\n        top_k: int,\n        stop_sequences: str | List[str] = None,\n        stop_token_ids: List[int] = None,\n        frequency_penalty: float = 1.0,\n        incremental_generation: bool = True,\n        *args,\n        **kwargs,\n    ) -> str:\n        \"\"\"\n        Given a prompt, runs generation on the language model with vLLM.\n\n        Args:\n        - prompt (str): the prompt to give the model.\n        - max_new_tokens (int): the maximum number of new tokens to generate.\n        - temperature (float): the parameter to anneal the sampling distribution with.\n        - top_p (float): the amount to truncate the sampling distribution by.\n        - top_k (int): the number of tokens to truncate the sampling distribution by.\n        - stop_sequences (str | List[str]): the string to stop generation at.\n        - stop_token_ids (List[str]): a list of token ids to stop generation at.\n        - frequency_penalty (float): the amount to penalize tokens that have already been generated, higher values penalize more.\n        - incremental_generation: whether to yield the entire generated sequence or the next generated token at each step.\n\n        Yields:\n        - generated_text (str): the generated text, or next token, depending on the value of `incremental_generation`.\n        \"\"\"\n        if top_k is None or top_k == 0:\n            top_k = -1\n\n        min_new_tokens = kwargs.pop(\"min_new_tokens\", None)\n        if min_new_tokens is not None and min_new_tokens > -1:\n            raise ValueError(\n                \"min_new_tokens is currently not supported by vLLM Engine.\"\n            )\n\n        stop_token_ids = stop_token_ids or []\n        stop_token_ids.append(self.tokenizer.eos_token_id)\n\n        if isinstance(stop_sequences, str) and stop_sequences != \"\":\n            stop = [stop_sequences]\n        elif isinstance(stop_sequences, list) and len(stop_sequences) > 0:\n            stop = stop_sequences\n        else:\n            stop = []\n\n        for tid in stop_token_ids:\n            stop.append(self.tokenizer.decode(tid))\n\n        sampling_params = SamplingParams(\n            n=1,\n            top_p=top_p,\n            top_k=top_k,\n            temperature=temperature,\n            use_beam_search=False,\n            stop=stop,\n            max_tokens=max_new_tokens,\n            frequency_penalty=frequency_penalty,\n        )\n\n        try:\n            loop = asyncio.get_event_loop()\n        except RuntimeError:\n            loop = asyncio.new_event_loop()\n            asyncio.set_event_loop(loop)\n\n        gen = self.generate_stream(\n            prompt,\n            sampling_params,\n        )\n\n        generation_length = 0\n        while True:\n            try:\n                request_output = loop.run_until_complete(gen.__anext__())\n                assert len(request_output.outputs) == 1\n                generated_text = request_output.outputs[0].text\n                if incremental_generation:\n                    # it takes multiple calls to gen.__anext__ to render one emoji. \n                    # this check keeps us from needlesly yielding empty strings\n                    if len(generated_text) > generation_length:\n                        yield generated_text[generation_length:]\n                else:\n                    yield generated_text\n                generation_length = len(generated_text)\n            except StopAsyncIteration:\n                break\n\n\ndef run_generation():\n    \"\"\"\n    Helper class to run the generation for tests.\n    \"\"\"\n    model_path = \"/home/moin/Llama-2-7b\"\n    tokenizer_path = \"/home/moin/Llama-2-7b\"\n    dtype = \"auto\"\n    engine = vLLMEngine(\n        model_path=model_path, tokenizer_path=tokenizer_path, dtype=dtype\n    )\n    prompt = \"Hello,\"\n    generated_text = engine(\n        prompt=prompt, max_new_tokens=128, temperature=1.0, top_p=0.9, top_k=50\n    )\n    for text in generated_text:\n        print(text, end=\"\")\n\n\nif __name__ == \"__main__\":\n    run_generation()\n"
  },
  {
    "path": "src/inference_engines/vllm_exllama_engine.py",
    "content": "import gc\nfrom typing import Any, Optional, List\n\nimport torch\nimport os\n\nfrom .engine import Engine\nfrom .vllm_engine import vLLMEngine\nfrom .exllama import ExllamaEngine\n\n\nclass ExllamaVllmEngine(Engine):\n    \"\"\"\n    It's exllama until fine-tuning hits, and then it's vllm.\n    \"\"\"\n\n    def __init__(self, vllm_args: dict, exllama_args: dict) -> None:\n        # for old-style loras, should they happen\n        if \"COG_WEIGHTS\" in os.environ or (\n            \"REPLICATE_HOTSWAP\" in os.environ and os.environ[\"REPLICATE_HOTSWAP\"] == \"1\"\n        ):\n            self.engine = vLLMEngine(**vllm_args)\n        else:\n            self.engine = ExllamaEngine(**exllama_args)\n            self.vllm_args = vllm_args\n\n    def load_lora(self, lora_data: dict) -> Any:\n        \"\"\"\n        loads a lora from files into the format that this particular engine expects. DOES NOT prepare the engine for inference.\n        lora_data is a dictionary of file names & references from the zip file\n        \"\"\"\n        if isinstance(self.engine, ExllamaEngine):\n            # Really we should never need to do this.\n            print(\"Transitioning from Exllama to vLLM\")\n            del self.engine.model\n            del self.engine.generator\n            del self.engine\n\n            gc.collect()\n            torch.cuda.empty_cache()\n            self.engine = vLLMEngine(**self.vllm_args)\n\n        return self.engine.load_lora(lora_data)\n\n    def is_lora_active(self) -> bool:\n        \"\"\"\n        Returns True if the engine is currently configured to use a lora, False otherwise.\n        \"\"\"\n        if isinstance(self.engine, vLLMEngine):\n            return self.engine.is_lora_active()\n        return False\n\n    def set_lora(self, lora: Any) -> None:\n        \"\"\"\n        Given a loaded lora (created w/ load_lora), configures the engine to use that lora in combination with the loaded base weights.\n        \"\"\"\n        if isinstance(self.engine, ExllamaEngine):\n            raise Exception(\n                \"Loras not supported with Exllama Engine! Invalid state reached.\"\n            )\n        self.engine.set_lora(lora)\n\n    def delete_lora(self) -> None:\n        self.engine.delete_lora()\n\n    def __call__(\n        self,\n        prompt,\n        max_new_tokens: int = 128,\n        min_new_tokens: int = -1,\n        temperature: float = 0.75,\n        top_p: float = 0.9,\n        top_k: int = 50,\n        stop_sequences: Optional[List[str]] = None,\n        **kwargs,\n    ):\n        if top_k <=0:\n            top_k = 50\n        print(f\"Exllama: {isinstance(self.engine, ExllamaEngine)}\")\n        gen = self.engine(\n            prompt,\n            max_new_tokens=max_new_tokens,\n            min_new_tokens=min_new_tokens,\n            temperature=temperature,\n            top_p=top_p,\n            top_k=top_k,\n            stop_sequences=stop_sequences,\n            **kwargs,\n        )\n        for val in gen:\n            yield val\n"
  },
  {
    "path": "src/inference_engines/vllm_transformers.py",
    "content": "import gc\nfrom typing import Any, Optional, List\n\nimport torch\n\nfrom .engine import Engine\nfrom .vllm_engine import vLLMEngine\nfrom .transformers_engine import TransformersEngine\n\n\nclass vLLMTransformersEngine(Engine):\n    \"\"\"\n    It's vLLM until fine-tuning hits, and then it's transformers.\n    \"\"\"\n\n    def __init__(\n        self, model_path: str, vllm_args: dict, transformers_args: dict\n    ) -> None:\n        self.engine = vLLMEngine(model_path, **vllm_args)\n        self.model_path = model_path\n        self.transformers_args = transformers_args\n\n    def load_lora(self, lora_data: dict) -> Any:\n        \"\"\"\n        loads a lora from files into the format that this particular engine expects. DOES NOT prepare the engine for inference.\n        lora_data is a dictionary of file names & references from the zip file\n        \"\"\"\n        if isinstance(self.engine, vLLMEngine):\n            print(\"Transitioning from vLLM to Transformers\")\n            for worker in self.engine.engine.engine.workers:  # needs more engine\n                del worker.cache_engine.gpu_cache\n                del worker.cache_engine.cpu_cache\n                del worker.gpu_cache\n                del worker.model\n\n            del self.engine\n            gc.collect()\n            torch.cuda.empty_cache()\n            self.engine = TransformersEngine(self.model_path, **self.transformers_args)\n\n        return self.engine.load_lora(lora_data)\n\n    def is_lora_active(self) -> bool:\n        \"\"\"\n        Returns True if the engine is currently configured to use a lora, False otherwise.\n        \"\"\"\n        if isinstance(self.engine, TransformersEngine):\n            return self.engine.is_lora_active()\n        return False\n\n    def set_lora(self, lora: Any) -> None:\n        \"\"\"\n        Given a loaded lora (created w/ load_lora), configures the engine to use that lora in combination with the loaded base weights.\n        \"\"\"\n        if isinstance(self.engine, vLLMEngine):\n            raise Exception(\n                \"Loras not supported with vLLM Engine! Invalid state reached.\"\n            )\n        self.engine.set_lora(lora)\n\n    def delete_lora(self) -> None:\n        self.engine.delete_lora()\n\n    def __call__(\n        self,\n        prompt,\n        max_new_tokens: int = 128,\n        min_new_tokens: int = -1,\n        temperature: float = 0.75,\n        top_p: float = 0.9,\n        top_k: int = 50,\n        stop_sequences: Optional[List[str]] = None,\n        **kwargs,\n    ):\n        gen = self.engine(\n            prompt,\n            max_new_tokens=max_new_tokens,\n            min_new_tokens=min_new_tokens,\n            temperature=temperature,\n            top_p=top_p,\n            top_k=top_k,\n            stop_sequences=stop_sequences,\n            **kwargs,\n        )\n        for val in gen:\n            yield val\n"
  },
  {
    "path": "src/more_utils.py",
    "content": "import os\n\n\nDEFAULT_PAD_TOKEN = \"[PAD]\"\nDEFAULT_EOS_TOKEN = \"</s>\"\nDEFAULT_BOS_TOKEN = \"<s>\"\nDEFAULT_UNK_TOKEN = \"</s>\"\n\n\ndef log_memory_stuff(prompt=None):\n    \"\"\"One method to barf out everything we'd ever want to know about memory\"\"\"\n    import torch\n\n    if prompt is not None:\n        print(prompt)\n    os.system(\"nvidia-smi\")\n    print(torch.cuda.memory_summary())\n\n\ndef load_tokenizer(tokenizer_path):\n    \"\"\"Same tokenizer, agnostic from tensorized weights/etc\"\"\"\n    from transformers import LlamaTokenizer\n\n    tok = LlamaTokenizer.from_pretrained(\n        tokenizer_path, cache_dir=\"pretrained_weights\", legacy=False\n    )\n    tok.add_special_tokens(\n        {\n            \"eos_token\": DEFAULT_EOS_TOKEN,\n            \"bos_token\": DEFAULT_BOS_TOKEN,\n            \"unk_token\": DEFAULT_UNK_TOKEN,\n            \"pad_token\": DEFAULT_PAD_TOKEN,\n        }\n    )\n    return tok\n"
  },
  {
    "path": "src/utils.py",
    "content": "import asyncio\nimport builtins\nimport contextlib\nimport os\nimport random\nimport subprocess\nimport time\nimport typing as tp\n\n\ndef seed_all(seed: int):\n    import numpy\n    import torch\n\n    os.environ[\"PYTHONHASHSEED\"] = str(seed)\n    torch.manual_seed(seed)\n    torch.cuda.manual_seed(seed)\n    torch.cuda.manual_seed_all(seed)\n    random.seed(seed)\n    numpy.random.seed(seed)\n\n\ndef get_env_var_or_default(var_name, default_value):\n    \"\"\"\n    Attempts to load a global variable from an environment variable.\n\n    Args:\n    - var_name (str): Name of the global variable.\n    - default_value: The default value to use if the environment variable doesn't exist or its length is 0.\n\n    Returns:\n    - value: The value from the environment variable or the default value.\n    \"\"\"\n    env_value = os.environ.get(var_name, \"\")\n\n    # Check if the environment variable exists and is not empty\n    if len(env_value) > 0:\n        return env_value\n    return default_value\n\n\nclass Logger:\n    def __init__(self, marker: str = \"predict-timings\"):\n        self.marker = marker + \"%s\" % random.randint(0, 1000000)\n        self.start = time.time()\n        self.last = self.start\n\n    def log(self, *args):\n        current_time = time.time()\n        elapsed_since_start = current_time - self.start\n        elapsed_since_last_log = current_time - self.last\n\n        message = \" \".join(str(arg) for arg in args)\n        timings = f\"{elapsed_since_start:.2f}s since start, {elapsed_since_last_log:.2f}s since last log\"\n\n        print(f\"{self.marker}: {message} - {timings}\")\n        self.last = current_time\n\n\ndef get_loop() -> asyncio.AbstractEventLoop:\n    try:\n        return asyncio.get_running_loop()\n    except RuntimeError:\n        return asyncio.new_event_loop()\n\n\ndef download_file(file, local_filename):\n    print(f\"Downloading {file} to {local_filename}\")\n    if os.path.exists(local_filename):\n        os.remove(local_filename)\n    if \"/\" in local_filename:\n        if not os.path.exists(os.path.dirname(local_filename)):\n            os.makedirs(os.path.dirname(local_filename), exist_ok=True)\n\n    command = [\"pget\", file, local_filename]\n    subprocess.check_call(command, close_fds=True)\n\n\ndef check_files_exist(remote_files: list[str], local_path: str) -> list[str]:\n    # Get the list of local file names\n    local_files = os.listdir(local_path)\n\n    # Check if each remote file exists in the local directory\n    missing_files = list(set(remote_files) - set(local_files))\n\n    return missing_files\n\n\nasync def download_file_with_pget(remote_path, dest_path, pget_concurrency=\"10\"):\n    # Create the subprocess\n    print(\"Downloading \", remote_path)\n    if remote_path.endswith(\"json\"):\n        info = (\n            \"%{filename_effective} took %{time_total}s (%{speed_download} bytes/sec)\\n\"\n        )\n        args = [\"curl\", \"-w\", info, \"-sLo\", dest_path, remote_path]\n    else:\n        args = [\"pget\", \"-c\", pget_concurrency, remote_path, dest_path]\n    process = await asyncio.create_subprocess_exec(\n        *args,\n        stdout=asyncio.subprocess.PIPE,\n        stderr=asyncio.subprocess.PIPE,\n        close_fds=True,\n    )\n\n    # Wait for the subprocess to finish\n    stdout, stderr = await process.communicate()\n\n    # Print what the subprocess output (if any)\n    if stdout:\n        print(f\"[stdout]\\n{stdout.decode()}\")\n    if stderr:\n        print(f\"[stderr]\\n{stderr.decode()}\")\n\n\nasync def download_files_with_pget(\n    remote_path: str, path: str, files: list[str]\n) -> None:\n    download_jobs = \"\\n\".join(f\"{remote_path}/{f} {path}/{f}\" for f in files)\n    args = [\"pget\", \"multifile\", \"-\", \"-f\", \"--max-conn-per-host\", \"100\"]\n    process = await asyncio.create_subprocess_exec(*args, stdin=-1, close_fds=True)\n    # Wait for the subprocess to finish\n    await process.communicate(download_jobs.encode())\n\n\ndef maybe_download_with_pget(\n    path: str,\n    remote_path: tp.Optional[str] = None,\n    remote_filenames: tp.Optional[list[str]] = None,\n    logger: tp.Optional[Logger] = None,\n):\n    \"\"\"\n    Downloads files from remote_path to path if they are not present in path. File paths are constructed\n    by concatenating remote_path and remote_filenames. If remote_path is None, files are not downloaded.\n\n    Args:\n        path (str): Path to the directory where files should be downloaded\n        remote_path (str): Path to the directory where files should be downloaded from\n        remote_filenames (List[str]): List of file names to download\n\n    Returns:\n        path (str): Path to the directory where files were downloaded\n\n    Example:\n\n        maybe_download_with_pget(\n            path=\"models/roberta-base\",\n            remote_path=\"gs://my-bucket/models/roberta-base\",\n            remote_filenames=[\"config.json\", \"pytorch_model.bin\", \"tokenizer.json\", \"vocab.json\"],\n        )\n    \"\"\"\n    if remote_path:\n        remote_path = remote_path.rstrip(\"/\")\n        if not os.path.exists(path):\n            os.makedirs(path, exist_ok=True)\n            missing_files = remote_filenames or []\n        else:\n            missing_files = check_files_exist(remote_filenames or [], path)\n        get_loop().run_until_complete(\n            download_files_with_pget(remote_path, path, missing_files)\n        )\n\n    return path\n\n\nclass StreamingTextStopSequenceHandler:\n    def __init__(self, stop_sequences: tp.List[str] = None, eos_token: str = None):\n        self.stop_sequences = stop_sequences\n        self.eos_token = eos_token\n        self.cache = []\n\n        if stop_sequences:\n            self.stop_sequence_tracker = [0] * len(self.stop_sequences)\n            self.stop_sequence_lens = [len(seq) for seq in self.stop_sequences]\n\n    def get_match_length(self, text: str, stop_sequence: str):\n        \"\"\"\n        Checks if the end of the provided text matches the beginning of the stop sequence.\n        Returns the length of the matched stop sequence if it exists, otherwise returns 0.\n        \"\"\"\n        matched_len = 0\n        for i in range(1, len(stop_sequence) + 1):\n            # Check if the end of the text matches the start of the stop_sequence\n            if stop_sequence[:i] in text:\n                matched_len = i\n\n        if matched_len:\n            return matched_len\n        return 0\n\n    def process(self, token):\n        partial_match = False\n        stop_sequence_tracker = self.stop_sequence_tracker.copy()\n\n        # Iterate through each stop sequence\n        text = \"\".join(self.cache) + token\n        for idx, stop_sequence in enumerate(self.stop_sequences):\n            # If token matches the next token in the stop sequence\n            match_length = self.get_match_length(text, stop_sequence)\n            if match_length:\n                # If we've completed the stop sequence\n                if match_length == self.stop_sequence_lens[idx]:\n                    self.cache.append(token)\n                    text_before_stop_sequence = \"\".join(self.cache).split(\n                        stop_sequence, maxsplit=1\n                    )[0]\n                    if text_before_stop_sequence:\n                        self.cache = [text_before_stop_sequence]\n                    else:\n                        self.cache.clear()\n\n                    # self.cache.clear()\n                    stop_sequence_tracker = [0] * len(self.stop_sequences)\n                    yield self.eos_token\n                else:\n                    partial_match = True\n                    # If we've matched more characters than before, update the tracker\n                    if match_length > stop_sequence_tracker[idx]:\n                        stop_sequence_tracker[idx] = match_length\n                    else:\n                        # Reset the tracker for that sequence\n                        stop_sequence_tracker[idx] = 0\n\n            # If token doesn't match the next token in the stop sequence\n            else:\n                # Reset the tracker for that stop token sequence\n                stop_sequence_tracker[idx] = 0\n\n        if not partial_match:\n            # If token doesn't match a stop sequence, yield all cached tokens and the current token\n            self.cache.clear()\n            yield text\n\n        else:\n            # If we've reset a stop token counter, we need to yield cached tokens and then clear the cache\n            for i, j in zip(stop_sequence_tracker, self.stop_sequence_tracker):\n                if i < j:\n                    yield \"\".join(self.cache)\n                    self.cache.clear()\n\n            # Then we need to update the tracker and cache the current token\n            self.stop_sequence_tracker = stop_sequence_tracker\n            self.cache.append(token)\n\n    def __call__(self, token):\n        if self.stop_sequences:\n            yield from self.process(token)\n\n        else:\n            yield token\n\n    def finalize(self):\n        if self.cache:\n            yield from self.cache\n            self.cache.clear()\n\n\n@contextlib.contextmanager\ndef delay_prints(REALLY_EAT_MY_PRINT_STATEMENTS: bool = False) -> tp.Iterator[tp.Callable]:\n    lines = []\n\n    def delayed_print(*args: tp.Any, **kwargs: tp.Any) -> None:\n        lines.append((args, kwargs))\n\n    if REALLY_EAT_MY_PRINT_STATEMENTS:\n        builtins.print, _print = delayed_print, builtins.print\n    try:\n        yield delayed_print\n    finally:\n        if REALLY_EAT_MY_PRINT_STATEMENTS:\n            builtins.print = _print\n        for args, kwargs in lines:\n            print(*args, **kwargs)\n\n    return delay_prints\n"
  },
  {
    "path": "tests/__init__.py",
    "content": ""
  },
  {
    "path": "tests/assets/llama_tokenizer/special_tokens_map.json",
    "content": "{}"
  },
  {
    "path": "tests/assets/llama_tokenizer/tokenizer_checklist.chk",
    "content": "eeec4125e9c7560836b4873b6f8e3025  tokenizer.model\n"
  },
  {
    "path": "tests/assets/llama_tokenizer/tokenizer_config.json",
    "content": "{\"bos_token\": \"\", \"eos_token\": \"\", \"model_max_length\": 4096, \"tokenizer_class\": \"LlamaTokenizer\", \"unk_token\": \"\"}"
  },
  {
    "path": "tests/conftest.py",
    "content": "def pytest_addoption(parser):\n    parser.addoption(\"--model\", action=\"store\", default=None, help=\"Model name to test\")\n"
  },
  {
    "path": "tests/data/200_samples.jsonl",
    "content": "{\"text\": \"Write a response to the following message:\\nI like to read popular science book. Does that mean that I have to give up reading popular science and read dreary academic papers?\\n###Response: No! Not by any means. But don't take them at their word. As with everything you read (or hear, or see . . .) you should question it. It should encourage you to investigate more of the subject at hand, to look up the references (no references is a bad sign). If nothing else, it can provide a valuable lesson in skepticism and the depth required to truly understand these fields; you didn't really think that such complex topics could be properly addressed in a twenty minute TED talk or a single non-reference book, did you? These things are teasers, meant to whet your appetite or let you know that there is a broader world out there.\"}\n{\"text\": \"Write a response to the following message:\\nQuite optimistic views there. Most content doesn't have a real monetary value beyond generating views. There won't be an acceptable ads consortium, advertisers will only get more ruthless or die.\\n###Response: It really does seem like a race to the bottom at this point, but it's a releif to see that we're starting to move past calling people theives for ad blocking. Except for the scolding, this situation reminds of the days shortly before everyone had popup blocking built in to browsers; for normal people the internet is broken, and for us, it's hard to realize how awful it is until we use grandma's computer, and get a horrifying reminder of how the other half lives.\\nIf someone's business model relies on exploiting security flaws on the client, they shouldn't expect to trade on that forever.\"}\n{\"text\": \"Write a response to the following message:\\nThis is actually the most scary comment I've seen here in a while.  VC is not income.\\n###Response: well, it had a smiley at the end...\"}\n{\"text\": \"Write a response to the following message:\\nI'm all for innovation, but I become wary when a company that controls a sizeable proportion of the browser market-share and also serves a not insignificant proportion of all web traffic makes extensions like this.\\nSPDY, Dart and now this.\\n###Response: Your kind of skepticism is ultimately one of the few things that holds corporations in check, so I am not discouraging you from persistently aiming it at Google, Facebook, et al.\\nBut that being said, I was a full-time dev there for 5 years, and I can say with a (however misguided) degree of certainty: all of those projects have one goal in mind - to make the experience of using the web better. Whatever else is true about the dangers of monoculture, Google's intentions are benign.\"}\n{\"text\": \"Write a response to the following message:\\nWe feel that NOLOH is significantly advanced to warrant a small fee for commercial purposes. As you can see from my previous post we offer several free license options, including free licenses for open-source projects.\\nFortunately/Unfortunately (depending on your viewpoint), we live in a capitalistic society and need to recoup some of the costs associated with our team's development over the past six years.\\nFurthermore, we open-source every new module and component on our github either through an MIT or BSD license. We've even taken the unprecedented step of integrating github into our site as can be read here:\\nhttp://dev.noloh.com/#/blog/2010/06/30/demos-demos-everywher....\\nLike Google, we open source anything non-related to the NOLOH core. Similarly , they don't release the details or the code to their search engine. Also, we're working on legalize that would open source the core if anything were to happen to us.\\n###Response: Your comparison with Google is flawed, google does not ship the core software to third parties for integration in to their product, it's purely a service. So google not shipping their core as open source has a different impact on their users than NOLOH releasing the source of their core would have on theirs.\"}\n{\"text\": \"Write a response to the following message:\\nWhat shouldn't be heard?  For example, should reincarnation be heard?  What about telekenesis or other psychic abilities?  What about more off the wall things like murder is justifiable if you don't get caught?  Or that cheating is actually a good thing if you can pin the fallout on someone unsuspecting?  Or that most Blacks are werewolves who need to be staked in the heart for the sake of the world?  Or that WWII never happened, but was invented by Marvel comics to sell Captain America?  Or that Santa Claus is not only real, but its he who delivers the babies, since he killed all the storks?\\nAt what point do we say, that point of view has so little evidence that hearing it is a waste of time, given time is a limited resource?\\n###Response: What about telekenesis or other psychic abilities?\\nActually, I think studies purporting to prove psychic abilities are real is something that students should spend time on. Remember this article?\\nhttp://news.ycombinator.com/item?id=2068105\\nI skimmed the study in question. There are no obvious methodological flaws, and by the standards of psychological publishing, it was worthy of publication. It's my opinion that ALL students of psychology should read it critically, and understand why it doesn't prove that psychic abilities are real. (Then they should apply that same skepticism to the rest of psychology research.) It's a great exercise, just like doing easy integrals, or spotting flaws in proofs that 1 == 2.\"}\n{\"text\": \"Write a response to the following message:\\nNot living in Sochi, the comparison is less convincing for me:\\nhttp://www.openstreetmap.org/#map=17/30.33393/-95.47847\\nhttps://www.google.com/maps/preview/@30.3339269,-95.4784644,...\\nNot to mention that searching for \\\"Conroe, TX\\\" and taking the first, correct-sounding result got me ~10 miles south of city limits.\\nNow, Google has a couple of oddities, here (those two businesses apparently on the median between the freeway and the access road!), but the road data is more accurate and detailed (including street names).\\nI've done some editing on wikis, but I'm not going to try to jump in and correct that gap.\\n###Response: Not to mention that searching for \\\"Conroe, TX\\\" and taking the first, correct-sounding result got me ~10 miles south of city limits.\\nThe search box on the OSM front page uses Nominatim, which also has it's own website. Here's the search results for that: http://nominatim.openstreetmap.org/search.php?q=Conroe%2C+TX... You can see the 2 results there http://nominatim.openstreetmap.org/details.php?place_id=4274... and http://nominatim.openstreetmap.org/details.php?place_id=9815... There's a \\\"Report Problem\\\" button to help debug issues like  this. Feel free to submit a bug!\\nI think Nominatim tries to rank things based on how long their Wikipedia article, the the item to the south (that was at the top), is the one that links to Wikipedia. TBH it looks like bad mapping whoever put that there\"}\n{\"text\": \"Write a response to the following message:\\nHowever, here's the thing: Windows are useful. An UI which pretends you shouldn't use them isn't good for anything complicated, when you use more than one application at a time.\\nHierarchical start menus are also useful, for that matter.\\n###Response: > However, here's the thing: Windows are useful.\\nI'm increasingly convinced that overlapping Windows are not particularly useful (they are better than one-thing-maximized-is-the-only-option), but tiled windows are very useful.\"}\n{\"text\": \"Write a response to the following message:\\n\\\"Typically people who live in mud huts with thatched roofs \\u2013 and uses a system called M-Pesa, run by Vodafone , to transfer money to their cell phones.\\\"\\nPeople in mud huts have cell phones?\\n###Response: The phrase \\\"mud huts\\\" evokes a primitive pastoral lifestyle, but there are a lot of places where mud brick is simply a sensible building material. I have stayed in a \\\"mud hut\\\" in Africa which had running water and electricity; this was common to all the houses in the neighborhood. They were nice big places, plenty of air and light, with ordinary doors and furniture and whatnot, that just happened to be built out of great big slabs of mud. They build this way not because they lack more sophisticated technology, but because mud bricks are an effective, economical technology well suited to their environment.\"}\n{\"text\": \"Write a response to the following message:\\nWhile there are no kids that ask their mom \\\"where's daddy?\\\" - meaning you - there is little to worry about.\\nI totally wasted several years of my youth and that was the best time ever. And this guy was creating something, so +1 for that.\\n###Response: \\\"where's daddy\\\" problem is rarely an issue for entrepreneurs.\\nkids are very good at adapting to their environment as long as their environment is stable and predictable. for example if you spend with your kids whole their life only 5 minutes a day, they perceive it as normal and wouldn't expect it any other way because \\\"that's how daddy always used to be\\\"\\nthe key for entrepreneur with kids is to deliver consistency no matter what that consistency is. follow my advice and you will be happy entrepreneur.\"}\n{\"text\": \"Write a response to the following message:\\n> My big wish for the next major iOS SDK is to get bindings.\\nI wouldn't expect this. Honestly I'd expect things to go the other way: 10.7 maybe deprecating bindings on the Desktop for things like table and outline views. Particularly since they added NSTableViewDataSource/Delegate and NSOutlineViewDataSource/Delgate in 10.6.\\n###Response: Really? With bindings you get to write so much less glue code for labels, button states and other UI widgets. It's a dramatic time saver for UI minutiae.\"}\n{\"text\": \"Write a response to the following message:\\nI wanted to try out ClojureScript a while ago, but found the tooling around it very complex since I also didn't have any experience with Clojure and Leiningen.\\nIs there a straightforward starter guide around?\\n###Response: It used to be a PITA, it's still not perfect but much better. take a look at lein-cljsbuild (https://github.com/emezeske/lein-cljsbuild) to create you project, there you can just create an html and add the resulting js in the html, so you can test in the browser. No need for web servers or anything.\"}\n{\"text\": \"Write a response to the following message:\\nIt's important for the Keystone XL alarmists, though.\\nThey want to make sure that Canadian oil is not piped through the USA via this pipeline.\\n###Response: What are they alarmed about?\"}\n{\"text\": \"Write a response to the following message:\\nAlso like the fact that I can use the GSM iPhone internationally.\\nThe AT&T iPhone has been a disappointment for international travel. The international data plan is a terrible deal, and it seems to be impossible to carrier unlock without also jailbreaking. Jailbreak is not a great option if you're an app developer as the jailbreak ties the phone to a specific OS version, and you generally want the latest, or even a beta OS installed. If you want to use local SIMs, your only real option is to buy a phone factory unlocked from somewhere like Singapore. If you know of a way to carrier unlock without jailbreaking, let me know!\\n###Response: Agreed. You can buy an unlocked iPhone in Canada, which may be more convenient for you.\"}\n{\"text\": \"Write a response to the following message:\\n... an intelligence service ...\\nThat was the exact phrase that popped into my head after reading the subhead.\\nNow for one of those nutty, wild-assed guesses:  what do you think about Anathem?\\n###Response: Sorry, haven't read it!  Or any Stephensen, actually.\\nforfeits in geek card\"}\n{\"text\": \"Write a response to the following message:\\nGoogle have reached the point where they know that every website is fundamentally dependent on them, and now they're taking advantage of it that to make their lives easier by forcing the entire internet to change every time some commonplace practice makes their algorithm's life harder. They've been doing it on a technical level for a while, in terms of stuff like how ads may link to the destination. Now they've simply moved onto dictating the culture and social norms of the Internet. If your way of sharing good writing with your actual readers makes life harder for Google, well you'd better change your practices or they'll punish you, even if you're not doing it for them.\\n###Response: Our startup actually receives very few organic inbound traffic from Google.\\nIt is secondary to our strategy.  Facebook is the dominant platform in our target demographic.\\nI would not consider us, even remotely dependent on Google.  If Google disappeared tomorrow the effect on our net sales and exposure would probably be less than 1%.\\nGoogle is, for many casual users, a lazy way of getting a full URL.  Hit Google.  Type Hubspot.  Click.  Hubspot.\"}\n{\"text\": \"Write a response to the following message:\\nMany samples of (working and effective )online partnerships are in world of open source where many hackers around the world are working on much more complex problems than those backed by YC.\\n###Response: The main difficulty in a startup is not the complexity of the technical problem you're solving.\"}\n{\"text\": \"Write a response to the following message:\\nI don't believe the Linux vs windows server market share numbers.\\nMy current company has about 40 linux servers and 3 windows. My previous company had 1 windows server to 5 linux. The one before that was a similar ratio.\\nI'm looking around online to see how IDC collected/calculated this data but\\ncan't see anything that actually says. The only thing I can think of is if\\nthey are taking commercial unixes like redhat and suse and comparing them\\nagainst windows. If that is the case these numbers are beyond worthless due to\\nthe fact that most linuxs end up being centos, debian, ubuntu or other free\\ndistros.\\n###Response: I was thinking the same thing; Apache has double the marketshare on the web, and all the computationally intensive plays that I know of use Linux or BSD clusters on the backend.  Google alone has somewhere over a million boxes, Amazon isn't exactly tiny, neither is Facebook, and most of the banks that I know of are still Big Iron on the backend.\"}\n{\"text\": \"Write a response to the following message:\\nThere is absolutely no motivation for pushing the price point lower. PCs are devices we expect to use for hours and hours and hours. A difference of $300 will make this into a real computer, most parents value their kids beyond such a petty price point.\\n###Response: $300 is still a lot of money to a lot of people.  $99 is doable, sometimes $399 isn't.\"}\n{\"text\": \"Write a response to the following message:\\nI don't keep plates on my car because of the abundance of plate tracking applications in use by the government and private companies.\\n###Response: Time for a kickstarter for an e-ink license plate? Tie it to a GPS with known locations of cameras, and change your tag for each picture?\"}\n{\"text\": \"Write a response to the following message:\\nActually, he's asking you to put your money where his mouth is - and without any credit card protection.\\nA cynic would also point out that that by not accepting credit cards he also avoids losing the cutr of the transaction that they charge and so makes more money.\\nI'm not a cynic of course. Personally, my credit card is paid of automatically each month in full.\\n###Response: \\\"Actually, he's asking you to put your money where his mouth is...\\\"\\nUh, yes.  That is his entire business proposition, telling people how to manage their finances.\"}\n{\"text\": \"Write a response to the following message:\\nBack in the 90s there was an episode of the Outer Limits that touched on this subject. Effectively there was a nexus of information that everyone connected to through their minds. Anytime they wanted to know something, they just looked it up by thought and presto, instant expert. One guy couldn't connect, so he had to learn things the old fashioned way by research and practice. Naturally, the nexus goes down and people can barely function. He goes from being the village idiot to the village savior.\\n###Response: There's an Asimov story similar to this: http://en.wikipedia.org/wiki/Profession_%28short_story%29\"}\n{\"text\": \"Write a response to the following message:\\nimprove gravity and friction, a bit too floaty and the lack of friction means i can pull out entire mid sections in one fell swoop without loosing the tower.\\n###Response: I just did this, it was thrilling, like ripping a table cloth out from under a table setting. I wish I could do that in regular jenga.\"}\n{\"text\": \"Write a response to the following message:\\nNot quite the same thing, although they do have some similarities.\\nThe Selective Availability \\\"feature\\\" of GPS worked by fuzzing the time readings output by the GPS satellites. The fuzz was time-varying but only changed every few seconds. Because it affected nearby receivers with similar amounts of error, if you had access to two receivers in the same area you could quite accurately measure their positions relative to each other.\\nThe Chinese map obfuscation scheme is basically a secret map projection (it's not a constant offset, it varies over locations). GPS coordinates go into a black box and out come obfuscated coordinates, which you then use to plot on the map. Because there wouldn't be much point to GPS receivers that constantly show your location on the wrong point on the map, receivers sold in China actually incorporate the algorithm, and it probably wouldn't be too difficult to reverse engineer.\\n###Response: I'm trying to figure out if you are serious about \\\"Chinese Map Obfuscation Schemes\\\".  Your description of SA and DGPS is spot on, but I can't think of any reason that China would want to obfuscate their maps now that satellites have mapped their country.\"}\n{\"text\": \"Write a response to the following message:\\nNote to self: Never make a joke about apple fanboys. They might have a better sense of design but poor comedic sense and would slam you down with downvotes.\\n###Response: I downvoted you because you used the word \\\"fanboy\\\".  It's my policy.\"}\n{\"text\": \"Write a response to the following message:\\nI think we should note that logistic regression has been around forever, and should probably be considered property of \\\"statistics\\\", not \\\"machine learning\\\".\\n###Response: this is literally the first thing taught in both of the machine learning classes I've taken from Prof. Ng at Stanford, so maybe it's the application of logistic regression more than the estimation technique itself that makes machine learning?\"}\n{\"text\": \"Write a response to the following message:\\nTraditionally, when folks from the country moved to the city, they would bring \\u793c\\u91d1 (money in return for a kindness - n.b. It is what you call paying someone to tutor you, too) to show thanks for their landlord taking them under e landlord's wing socially.  In modern Japan this doesn't really happen, but traditions are durable.  Prevailing norms differ by area, but it can be anywhere from nothing to, most commonly, 1 to 3 month's rent, and it is compulsory.  You never receive \\u793c\\u91d1 back.  Most apartments also charge a security deposit, generally one month's rent.  Custom in Japan favors the landlord heavily with regards to security deposits, for example in accepting debits against it to return the apartment to pristine condition (such as buying new tatami), so most people wouldn't expect to receive it back either.  e.g. I got charged a $350 \\\"cleaning fee\\\" after paying for my own cleaners to make it absolutely sparkle.  Their employee apologized but said it was standard practice.\\n###Response: keep in mind key money its also an incentive to not bail after 3months of living there. As unlike the US you can terminate your rental agreement at any time with a 30-60day notice and dont have to pay rent until  end of contract or someone else moves in.\"}\n{\"text\": \"Write a response to the following message:\\n> I keep getting annoyed when sites demand I enter \\\"state/province\\\" for international shipping when my country doesn't have that concept.\\nAnd uses a select box of US states, so even if your country did have such a concept (and used it for mail, which may not be the case) you couldn't give it.\\n###Response: If you have a select box of US states and you ship internationally, clearly no one put any thought into the form.\"}\n{\"text\": \"Write a response to the following message:\\nThat makes me happy that people are still fighting authoritarian governments with their time instead of watching football.\\nPanem et Circenses!\\n###Response: I mostly just lurk on HN... but occasionally I wish I had the time to post more regularly. Just so I could build up enough karma points to have downvote privileges for stuff like this.\"}\n{\"text\": \"Write a response to the following message:\\nI have to say, I support this type of thing.  It is social commentary in a way.  Every patch that gets merged (as this one did) that is absolutely ridiculous shines a bright light on the fact that it isn't the content of the patches that matter.\\nIf you can't tell the difference between an obvious troll with an email address \\\"feministsoftwarefoundation@loves.dicksinmyan.us\\\" and a ridiculous patch from a real one, I think the community needs to take a beat and think about why.  Why could it be taken seriously?  Why wasn't it challenged?  Are we so terrified of being called a sexist that we will do insane things?\\n###Response: I would argue that changing language to be gender neutral hardly counts as insane.\"}\n{\"text\": \"Write a response to the following message:\\nA pithy homily does sell business books. I can't disagree.\\nBut often the answers to these questions are found (or changed) by experimentation. The point of a scrappy startup tends to be that experimentation, not a sophisticated execution of a great idea.\\nI've read your comments, and while I appreciate your economics perspective, and I do agree that's immature to pretend to be a sophisticated business when really what's being built is a sloppy experiment, it's not going to be well received to attack the focus of this community: experimentation.\\n###Response: I don't see how most of your post has anything to do with what this post is about.\\nThis post is not about a \\\"pithy homily\\\".  It's not intended to \\\"inspire\\\" anybody with platitudes.  This is about understanding the underlying reality of what makes people care enough about your \\\"experiment\\\" to pay you money for it.\\nI was under the impression that people here were starting startups in the hope they they may one day become successful businesses.  To make that transition all the questions above need to be answered.  I thought that might be relevant to some people as being able to answer all those questions is something lots of startups, Y-related or not, have trouble answering.\\n> The point of a scrappy startup tends to be that of \\n> experimentation, not a sophistacated execution of a great idea.\\nWell, that may be the point of your startup, but this post was for people who want their startups to be about good enough execution of an idea that paying customers think is great.\\nThat translates into financial transaction which translates into a functional business.\\nExperimentation should be the points of students still in school, major corporations with money to support it, and people who are tinkering while having another source of income.  If you quit all that and \\\"dropped out\\\" of the system to start a business, well, the only point is to figure out how to start making money.  Especially over the next 4-5 years.\\nTo the extent the tinkering strays away from that and becomes absorbed in itself for its own ends, you'll wind up with an interesting experiment, not a startup that becomes a business.\\nThat's not me \\\"attacking\\\" the delicate egos of any programmers, that's cold, hard economic reality.  Whine if you wish but people still won't hand over their increasingly scarce money to you as long as you pout instead of figuring out how to answer all the above questions.\"}\n{\"text\": \"Write a response to the following message:\\nHaving spent 6 years as a fully licensed Financial/Investment advisor as firms both large and small (independent), I can say that while I think this is a cool project, marketing it towards investment advisors likely isn't going to get much traction. This is because 99% of advisors (in the US) already have access to tools like this, except much more in depth and integrated with their trading systems. It's in every broker-dealer's (the companies whose investment products the advisors are selling) best interest to provide as much sales support and tooling as possible to their registered reps, so many of them provide things like this free of charge.\\nAside from that, unless this has been vetted and audited by FINRA or a registered CRCP, the chances that any compliance officer (which every RIA is required to have), are virtually non-existent. I haven't dug into the platform to see what type of guidance you're offering/presenting, but you need to be really careful about what advice you give or state that you give. As in the US, anything even remotely related to any investment vehicle is highly regulated and providing guidance and advice without the required registrations, licenses and oversight can and will land you in very hot water, very fast.\\nAll that said, I think with the right approach, something like this could be great for personal use as long as it integrated with multiple investment companies and could track and recommend based on all of them.\\n###Response: Are you saying all software used by RIAs has do be vetted and audited by FINRA or a CRCP? I don't believe this to be true.\\nWe don't offer advice, only code.\\nCould you point me to a few custodians or broker/dealers that provide portfolio management & rebalancing software for free? We haven't found any, but def don't have the same experience as you, so maybe we're just missing something.\"}\n{\"text\": \"Write a response to the following message:\\nEric Raymond:\\nit was reported to me at the time that English has a richer technical vocabulary than any other language and is therefore simply a better tool for the job\\nOr probably English is simply the most easy to learn.\\n###Response: I don't think english is easier to learn, just more fault tolerant. We are used to translating bad english and have no trouble with statements like this:\\nIn the early part of the 1950's in the USA, a game called \\\"Violence Fight\\\" was in vogue among mafia, reckless drivers and general businessmen. The \\\"Violence Fight\\\" was the game to struggle for \\\"No. 1 Quarreler\\\" with fighters who were gathered from all parts of the USA speaking boastingly of their strength. And of course a lot of winning money as well as the honor were given to the \\\"winner\\\".\\n(Example taken from here http://www.youtube.com/watch?v=MqnBWgrSt30). Other languages (e.g. french) just don't tolerate stuff like this.\\nFor an example of a really easy to learn language, see modern Turkish. For various reasons (vacations and chicks) I've tried and failed to learn many languages. I came closest with turkish, vocabulary was my only sticking point. Ataturk simplified the language only 70 years ago, and it worked fantastically well.\"}\n{\"text\": \"Write a response to the following message:\\nThe issue I have with LastPass is that they claim to never see your master password. This is not true in any sense. Open their website, log in using your master pass. You just submitted it to them. As a secondary thing, pick a random password from the list and say \\\"Show me the password\\\"; it will ask you for your master password. The extension you install has nothing to do with this: you are entering the password directly into their web page and interacting with their JavaScript and their server-side code. At this point they have your master password.\\nI understand why they do this: it's convenient and lets you share/give passwords to others. But this feature is 100% incompatible with the claim that they never see your master password.\\n###Response: My understanding is that even with their web login process your password isn't sent to their servers in plaintext. From the comments on their heartbleed blog post: \\\"We only use one-way salted hashes (after going through PBKDF2 rounds) to send to the server for authentication.\\\"\\nSo their servers get a hashed version of your password, but not the password itself. Their servers likely also store a hashed version of your password so that they can authenticate you. This style of auth is also used when you use the \\\"show me the password\\\" feature.\"}\n{\"text\": \"Write a response to the following message:\\nTo consider yourself in the top 1%, you just have to be richer than 99 other people you know. Humans are not good at avoiding sampling bias.\\n###Response: That's not true. Most of my friends and aquaintences are software engineers or masters+ in some other STEM field. (Or are students of a similar caliber). The people you know aren't a good random sample. The top 1% tend to have more friends who are also top 1%.\"}\n{\"text\": \"Write a response to the following message:\\nI am not fully convinced that everybody could be a Gandhi. Wasn't he from a wealthy family and got sent to England to study?\\nProbably everybody could do fasting (although even that might require a certain amount of dediction). But to come of with the psychological strength and dedication would be another matter.\\nMaybe for everybody there is a path of possible actions that would lead him to become like Ghandhi. But that is what I was saying: it certainly isn't obvious what actions that would be. Therefore I think a certain amount of luck is involved.\\nIf you don't have an answer to my question, then why do you disagree with me? How is the poverty in India your fault? I suppose you could at least decide to study and try to find an answer, but there is no guarantee that you would find one.\\n###Response: Not everybody can be a Gandhi, but here is a suggestion. You can light a room with a candle but not a City, you can help by helping in your 'little corner' and I don't mean charity. India has come a long way. I remember when every other year there was a 'Faminine' in India no more. Just by working hard and improving yourself and your family you are helping pull India out of poverty. Don't pay a bribe to no-one. Don't support the system this way. I have a couple of Indian acquaintances that I know made a lot of money out of bribes. Good luck to them. Give your children a good education and good moral support. Help others in indirect ways (ie, give a chance of a job to a kid from a poor family - if he deserves it). I believe education lifts people. If you can find a way to support education do it. Support the small trader rather than the big store.\"}\n{\"text\": \"Write a response to the following message:\\nI really want to use Python in a distributed computation project of mine, but I've been running into serious performance bottlenecks with regards to object serialization.  Like many concurrent Python frameworks, Pulsar uses `pickle` to serialize data across processes, and for all but the smallest data structures, workers end up spending 95+% of their time (de)serializing data.\\nI work with video data, so I just ran these rough benchmarks on my MacBook Pro:\\n    import cPickle as pickle  # yes, I'm on Python 2.7.  Sue me.\\n    import numpy as np\\n    \\n    a = np.random.rand(800, 600, 3)  # 800x600 px, RGB channel\\n\\n    %timeit pickle.dumps(a)  // %timeit magic from IPython\\n    1 loops, best of 3: 855 ms per loop\\n\\n\\nIt takes nearly a second to serialize a not-so-huge `numpy` array, which makes it very difficult to do any sort of soft real-time analysis.\\nThis is a huge pain, and (very sadly for this Python aficionado) suggests that Python might be the wrong language for this kind of work.\\nAny suggestions?\\n###Response: Pass the data by compressed, binary file using something like bcolz or h5py?\"}\n{\"text\": \"Write a response to the following message:\\nPeople are horrible at taking variables like growth into account.\\nIt isn't 66$ per user.  It is 33billion/expected number of total users over the lifetime of facebook.\\nSo if you throw in rough estimates of 2 billion facebook users and give them a decade of profiting from an average user...3$ per year per user.\\n###Response: 2 billion active users is almost 1/3 of the worlds population. That seems like a strech.\\nAlso you can't link the 33billion to expected.  33billion is their \\\"valuation\\\" now with 500M active users. There's a certain expectation that your valuation should go up as users increase.\\nPersonally I think the per-user deal is kind of a red-herring.  Revenues and Profits are the only things that really matter. Users are just a one (good) dataset for estimating growth - but hardly the only one.\"}\n{\"text\": \"Write a response to the following message:\\nGenuinely non-snarky question.\\nIt looks like a standard calculator and conversion app, and it looks like most of the implementation I could imagine being in a calculator app is already done.\\nWhy would you even think of raising funds? You thought it was a good idea to write the blog post, so it obviously crossed your mind.\\n###Response: Because he went through an accelerator. He'll be told for months that the demo day and the pitch will be super-duper important, because investors.\\nI attended a different accelerator than him (but also corporate and in Berlin), and the idea that bootstrapping may be viable, or even that waiting for another 6 to 12 months when one has real feedback from the market, was never brought up. This is especially sad, because the situation in Berlin is basically inverted to the one in SV: Living costs are cheap, raising (seed) money is hard.\\nOur Demo Day... afaik no startup got an investment because they met an investor there :-) Afaik, some proceeded to later (>12 months) raise money, one did have success with crowd-funding. But we all wasted a serious amount of time training our pitch...\"}\n{\"text\": \"Write a response to the following message:\\n> Flexbox. The authors missed the opportunity of offering a saner API and favored sticking to the official spec instead.\\nThis is located in \\u00ab The Meh \\u00bb section. IMHO, it should be in the \\u00ab The Good \\u00bb. Using the official spec instead of reinventing the wheel is always a good move.\\n###Response: Id really like to know what a better flexbox api would be like.\"}\n{\"text\": \"Write a response to the following message:\\nAlso worth nothing that what GHC does in 8 seconds, Ruby does in 48 minutes.  This is why I always chuckle when people get super-excited about multi-core concurrency in langauges like Perl, Python, and Ruby -- at best, you will make your app 4-8x faster.  But by moving to a different language, you can make your app 360x faster.\\nSomething to keep in mind.\\n###Response: I see your point but unfortunately using Haskell doesn't magically also make your database (/etc other bottlenecks) 360x faster, so... Ruby it is :)\"}\n{\"text\": \"Write a response to the following message:\\n> prematurity with crack seems like the logical thing to study\\nIt is a logical thing to study, and most likely is being studied, by someone else.  After all, this study wasn't looking at premature babies.  That doesn't prematurity with crack isn't a problem, only a different problem.\\n###Response: My issue is less with what the experiment studied and more with how it was summarized (particularly by the headline).\"}\n{\"text\": \"Write a response to the following message:\\nI'm a Ruby developer, and I know and use RSpec, and I still couldn't tell what it did. I know it's an RSpec formatter, but nothing on the page tells me how it changes the format, or why I care, other than that it invokes one of the fastest birth-to-death memes in recent history. I'm not going to install $RANDOM_GEM without actually understanding the value proposition.\\nTake a look at the announcement post for rspec-fuuu (http://sjackson.net/2011/08/30/rspec-fuuu.html). It says \\\"Hey, it turns 'FFFF' into what you're really thinking: 'FUUU'\\\". I understand exactly what the gem gets me, and why I should be amused. Your gem provides none of that. No examples, not even a sentence explaining how the output format changes. Even your lengthy blog post (which does a good job of explaining how to set up a custom formatter) tells me nothing at all about what your gem actually does. It's a bad sell.\\n###Response: After your advise, we've added a video showing it in action (http://vimeo.com/32852547) and we linked to it from the project page (http://chucktestar.com/).\"}\n{\"text\": \"Write a response to the following message:\\nhttp://psx-scene.com/forums/f177/sony-has-been-bad-boy-ridic...\\n\\\"A well known hacker i don\\u2019t want to reveal here had all the Sony PlayStation Network functions 100% decrypted as well as providing some nice info about how Sony dealing with PSN members privacy in their online servers.\\nApparently, Sony server gathered everything they can from the PSN connected PS3 console. When i said everything, i meant it. Here, i make all the list of what they squeezed from the IRC chat logs conversation between the hackers.\\nSony monitors all messages over PSN. \\nAll connected devices return values sent to Sony server returns TV, Firmware version, Firmware type, Console model \\nThey also collects data in your USB attached device. \\nCredit card sent as plain text, example: creditCard.paymentMethodId=VISA&creditCard.holderN ame=Max&creditCard.cardNumber=4558254723658741&cre ditCard.expireYear=2012&creditCard.expireMonth=2&c reditCard.securityCode=214&creditCard.address.addr ess1=example street%2024%20&creditCard.address.city=city1%20&cr editCard.address.province=abc%20&creditCard.addres s.postalCode=12345%20 \\n*The best part of all, the list is stored online and updated when u login PSN and random.\\nBut, that\\u2019s not all, with the PSN functions fully decrypted, this hacker can use the function to get all games, DLC, you name it, from PSN store without paying anything.\\\"\\n###Response: > Credit card sent as plain text\\nThis was debunked. It's encrypted on the wire.\"}\n{\"text\": \"Write a response to the following message:\\nThe letters are hanging in the air, i think a good demonstration would show the baseline of the letters to coincide with the line pattern!\\nhttp://upload.wikimedia.org/wikipedia/commons/3/39/Typograph...\\n###Response: Hi, creator here. Unfortunately that's not how CSS works. See here for more information: http://joshnh.com/2012/10/12/how-does-line-height-actually-w...\"}\n{\"text\": \"Write a response to the following message:\\nI have no great answer to this question (I'm only peripherally aware of how Samsung works and the entire Android ecosystem is a bit of a mystery to me), but if you do, you could get a lot of bookings on the Japanese equivalents of MSNBC.\\n###Response: Here's one piece of the puzzle: http://www.vanityfair.com/business/2014/06/apple-samsung-sma...\\n> It was the same old pattern: when caught red-handed [for patent infringement], countersue, claiming Samsung actually owned the patent or another one that the plaintiff company had used. Then, as the litigation dragged on, snap up a greater share of the market and settle when Samsung imports were about to be barred. Sharp had filed its lawsuit in 2007; as the lawsuit played out, Samsung built up its flat-screen business until, by the end of 2009, it held 23.6 percent of the global market in TV sets, while Sharp had only 5.4 percent. All in all, not a bad outcome for Samsung.\\nThe Koreans are much more ruthless than the Japanese, who have gone soft.\"}\n{\"text\": \"Write a response to the following message:\\nThe SENS Research Foundation has a network of supporters among portions of the VC crowd in the Bay Area. Some of the halo of attached or associated young researchers there should explore putting one of their lines of research more likely to get somewhere interesting soon into YC, see what happens.\\nA completely baked form of allotopic expression or toolkits for glucosepane breakdown in tissues could both produce viable and highly useful technology platforms prior to being far enough along to be applied to the reversal of aspects of aging. Neither of those seems too far away to be non-viable for this sort of timescale, and meaningful biotech projects are very cheap these days if you have access to a provisioned lab.\\n###Response: When are you submitting a proposal ? Ping me if you want to talk more about what a project in this space would look like.\"}\n{\"text\": \"Write a response to the following message:\\nActually, yes, that's exactly what I was getting at. You'd have to make the square cover sqrt(2) times wider than the hole, but you could still prevent it from ever falling in. (Granted, it's not a very efficient solution, though.)\\n###Response: Ok, agreed :-)\"}\n{\"text\": \"Write a response to the following message:\\nThe conditional threading macros are nice. I've been using them from clojure.core.incubator for a while as -?> and -?>>. Anyone know why the name change?\\n###Response: Looks like the equivalents of `-?>` and `-?>>` are now `some->` and `some->>`, is that right? Perhaps the name change is meant to avoid confusion now that there are multiple variants, i.e. also `cond->` and `cond->>`, which are different.\\nIt also occurs to me that threading through `some->` and `some->>` is kind of like doing computations in the Maybe monad in Haskell.\"}\n{\"text\": \"Write a response to the following message:\\nGot that part.\\nI was just challenging your statement that they \\\"would never actually become humans\\\" with the fact that anything of the homo sapien species is human, which of course would include the embryos.  Just like dog embryos are very young dogs, and cat embryos are very young cats.  'Embryo' is just one stage of a single line of development.\\n###Response: The trouble is that \\\"human\\\" is a very ambiguous word. If a \\\"human\\\" is a living organism made out of cells with DNA typical of humans, then certainly embryos are humans. If a \\\"human\\\" is a thinking, language-using, tool-using bipedal mammal, then certainly embryos are not humans (but neither are 3-month-old babies).\\nThere's nothing wrong with that ... except that most of us have strongly held beliefs about \\\"humans\\\", which don't necessarily come with precise definitions of \\\"human\\\" attached to them.\"}\n{\"text\": \"Write a response to the following message:\\nThese kinds of advancements in the language are why I'm glad I've switched to Python 3, Python 2 needs to die already.\\n###Response: Python 2 will never die. Ask Kenneth Reitz for example. He says he will use Python 2 forever. There are a lot of companies with huge codebases who never will port to Python 3.\"}\n{\"text\": \"Write a response to the following message:\\nShameless plug:\\nAirTran and Virgin have wifi available on every flight, but Delta does not.  So, find out which flights to book on Delta @ http://www.haswifi.com\\n###Response: Kayak also has information on this if you're looking up a flight from there.  You can also use \\\"has wi-fi\\\" as filter.\"}\n{\"text\": \"Write a response to the following message:\\nIs this a front end on top of the current government provided resources [1] [2] [3], or does this include other sources?\\n[1] http://www.recalls.gov/\\n[2] http://www.cpsc.gov/en/Recalls/\\n[3] http://www.saferproducts.gov/\\n###Response: I'm using those sources, but I'm also monitoring a few dozen parents and educators forums where safety concerns are sometimes raised, even if they don't convert into actual recalls, and treat those reports as minor alerts.\"}\n{\"text\": \"Write a response to the following message:\\nExcept for Facebook and LinkedIn, I haven't used my real name and any personally identifiable information on the Internet in 20 years, since I realized that everything I wrote on Usenet would be there forever. There is no value with having the things I say potentially used against me for the rest of my life.\\n###Response: I am tempted to upvote this with the HN accounts of my desktop, my laptop, my work laptop, my phone, my work phone, my tablet and my TV. ;)\\nSeparation is good opsec.\"}\n{\"text\": \"Write a response to the following message:\\nThis assertion is repeated often by defenders of file sharing, but it's been uniformly rejected by the legal systems of --- from what I can tell --- most of western civilization.\\nThe RIAA makes over-the-top claims about the evils of file sharing, but district and appellate court judges in the US are not uniformly shills of the RIAA, and none of them adhere to your logic.\\n###Response: I wasn't suggesting it was legal, or non-infringing, only that referring to it as doing damage is incorrect.  Whether something is actually damaging doesn't have perfect correlation with whether you can get legal damages in court.\"}\n{\"text\": \"Write a response to the following message:\\nYou're putting a lot of faith into IBM as far as user experience goes. You, my friend, have apparently never used Lotus Notes.\\n###Response: Or ClearCase.\"}\n{\"text\": \"Write a response to the following message:\\nThis article borders on ridiculous.\\nI unfollowed the so-called 'power users' a long time ago. People like Chris Brogan, Scoble, Guy Kawasaki, etc. There was too much noise and not enough signal.\\nFor my own uses, I couldn't possibly follow 1000 people. I would much prefer to follow a smaller number of people and be able to read all their tweets. I currently follow around 300, most of whom tweet a handful of times each day. Following 1000 people who update dozens of times each day doesn't seem like it's scalable. You would either have to ignore 90% of what you saw, or spend the majority of your day watching Twitter.\\nThe fact that Twitter employees use Twitter in the same way that most of their users use Twitter is not worth complaining about. It's exactly what you would expect. The fact that a handful of people (ab)use Twitter and take it to extremes doesn't mean that anyone else needs to.\\nThis sort of article is exactly why tech reporting gets a bad rap.\\n###Response: Couldn't agree more.\\nThe author appears unfamiliar with how Twitter actually works pragmatically.\\nAnyone following more than 100-200 folks on Twitter is likely in the category of \\\"Filthy Marketer.\\\"\\nAnyone routinely posting more than 3-4 times a day is likely also in the FM category, or is a babbler begging to be unfollowed or banished to Facebook.\\nThe Twitter employees are using the tool in its most elegant fashion.\\nArticle = F.\"}\n{\"text\": \"Write a response to the following message:\\nhttp://what-if.xkcd.com/8/\\n###Response: Why didn't I know about this.  My evening has been forfeited for my Randallicious overlord.\"}\n{\"text\": \"Write a response to the following message:\\ngmail aol hotmail/live\\nA few alternatives i've used, Gmail being my favorite although they mess around with the UX from time to time.\\n###Response: I have a gmail account for email too. I am talking more specifically about a news portal. Any thoughts there?\"}\n{\"text\": \"Write a response to the following message:\\nA couple thoughts here:\\n1) Great to see Mr. Munoz getting some press for himself since it's been Chris Anderson getting all the attention until now with his involvement in 3d Robotics. A bit sad that it's in the context of Munoz being \\\"a mexican immigrant\\\" rather that merely being \\\"an awesome human\\\" but I guess that's the angle here\\n2) There is a big rift between the UAV community and the general public regarding terminology. No one - not a single person as far as I can tell - in the community calls them \\\"drones\\\" because it is an ugly word and it's associated with machines that fly to pakistan and murder people. Everyone - every single person - outside the UAV community does call them drones. I see no solution to this except for the fact that if you're a part of that community and you meet someone else who is, and you call it a UAV or multirotor, they know you're \\\"in\\\" which is sort of cool I guess ;-)\\n###Response: I have found that to the general public, drone now means either of two distinct things:\\n1. Any type of multirotor, regardless of how autonomous it is or whether it has a camera transmitting video\\n2. An autonomous fixed wing military aircraft used to take grainy green videos and drop bombs\\nAlthough, actually, the second option would more likely not be the default, and called a \\\"military drone\\\" to clarify.\"}\n{\"text\": \"Write a response to the following message:\\nMaybe I misunderstand the OP's point but I specifically took issue to this very topic, at the expense of my karma apparently, because religion is differente.\\nReligion is 'solved' in the sense that they are man made belief systems, all which proclaim to be true, and most holding mutually exclusive claims between each other.\\nIt is not debatable in a way left-right politics and their backers' stances and reasonings are. I'm not hating on them because it is not their fault. I feel for those trapped, I really do.\\nGiven time societies turn (and have already turned in large parts) away from it through education and the undrstanding of the universe and the human condition via scientific method. There is no middle ground in this specific instance.\\nYou would not entertain an alchemist or an astrologist either. Religion will soon join them in that regard in the collective consciousness.\\n###Response: keep in mind that the article was about people who are not domain experts, seeing things they don't think make sense within a domain, and deciding those things are stupid prior to gaining proper understanding.\\nI feel fairly confident not entertaining someone who claims to be able to turn a lead brick into gold in their living room, because I know enough about physics and chemistry to understand the energy difference between lead and gold.  But I would entertain someone who had nuclear-lab-grade equipment who claimed to be able to convert a small number of atoms.  They might actually be full of crap, but my domain knowledge is not specific enough to be able to say.\\nWhen it comes to religion, I think the same thing with non-experts happens, and you're doing it.  You claim to know all religious are \\\"man made belief systems\\\", \\\"unjustified\\\", with \\\"no logical backing\\\" -- claims that a domain expert can make regarding perhaps a few religions they're intimately familiar with, but not that anyone can actually make for all religions.  How many religions have you studied deeply enough to really be able to make that claim?  If Christianity is on your list, have you ever read the Didache, or De Principiis?  If Mormonism is on your list, do you know about the Seer Stone or Elijah Abel?  If Judaism is on your list, are you familiar with the Midrash or Siddur?  These aren't particularly obscure references; they're things anyone well-enough versed in any of these religions to say \\\"it's bogus because...\\\" should know about.\\nIf you don't know about those things, but you're sure each of those religions are \\\"man-made\\\", \\\"unjustified\\\", with \\\"no logical backing\\\", you should consider where your certainty comes from and how certain it really is.\"}\n{\"text\": \"Write a response to the following message:\\nThis is a really amazing post.\\nTwo things that startled me:\\nFirst, there is apparently a market for vulnerabilities that bypass the Flash access controls for cameras and audio recording. There can be no benign purpose for those exploits. Nobody penetration tests a Fortune 500 company looking to see if they can light up the cameras on worker desktops.\\nSecond, there's an eighty thousand dollar price tag for a Netgear vulnerability. That shocked me: serverside, highly targeted. Only, it turns out, there probably isn't any such market. Apparently, some of these bugs are listed for sale at exorbitant price with no anticipation of ever selling them. They're not listed at close to a clearing price, but rather just aspirationally, with the idea being that anyone who will someday, maybe, engage a serious zero-day broker for a Netgear vuln is probably going to derive six figures of income from that bug.\\nThat's the theory, at least.\\nFor future HN bug bounty/black market threads: note the absence of Facebook XSS vulns on these price lists. Nobody is paying tens of thousands of dollars for web vulns. Except the vendors. :)\\n###Response: As someone currently sat next to a netgear router it did make me look at the thing and wonder.\\nNow I'm considering a small fanless PC running something like pfsense for home... what a world eh.\"}\n{\"text\": \"Write a response to the following message:\\nWhat's essential?\\n###Response: Sorry, I probably meant novel. What's novel vs. what is actually useful.\\nSelf expression is useful, and it comes in all forms.\\nCool quotes and funny photos are the social-media version of PowerPoint -- a limiting medium that squelches higher bandwidth idea transfer.\"}\n{\"text\": \"Write a response to the following message:\\n>Abbott and Donald smoked a cigarette in the bathroom and sorted through the grim math\\nSo they can afford cigarettes?  I have a hard time feeling bad for people with low income and expensive vices.\\n###Response: Smoking is an addiction, and it is highly correlated with poverty for a variety of reasons, some of which I'm sure you can find if you step off the personal responsibility high-horse for a moment.\"}\n{\"text\": \"Write a response to the following message:\\nThis Firefox plugin lets you see the IP you connect to which is useful to see if you get IPv6 https://addons.mozilla.org/en-US/firefox/addon/showip/?id=59...\\nThe bbc is odd, as www.bbc.co.uk has ipv6 address 2001:4b10:bbc::2 but Firefox only connects to ipv4.. [edit] finally connected on ipv6 after a restart...\\n###Response: from what i remember, that add-on just tries to resolve hostnames that it sees, it doesn't actually show you whether you connected to that specific ipv4 or ipv6 address.\"}\n{\"text\": \"Write a response to the following message:\\nI would think this leads quickly to another question people might not want to answer... Does empathy increase the probability that the actions you decide to take will reduce or increase suffering?\\nI am a fan of an old German saying:  \\\"Good intentions are the opposite of good actions.\\\"\\nEvery tragedy in human history that I know of was born from good intentions.  Make society stronger, make the majority happier, make people safer, make them healthier, etc.  The bottom line is that good intentions have no influence on the consequences of actions.  To determine what the consequences will be and whether a course of action is likely to help or hurt, we might have to abandon empathy during the discussion.  Of course, we shouldn't ignore empathy when considering whether we want the consequences or not.\\n###Response: >Does empathy increase the probability that the actions you decide to take will reduce or increase suffering?\\nInteresting question. You are speculating about human psychology based on your understanding of history. Though you haven't cited any literature to back up your statements, and you haven't claimed that you have knowledge of any psychological literature, I still find your opinion valuable.\\nI've read some of the literature related to this question. Here is one thing it says: psychopaths are people who almost never feel empathy. They decide to take many actions and many of those actions have disastrous and extremely harmful consequences for those around them. The harm they do is grossly out of proportion compared to the harm committed by non-psychopaths. They are extremely exploitative and cruel and disproportionately commit a large number of crimes.\\nPsychopaths are now roughly defined, by researchers, as people who don't experience empathy. If you want to see how people without empathy behave, look no further than the psychopath.\\nIt's not pretty, and it contradicts your idea here.\\nA psychologist who specializes in psychopathy is Dr. Stout, who wrote several famous books about this topic.\\nThe research shows that lack of empathy absolutely and unequivocally leads to hurting others. The actions of those who lack empathy hurt other people in far greater proportion. Operating empathetically involves feeling rather than logical thinking and it requires being in touch with your emotions.\\n\\\"The road to hell\\\" phrase refers to INTENTIONS, not EMPATHY. Intentions are cognitive, they are analytical. So good intentions may be meaningless (if this proverb has any truth) because good intentions are NOT empathy. What is more important are the FEELINGS of empathy.\"}\n{\"text\": \"Write a response to the following message:\\nIt really is oversimplified.\\n\\\"Creating dozens of light sources simultaneously on screen at once is basically not doable unless you have Mantle or DirectX 12.  Guess how many light sources most engines support right now? 20? 10? Try 4. Four. Which is fine for a relatively static scene. \\\"\\nFor my Masters degree project at uni I had a demo written in OpenGL with over 500 dynamic lights, running at 60fps on a GTX580. Without Mantle, or DX12. How? Deffered rendering, that's how. You could probably add a couple thousand and it would be fine too.\\n\\\"Every time I hear someone say \\u201cbut X allows you to get close to the hardware\\u201d I want to shake them.  None of this has to do with getting close to the hardware. It\\u2019s all about the cores\\\"\\nAlso not true. I work with console devkits every single day and the reason why we can squeeze so much performance out of relatively low-end hardware is that we get to make calls which you can't make on PC. A DirectX call to switch a texture takes a few thousand clock cycles. A low-level hardware call available on Playstation Platform will do the same texture switch in few dozen instruction calls. The numbers are against DirectX, and that's why Microsoft is slowly letting devs access the GPU on the Xbox One without the DirectX overhead.\\n###Response: Is this a direct effect of the design of PC hardware, or could we theoretically build a PC OS that would let you do a texture switch as efficiently as on PS/XBOX/etc?\"}\n{\"text\": \"Write a response to the following message:\\nTests show standard UI inputs are understood the best. Just use the checkbox for your website.\\n###Response: Additionally, the iPhone checkboxes were created because clicking checkboxes is hard to do with the iPhone interface, not because they're better than plain old checkboxes.\\nJust because Apple does it, doesn't mean it's the best way to do things all the time.\"}\n{\"text\": \"Write a response to the following message:\\nI'm just confused. Why on earth would you go to .blockbuster? What domain would they use? \\\"movies.blockbuster\\\" ? It's so strange - some of these are extremely long.\\n###Response: I think it's more that they felt obligated to register it to protect their trademark than them actually wanting to use it.\"}\n{\"text\": \"Write a response to the following message:\\nIt's uses hover and zoom to navigate.  not clicking.  Did you watch one of the videos?\\n###Response: It's also incredibly slow, though. I think that is what axod was referring to. The UI gets in the way of what actions you want to perform.\"}\n{\"text\": \"Write a response to the following message:\\nIndeed it is. I'm afraid Google only have their own bug bounty program and I think PayPal no longer have one.\\nIn terms of where to go from here I've no idea.\\n###Response: If you asked the google bug bounty program about it, I'm sure they'd either answer for youtube, or send you in the right direction. While unlikely, I wonder if it was some kind of hash table collision. If you change your password again that might solve the issue if that was the root cause (a guess).\"}\n{\"text\": \"Write a response to the following message:\\nMatching service style? haven't heard of anything like but then I'm white and speak English. Sounds like a neat service. Good luck.\\n###Response: Yes. Maybe for someone who has a professional teaching background too (he could find new students). Should also be a great alternative for people who cant achieve/spend a few months in a foreign country.\"}\n{\"text\": \"Write a response to the following message:\\n> Kindle is practically the reason I was going to buy it.\\nThen why not just buy a Kindle?\\n###Response: Because iPad's also perfect for reading the web. I've tried it and it feels so much better than a computer. You can't effectively browse the web with Kindle. Now I'm going to consider buying some Android tablet.\"}\n{\"text\": \"Write a response to the following message:\\nI'm having a hard time imagining how this whole thing works with a multi-user Mac. Right now, every user account has one iTunes account associated with it, such that different users own different iOS App Store apps. Does that mean that the Mac App Store will only install apps per-user? It didn't seem to suggest that (they said that the app licenses are \\\"per computer\\\", though they also called them \\\"personal.\\\")\\n###Response: I would bet it still installs stuff in /Applications and just has a per user preference for showing the arrangement of the apps in launch pad.\"}\n{\"text\": \"Write a response to the following message:\\nHow do you deal with bad coding practices?  Such as over complicated & verbose code and copy paste everywhere leading to an unmaintainable mess?\\n###Response: Short answer: testing.\"}\n{\"text\": \"Write a response to the following message:\\nI think being an onocologist is the most depressing profession one could go into. I mean, you have this disease that is hugely fatal, many of your patients die, you have lots of noise on survivability, sometimes you win, often you lose. You can cause lots of anxiety with a misdiagnosis or a prediction that doesn't pan out.\\n###Response: Not to mention if you did your job well, the patient thanks you by saying \\\"fuck you\\\".\"}\n{\"text\": \"Write a response to the following message:\\nWe implemented a \\\"mega box\\\" drop down at the University of Nebraska (http://www.unl.edu/) in 2009. We didn't do a production a/b test but we did do in-person a/b testing using a custom web app that tested user's ability to find things in the navigation in the mega box compared to a traditional flag model. Mega box won.\\nAlso note that this nav and template isn't unique to the homepage like most higher Ed institutions but is used site wide by all units. That was an epic battle story for another time.\\n###Response: your font could be a bit bigger and you could provide a larger click area - but I like the design.\"}\n{\"text\": \"Write a response to the following message:\\nOr maybe our local gamers have been voting with the paychecks they earn. Patio, I generally think your commentary is valuable and insightful. In this case I think you've reduced a complicated situation to a nifty sounding catch phrase that appeals to our libertarian leanings (even mine).\\nI used to manage a high-volume retail game store (~3m in yearly sales, $30 at a time). Many gamers literally do spend all of their disposable income on the games, and pirate what they cannot afford (and surprise, huge swaths of the gamer population are poor as dirt, and with the amount of time and money they spend on games this isn't changing). This kind of DRM would exist if 1 person or 1million people were pirating games, because its the only secure solution.\\nI don't think this kind of DRM will be successful in improving PC sales in general. The only reason Blizzard gets away with it is because their games will always be at the top of gamer's purchase lists. They are high quality products with a shitload of content and beautiful user interfaces. If every game company followed suite, I don't think the result would be successful.\\nedit, and context: I don't really care what Blizzard does to protect their games. People were concerned about Starcraft 2, the required online connectivity hasn't affected my game experience at all in the last year. I think this will probably be true for D3, as well.\\n###Response: Here's a visualization where your understanding of the world and my understanding of the world converge:\\nLet's say the statistical composite gamer buys 10 games a year and consumes 100.  Game companies don't get rewarded for being in the top 100 and they don't get rewarded for having perfect anti-piracy: they get rewarded for being 1 step ahead of #11.\\nYou're probably insanely familiar with the importance of the new release window, where the supermajority of game sales (and more importantly to publishers, original sales) are made.  All you have to do is be desirable enough and annoying enough to be a must have in those first ten days, and you secure your coveted position among games that our poor subject actually purchases.\\nWith regards to poverty and video game players: personally, I don't really see a justice concern in \\\"I am wealthy enough to consume video games, but not wealthy enough to consume video games up to my desired level of consumption.\\\"  If they were stealing food to feed their children, I'd be darn careful with how I talked about them, but unless Diablo III tastes great with ketchup...\"}\n{\"text\": \"Write a response to the following message:\\nWhat is meant by intuitive thinking?\\n###Response: I don't think the author talks about intuitive thinking in positive vein. Whatever is not analytical is named intuitive. \\nHowever, the real definition of intuitive is probably that kind of thinking that comes out of subconsciously learned patterns and not from proactive conscious thought - often cited in the usual example of Friedrich August Kekul\\u00e9 discovering the benzene structure from a dream of a snake biting its own tale.\"}\n{\"text\": \"Write a response to the following message:\\nI've read parts of her blog, but I recall it seeming like it required a lot of regular effort to maintain that schedule; almost as though she was still fighting polyphasic sleep every day.\\nDo you feel that it requires regular effort on your part to maintain that schedule?\\nDo you feel any differences mentally?\\n###Response: My experience was that is wasn't a physical struggle to stay on the schedule - it was a social one.  The world does not do polyphasic and you have to defend your time.  I eventually crashed out when I was supposed to come home ad sleep after work but my wife took me to dinner instead.  Being up for 7 hours straight wiped me out for a day and it was too socially challenging to keep it up.\"}\n{\"text\": \"Write a response to the following message:\\nAfter the last time Apple cock blocked a Google App and nearly ended up in court, they'll play nice I imagine.\\n###Response: Are you sure? Putting Maps aside, Apple is currently dragging their feet (or worse) over Google Voice Search:\\nhttp://searchengineland.com/ios-users-to-get-enhanced-google...\\nNote the current Google Search app on iTunes (last updated on June 21, 2012 as of this writing, despite Google's announcement):\\nhttp://itunes.apple.com/us/app/google-search/id284815942?mt=...\"}\n{\"text\": \"Write a response to the following message:\\nIs the argument that they have enough money and thus shouldn't make more?\\n###Response: The argument that it is difficult to hold Apple as a victim when they're rolling in enormous success.\\nBut yes, as a society and culture there is a natural disdain for excessive success ($100 billion in cash reserves? That is deeply unsettling), and as a natural course both the government and the courts are going to be less favourable to Apple.\"}\n{\"text\": \"Write a response to the following message:\\nAha\\u2026 an answer to my previous question!\\nhttp://news.ycombinator.com/item?id=573730\\n###Response: Here's a calender you might be interested in: http://www.google.com/calendar/embed?src=nycusergroups%40bra...\"}\n{\"text\": \"Write a response to the following message:\\n> using UTF-8 to examine the informational properties of Chinese text is absurd.\\nBy the same reasoning, using 8-bit ASCII to examine the informational properties of English text is also absurd. English can be comfortably expressed with only 6 bits per character, after all.\\nAnyway, I did include a GB-encoded version in my calculations. And once you compress the text, the charset-related difference becomes much smaller anyway.\\n###Response: Agreed. In fact, I'll go farther and say I misspoke, raising the objection you rightly criticize when it would make more sense to object to using UTF-8 to compare english text to chinese text. I have no issues with your comment, though, and I agree that under compression is the correct place to make the comparison. It's faintly aggravating to me that the same text compresses differently according to the intermediate encoding, but that's between me and the universe.\\nEnglish can be comfortably expressed in 5 bits per character, using 26 letters and the five symbols [. ',\\\"] . Capitalization and numeric digits are nice-to-haves. ;)\"}\n{\"text\": \"Write a response to the following message:\\nThe privacy angle looks like a non-issue to me.\\nYour public items get added to their news feed. They could view that information even if they didn't issue a friend request.\\n###Response: Agreed. I suspect people have their newsfeeds set to \\\"friends only\\\", in which case the \\\"de-facto follow feature\\\" shouldn't work unless the friend request is accepted.\"}\n{\"text\": \"Write a response to the following message:\\nI think, in the US at least, part of the problem would have been Verizon/Sprint. Now with everyone going to LTE and using SIM cards for that it seems more viable (fewer network restrictions).\\n###Response: It's a shame Apple are a US company, since the US is nearly the only country with this problem.\"}\n{\"text\": \"Write a response to the following message:\\n> 5) There would still be much internet hate.\\ni honestly don't think there would have been much hate. Allot of the anger was as how drastically she reacted by posting their photos online.\\n###Response: The hate stems from the loss of the job at Playhaven. Without that happening, this event becomes pretty mundane - it's not like social activists don't regularly name and shame with photos.\"}\n{\"text\": \"Write a response to the following message:\\nNone of the revelations of US spying of the past year have particularly shocked or surprised me, and probably not very many here.\\nThe level of sheer insanity displayed in the latest revelations however, with yesterday's \\\"horde of Orcs\\\" and this sickening slogan \\\"nothing is beyond our reach\\\" is worse than I could have imagined.\\nUS intelligence has not just gone beyond what is acceptable, they've completely gone off the reservation and have lost all perspective. Historically, this doesn't end well.\\n###Response: >Historically, this doesn't end well.\\nDoesn't end well for us the little people. Those who operate in the shadows, those have the power to play these games probably have it a lot better since they undoubtedly operate outside the rules they set for the rest of us (witness the lack of accountability shown by anyone who has ever been involved in any of these shenanigans).\"}\n{\"text\": \"Write a response to the following message:\\nWe are staying near the Presidio on Lombard street... I get the impression there isn't a whole lot to do around there, so we'll probably be elsewhere in the city though I have nowhere in particular I need to be on Sunday evening. We have no transportation other than public transportation and cabs (not 25 yet to rent a car) which hopefully works out. \\nThanks for the food recommendations, I love Asian food and forgot that San Francisco is one of the best places for it. Will probably hit up Ebisu and the Dim Sum place. Ebisu actually seems extremely reasonably priced based on how much good sushi goes for in Pittsburgh.\\n###Response: Steve's Ebisu, make sure you order the dragon and 49er roll, to live for.\"}\n{\"text\": \"Write a response to the following message:\\nI still don't understand why Firefox OS will go anywhere when webOS did not.  Aren't we talking about basically the same thing? HTML5/JS/CSS?  Is it timing, the changed environment that now focuses attention on web apps?  Is it the reputation of Mozilla vs that of Palm/HP?  I'm not saying web apps won't have a place, I'm just extremely confused about the seeming contradiction or about-face here.\\n###Response: Palm/HP didn't exactly make it a huge deal to go to standards bodies, did they? That was my impression. Could be wrong.\"}\n{\"text\": \"Write a response to the following message:\\nI'm really reluctant to bash someone else's project, and I think Breakup Notifier and its developer are AWESOME, seriously.\\nBut I'm a little surprised that you pivoted from an original and successful idea to an unoriginal and derivative idea.\\nI must have seen half a dozen Facebook-powered crush sites like this, several of them on HN as a \\\"review my startup\\\". None have taken off. What's to stop me from crushing on everyone just to see who likes me?\\nIsn't the anonymous crush site sort of like the to do list app that every developer thinks they can improve on?\\nPerhaps a pivot to a straight up dating site/monetizing with dating ads might have made more sense.\\n###Response: What's the problem? Most of the difference between success and failure is marketing. This is a great way to take advantage of the momentum of his previous application.\"}\n{\"text\": \"Write a response to the following message:\\nThey are paying for national defense and the social spending that allows the state to exist in a stable fashion.  The wealth of a society can only exist with the aid of rule of law and relatively social stability.  Someone has to pay for that stability.  Why not tax people based on how much they benefit from it?\\n###Response: > The wealth of a society can only exist with the aid of rule of law and relatively social stability.\\nDo you really think that rich people benefit more than the poor from rule of law and social stability?  If so, you're wrong.\"}\n{\"text\": \"Write a response to the following message:\\nThere are plenty of other categories of people who don't apply as well.  What percentage of female applicants implies no prejudice?  Where is this even coming from?\\nI question the assumption that the perceived low female application numbers are due to prejudice. It could be due to differences in the predispositions to excel at different types of work between the sexes. Men and women are different.\\nWhy are so many professional basket ball players black? Racism?\\nThere is an implicit assumption in this and its validity is assumed. It is not being questioned.\\n###Response: Yes, we've heard you say this before: https://news.ycombinator.com/item?id=7327294\\nThe correct default assumption is that any two individuals are similarly capable.  The burden of proof is on the person asserting that they're not.  The presence of physical differences between men and women is no evidence that they're differently capable in terms of founding a startup.  You would need specific data to that effect that accounted for the confounding factor of relatively low female participation in the startup scene.\\nAnd look, in the very article you're commenting on, the people best able to to analyze that have decided that women founders are worth pursuing.  I'm guessing they know a bit more about it than you do.\"}\n{\"text\": \"Write a response to the following message:\\nBeing able to identify the \\\"official\\\" project on GitHub is often tough, especially for projects with hundreds of active forks, none of them with any discernible merge-back pattern. Not sure if it's a great solution to allow a single use the ability to \\\"own\\\" a project name though. You can imagine it resulting in some of the problems associated with domain naming: name squatting and legal fights over trademarks.\\n###Response: Not sure if it's a great solution to allow a single use the ability to \\\"own\\\" a project name though. You can imagine it resulting in some of the problems associated with domain naming: name squatting and legal fights over trademarks.\\nArguably the name of a project is a trademark, and no one has permission to use it unless you grant them that permission.  Just because you license the copyright to your code under an OSS or Free Software license, does not automatically entail granting a license to the trademark on the name.\\nIMO, this is one thing that makes Github a bit of a legal tar-pit... it's trivially easy to fork a repo, but nothing makes (or even encourages) you to change the name, or make it clear that your repo isn't the \\\"official\\\" Project Foobar.  So far it seems that most people just take a \\\"don't ask, don't tell, don't care\\\" sort of approach to that, but I have a hunch it's going to get somebody burned eventually.\"}\n{\"text\": \"Write a response to the following message:\\nI am amazed at the vitriol in the comments following the article. A sign of serious issues a-brew.\\n###Response: I dunno, xenophobic hate-mongers have been present on the internet for a long time. This seems pretty typical to me. Probably some white supremacist website linked to this article.\"}\n{\"text\": \"Write a response to the following message:\\nWhat a ridiculous comment. Take your trolling comments somewhere else. Nearly every company is guilty of reusing collected data and almost always without telling customers.\\n###Response: It's not a ridiculous comment. Do you work at Apple? What's wrong with him expressing his opinion. I feel the same way and belive we can have a discussion about what's right and what's wrong. I agree that most companies just automatically subscribe you in when they create new feature and they make it users responsibility to opt out but I hate it as much as most of us. At least they (including Apple) should be responsible of informing users if they decide to keep us opted in automatically. Tracking is not a joke, people are very concerned about it.\"}\n{\"text\": \"Write a response to the following message:\\nPutting on my client/product manager hat for a moment, this is incredibly annoying. You are putting me in the position of either (a) fixing the obvious ducks and not looking closely at the real problems, or (b) having to micromanage you and scheduling another review to make further changes. Maybe you get your desired result (a), but my opinion of your work is going to be much lower since you are making obvious duck-sized mistakes.\\nIf you are a developer who takes this approach, you may want to consider whether the short-term benefit of avoiding criticism is worth the long-term downside of having me think you are incompetent.\\n###Response: If you interpret this to be about \\\"avoiding criticism\\\" instead of as a counter-reaction to meddling, clueless middle management eager to justify their job position at a cost on employee time and product quality, then you're likely to be part of the very problem ducks fix.\"}\n{\"text\": \"Write a response to the following message:\\n> \\\"Good heavens! If there's anything else you can imagine doing, don't stay in math\\\"\\nI heard similar advice from people in many different lines of work including laws, finance, medicine, academia... usually they add it's not as interesting as it used to be.\\nIt's a matter of compromise. I wish I could have the salary of banker, the freedom of a tenured professor and the prestige of a doctor.\\n###Response: > the freedom of a tenured professor\\nWell, one disadvantage of academia is that you don't really get to choose where to live, even if your CV is quite good. Indeed, there are a number of academic couples that make crazy long-distance commuter marriages work because they weren't able to get jobs at the same institution (or in the same city).\"}\n{\"text\": \"Write a response to the following message:\\nIn my direct experience as an employer most people who apply for a programming job can actually program. I think it comes down to a question of degrees. People who spend their time on heady research projects might well proclaim me as someone who has \\\"clearly never written a line of code in his life\\\" (it's happened). And yet I've had competent programmers that I ran ask me how I got to be so good. Truth is I'm probably middle of the road - I know plenty I look up to, and plenty I could teach. Hell, my mom can program (she writes her own shell scripts at times).\\nI think the practice of \\\"write this program before we'll talk to you\\\" is largely driven by technical staff who don't understand how to run an interview where a discussion of theory and practice can suss out someone who doesn't actually have the experience. Hell, even if they don't have a language under their belt if they can pass the theory & practice Q&A I have every confidence that they can pick it up and fake it enough in the meantime.\\nI've had bad hires before - lazy, conceptually challenged, addicted to crystal meth - but all of them could have passed a functional programming test. Sure, you'd like to be hiring the \\\"excellent\\\" programmers - but a basic functional test isn't going to highlight them anyway - and may very well drive them away from you.\\nI should note that I don't have a problem with asking for code samples - that's a good practice I think, I do it myself. Just the complete this arbitrary test style.\\n###Response: I take that back, I did hire someone about 11 years ago that just didn't belong in the position at all. It was clear within the first week and we let him go. I consider that my failure though, not the process.\"}\n{\"text\": \"Write a response to the following message:\\nIncredible for a freelancer like me!\\nI just hope all this positive feedback doesn't result in crazy-ass pricing :)\\n###Response: Thanks, very excited that you find it useful! :)\"}\n{\"text\": \"Write a response to the following message:\\nKickstarter just updated their terms so it is possible this triggered that: https://www.kickstarter.com/terms-of-use#section4\\n\\\"The creator is solely responsible for fulfilling the promises made in their project. If they\\u2019re unable to satisfy the terms of this agreement, they may be subject to legal action by backers.\\\"\\nHowever the CLANG project did make a prototype but it would be nice if projects fail they have to be open sourced or able to be observed or carried forward if possible.\\n###Response: They gave their game as part of the rewards. I wouldn't say that a prototype fulfills this promise.\\nPerhaps Kickstarter shouldn't allow to give the product people back with money as reward because if the project fails this product reward goes down the drain too. I know like 90% of all projects do that and its kind of what makes Kickstarter but still its not really logical. Or another option would be a rule that backers have no rights at all. This would force the project makers to publish budgets and more details about the project otherwise nobody would back anything.\"}\n{\"text\": \"Write a response to the following message:\\nI know very few scientists who are terribly pleased with their coding skills, and many who actively admit they're not great coders.\\nThe problem is you can't rely on a \\\"code for science\\\" campaign. A research study isn't something done in a weekend Hackathon, though they are occasionally helpful. What happens if, 2 years into your 5 year study, your helpful volunteers, leaving you with an incomplete code base beyond your skills to maintain or extend?\\n###Response: I know of two ways currently being tried to address this.\\nThe first is to have scientists break their problems down into chunks that can be performed by volunteers but which aren't completely beyond the ability of the scientist to manage the resultant code. This is doable for much of physics and computational biology, less so where a scientist isn't a programmer themselves. We're taking this approach with http://solvers.io.\\nThe second is to have the scientists mentored by programmers to help them become better at it. This is the approach being taken by http://interdisciplinaryprogramming.com.\\nIn both cases, any particular volunteer dropping out is probably not a massive blow. If a project is going to rely on a particular programmer long-term, they probably need to find the funding to pay them.\"}\n{\"text\": \"Write a response to the following message:\\nThere's the type where you disobey unjust laws, and then actually go to prison when you get caught in order to demonstrate the injustice.\\n###Response: What is interesting is that if we all do what Aaron did and refuse to take the deal, the courts would eventually be unable to handle all the cases.  The USA simply arrests too many people each year for everyone to exercise their right to a trial.  I suspect that if hundreds of thousands of people flagrantly committed felonies and demanded a jury trial, it would be one of the most effective forms of civil disobedience in the history of our country.\\nOf course, that means organizing hundreds of thousands of people and convincing them to put their lives and livelihoods on the line.\"}\n{\"text\": \"Write a response to the following message:\\nAbout a year ago I left a cable modem and internet service (Time Warner) at an apartment I was moving out of while my friend continued to stay there.  I had configured the thing in a manner I thought to be fairly secure -- strong password, no broadcast, etc.. One day the internet goes down and my friend doesn't know what to do.  She calls the ISP and asks them what's wrong.  They say they can't release any information about the service to her without my permission, so I suddenly get a three-way call explaining that my friend and the ISP representative are on the line and I need to give my authorization to access the account information.  Being the person I am, I attempt to troubleshoot things over the phone before giving out any sort of account credentials.  Eventually, I ask her to log into the router configuration page.  She doesn't know the password and the first one I gave her doesn't work.  The representative chimes in \\\"That's fine -- I can just change it from here.\\\"\\n\\\"...What?\\\"\\nI was furious.  Time Warner had left a backdoor in all their modems that gives them administrative access to my private connection.  And yes -- she did alter the password remotely.  She didn't seem to think there was anything wrong with this.  I tried googling for relevant information, but wasn't able to find anything more than speculation at the time.\\n###Response: Proprietary software, firmware and hardware ultimately means you don't own the stuff you use. Goes on at all levels and people interiorise it to the point that they will take it as a  given and justify uses that are not truly justifiable. ISPs do not need this kind of policy to operate, regardless what you'll read in this thread.\"}\n{\"text\": \"Write a response to the following message:\\n\\\"At-will\\\"?! I had no idea that in California you could just terminate employment for no reason.\\nComing from Australia where you can't fire anybody without good reason (even demoting people can be considered unfair dismissal) this just sounds insane.\\n###Response: In fact the majority of states in the US are at-will employment states.\\nhttp://en.m.wikipedia.org/wiki/At-will_employment\\nPretty much the only significant reason you can't get terminated in the US is as a direct result of being in a protected class (so you can't fire someone for being black but you can fire a black person for any other reason including no reason at all)\"}\n{\"text\": \"Write a response to the following message:\\nI'm not from the US, and I've never heard of anyone paid biweekly, that's why I found it weird.\\nAccording to the US Bureau of Labour Statistics, it's actually the most common schedule over there: http://www.bls.gov/opub/btn/volume-3/how-frequently-do-priva...\\n###Response: Bi-weekly means you would get 2 paychecks a week. Did you mean bi-monthly? Also to chime in a bit, I get a paycheck every week.\"}\n{\"text\": \"Write a response to the following message:\\nWow, what do you do with your browser that you absolutely need to have a JS performance which is an order of magnitude better than FF 3.6? Just curious...\\n###Response: \\n  1. Facebook\\n  2. Google Wave\\n  3. Google Docs\\n  4. And a bunch of other webapps I'm forgetting ...\\n\\nTry out Wave on an older version of Firefox (say, 3.0 or even something from the 2.x branch if you're adventurous). It totally destroys the browser.\"}\n{\"text\": \"Write a response to the following message:\\nMe1000, I love you, but...\\n> The OS won't even give you access to the phone book without prompting the user. So somewhere along the way, the user knowingly gave Path access to their contacts.\\nThe introduction of address book privacy in iOS was in large part prompted by the publication of Path's behavior. Up until the Path and eventually iOS update after the controversy first arose, Path didn't explicitly ask the user for access to their address book.\\nhttp://www.engadget.com/2012/02/15/iphone-address-book-issue...\\n> Path was fined, not for anything involving address books, but for allowing 12 year olds to sign up for the service.\\nPath was fined for the 12 year old signup thing specifically, but they were still charged with privacy violations regarding the address book kerfuffle.\\nhttp://www.ftc.gov/opa/2013/02/path.shtm\\n###Response: OP is using an android phone.\"}\n{\"text\": \"Write a response to the following message:\\n\\\"C and C++ is dumb and everyone who continues to use it is also dumb and/or ignorant\\\" certainly isn't the position of the Rust team nor the general Rust community at large.\\n###Response: It's not even the position expressed in the parent comment.\"}\n{\"text\": \"Write a response to the following message:\\nSony makes really great hardware, and (most of the time) utterly wretched software.  Sony could be a much better company with Apple's help.  Not sure how such a merger helps Apple, though.\\n###Response: Apple also makes really great hardware, and (most of the time) utterly wretched software- see itunes, iphoto, etc....\\nNot sure how this would help either company. Apple doesn't need any help getting really great hardware made. If anything, Sony would bring the quality down considerably.\\n(written from my macbookpro in chrome)\"}\n{\"text\": \"Write a response to the following message:\\n> The so-called \\u201cow zone\\u201d is a zone that is hard to reach with your thumb, like corners of the screen.\\nThe image that goes along with this point demonstrates the areas that are hard to reach for right-handed users, ignoring that ~10% of people are left-handed and have trouble reaching the opposite corners. Your design shouldn't assume that two particular corners are bad and the other two are fine, all four corners should be used for uncommon options only.\\n###Response: I\\u2019m right handed, but I use my phone with my left hand. Most of the time because I need my dominant hand to do something more complex. I don\\u2019t know if \\u201chandiness\\u201d really determines the hand in which you hold your phone.\"}\n{\"text\": \"Write a response to the following message:\\nWhat are other companies going to think of my situation?\\nThere's a specific language to hiring and firing. Make sure you get it right. They'll think it's quite bad if you say you were fired. Fired very specifically means you were told to leave for some sort of misconduct. You weren't fired - you were either layed-off or you were made redundant.\\n###Response: Perhaps a different business culture:\\nStaff are given a record of their employment history, and companies have to follow strict rules when dismissing employees:\\n* Fired is a big negative.  It goes in the government sealed employment record.  But you do get 3 months salary on the spot.\\n* Chose to resign gets a month's salary and (usually) no need to come to work and get on the job hunt.\\n* Released (made redundant), a pain for employers but great for employees, as they get the 3 months' benefit plus it is not their fault.\\nFocus on the 3rd option, especially when getting a contract signed.  Powers out of your control are out of your control, don't get blamed for them.\"}\n{\"text\": \"Write a response to the following message:\\nI like the Ubuntu Linux distributions not just because of the \\\"lock in\\\" issues,  but a lot of things \\\"just work\\\" with Ubuntu that are difficult with Red Hat.\\nFor instance there is a Sun Java installer that \\\"just works\\\" with Ubuntu and will keep your Sun Java up to date when you do your system updates.  If you need a Bit Torrent client you can just\\napt-get install transmission\\nbut it turns out to be quite an exercise to compile and install any Bit Torrent client for Red Hat Linux.\\nI remember how Solaris was the center of the open source world in 1991,  but by 2001 it was starting to get hard to compile things on Solaris.  Red Hat is also diverging from the open source mainstream and it is \\\"just hard\\\" to do things that \\\"just work\\\" with Ubuntu.\\n###Response: Red Hat is made to use Red Hat repos. You may have to enable epel or epel-testing for packages that are newer or have software that Red Hat isn't interested in maintaining.\\nRed Hat also isn't meant to be bleeding edge, it's a very safe, stable server distribution. If you want a desktop use Fedora.\"}\n{\"text\": \"Write a response to the following message:\\nThis is certainly good for games and similar multimedia heavy type apps.\\nHowever, if you're using C++ for portability for any sort of non-realtime needs you've lost your marbles, and would be better off with JavaScript (no, really) and/or platform specific code to leverage the class libraries of the respective platforms. C++ heavy apps on Android are larger, use more memory, and load less quickly. This isn't obvious if you're used to other worlds, but it is a side effect of how pervasive Dalvik is on the system. Even if most of your app code is in C++ it's still easy to get screwed by a GC pause caused by where you have to interface with the framework.\\nMuch of the underlying problem here is that speed-wise Dalvik just isn't very good. It wasn't even competitive with J2ME VM performance when it first appeared, and took a long time to get close.\\nWith all that said, native debugging has been a sore point forever, and any improvement in that area is welcome.\\n###Response: C++ heavy apps on Android are larger, use more memory, and load less quickly. This isn't obvious if you're used to other worlds, but it is a side effect of how pervasive Dalvik is on the system.\\nCitation required, as this is entirely the opposite of every experience I've had actually building apps that use significant native code. What are you basing this claim on?\\nThere really isn't anything magical about Dalvik (or ART) -- it is a per instance runtime. C++ code doesn't suddenly become heavier or slower to load because of Dalvik.\\nAnd of course the overwhelming majority of games are mostly built using the NDK.\"}\n{\"text\": \"Write a response to the following message:\\nAddressing your points in the opposite order:\\n#3 may be true for C# development; I read a lot of C#, but don't write it. But I did spend 4 years writing a significant amount of systems C++ code in VC++, and --- at least 8 years ago --- VC++ was significantly better than any comparable C environment.\\nWe can get into a tit-for-tat on it versus, e.g. GCC, but I feel like one way to lose a programming environment shoot-out is to imply that Microsoft half-asses that part of the business.\\nRegarding #2, you seem to be sticking up for the author's implication that Ruby has \\\"complex\\\" syntax compared to Lisp. Clearly, most languages have more \\\"idiosyncracies\\\" (here defined as: \\\"notation of any sort\\\") than Lisp. That doesn't make Lisp's austerity an interesting lesson to take to other languages; it is, in fact, the major failure of Lisp.\\nThere is a notion, unaddressed in the original post but far more important than any of that post's insights, that obsessively regular syntax is crucial to metaprogramming. Even defined down to \\\"true macros\\\", this is simply folk wisdom: you can get \\\"Lisp-style macros\\\" out of any language that gives you access to an abstract syntax tree.\\nFinally, call the two \\\"mainstream\\\" high level languages Ruby  and Python (relegating Perl and PHP to uh, different, uh, categories). Both share the type system the author appreciates in Lisp.\\n###Response: Re #3: you're right to distinguish between the MS environments for C++ and C#. Your comment on VC++ matches my experience from about the same time. But when I say that VS.NET is backward and provincial, believe me I mean it as an understatement. There are very clear points of comparison (Eclipse and IntelliJ) and compared to either of them VS is a bad joke. It only survives because the MS development world is largely a parochial backwater in which most programmers eat the porridge they're given and don't care to look at anything else, like medieval peasants who never leave their village.\\nApologies for ranting. I've never been a big MS-hater (though I seem to be getting there), and I hardly think the Java IDE is the last word in programming environments (having an editor generate my duplicate code for me is not my idea of a good time; as you may have noticed, I like Lisp!) But like most hackers here, I care deeply about my tools, and it pains me to work in ways that feel suboptimal or ugly.\\nYour other points were, as usual, pretty interesting, but I burned myself out on this one. :)\"}\n{\"text\": \"Write a response to the following message:\\n> 20% time can easily destroy your career. Most managers view it either as a waste of time or a flight risk.\\nSo at Google, you're job isn't to make Google more successful, but to make your manager look good.\\n###Response: Yes, which doesn't make Google any worse than most companies, but does mean it has regressed into the meaningless, incoherent, parochial grey glop that is standard corporate working life.\"}\n{\"text\": \"Write a response to the following message:\\nMy all time favourite place in North America is Europe.\\nI am a European. I lived in US for two years, I lived in Canada for 15. I also lived in Sweden and Japan. States are good because it's just one big Walmart - everything is ridiculously cheap, but what it gets in quantity it tends to lose in quality. Canada in this respect is somewhat better, but it has the same problem - it feels ... I don't know ... diluted? Too much stuff, too little substance. It is nice, it is comfortable and affordable, but - and I am dumbing it down considerably - you can't get a decent croissant, because nobody simply gives a shit about getting them right.\\nOn the other hand, the grass is always greener on the other side. If you move, you will have at least a couple of years of excitement and novelty.\\nPS. And generally working too much and not enjoying life is in fact the biggest lifestyle issue in States and Canada. No better manifested by the fact that most of the restaurants are there for eating and not for socializing.\\n###Response: Where in the US did you live, and which places in Europe do you prefer? Both are huge, with significant regional differences.\"}\n{\"text\": \"Write a response to the following message:\\nI like that ... \\\"winging it\\\" after 50. :-)  Made me chuckle.  I agree with it.\\nI have a (an emotional) problem with that number though, being acutely aware and sensitive to ageism or age-related relegation of human functions and roles in society.  I have a problem with any number for that matter.  50 is too low, and any number will always be too low, the way I see it if you incorporate Dawkins' idea of the Selfish Gene into the mix.  I would argue that sticking around \\\"to ensure the survival of our offspring\\\" (as you put it) for the human species is significantly more important than any other species on the planet.\\nTurtles don't need this survival strategy.  Lay eggs (procreate) and chuck - that form of hormesis aggressively weeds out the weak members of the species from the ones laying eggs for the next generation.  While chimps, elephants and whales may also need to \\\"stick around longer\\\" for their young - relatively, the human is by far an incredible outlier in this regard.  Not only do adults have to stick around for their kids well into the young's teen years, but elders too have to stick around for the group or for the clan.  This last point is important to understand the relationship between the survival success of the species and an imagined age cutoff for \\\"usefulness\\\" (that contributes to the success of the species) of any one member of the species.  The individual has extended roles beyond just his/her offspring.  His/her role in the clan's survival success mattered too.\\nRecent discoveries of hominid fossils suggest that as early as Erectus, \\\"humans\\\" have been keeping their elders alive! :-)  Evidence of a toothless skulls suggest that able members of the group may have been keeping the elder alive by chewing for her.  But for what??  Could it be to access knowledge of poisoned berries to avoid, or hunting methods to teach or ... stay at home creche for the kids while able-bodied adults went out for hunting??\\nAgain, I don't think I needed to respond to your point necessarily ... you're right for the most part.\\n###Response: > 50 is too low, and any number will always be too low, the way I see it if you incorporate Dawkins' idea of the Selfish Gene into the mix. I would argue that sticking around \\\"to ensure the survival of our offspring\\\" (as you put it) for the human species is significantly more important than any other species on the planet.\\nI see what you mean, but with every generation of your offspring, you're less and less vital to their survival - your children and grandchildren can care for your great-grandchildren as well; there's more redundancy there, so no individual is as crucial.\"}\n{\"text\": \"Write a response to the following message:\\nWhich way did Obama himself vote?\\n###Response: He didn't have to, because it didn't pass the House.\\nHowever, he's been vocally supporting spying, so it's assumed he would have voted No on this bill.\"}\n{\"text\": \"Write a response to the following message:\\nDoes google require you to use their service the same way apple does?  Or is their service just there for developers that want to use it(at a 10% cost?)\\n###Response: I believe the latter.  Android has always allowed app installs not-from-the-market.  Some CARRIERS (notably AT&T) have removed that ability for some handsets, but it's not universal and as far as I know, not common.\"}\n{\"text\": \"Write a response to the following message:\\nNot considering the specifics of DDG billboard and the Referer angle, does it not bother you to the slightes that Google does in fact track you all over the Internet?\\nOne is basically checking in with Google everytime he visits any site that uses Analytics, embeds a YouTube video or a font from a Font Directory, or a Google-hosted Javascript snippet. Not to mention GMail. Don't know about other people but this bothers the hell out of me. So while I don't think DDG should've focused on the Referer issue, they got the core issue absoluely right. Google does in fact track everyone.\\n###Response: So is Facebook with their like button, AddThis with their widgets and probably dozens or even hundreds of other services people use. At this point no, I'm not worried about it.\"}\n{\"text\": \"Write a response to the following message:\\nNothing you said negates my comment.\\n###Response: It all suggests derpmeister should be taken as a less reliable source than Blizzard, which does count against your comment.\\nSure, it's not a deductive proof that Blizzard is right and derpmeister is wrong, but it would be unreasonable to require such a thing.\"}\n{\"text\": \"Write a response to the following message:\\nSomething that the demo video has made me think of, is how their new UI style seem like it would be easy to use on a tablet. The large whitespace, oversized icons and 'clickable' areas.\\n###Response: Yes. Instead of responsive design, Google is throwing PC users under the bus to push for and invest in Android and tablets.\"}\n{\"text\": \"Write a response to the following message:\\nThere is a subreddit devoted to beating women.  What is your definition of PC?\\n###Response: The purge of Reddit will continue. Don't worry, /r/beatingwomen will be gone soon.\"}\n{\"text\": \"Write a response to the following message:\\nMan, I wish I understood ICs. I should've taken a few EE courses in college.\\n###Response: You could try grabbing a digital electronics textbook such as this one (link below). I haven't read this one specifically.\\nhttp://www.amazon.com/Digital-Electronics-Practical-Approach...\\nI took some EE courses as part of a CS degree. Years later, I found that reading a textbook was a good way to re-learn it.\"}\n{\"text\": \"Write a response to the following message:\\nI wouldn't call it loathing a company when pointing out the obvious flaws in a rigged contest.\\n###Response: I hope in future versions they rig WindowsPhones to do more things correctly.\"}\n{\"text\": \"Write a response to the following message:\\nThe biggest threat, by far, to WebOS is some Microsoft exec letting an HP exec know, casually, while they are, say, playing golf, that HP's Windows license could end up costing a bit more than Dell's because they are not really helping Windows Phone 7 become the success it deserves and helping WP7 would entitle a couple large discounts on other licenses.\\nMicrosoft has enormous power on OEMs.\\n###Response: I really think those days are over for MS.  They can't bully anyone around anymore.\"}\n{\"text\": \"Write a response to the following message:\\nWhat about the water in the earth's core?\\n>>A reservoir of water three times the volume of all the oceans has been discovered deep beneath the Earth's surface. The finding could help explain where Earth's seas came from.\\nhttp://www.newscientist.com/article/dn25723-massive-ocean-di...\\n###Response: My wife looks at carbon in the deep earth, but water is a very related chemical in these cases. She says that the amount of water in the core is indeterminable at this time, but it is likely a lot more than a mere 3x the ocean volume (also, volume depends on pressure and temp, which are extreme in the core)\\nNote: at extreme temps and pressures in the core, water as we know it is not a good guidepost. It may be crystallized or associating strongly with other exotic crystals and 'plastic'-y compounds at the Moho layer and other transition zones deep down. Also, just counting O and H elements is likely to not get you far there either. The story of how we got our oceans is FAR from complete. I'd wager we are in the 2nd page of the prologue though, which, for 60 years of real work, is pretty good compared to most fields.\"}\n{\"text\": \"Write a response to the following message:\\nMySQL was bought in cash by Sun for $1 billion. Of course, Sun collapsed after that, so perhaps you are right about the madness.\\n###Response: It wasn't for 1B cash, it was 800M cash, 200M Sun stock.\"}\n{\"text\": \"Write a response to the following message:\\nThe HN crowd seems to dislike (or despise perhaps?) PHP, but it's really not that bad. Yes it has a lot of warts, but it has a lot of things that make it nice for web development.\\na) try your new code by saving in your editor, and hitting reload in your web browser.\\nb) it's very approachable.  People who only know HTML and CSS can be expected to do a little bit of PHP work to integrate their changes.  If you setup the right network mounts, they just need to edit files and reload (see a)\\nc) it's not super high overhead at runtime.  If you're not using a framework, and you don't build up a crazy object hierarchy, it's not too hard to get your page out with about 10 ms of overhead beyond data fetching.  For very simple webservices (fetch data, possibly from multiple sources, and do a little formatting for the consumer), I was able to get the overhead down to 2ms.  You can certainly do better with other languages, but you can usually get better throughput improvement by working on getting data quickly. Btw, all the frameworks are terrible; many of them add 100 ms to the page just for the privilege of loading the includes; PHP is a framework for web programming thank you very much.\\nd) cleanup; you don't have to worry about it. If you don't do anything weird (c extensions, with non-preferred malloc), at the end of the request, everything is thrown away.\\nThat said, there are plenty of things PHP isn't good at: I wouldn't run a long running process in PHP; and multithreaded PHP sounds like a bad idea.\\n###Response: Yes, PHP is its own web framework, but it's not a very good one. And, as you say, implementing a better one on top of it adds a lot of overhead due to the execution model. With other languages, where you don't throw everything away at the end of the request, you are free to implement a good web framework without suffering additional overhead.\\nAs for a), many (most?) frameworks are able to monitor the source files and reload the application when they change, in order to enable that workflow. For example:\\nhttp://cherrypy.readthedocs.org/en/latest/refman/process/plu...\"}\n{\"text\": \"Write a response to the following message:\\nThat's because you have no real friends, only facebook \\\"keeping up appearances\\\"-tier acquintances.\\n###Response: And you clearly have no idea what you are talking about.\"}\n{\"text\": \"Write a response to the following message:\\nFor Steve, it became personal, whereas it was simply business for Gawker.\\nIn general, this is how it works for public figures and the press.\\n###Response: Well, and there you have Gawker's great miscalculation in the whole ordeal - they expected Apple to react as a business entity rather than as a proxy for Steve Jobs himself.\"}\n{\"text\": \"Write a response to the following message:\\nI wasn't trying to find a massive list, droithomme asked \\\"where are the technical contributions\\\" by women in computing, not \\\"who has the biggest list\\\". Really, all I needed to write down was Grace Hopper. Compilers being fairly useful in the field.\\n###Response: Did everyone miss the point of that comment? Most of the names on my list are not women.\"}\n{\"text\": \"Write a response to the following message:\\nHere here to this.  For some reason when I'm taking a shower, I usually come up with solutions to problems that I've never been able to do elsewhere.  Probably because when I'm not in the shower, I'm \\\"keeping busy\\\" and that restricts my creative abilities.\\n###Response: But would you say that time in the shower is \\\"wasted\\\" if your mind is actively on and turning over a problem?\"}\n{\"text\": \"Write a response to the following message:\\nThis is great.\\nI am always surprised when I meet mobile developers who have never created a mobile app for themselves.\\n###Response: The game is called 'aqua balls' and is on google play. I haven't updated it in almost 3 years, though ... right after I finished it I started having trouble with my wrists and finger joints, which meant no more coding for fun (I took a team lead job and cut out a lot of typing at work too).  My brother wrote and maintains the other app published by 'woggle' (he also did all of the art for aqua balls), so it is still updated regularly. He's actually a D.O. IRL, so this is 100% hobby for him.\"}\n{\"text\": \"Write a response to the following message:\\nFrom what I've read a lot of them can be spoofed with either a printed picture, or a video playing back.\\n###Response: It's probably enough for stopping the casual interference the OP wants to prevent.\"}\n{\"text\": \"Write a response to the following message:\\nHow can they support 2-factor auth without also having the ability to spend the bitcoin in the wallets?  What's needed is a way to keep bitcoin on remote servers without them having the ability to spend it, but to grant access with 2FA.  This prevents password phishing attacks and keylogger attacks, without the user having to worry about their harddrive crashing and losing their wallet.  But I'm not sure whether that's possible short of making them keep an encrypted wallet, which negates all of the user convenience of using an exchange.\\n###Response: If I understand you right, that's already what blockchain.info does\"}\n{\"text\": \"Write a response to the following message:\\nIt seems the holy grail is almost always linear.  Especially in interviews.  Main thing to look for is if you find yourself doing something you have already done before.\\nAbove that, just know the general idea behind different data structures.  HashTables and binary trees will probably have you covered.  More data structures can't hurt, though.  Tries, BTrees, etc.\\nThough, I can't think of a single time I have used some of the more advanced items.  Linear searches with sentinal values being my personal favorite optimization that I will likely never directly code.\\n###Response: That's a really good point, thanks taeric.\\nAnything less than O(n) obviously means not needing to look at every element of the input, i.e. it's already sorted or similar. For most other interview problems that seems like a reasonable lower bound in the absence of more detailed analysis. I guess the recruiter's advice to go practice on TopCoder wasn't just copy-paste.\"}\n{\"text\": \"Write a response to the following message:\\nI actually might be wrong about this being the original.\\nWaldemar Horwat worked with Brenden Eich at Netscape and hacked on the JS impl in Netscape 2.0, but the dates are too late.\\nIf its not the original, what is it?\\n###Response: You linked obviously the much later code than what can be anything relevant for the first Javascript implementation in Netscape in nineties.\\nIf I understand it correctly, this is the experimental code that was supposed to be used in the research for a \\\"big leap\\\" in Javascript syntax considered by Mozilla around the middle of the previous decade.\\nThe keyword to search for can be Epimetheus, it appears in the sources you linked:\\nhttp://www-archive.mozilla.org/js/language/Epimetheus.html\\nhttp://discerning.com/burstproject.org/build/doc/constructs....\\n'Mozilla's implementation (\\\"JavaScript 2.0\\\", or \\\"Epimetheus\\\"): http://www.mozilla.org/js/language/Epimetheus.html http://www.mozilla.org/js/language/js20/ It (of course) has all features of Draft ECMAScript 4, and a few additions. It is not (to our knowledge) shipping in any product.'\"}\n{\"text\": \"Write a response to the following message:\\nFor concave shapes like letters the best choice would probably be a scanline-based rasterization algorithm.\\nSort all polygon edges from top to bottom and walk through that list line-by-line, keeping track of all active edges for each scanline. For those you can then find all intersections and fill the ranges in-between. By using an even-odd rule you can make sure that concave polygons and polygons with holes are filled properly.\\n###Response: I wrote one of those once! It was a nightmare morass of one-pixel errors. Maybe there was some clever simplification I missed that would have made it all easy, but these days I use the libraries. :P\\nI know that's how stb_truetype's renderer works, incidentally. Don't know if Freetype is the same, but I'd be mildly surprised to find it wasn't.\"}\n{\"text\": \"Write a response to the following message:\\nI've been reading a bit about APL, and I can't find anywhere to buy one of the old APL keyboards. However, you can remap your existing keyboard to output APL symbols for certain key combinations, which is how I think most people are using it these days.\\nEDIT: Picture of the keyboard layout:\\nhttp://www.users.on.net/~farnik/upload/APL2union.gif\\n###Response: Most people are using virtual keyboards, but Dyalog (in the warning on their website) mention that they sell APL keyboards.\"}\n{\"text\": \"Write a response to the following message:\\nBut if you can't flip a switch to deploy an identical server and/or restore from a backup image, aren't you in deep shit anyway? Your failure plan is to rebuild the box from scratch?\\n###Response: For a one-off budget server in 2002-2003? Yes, yes it is.\\nFor a highly-available mission-critical infrastructure in 2013, the failure plan is also to rebuild from scratch, because \\\"failure\\\" means redundant and backup systems have exploded. This is most likely to occur when you automatically roll out untested changes to your infrastructure.\\nIn either case, you won't be sleeping tonight. Or possibly tomorrow night.\"}\n{\"text\": \"Write a response to the following message:\\nIt also allows you to share rig setup(I will add to the page), so you could share your pedal board setup. Everything that goes into creating your sound with a guitar or bass you can share. As room consideration I'm thinking this mostly for recording applications and think most people don't record too loud. But that's an interesting thought.\\n###Response: Even without recording too loud, room can have a HUGE impact on recording. As can mic, pre-amp, mic placement, speaker cabinet, pickup selection, pick choice, string choice, playing style.\\nI'd try to minimize or at least make transparent those choices. Either:\\n* Suggest an ideal circumstance (SM57 close mic'd as a reference or something), and note how this recording differs from the ideal\\n* Make people list all of the components that went into the recording\\nIt's still going to be hard, because individual playing style can have a really big effect on sound (ie, Jaco is always going to sound like Jaco, even if you give him someone else's bass).\\nThat being said, I think it's really useful to hear lots of examples. Just be careful about misleading people with regards to why things sound the way they do.\"}\n{\"text\": \"Write a response to the following message:\\nI personally do not really care. Faith is a personal matter. As long as we are not discussing it directly, why should it factor into a political conversation? Einstein was deeply religious, down to refusing to accept quantum theory because \\\"god does not play  with dice\\\", but he also had things to say. Would it have made his discoveries more wonderful if he was an atheist? I believe that question is devoid of meaning.\\n###Response: Well many atheists claim that being an atheist is a qualification for being a serious scientist. Sam Harris wrote a pretty poorly thought out piece in the NYT about how a person who is otherwise qualified was not a good candidate because of their faith.\"}\n{\"text\": \"Write a response to the following message:\\nThe difference is that you could update those apps if you want to. Your laziness affects your app; Miguel's hypothetical future laziness affects thousands of apps.\\n###Response: Metrowerks PowerPlant wasn't open source. MonoTouch is. If Miguel gets lazy, you can still update your apps as long as you aren't lazy.\"}\n{\"text\": \"Write a response to the following message:\\nSo do you actually use it? What is the rate of adoption among your friends and colleagues? How pleasant do you find introducing and explaining the whole process to a nontechnical person?\\n###Response: I do and it's an awful experience. The rate of adoption is 0% for the most part, there's no easy way to make it usable for non-technical people.\"}\n{\"text\": \"Write a response to the following message:\\nI think it's popular because it's: simple, small, efficient and documented.\\nFor hugely complex web applications you would probably desire more, but the fact of the matter is that the browser world is not ready for huge stuff, we still have IE, we recently got iOS, Android and many others. Progressive enhancement doesn't really allow you do those complex client side apps.\\n###Response: Look behind you, we're already building them. Not everything needs to work in every old browser.\"}\n{\"text\": \"Write a response to the following message:\\n3) I think facebook is here to stay.  Whether it lasts through time to be google status is yet to be seen, but I really think it could be.  It's interesting hearing paul's predictions as he saw google become GOOGLE.\\n###Response: because the people working there are smart and ambitious?  come now, it takes a lot more than that to make a google.\"}\n{\"text\": \"Write a response to the following message:\\nI studied both (\\u03bb-calculus and Set Theory) academically and the description IS disjointed and difficult to read. I get what OP is trying to say because I already understand these concepts.\\nBut even though the isomorphism between the Y combinator and Russell's paradox is elegant, the paradox is actually very deep (much deeper than the idea of a fixed point) -- that's why it took someone until the early 20th century to formalize it. For an awesome (and mind-blowing) explanation of the paradox, see Halmos' Naive Set Theory (botom of page 6): http://sistemas.fciencias.unam.mx/~lokylog/images/stories/Al...\\n###Response: It also took someone until the mid-20th century to formalize the Y combinator, though... Earliness of discovery is no grounds on which to consider the Y combinator less \\\"deep\\\" than Russell's paradox (if anything, the reverse!).\"}\n{\"text\": \"Write a response to the following message:\\nI'm still not sure why that was deprecated. It's much cleaner. I still use it\\u2026 :/\\n###Response: Ideology.\"}\n{\"text\": \"Write a response to the following message:\\nThe first two points seem like non-constructive scab picking that could apply to almost any project launched around here.\\n1. Glif, Wii, Flickr or Forrst are hardly any better. Unless they're generic words, names become valuable. \\\"Y Combinator\\\" itself is a pretty weird name for a business if you ignore the context of everything it has gone on to do and represent. \\\"Fowndr\\\" only sounds cheap now because it's new and doesn't have any history.\\n2. Why does it matter if the barrier is low? Any barrier that cuts down the useless 90% is fine. The term \\\"founder\\\" is self selecting anyway. I know at least 90% of my Twitter followers wouldn't self identify as \\\"founders\\\" and the 10% who do (whether they are or not) would still likely find such a community useful (assuming it takes off).\\nYour third point is more serious and this is an area where he'll need to do a lot of work. I'll certainly be trying to help Stu a little with his efforts because I see some value in what he's trying to do (from the \\\"Forrst for founders\\\" POV).\\n###Response: Wii works very well -- it's unique and can be thought of as \\\"we play together\\\" and \\\"whee! this is fun!\\\"\"}\n{\"text\": \"Write a response to the following message:\\nCan we take a peak at the campaign?\\n###Response: http://kck.st/VxaN0O\"}\n{\"text\": \"Write a response to the following message:\\nYou are referring to people being slavish and excessively subservient to authority figures? I wasn't looking at it that way. I was merely thinking she would like to work somewhere else again. The way she publicly played this makes her look unprofessional. If you have a feeling you're being fucked over, make a case, collect the evidence and by all means, break the news. But don't go all emotional and start posting on Quora and then keep quit about the whole case. The way she handled this does not make her case stronger, only weaker.\\n###Response: Who is right or wrong (she or the firm), it is hard for outsiders to decide without knowing the facts.  But what is very very sad is the fact that much of the \\\"civilized\\\" society accepts that:\\n1. Employers can treat their employees like crap, not many people seem to be bothered by it.  But when a employee says anything even remotely bad, suddenly he/she is unprofessional, immature etc\\n2. In the case of whistle blowers, we spend more time arguing the behavior of the whistle blower (aka the \\\"correct\\\" way of whistle blowing) than actually looking into the allegations.\\nSo in short, even if I (the whistle blower) have a legitimate case (racism, ageism, sexism...whatever), then I am expected to act totally \\\"professional\\\" if I ever expect to be taken seriously.\"}\n{\"text\": \"Write a response to the following message:\\nCan I use a browser other than IE on it?\\n###Response: I'm not sure you can already but in the near future, yes: Firefox, Opera and Chrome.\"}\n{\"text\": \"Write a response to the following message:\\nMost domainers I know also plan on creating services on their domains.  Why does anyone else have a more legitimate right to want to build something on a domain name?  What if I own cats.com and just want to put a picture of my cat fuzzy wuzzy?  Why shouldnt I be allowed if I registered it first or paid for it?  What if your startup isnt making any/much money and mine is and we're doing the same thing.  Do I have more rights to that name than you?\\n###Response: What if I own cats.com and just want to put a picture of my cat fuzzy wuzzy?\\nI'm sure a .meow domain would suit your personal purposes just fine. A .com, however, would be better suited for commercial purposes if you intend to profit off of your fuzzy wuzzy photos.\"}\n{\"text\": \"Write a response to the following message:\\nThis should be stressed - sites like Facebook do exactly this.  Constant changes mean constantly updating your scraper. When it comes to A/B testing? Your scraper needs to intelligent find the data, which might not always be in the same place.\\nSidenote: I wonder if any webapps use randomly generated IDs and class names (linked in the CSS) to prevent scraping. I guess this would be a caching nightmare, though.\\n###Response: I bet you would only need to randomly shuffle between a few alternatives for all of them. You'd need a dedicated effort to work that one out and the cache implications could be managed. No getting around the trade-off of possible page alternatives vs cache nightmare-ness though, and doing that to json apis would get ugly fast.\\nAt least it's easier to code these tricks than to patch a scraper to get around them.\"}\n{\"text\": \"Write a response to the following message:\\nI think that it'll speak volumes, not just to the bereaved parents, but also to the employees of Office Max, if the CEO were to demonstrate a true, deep apology.  Corporate culture is an important thing.\\nThis is the sort of thing that's serious enough for employees to work on Sunday to fix.  My impression, from reading the article, is that they'll start to take it seriously when they get to work on Monday.  That's already two news cycles too late for them to undo the PR damage they've done to themselves.\\n###Response: If it wasn't made immediately, which could have ended the story, an appology is never happening because it's tantamount to an admission of legal culpability.  Also few CEOs of large orgs have the spine to take any risk for the greater good because they're beholden to their board and their shareholders.  That's just the way these folks are, by and large.\"}\n{\"text\": \"Write a response to the following message:\\nThanks for the links. I haven't used these tools - are they useful? Which features are most helpful? Certainly we'll want to convert to or from some Linux trace format to combine traces that involve Linux and Mirage machines.\\n###Response: I've tried LTTng once, but it was quite complicated to set up, and required patching the kernel. It seems they improved that, it only needs a kernel module so I might give it another try.\\nI also used 'perf timechart'[0] in the past, or rather I tried to, but the traces on any realistic workload generated SVG files so large that it was too slow to open in firefox/inkscape etc.\\nYour approach seems to be more scalable as it doesn't generate one huge image/SVG but rather generates that zoomable view via Javascript on-the-fly, right?\\n[0] http://web.archive.org/web/20130729151516/http://blog.fenrus...\"}\n{\"text\": \"Write a response to the following message:\\nFollowing this logic, every language is Perl with a bit of this or that. Perl was one of the first of the kind, but people saw obvious problems with it and so they innovated with 'little nicer syntax' or 'little better object system'. You might not want any of this, but you can't deny most people are happier with Python or Ruby.\\n###Response: I'd say it's more like Perl is every language with a bit of this or that. A sufficiently \\\"skilled\\\" Perl hacker can write everything from bad C, to bad Lisp, to bad Fortran, in Perl -- and most certainly can write plenty of bad Perl in Perl, too. It's writing good Perl that's hard.\"}\n{\"text\": \"Write a response to the following message:\\nTaking a quick look through this list:\\nhttp://www.nobelprize.org/nobel_prizes/lists/universities.ht...\\nBell Lab's had the most Nobel prizes of any for profit institution at 7.  There's 13 non-for profit organization that have eight or more.  Even if you drop out the political Nobels (Peace and Economics), there's still 11 entities out there besting the best that for-profit institutions have to offer.  Also, most of these Nobels have been in the past hundred years, so no need to normalize out the advent of democracy.\\nIt's true that for profit enterprises take these discoveries (e.g. Giant Magnetoresistence, MRI, Fiber Optics, CCDs) and ramp them up for mass production.  However, they're still tweaking the major break-through made by people focused on things besides the personal accumulation of wealth.\\n###Response: Though not necessarily their goal, a side of this story you don't mention is income via royalties. I have worked with multiple Bell labs alumnus who are financially very secure in part because of their work there.\\nAs much as software patents irk common sense, the notion of IP protection really does seem reasonable here. There are people who do research for reasons beyond personal profit. They actually do deserve licensing fees.\"}\n{\"text\": \"Write a response to the following message:\\nAbsolutely. Like I said, I have no problem with other people choosing other licenses which meet their requirements. My requirements are different. The GPL is most appropriate for myself.\\n###Response: I don't choose a license to meet my requirements.  I choose a license to meet everybody's requirements.  So when you don't choose a license that meets everybody's requirements, it makes me question the value of the freedoms it provides, and I end up wishing you'd chosen something more open.\"}\n{\"text\": \"Write a response to the following message:\\nJust wanted to say nice work. I was browsing GPP the other day and I found it to be a valuable reference, and not just for game programmers.\\nOn a different note, welcome to the Pacific Northwest!\\n###Response: Thanks! Moving to Seattle was weird: I felt like it was the place I'd always wanted to live even though I barely even knew it existed until a few years ago.\"}\n{\"text\": \"Write a response to the following message:\\nOCaml also internally uses an intermediate code representation called C-- (or Cmm).  I do not know if the two have any relationship.\\n###Response: It looks like OCaml's C-- predates this C--, but has had an influence on it.\\nFrom Xavier Leroy, one of the lead Ocaml developers [1]:\\n    I think I'm the one who coined the name \\\"C--\\\" to refer to a low-level,\\n    weakly-typed intermediate code with operations corresponding roughly\\n    to machine instructions, and minimal support for exact garbage\\n    collection and exceptions.  See my POPL 1992 paper describing the\\n    experimental Gallium compiler.  Such an intermediate code is still in\\n    use in the ocamlopt compiler.\\n\\n    I had many interesting discussions with Simon PJ and Norman Ramsey\\n    when they started to design their intermediate language.  Simon liked\\n    the name \\\"C--\\\" and kindly asked permission to re-use the name.\\n\\n    However, C-- is more general than the intermediate code used by\\n    ocamlopt, since it is designed to accommodate the needs of many source\\n    languages, and present a clean, abstract interface to the GC and\\n    run-time system.  The ocamlopt intermediate code is somewhat\\n    specialized for the needs of Caml and for the particular GC we use.\\n\\n[1] http://article.gmane.org/gmane.comp.lang.caml.inria/9436/\"}\n{\"text\": \"Write a response to the following message:\\nIn which case you'll probably inevitably end up building something with a suboptimal stack -- and limiting novelty also means you won't learn anything _new_, which sounds more terrible to me.\\n###Response: I am not arguing that you should elimate novelty completely.  Just that the amount of time you have to learn new things is limited and needs to deliver returns.\\nTake the iOS app development.  Consider someone who has never used Objective-C but has lots of experience doing client side web development.  It would probably be faster for them to develop using a tool like Phonegap and use existing skills.  It is theporetically sub-optimal to native code but surely the time spent learning phone gap will produce faster results than trying to learn an entirely new stack.\"}\n{\"text\": \"Write a response to the following message:\\nOne time they bought a plane ticket online for $350.\\nI assume this one get caught?\\n###Response: Depends on where you are, but in a city cops aren't going to bother if the amount's not enough to get felony charges.\"}\n{\"text\": \"Write a response to the following message:\\n\\\"128 KiloBytes\\\" per second is a 1Mbit connection... That's more than enough for most people unless you're a heavy video user. I think mine is about 8Mbit, but I doubt I'd notice if it dropped to 1Mbit.\\nI'm not arguing for the cap. I'm just confused that you think a 1Mbit connection is somehow comically slow?\\n###Response: Sorry. I don't know how I made that mistake... I meant 128 Kilobits per second (16 KiloBytes per second). The best you can get is 1 Megabits per second (128 KiloBytes per second).\\nI have a 256 Kbps connection. It's really, really slow. It took me 2 days to download OS X Lion from Mac App Store (about 3.5 GB).\"}\n{\"text\": \"Write a response to the following message:\\nWould you find it magical if it were truly immersive 3d where eye movements would scroll and an Omni-like 3D treadmill actually worked?\\n###Response: No it's even worse. Now I have to work even harder to have the marginal experience I can have from my desk.\"}\n{\"text\": \"Write a response to the following message:\\nThe conversion from the given metric to the 'taxicab' metric needs some justification!\\nThe n^2 algorithm is simple enough to be solid.  If your code does not give their answers, then their answers might be wrong!\\nMy work with convexity is an effort at faster code, but actually programming all that would be a bit much.  I've done such things, but I got the linear programming from the old IBM Fortran Optimization Subroutine Library (OSL) and, then, wrote the code in Watcom Fortran so that I could use the Fortran OSL OBJ files.  Using the OSL for the problem in this thread would be a bit much.\\n###Response: You're right! They seem to have fixed a bug in their test program, and I'm now credited with solving that problem.\\nAs for the justification, it shouldn't be too hard to show with a little algebra that\\nTaxicabDistance(x1+y1, y1-x1, x2+y2, y2-x2)/2 = ChebyshevDistance(x1, y1, x2, y2)\\nwhere\\nChebyshevDistance(x1, y1, x2, y2) = max(|x1-x2|,|y1-y2|) and\\nTaxicabDistance(x1, y1, x2, y2) = |x1-x2|+|y1-y2|.\"}\n{\"text\": \"Write a response to the following message:\\nCan you extend this with filtering all AngularJS articles as well? :)\\n###Response: http://hn-filter.appspot.com/#preset=AngularJS  didnt spend much time but appears to be cookie based as this didnt work just by changing the preset in the url but opening in a different browser seemed to have taken the filter\"}\n{\"text\": \"Write a response to the following message:\\nAnother Thomas Street dev here. I'm already using famous-angular on some client projects that are just that \\u2014 animation-heavy desktop sites.\\nIt's awesome. The API for working with 3D and animations/transitions is so much nicer than straight HTML+CSS. And I love having a render loop that's about 100x better tuned than I could come up with in the time available.\\n###Response: Hi dgraunke.  I am working on some projects and really anxious to try out famous-angular integration.  Do yall have any alphas or betas of your angular integration js available yet?  I'd be more than happy to be a tester for it.\"}\n{\"text\": \"Write a response to the following message:\\nI think you gotta pay the original artist.  As an old-school analog musician, I had to sweat for literally decades to be able to drop that perfect ten second fill effortlessly.  The engineer had to invest thousands of dollars and a similar level of time to record it and make it sound amazing.  And if some kid wants to use them, great, that should be allowed, but they gotta pay, because that music didn't just drop out of the sky.  And if you want to sample a gigantic hit that everybody knows (which is going to make your derivative work much more marketable), then you're gonna have to pay a lot more, no?  If I wanted, let's say, Jay-Z to come in and sing 99 problems on my song, what would that cost me?  Probably a lot, and for good reason.\\nIf it's really worth nothing, then all these DJ's could either produce it or record it themselves.  But it's not, and they can't.  This is a classic economic externality.  Writing, performing, and recording really good music costs a lot of money, and sampling is virtually free.  Pay obscure artists a reasonable mechanical residual and negotiate with samples of huge hits for huge money.\\n###Response: How much should they pay?\"}\n{\"text\": \"Write a response to the following message:\\nTry Bahamas or Hawaii, at least the weather will be good.\\n###Response: ... Except for the fact that Hawaii is in the US.\"}\n{\"text\": \"Write a response to the following message:\\nI actually think this looks really good, but 3 things:\\n1. Why differentiate it from CoffeeScript so much? Why not call it DOMCoffeeScript or something? Are there any core language changes from CoffeeScript other than the tag features?\\n2. I'm not sure how I feel about the mixture of XML tag characters with HAML/Jade-like indentation. My gut instinct is to always look for a closing tag with XML/HTML. Why not use some kind of sigil like % or @ or ! to represent a tag, since clearly the requirement for both a left and right caret is now obviated?\\n3. Why require the `var` keyword instead of making it the default? That's one of my biggest pet peeves with languages like Javascript and Lua. Local-by-default always makes the most sense.\\n###Response: 1. Because the semantics are quite different. See my other post.\\n2. It's still nice to separate the attributes from the content:\\n    <h1 title=\\\"hello\\\"> \\\"Foo\\\"\\n\\nWhy use a new syntax when everyone knows HTML/XML?\\n3. The lack of `var` in CoffeeScript is its worst feature ever IMO! Every time I write `someVariable = \\u2026` I'm terrified that I will accidentally overwrite a previous variable. Imba improves on JavaScript here and will correctly shadow multiple `var` in the same function.\"}\n{\"text\": \"Write a response to the following message:\\nWorked for Thoreau.\\n###Response: Thoreau abandoned his cabin after 2 years.\"}\n{\"text\": \"Write a response to the following message:\\nI was interested in computer hacking when I was child(!). Then one day I have read, if I want to be professional I have to learn programming. So I have decided to learn programming and begun with BASIC. Creation of a program was really impressive for myself. It was my beginning of learning to code.\\n###Response: Everyone wanted to be a hacker in my childhood days :-) Though nobody actually had a clue what is it all about :-)\"}\n{\"text\": \"Write a response to the following message:\\nThere are two perspectives when looking at any job: project objectives and social utility.\\nThe first sees a job as an effort focused on some project goals, e.g. designing a car, building a house, curing a disease, finding Higgs, running a restaurant, landing on Mars. The second sees a job as something simply done for money: those who do it need the proceeds.\\nPoliticians have a tendency to put the social utility above project objectives. This explains situations where some jobs are equivalent to digging a whole in the ground only to bury it back again.\\nIt's sad to see NASA come to this, too.\\n###Response: There is no social utility in digging a hole to bury it back. Targeting social utility would mean to run a business at break-even or even at a loss to provide value as a side-effect to the community.\\nThe cost of restarting tower building from a hibernate state would be enormous. I think they did the right thing in continuing building even if the immediate utility was lost. It's not like they will never need such a tower. They can't use it _now_ and that's a different thing.\\nIn perspective, the $300M do not compare to the $8B mentioned in the article. Hey, they don't compare even to the budget of a box-office Hollywood movie today. That should mean something.\"}\n{\"text\": \"Write a response to the following message:\\nlet me point to a comment made by PG yesterday when asked by thaumaturgy what it's like to have your every written (or spoken!) word analyzed by a bunch of people?\\nIt's pretty grim. I think that's one of the reasons I write fewer essays now. After I wrote this one, I had to go back and armor it by pre-empting anything I could imagine anyone willfully misunderstanding to use as a weapon in comment threads. The whole of footnote 1 is such armor for example. I essentially anticipated all the \\\"No, what I said was\\\" type comments I'd have had to make on HN and just included them in the essay.\\nIt's a uniquely bad combination to both write essays and run a forum. It's like having comments enabled on your blog whether you want them or not.\\nhttp://news.ycombinator.com/item?id=4497691\\nMaybe we should be discussing the future of finance, crowdsourced funding, new programming paradigms or some other interesting forward looking stuff instead of spending our time looking backwards trying to poke holes in an essay written more than ten years ago.\\n###Response: There are two discussions waiting to happen here:\\n1. \\\"Haha, what an idiot. He said Java will die and now it is still there, hahahaha.\\\" (obviously, an contrived example). This would be an useless \\\"discussion\\\". We wouldn't learn anything from it.\\n2. \\\"Well, as he writes in the essay: predictions are hard. Let us check which predictions did hold and which didn't.\\\" - This discussion could provide useful lessons for the future.\\nI hope for the second, but fear the first. In a few hours we will know the answer.\"}\n{\"text\": \"Write a response to the following message:\\nI'm sorry but it's hard to really take someone seriously when they start out by comparing a language (PHP) against a framework (Rails), and then make this claim:\\n> Metaprogramming and reflection have to be implemented by the framework as they\\u2019re not part of the language.\\nEvery example there seems to relate to a framework called \\\"Flow\\\", and the author sums this up quite succinctly:\\n> It took them five years to experiment and find something that works for them\\nThe key words are the last three. WORKS. FOR. THEM. I've never heard of Flow before, and from what I've seen in this blog alone, it looks like more PHP developers who want to be Java developers.\\nI agree completely that the examples shown are fucking horrific, but thats not a fault of the language.\\nLet's take the \\\"Fetch a record\\\" example. It's perfectly possible to have a php model that works like so:\\n  BlogPost::findByPK($foo);\\n\\n\\nAs for the templates, yes the Flow templates are fucking stupid. That's still not a fault against PHP - I've given up trying to understand why people feel the need in PHP - which is itself an excellent templating language - to create new, more complex and yet limited template languages.\\n###Response: OK so weirdly, he claims PHP is dead but actually specifically says he's comparing two frameworks. So, does that mean because Rails recently had a heap of vulnerabilities, Ruby is automatically insecure?\\nNo.\\nThere is a difference between a language and a framework. That Flow is yet another attempt at an \\\"Enterprisey\\\" mvc framework for PHP is a fault of the developers of Flow, not the language it's written in.\"}\n{\"text\": \"Write a response to the following message:\\nThese app stores are stifling.  They're injecting arbitrary non-functional nonsense and technical hurdles that just hold developers back.\\nI'm not saying HTML/CSS/JS development is a panacea, it's all painful awkwardness with occasional sublime moments.  But at least I am my own master when I deploy my apps to the Web.\\n###Response: >They're injecting arbitrary non-functional nonsense and technical hurdles that just hold developers back.\\nArbitrary nonsense like a sandbox? Didn't we learn anything from when viruses were rampant in earlier OSs?\\n>I'm not saying HTML/CSS/JS development is a panacea, it's all painful awkwardness with occasional sublime moments. But at least I am my own master when I deploy my apps to the Web.\\nSo the solution to the \\\"arbitrary non-functional nonsense and technical hurdles\\\" of the native App Stores is web development? As if that doesn't have tons of arbitrary non-functional nonsense and technical hurdles itself compared to native development...\"}\n{\"text\": \"Write a response to the following message:\\nI'm curious, how did you get in and through college (tuition, enrollment, etc.) without an ID or a bank account?\\nI lived in NYC myself. Besides not having to drive, is it really a better place to be for undocumented immigrants?\\n###Response: There are state level DREAM Act legislations that offer a path to college for undocumented students, but no path to legalization. If I recall correctly there are currently 10 states that have some form of such legislation: California, Illinois, Kansas, Nebraska, New Mexico, New York, Texas, Utah, Washington, and since this past Spring, Wisconsin. These states have urged action on the federal level, as this is the only thing that would really solve the problem.\\nThat, working my ass off, working my ass off some more, and a whole lot of help from people who care about me, to whom I am in debt for the rest of my life. I know too many who are not as lucky as me and it tears me apart to see their dreams shattered for no sensible reason and no wrongdoing of their own.\\nNYC is a better place for undocumented immigrants because you can rely on the police to help you, they are legally prevented from inquiring about your immigration status. On the other hand, we have stories of valedictorians being thrown into jail and put into deportation proceedings for rolling a stop sign in places like Texas (some people were able to get driver licenses a while back, in some states, no longer possible).\"}\n{\"text\": \"Write a response to the following message:\\nQuestions 2 and 3 both say:\\nWhat single attribute is most attractive about your current place of employment?\\n###Response: yeah I fixed that, it was supposed to be most and least :\\\\\"}\n{\"text\": \"Write a response to the following message:\\nI can't believe this comment is being upvoted. Do you somehow feel it's wrong that Wikipedia asks for donations?\\n###Response: I also can't believe it was upvoted, mainly because it was a silly bit of sarcasm.\"}\n{\"text\": \"Write a response to the following message:\\nMany mobile devices ignore these units - to my annoyance - in favour of the \\\"reference pixel\\\" as defined in CSS. http://www.w3.org/TR/CSS2/syndata.html#length-units\\nCrazy-speak IMO, and I suspect I'm not the only one... perhaps someone in the standards community could explain the rationale.\\n###Response: Yes it's sad how in software development, the most straightforward ways to do something (just specify cm) are usually broken. It's always exception built on exception, different per platform, different per browser.\\nI guess to do it correctly you'd have to write javascript to reliably detect the DPI on all platforms/browsers (as far as possible, if not make some educated guess) then adapt your text sizes to that :(\\nSo I can certainly understand 'just choose 16px and be done with it'.\"}\n{\"text\": \"Write a response to the following message:\\nAmerica\\u2019s typical tech entrepreneurs are in their 20s.\\nIt seems to me that media is absolutely in love with this notion.  The scrappy, brilliant kid who's going to change your world (e.g. Diaspora).  Why is that?\\n###Response: The media in general loves to fit stories into archetypal stories that are easily digested by audiences. The 20 year old scrappy kid fits right into the \\\"David vs. Goliath\\\"  archetype, people love to hear stories about someone winning against all odds.\"}\n{\"text\": \"Write a response to the following message:\\n> Low Bandwith Version of this site due to large amount of requests:\\n> Aufgrund unzumutbarer Kosten und Bezugsbedingungen hat das Direktorium des Zentrums Mathematik beschlossen, alle abonnierten Elsevier-Zeitschriften ab 2013 abzubestellen.\\n> Because of unsustainable subscription prices and conditions, the board of directors of the mathematics department has voted to cancel all of its subscriptions to Elsevier journals by 2013.\\n> @http://news.ycombinator.com/ visitors:\\nYou just DDOS-ed our web server ;-)\\n###Response: Somewhat odd that \\\"Germany's leading Technical University\\\" isn't able to handle a few Hackernews visitors showing up\"}\n{\"text\": \"Write a response to the following message:\\nLol. I just realized why people aren't understanding my other comments about why this debate matters.  Here's an explanation:\\nCapital gains on the sale of capital assets (i.e., stock) by foreign persons are not taxable by the U.S.  Only certain types of capital investment (i.e., real property or income that is connected to a U.S. business activity) is taxable by the US.  See generally Section 871 and its accompanying regulations.\\nCapital gains by foreign persons are generally taxed by the foreign person's country of residence unless the foreign person has a tax nexus to the U.S. Generally, the tax nexus is provided by citizenship, actual residence, or deemed residence.  Id.\\nEduardo is not an actual or deemed resident of the U.S., thus his only tax nexus was via his citizenship.  By renouncing his citizenship, he severs that last bit of tax nexus.\\nConsequently, the sale of the Facebook stock is now taxable only by the foreign jursidiction in which he is a citizen/resident.  In this case, Singapore does not impose a capital gains tax, so he would effectively not be subject to any tax at all on the sale of his Facebook stock.\\nThis matters because Singapore is one of the few non-tax haven jurisdictions that provides for no capital gain taxation.  It's also unusual that he did so around the same time that Facebook began publicly indicating it would move toward an IPO, despite Eduardo having been eligible for Singaporean citizenship for some time before he actually renounced his U.S. citzenship.  Note also that Eduardo \\\"resides\\\" in Singapore but spends significant amounts of time outside of the country.\\nThis is why so many people (including myself) think Eduardo renounced his citizenship to avoid paying taxes.  It's the circumstances surrounding his renunciation which are suspicious.  If he'd chosen to move to Europe, or China, or Hong Kong, or some other place, it wouldn't be an issue.  But based on the totality of the circumstances, it looks like tax avoidance was his primary motive.\\nHope this helps.\\n###Response: > In this case, Singapore does not impose a capital gains tax, so he would effectively not be subject to any tax at all on the sale of his Facebook stock.\\nThis is blatantly wrong.\\nThe US, since 2008, has an exit tax. For the purposes of US taxation, what happens is that on the day that he loses his US citizenship, US-Eduardo sells all his things to Singapore-Eduardo at fair market price. Thus, he pays capital gain taxes up until that day.\\nAfter that day, it is up to the new jurisdiction: Singapore won't tax him for any further gains, most other countries would.\\nEither way, if facebook goes down between the day he loses citizenship and the day he actually sells, he has lost money.\\nHe's saved some taxes already, but that's a result of a bet. Another way to make the same bet would have been to buy a call option (or sell a put option, or go long a forward or future) on Facebook stock.\\nSchumer is just looking for votes and publicity. And the media are happy to distort the story to make villains and heros, because they sell more ads that way.\"}\n{\"text\": \"Write a response to the following message:\\nWhat to you mean by \\\"cryptic\\\"?  Something you struggle with?  An abstraction?\\nYou're not used to maps, folds, and filters (the bulk of sequence comprehension), so they don't free up your brain power.  But seriously, they're not complicated, and using them instead of regular loops often reduce the amount of code by a factor of 3 to 5.  Simple code, where the same idioms come back over and over \\u2014just like in regular loops, only shorter.\\nDon't tell me that doesn't free brainpower, eventually.\\n###Response: Functional orgami is hella hard to debug. Their is a reason functional programming relies so heavily on equational reason and pure FP relies heavily on static typing: the alternative, actually debugging programmers, is so painful that you just want to avoid that at all costs.\\nA loop is way easier to debug than a filter or a map, you just step through it. Who cares about code length? Haskell doesn't become a usable language until someone comes up with a decent dataflow debugger, if that's even possible.\"}\n{\"text\": \"Write a response to the following message:\\nMost programming puzzles aren't real work-sample tests. A work-sample test has to be representative of the actual stuff you'd do on the job.\\nThe trick is, to work in a recruiting context, you also want those tests to be standardized and repeatable. A lot of companies fall down on this. They have candidates do \\\"real work\\\", often in a pair-programming context. There a bunch of problems with this:\\n(1) \\\"Real work\\\" usually isn't standardizable, so you can't compare candidates\\n(2) Signal quality from the test is intensely dependent on who is doing the work with the candidate\\n(3) Two different candidates might end up getting \\\"tests\\\" that are wildly different in terms of predictive power\\nI have a bunch of ideas for pure software development work-sample tests, but I'm not ready to share them. The idea is simple, though:\\n* It's a realistic exercise that approximates actual day-to-day work as much as possible\\n* Every candidate gets the same exercises\\n* The exercises have objective (preferably gradable) outcomes\\n###Response: So accurate. I'm a grad student who studied security deeply during my time in school.\\nI want to keep my hands on a keyboard and I don't like airplanes (which rules out consulting) so I've applied at a lot of companies to be a ``security engineer''.\\nI can't tell you how many well-respected companies ask me to write min-heaps, depth-first searches, etc. I don't understand what they are asking me this for...It isn't even close to a realistic representation of what my day-to-day responsibilities would be. It is also an immediate turn-off....\"}\n{\"text\": \"Write a response to the following message:\\nGoes to show how many (greatly needed) programmatic features CSS is adding. Also shows that some people love a creative way to attack a problem.\\n###Response: Goes to show what a hack-riddled mess Web presentation still is.\"}\n{\"text\": \"Write a response to the following message:\\nPretty shocking, especially considering the equity he's presumably walking away from. More power to him.\\n###Response: I doubt he's walking away from much equity at all. He's been with Twitter a long time. If he isn't 100% vested, he's got to be pretty close.\"}\n{\"text\": \"Write a response to the following message:\\n\\\"It may not be PC to say this, but it's important to distinguish unpleasant, but fundamentally innocuos flirting from awkward people, from bona fide sexual harassment like groping or inappropriate touching.\\\"\\nSexual harassment includes the comments like the \\\"how much for a sexual favor?\\\" example.  That constitutes bona fide harassment.\\nHere's my example from the last 2 years.  I'm blanking on the conference now, but there was a live IRC session running on the overhead, and each time a question was asked by a woman, IRC would be filled with lude and aggressively sexual remarks.  It was totally uncomfortable!  I think this situation qualifies as bona fide sexual harassment.\\nTouching or groping someone in a sexual manner is assault (at least it can be in the US.)  It's so far beyond mere  harassment that it's not the point of the article...\\nEDIT: for clarity...\\n###Response: The same even happened in the chat during the start-up school webcasts. Very disturbing.\"}\n{\"text\": \"Write a response to the following message:\\nfor i in range(1,101): print \\\"FizzBuzz\\\" if i %3 == 0 and i % 5 == 0 else \\\"Fizz\\\" if i % 3 == 0 else \\\"Buzz\\\" if i % 5 == 0 else str(i)\\nScores 69 in python but the login/signup is broken :)\\n###Response: for i in range(1,101): print \\\"FizzBuzz\\\" if i % 15 == 0 else \\\"Fizz\\\" if i % 3 == 0 else \\\"Buzz\\\" if i % 5 == 0 else str(i)\\nderp, works better of course\\nThough I can't find out the score the page to test the code seems to be inaccessible if you are signed in. It just redirects you to the signed in page and the signout link is broken. Still it's a fun and cool idea\"}\n{\"text\": \"Write a response to the following message:\\nHow to multiply isn't necessarily self evident, especially if you don't have a positional notation system. Even after the introduction of arabic numerals into Europe there were many different algorithms for performing multiplication, some of which are only moderately recognizable as our long multiplication system:\\nhttp://www.pballew.net/old_mult.htm\\nThis method in practice would have used objects that represent larger numbers(1,5,10) and with those a practitioner only needs to know how to add, subtract and multiply or divide by 2. All of which are operations which can be performed quickly. It certainly isn't true that you would need as many stones as the final result.\\n###Response: If you used objects to represent 5, 10, 100 stones, etc., and were capable of e.g. saying that 7 and 7 stones equal one 10-stone and 4 stones, you would have a compact representation of your number, and that would be the proper use of this algorithm.\\nBut note that this is different from what John H. Lienhard explicitly says: \\\"the hole contains 224 stones\\\". His version is completely pointless. None of us are objecting to the algorithm itself - yes, it is equivalent to long multiplication in binary, it has been known since ancient times, and there is nothing wrong with it. However, Lienhart has a) apparently misunderstood the point of the algorithm, and b) ascribed it to Ethiopian village \\\"shamans\\\", which gives one the mental image of Ethiopians standing around and watching a shaman manipulate hundreds of stones, instead of simply counting out the price - 7 for the first goat, 7 for the second, etc. This probably never happened, and would be stupid if it did; and yet people are coming here to defend this as an example of \\\"Ethiopian ingenuity\\\".\\nWhat kind of \\\"shamans\\\" is Ethiopia supposed to have, anyway? Aren't they a Siberian thing? Ethiopia has one of the most ancient Christian traditions in the world, you know. I agree with some other posters that this whole thing is some kind of a politically correct cringe which somehow tries to compliment Ethiopians for something they have nothing to do with, and only ends up insulting them. And every reader's intelligence, to boot.\"}\n{\"text\": \"Write a response to the following message:\\nThis whole premise is stupid.  We don't seem to have a problem with the lack of female linesmen, plumbers, construction workers, electricians, HVAC specialists, or garbage men.  There isn't a pest control or sewage treatment Barbie.  There also isn't a problem when 57% of college graduates are female.\\nIf women want to work in tech, nothing is stopping them.  They can learn to program, and write code.  This is something that you can teach yourself, you don't need to deal with or rely upon anyone else.  It seems that the problem is, at least as described by the New York Times, that women don't want to log the hours, and they want to do something that's more social.  That's fine, there is nothing wrong with that.  But they may be better off doing something other than writing code if that's the case.\\nWomen log fewer hours in other areas as well.  Female doctors see fewer patients than male doctors, and female lawyers are much less likely to make partner because they log far fewer hours than male lawyers.  There is nothing wrong with this.  Men and women are different.  This isn't inherently bad or wrong.\\nDisclaimer: My staff is 80% female.  We received more qualified female applicants than male applicants, so that's who we hired.\\n###Response: > This whole premise is stupid. We don't seem to have a problem with the lack of female linesmen, plumbers, construction workers, electricians, HVAC specialists, or garbage men.\\nSo why do you think that is?\\nIt sounds to me like evidence that women don't want those jobs! Which would imply that the jobs that people make a fuss about - e.g. programming and other tech jobs - are ones that a significant portion of women do want, and feel discouraged from.\"}\n{\"text\": \"Write a response to the following message:\\nI have never heard of the book. Is this a serious suggestion?\\n###Response: I'm not the poster you replied to, but it's a very good book on both how to be happy (somewhat) and when you're most likely to be happy (most of the book, and most of the examples).\\nIt's short on hard research, but provides a lot of good generalities, and some excellent starting points for finding the hard research.\\nOverall, it's a quick, light read and I'd recommend it as such.  It's not life-changing, but it's a great little overview of modern research on happiness, and on many kinds of common, widespread irrational behavior, along with an occasional bit of how to avoid succumbing to it.\"}\n{\"text\": \"Write a response to the following message:\\nI used to think this too, but the more I use Go the more I enjoy not needing to maintain a silly manifest\\n###Response: I keep meaning to spend some time playing with Go, but I haven't yet so I don't know how that feels. However, I wonder what it says that several dependency management tools have emerged for Go.\\nhttps://github.com/mattn/gom\\nhttps://github.com/nitrous-io/goop\\nhttps://github.com/tools/godep\"}\n{\"text\": \"Write a response to the following message:\\nI'd dig deeper on this one. When a company says \\\"we will donate $X to schools\\\", they often mean the notional retail value of their products. My high school computer class was drowning in donated AOL floppies, MS Office licenses, and copies of the soundtrack to Gigli.\\nIt's carpet bombing, basically. Maybe the kids benefit, maybe not, but they are being used in a distasteful way.\\n###Response: Good point, but judging from this I think they are talking about cash:\\n\\\"$1,000,000 from Desire2Learn can make a significant difference for thousands of students through the purchase of laptops, technology, supplies and other much needed educational support.\\\"\"}\n{\"text\": \"Write a response to the following message:\\nMicrosoft pretty much let their c compiler rot. It doesn't support dynamic allocation, ie, int arr[numElts];, it doesn't even support var declaration anywhere but at the beginning of a function. Making code I've been writing compatible with msvc has been a pure headache.\\n###Response: It is a C++ compiler, just use C++ and you will be fine.\"}\n{\"text\": \"Write a response to the following message:\\nI'm surprised there's no calculus. Not even a basic derivative. The rest of the exam is certainly difficult to warrant a calculus question.\\n###Response: I think that at that point calculus was a grad-school-level thing.  At the very least, it wasn't being taught in high schools.  In fact, I'm pretty sure it's only taught in high schools today under the aegis of Advanced Placement classes, which give you college credit.\"}\n{\"text\": \"Write a response to the following message:\\nThe last two sentences are fairly profound.  (I didn't quite understand what she was saying until I had read it a few times.)\\nWe think of ourselves as controlling our behavior, willing our actions into being, but it's not that simple.\\nIt's as if over time, we leave parts of ourselves all around us, which in turn, come to shape who we are.\\n###Response: Profound yet simple. Essentially this article is saying:\\nBy changing your environment you can dramatically change your behavior to the point where it can literally kick a heroin habit.\"}"
  },
  {
    "path": "tests/run_local_tests.sh",
    "content": "#!/bin/bash\n\n# TODO - rework this to spin up cog servers locally for prediction & training\n# this gives us the ability to test out post-training results (w/docker \"env\" vars)\n# I think that'll actually do it. \n\ncog predict -i prompt=\"Hey! How are you doing?\"\ncog train -i train_data=\"https://storage.googleapis.com/dan-scratch-public/fine-tuning/1k_samples_prompt.jsonl\" -i max_steps=10\n"
  },
  {
    "path": "tests/test_e2e.py",
    "content": "import pytest\nimport requests\nimport subprocess\nimport time\n\n# Constants\nSERVER_URL = \"http://localhost:5000/predictions\"\nHEALTH_CHECK_URL = \"http://localhost:5000/health-check\"\n\nIMAGE_NAME = \"your_image_name\"  # replace with your image name\nHOST_NAME = \"your_host_name\"  # replace with your host name\n\n\ndef wait_for_server_to_be_ready(url, timeout=300):\n    \"\"\"\n    Waits for the server to be ready.\n\n    Args:\n    - url: The health check URL to poll.\n    - timeout: Maximum time (in seconds) to wait for the server to be ready.\n    \"\"\"\n    start_time = time.time()\n    while True:\n        try:\n            response = requests.get(url)\n            data = response.json()\n\n            if data[\"status\"] == \"READY\":\n                return\n            elif data[\"status\"] == \"SETUP_FAILED\":\n                raise RuntimeError(\n                    \"Server initialization failed with status: SETUP_FAILED\"\n                )\n\n        except requests.RequestException:\n            pass\n\n        if time.time() - start_time > timeout:\n            raise TimeoutError(\"Server did not become ready in the expected time.\")\n\n        time.sleep(5)  # Poll every 5 seconds\n\n\n# Starting and stopping the server as part of the setup and teardown\n@pytest.fixture(scope=\"session\")\ndef server():\n    # Start the server\n    command = [\n        \"docker\",\n        \"run\",\n        \"-ti\",\n        \"-p\",\n        \"5000:5000\",\n        \"--gpus=all\",\n        \"-e\",\n        f\"COG_WEIGHTS=http://{HOST_NAME}:8000/training_output.zip\",\n        \"-v\",\n        \"`pwd`/training_output.zip:/src/local_weights.zip\",\n        IMAGE_NAME,\n    ]\n    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n\n    # Giving some time for the server to properly start\n    time.sleep(10)\n\n    yield process  # This is where the test will execute\n\n    # Stop the server\n    process.terminate()\n    process.wait()\n\n\ndef test_health_check():\n    response = requests.get(HEALTH_CHECK_URL)\n    assert (\n        response.status_code == 200\n    ), f\"Unexpected status code: {response.status_code}\"\n\n\ndef test_prediction():\n    data = {\n        \"input\": {\n            \"prompt\": \"...\",\n            \"max_length\": \"...\",\n            # Add other parameters here\n        }\n    }\n    response = requests.post(SERVER_URL, json=data)\n    assert (\n        response.status_code == 200\n    ), f\"Unexpected status code: {response.status_code}\"\n    # Add other assertions based on expected response\n\n\n# You can add more tests as per your requirements\n\nif __name__ == \"__main__\":\n    pytest.main()\n"
  },
  {
    "path": "tests/test_predict.py",
    "content": "import pytest\nimport requests\nimport subprocess\nfrom threading import Thread, Lock\n\nfrom tests.test_utils import (\n    get_image_name,\n    capture_output,\n    wait_for_server_to_be_ready,\n)\n\n# Constants\nSERVER_URL = \"http://localhost:5000/predictions\"\nHEALTH_CHECK_URL = \"http://localhost:5000/health-check\"\n\nIMAGE_NAME = \"your_image_name\"  # replace with your image name\nHOST_NAME = \"your_host_name\"  # replace with your host name\n\n\n@pytest.fixture(scope=\"session\")\ndef server():\n    image_name = get_image_name()\n\n    command = [\n        \"docker\",\n        \"run\",\n        # \"-ti\",\n        \"-p\",\n        \"5000:5000\",\n        \"--gpus=all\",\n        image_name,\n    ]\n    print(\"\\n**********************STARTING SERVER**********************\")\n    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n\n    print_lock = Lock()\n\n    stdout_thread = Thread(target=capture_output, args=(process.stdout, print_lock))\n    stdout_thread.start()\n\n    stderr_thread = Thread(target=capture_output, args=(process.stderr, print_lock))\n    stderr_thread.start()\n\n    wait_for_server_to_be_ready(HEALTH_CHECK_URL)\n\n    yield process\n\n    process.terminate()\n    process.wait()\n\n\ndef test_health_check(server):\n    response = requests.get(HEALTH_CHECK_URL)\n    assert (\n        response.status_code == 200\n    ), f\"Unexpected status code: {response.status_code}\"\n\n\ndef test_simple_prediction(server):\n    data = {\n        \"input\": {\n            \"prompt\": \"It was a dark and stormy night.\",\n            \"max_new_tokens\": 25,\n            # Add other parameters here\n        }\n    }\n    response = requests.post(SERVER_URL, json=data)\n    assert (\n        response.status_code == 200\n    ), f\"Unexpected status code: {response.status_code}\"\n    print(\"\\n**********************RESPONSE**********************\")\n    print(\"\".join(response.json()[\"output\"]))\n    print(\"******************************************************\\n\")\n    # Add other assertions based on expected response\n\n\ndef test_input_too_long(server):\n    # This is a placeholder. You need to provide an input that is expected to be too long.\n    data = {\n        \"input\": {\n            \"prompt\": \" a\"\n            * 6000,  # Assuming this string will produce more than 4096 tokens.\n            \"max_new_tokens\": 25,\n            # Add other parameters here\n        }\n    }\n\n    response = requests.post(SERVER_URL, json=data)\n\n    response_data = response.json()\n\n    assert \"error\" in response_data, \"Expected an 'error' field in the response\"\n\n    error_msg_prefix = \"Your input is too long. Max input length is\"\n    assert response_data[\"error\"].startswith(\n        error_msg_prefix\n    ), f\"Expected the error message to start with '{error_msg_prefix}'\"\n    assert response_data[\"status\"] == \"failed\", \"Expected the status to be 'failed'\"\n\n    print(\"\\n**********************RESPONSE**********************\")\n    print(response.text)\n    print(\"******************************************************\\n\")\n\n\nif __name__ == \"__main__\":\n    pytest.main()\n"
  },
  {
    "path": "tests/test_predict_with_trained_weights.py",
    "content": ""
  },
  {
    "path": "tests/test_remote_predict.py",
    "content": "import pytest\nimport replicate\n\n\n@pytest.fixture(scope=\"module\")\ndef model_name(request):\n    return request.config.getoption(\"--model\")\n\n\n@pytest.fixture(scope=\"module\")\ndef model(model_name):\n    return replicate.models.get(model_name)\n\n\n@pytest.fixture(scope=\"module\")\ndef version(model):\n    versions = model.versions.list()\n    return versions[0]\n\n\n@pytest.fixture(scope=\"module\")\ndef prediction_tests():\n    return [\n        {\"prompt\": \"How are you doing today?\"},\n    ]\n\n\ndef test_initial_predictions(version, prediction_tests):\n    predictions = [\n        replicate.predictions.create(version=version, input=val)\n        for val in prediction_tests\n    ]\n    for val in predictions:\n        val.wait()\n        assert val.status == \"succeeded\"\n"
  },
  {
    "path": "tests/test_remote_train.py",
    "content": "import time\nimport pytest\nimport replicate\n\n\n@pytest.fixture(scope=\"module\")\ndef model_name(request):\n    return request.config.getoption(\"--model\")\n\n\n@pytest.fixture(scope=\"module\")\ndef model(model_name):\n    return replicate.models.get(model_name)\n\n\n@pytest.fixture(scope=\"module\")\ndef version(model):\n    versions = model.versions.list()\n    return versions[0]\n\n\n@pytest.fixture(scope=\"module\")\ndef training(model_name, version):\n    training_input = {\n        # \"train_data\": \"https://storage.googleapis.com/replicate-weights/training-deadlock/1k_samples.jsonl\",\n        \"train_data\": \"https://pub-3054bb37389944ca9c8e5ada8572840e.r2.dev/samsum.jsonl\",\n    }\n    return replicate.trainings.create(\n        version=model_name + \":\" + version.id,\n        input=training_input,\n        destination=\"replicate-internal/training-scratch\",\n    )\n\n\n@pytest.fixture(scope=\"module\")\ndef prediction_tests():\n    return [\n        {\"prompt\": \"How are you doing today?\"},\n        {\n            \"prompt\": \"\"\"[INST] <<SYS>>\n        Use the Input to provide a summary of a conversation.\n        <</SYS>>\n        Input:\n        Liam: did you see that new movie that just came out?\n        Liam: \"Starry Skies\" I think it's called \n        Ava: oh yeah, I heard about it \n        Liam: it's about this astronaut who gets lost in space \n        Liam: and he has to find his way back to earth \n        Ava: sounds intense \n        Liam: it was! there were so many moments where I thought he wouldn't make it \n        Ava: i need to watch it then, been looking for a good movie \n        Liam: highly recommend it! \n        Ava: thanks for the suggestion Liam! \n        Liam: anytime, always happy to share good movies \n        Ava: let's plan to watch it together sometime \n        Liam: sounds like a plan! [/INST]\n        \"\"\"\n        },\n    ]\n\n\ndef test_training(training):\n    while training.completed_at is None:\n        time.sleep(60)\n        training.reload()\n    assert training.status == \"succeeded\"\n\n\n@pytest.fixture(scope=\"module\")\ndef trained_model_and_version(training):\n    trained_model, trained_version = training.output[\"version\"].split(\":\")\n    return trained_model, trained_version\n\n\ndef test_post_training_predictions(trained_model_and_version, prediction_tests):\n    trained_model, trained_version = trained_model_and_version\n    model = replicate.models.get(trained_model)\n    version = model.versions.get(trained_version)\n    predictions = [\n        replicate.predictions.create(version=version, input=val)\n        for val in prediction_tests\n    ]\n\n    for ind, val in enumerate(predictions):\n        val.wait()\n        assert val.status == \"succeeded\"\n        out = \"\".join(val.output)\n        print(\"Output: \", out)\n        if ind == 1:\n            assert \"Summary\" in out\n"
  },
  {
    "path": "tests/test_train.py",
    "content": "import pytest\nimport os\nimport re\n\nfrom tests.test_utils import run_training_subprocess\n\nERROR_PATTERN = re.compile(r\"ERROR:|Exception\", re.IGNORECASE)\n\n# Constants\nSERVER_URL = \"http://localhost:5000/predictions\"\nHEALTH_CHECK_URL = \"http://localhost:5000/health-check\"\n\nIMAGE_NAME = \"your_image_name\"  # replace with your image name\nHOST_NAME = \"your_host_name\"  # replace with your host name\n\n\n# def run_training_subprocess(command):\n#     # Start the subprocess with pipes for stdout and stderr\n#     process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n\n#     # Create a lock for printing to avoid potential race conditions between the two print processes\n#     print_lock = multiprocessing.Lock()\n\n#     # Start two separate processes to handle stdout and stderr\n#     stdout_processor = multiprocessing.Process(target=capture_output, args=(process.stdout, print_lock))\n#     stderr_processor = multiprocessing.Process(target=capture_output, args=(process.stderr, print_lock))\n\n#     # Start the log processors\n#     stdout_processor.start()\n#     stderr_processor.start()\n\n#     # Wait for the subprocess to finish\n#     return_code = process.wait()\n\n#     # Wait for the log processors to finish\n#     stdout_processor.join()\n#     stderr_processor.join()\n\n#     return return_code\n\n\ndef test_train():\n    command = [\n        \"cog\",\n        \"train\",\n        \"-i\",\n        \"train_data=https://storage.googleapis.com/dan-scratch-public/fine-tuning/1k_samples_prompt.jsonl\",\n        \"-i\",\n        \"train_batch_size=4\",\n        \"-i\",\n        \"max_steps=5\",\n        \"-i\",\n        \"gradient_accumulation_steps=2\",\n    ]\n\n    # result = subprocess.run(command, capture_output=False, text=True)#, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n    try:\n        logs = run_training_subprocess(command)\n    except Exception as e:\n        pytest.fail(f\"Error detected in training logs! Exception: {str(e)}\")\n\n    # Additional assertions can be added here, e.g.:\n    assert not any(\n        ERROR_PATTERN.search(log) for log in logs\n    ), \"Error pattern detected in logs!\"\n\n    # Check the return code\n    # assert exit_code == 0, \"Subprocess failed with return code {}\".format(exit_code)\n\n    # # Check if the log indicates successful completion for all processes\n    # success_logs = result.stdout.count(\"exits successfully.\")\n    # # Assuming 4 processes should exit successfully based on the logs provided\n    # assert success_logs == 4, \"Not all processes exited successfully. Expected 4 but got {}\".format(success_logs)\n\n    # # Optionally, you can also check for other indicators\n    # assert \"Written output to weights\" in result.stdout, \"Output weights were not successfully written.\"\n\n    assert os.path.exists(\"training_output.zip\")\n    # print_lock = Lock()\n\n    # stdout_thread = Thread(target=capture_output, args=(process.stdout, print_lock))\n    # stdout_thread.start()\n\n    # stderr_thread = Thread(target=capture_output, args=(process.stderr, print_lock))\n    # stderr_thread.start()\n\n    # process.terminate()\n    # process.wait()\n"
  },
  {
    "path": "tests/test_train_predict.py",
    "content": "import pytest\nimport requests\nimport subprocess\nimport os\nfrom threading import Thread, Lock\n\nfrom tests.test_utils import (\n    get_image_name,\n    capture_output,\n    wait_for_server_to_be_ready,\n)\n\n# Constants\nSERVER_URL = \"http://localhost:5000/predictions\"\nHEALTH_CHECK_URL = \"http://localhost:5000/health-check\"\n\nIMAGE_NAME = \"your_image_name\"  # replace with your image name\nHOST_NAME = \"your_host_name\"  # replace with your host name\n\n\n@pytest.fixture(scope=\"session\")\ndef server():\n    image_name = get_image_name()\n    current_directory = os.getcwd()\n    volume_mount = f\"{current_directory}/training_output.zip:/src/local_weights.zip\"\n\n    command = [\n        \"docker\",\n        \"run\",\n        \"-p\",\n        \"5000:5000\",\n        \"--gpus=all\",\n        \"-e\",\n        f\"COG_WEIGHTS=http://{HOST_NAME}:8000/training_output.zip\",\n        \"-v\",\n        volume_mount,\n        image_name,\n    ]\n    print(\"\\n**********************STARTING SERVER**********************\")\n    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n\n    print_lock = Lock()\n\n    stdout_thread = Thread(target=capture_output, args=(process.stdout, print_lock))\n    stdout_thread.start()\n\n    stderr_thread = Thread(target=capture_output, args=(process.stderr, print_lock))\n    stderr_thread.start()\n\n    wait_for_server_to_be_ready(HEALTH_CHECK_URL)\n\n    yield process\n\n    process.terminate()\n    process.wait()\n\n\ndef test_health_check(server):\n    response = requests.get(HEALTH_CHECK_URL)\n    assert (\n        response.status_code == 200\n    ), f\"Unexpected status code: {response.status_code}\"\n\n\ndef test_prediction(server):\n    data = {\n        \"input\": {\n            \"prompt\": \"It was a dark and stormy night.\",\n            \"max_new_tokens\": 25,\n            # Add other parameters here\n        }\n    }\n    response = requests.post(SERVER_URL, json=data)\n    assert (\n        response.status_code == 200\n    ), f\"Unexpected status code: {response.status_code}\"\n    print(\"\\n**********************RESPONSE**********************\")\n    print(\"\".join(response.json()[\"output\"]))\n    print(\"******************************************************\\n\")\n    # Add other assertions based on expected response\n\n\ndef test_input_too_long(server):\n    # This is a placeholder. You need to provide an input that is expected to be too long.\n    data = {\n        \"input\": {\n            \"prompt\": \" a\"\n            * 6000,  # Assuming this string will produce more than 4096 tokens.\n            \"max_new_tokens\": 25,\n            # Add other parameters here\n        }\n    }\n\n    response = requests.post(SERVER_URL, json=data)\n\n    response_data = response.json()\n    assert \"error\" in response_data, \"Expected an 'error' field in the response\"\n\n    error_msg_prefix = \"Your input is too long. Max input length is\"\n    assert response_data[\"error\"].startswith(\n        error_msg_prefix\n    ), f\"Expected the error message to start with '{error_msg_prefix}'\"\n    assert response_data[\"status\"] == \"failed\", \"Expected the status to be 'failed'\"\n\n    print(\"\\n**********************RESPONSE**********************\")\n    print(response.text)\n    print(\"******************************************************\\n\")\n\n\nif __name__ == \"__main__\":\n    pytest.main()\n"
  },
  {
    "path": "tests/test_utils.py",
    "content": "import os\nimport json\nimport requests\nimport time\nimport re\nimport multiprocessing\nimport subprocess\n\nERROR_PATTERN = re.compile(r\"ERROR:\")\n\n\ndef get_image_name():\n    current_dir = os.path.basename(os.getcwd())\n\n    if \"cog\" in current_dir:\n        return current_dir\n    else:\n        return f\"cog-{current_dir}\"\n\n\ndef process_log_line(line):\n    line = line.decode(\"utf-8\").strip()\n    try:\n        log_data = json.loads(line)\n        return json.dumps(log_data, indent=2)\n    except json.JSONDecodeError:\n        return line\n\n\n# def capture_output(pipe, print_lock):\n#     for line in iter(pipe.readline, b''):\n#         formatted_line = process_log_line(line)\n#         with print_lock:\n#             print(formatted_line)\n\n\ndef capture_output(pipe, print_lock, logs=None, error_detected=None):\n    for line in iter(pipe.readline, b\"\"):\n        formatted_line = process_log_line(line)\n        with print_lock:\n            print(formatted_line)\n            if logs is not None:\n                logs.append(formatted_line)\n            if error_detected is not None:\n                if ERROR_PATTERN.search(formatted_line):\n                    error_detected[0] = True\n\n\ndef wait_for_server_to_be_ready(url, timeout=300):\n    \"\"\"\n    Waits for the server to be ready.\n\n    Args:\n    - url: The health check URL to poll.\n    - timeout: Maximum time (in seconds) to wait for the server to be ready.\n    \"\"\"\n    start_time = time.time()\n    while True:\n        try:\n            response = requests.get(url)\n            data = response.json()\n\n            if data[\"status\"] == \"READY\":\n                return\n            elif data[\"status\"] == \"SETUP_FAILED\":\n                raise RuntimeError(\n                    \"Server initialization failed with status: SETUP_FAILED\"\n                )\n\n        except requests.RequestException:\n            pass\n\n        if time.time() - start_time > timeout:\n            raise TimeoutError(\"Server did not become ready in the expected time.\")\n\n        time.sleep(5)  # Poll every 5 seconds\n\n\ndef run_training_subprocess(command):\n    # Start the subprocess with pipes for stdout and stderr\n    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n\n    # Create a lock for printing and a list to accumulate logs\n    print_lock = multiprocessing.Lock()\n    logs = multiprocessing.Manager().list()\n    error_detected = multiprocessing.Manager().list([False])\n\n    # Start two separate processes to handle stdout and stderr\n    stdout_processor = multiprocessing.Process(\n        target=capture_output, args=(process.stdout, print_lock, logs, error_detected)\n    )\n    stderr_processor = multiprocessing.Process(\n        target=capture_output, args=(process.stderr, print_lock, logs, error_detected)\n    )\n\n    # Start the log processors\n    stdout_processor.start()\n    stderr_processor.start()\n\n    # Wait for the subprocess to finish\n    process.wait()\n\n    # Wait for the log processors to finish\n    stdout_processor.join()\n    stderr_processor.join()\n\n    # Check if an error pattern was detected\n    if error_detected[0]:\n        raise Exception(\"Error detected in training logs! Check logs for details\")\n\n    return list(logs)\n"
  },
  {
    "path": "tests/timing.py",
    "content": "import time\nimport replicate\nimport os\n\nbase = \"replicate-internal/staging-llama-2-7b:8ba7b9478e1cbdde020f79f0838cd94465dfc6fc0207e01d2e59c00422f65148\"\n\nv1 = \"a42037aa39fc7cdc9138d61a0a94172107906ed8be7c8b0568cc5766d633f0fe\"\nv2 = \"ca0a7d930eed4f330d7f187a18052842f35087fc15b93b741a554753591cb366\"\n\nmodel = replicate.models.get(\"technillogue/llama2-summarizer\")\nver1 = model.versions.get(v1)\nver2 = model.versions.get(v2)\n\nos.system(\"kubectl delete pod -l replicate/version_short_id=8ba7b947\")\n\n\ndef run(v):\n    t0 = time.time()\n    # gen = replicate.run(v1, input={\"prompt\": \"a\"})\n    global last\n    last = pred = replicate.predictions.create(v, input={\"prompt\": \"a\"})\n    t1 = time.time()\n    print(f\"got result after {t1 - t0:.4f}\")\n    gen = pred.output_iterator()\n    next(gen)\n    t2 = time.time()\n    print(f\"got first token {t2 - t1:.4f}\")\n    try:\n        print(re.search(\"previous weights were (.*)\\n\", pred.logs).group().strip())\n    except:\n        pass\n    try:\n        print(re.search(\"Downloaded peft weights in (\\d+.\\d+)\", pred.logs).group())\n    except:\n        pass\n    try:\n        print(re.search(\"initialize_peft took (\\d+.\\d+)\", pred.logs).group())\n    except:\n        pass\n    print(f\"prediciton created to first token: {t2 - t0:.4f}\")\n    pred.wait()\n    t3 = time.time()\n    print(re.search(\"hostname: (.*)\\n\", pred.logs).group().strip())\n    print(f\"prediction took {t3 - t2:.4f} from first to last token\")\n\n\nrun(ver1)\nrun(ver2)\n"
  },
  {
    "path": "tests/unit_tests/test_completion_dataset.py",
    "content": "import pytest\n\nimport sys\n\nsys.path.append(\".\")\n\nfrom llama_recipes.ft_datasets.completion_dataset import (\n    load_data,\n    format_data,\n    tokenize_data,\n)\n\nfrom dataclasses import dataclass\n\n\n@pytest.fixture(scope=\"session\")\ndef dataset_config():\n    @dataclass\n    class completion:\n        dataset: str = \"completion\"\n        train_split: str = \"train\"\n        test_split: str = \"val\"\n        data_path: str = \"tests/data/200_samples.jsonl\"\n        num_validation_samples: int = 100\n        run_validation: bool = True\n        validation_data_path: str = None\n        pack_sequences: bool = True\n        wrap_packed_sequences: bool = True\n        chunk_size: int = 100\n\n    return completion\n\n\n@pytest.fixture(scope=\"session\")\ndef tokenizer():\n    from transformers import LlamaTokenizer\n\n    tokenizer = LlamaTokenizer.from_pretrained(\n        \"tests/assets/llama_tokenizer\", legacy=False\n    )\n    tokenizer.add_special_tokens(\n        {\n            \"pad_token\": \"<PAD>\",\n            \"eos_token\": \"</s>\",\n            \"bos_token\": \"<s>\",\n        }\n    )\n    return tokenizer\n\n\ndef test__load_data_train(dataset_config):\n    dataset_config.run_validation = False\n    dataset = load_data(dataset_config, split=\"train\")\n    assert len(dataset) == 200\n    for example in dataset:\n        assert example[\"text\"].startswith(\"Write a response to the following message\")\n\n\ndef test__load_data_train_with_val_split(dataset_config):\n    dataset_config.run_validation = True\n    train_dataset = load_data(dataset_config, split=\"train\")\n\n    train_texts = [example[\"text\"] for example in train_dataset]\n\n    val_dataset = load_data(dataset_config, split=\"val\")\n    assert len(val_dataset) == 100\n    for example in val_dataset:\n        assert example[\"text\"].startswith(\"Write a response to the following message\")\n        assert example[\"text\"] not in train_texts\n\n\n@pytest.fixture(scope=\"session\")\ndef dataset(dataset_config):\n    dataset_config.run_validation = False\n    dataset = load_data(dataset_config, split=\"train\")\n    return dataset\n\n\ndef test_format_data(dataset, tokenizer):\n    formatted_data = format_data(dataset, tokenizer, dataset_config)\n    for example in formatted_data:\n        assert example[\"text\"].startswith(\"Write a response to the following message\")\n        assert example[\"text\"].endswith(tokenizer.eos_token)\n\n\n@pytest.fixture(scope=\"session\")\ndef formatted_dataset(dataset, tokenizer):\n    return format_data(dataset, tokenizer, dataset_config)\n\n\ndef test_tokenize_data_with_wrapped_packing(\n    formatted_dataset, tokenizer, dataset_config\n):\n    dataset_config.pack_sequences = True\n    dataset_config.wrap_packed_sequences = True\n\n    tokenized_data = tokenize_data(formatted_dataset, tokenizer, dataset_config)\n\n    for tokenized_example in tokenized_data:\n        assert \"labels\" in tokenized_example\n\n    decoded_data = tokenizer.batch_decode(\n        tokenized_data[\"input_ids\"], skip_special_tokens=False\n    )\n\n    decoded_data = tokenizer.batch_decode(\n        tokenized_data[\"input_ids\"], skip_special_tokens=True\n    )\n\n    at_least_one_wrapped = False\n    for example in decoded_data:\n        if not example.startswith(\"Write a response to the following message\"):\n            at_least_one_wrapped = True\n\n    assert at_least_one_wrapped\n\n    for tokenized_example in tokenized_data[\"input_ids\"]:\n        assert len(tokenized_example) == dataset_config.chunk_size\n\n\ndef test_tokenize_data_without_wrapped_packing_small_chunk(\n    formatted_dataset, tokenizer, dataset_config\n):\n    dataset_config.pack_sequences = True\n    dataset_config.wrap_packed_sequences = False\n    dataset_config.chunk_size: int = 100\n\n    tokenized_data = tokenize_data(formatted_dataset, tokenizer, dataset_config)\n\n    for tokenized_example in tokenized_data:\n        assert tokenized_example[\"input_ids\"][-1] == tokenizer.eos_token_id\n        assert \"labels\" in tokenized_example\n\n    decoded_data = tokenizer.batch_decode(\n        tokenized_data[\"input_ids\"], skip_special_tokens=False\n    )\n\n    for example in decoded_data:\n        prefix = \" \".join(\n            [tokenizer.bos_token, \"Write a response to the following message\"]\n        )\n        assert example.startswith(prefix)\n\n    recovered_data = []\n    for decoded_sequence in decoded_data:\n        for decoded_example in decoded_sequence.split(tokenizer.eos_token)[:-1]:\n            decoded_example = decoded_example.removeprefix(tokenizer.bos_token + \" \")\n            decoded_example += tokenizer.eos_token\n            recovered_data.append(decoded_example)\n\n    for i in range(len(recovered_data)):\n        assert recovered_data[i] == formatted_dataset[i][\"text\"]\n\n\ndef test_tokenize_data_without_wrapped_packing_large_chunk(\n    formatted_dataset, tokenizer, dataset_config\n):\n    dataset_config.pack_sequences = True\n    dataset_config.wrap_packed_sequences = False\n    dataset_config.chunk_size: int = 2048\n\n    tokenized_data = tokenize_data(formatted_dataset, tokenizer, dataset_config)\n\n    for tokenized_example in tokenized_data:\n        assert tokenized_example[\"input_ids\"][-1] == tokenizer.eos_token_id\n        assert \"labels\" in tokenized_example\n\n    decoded_data = tokenizer.batch_decode(\n        tokenized_data[\"input_ids\"], skip_special_tokens=False\n    )\n\n    for example in decoded_data:\n        prefix = \" \".join(\n            [tokenizer.bos_token, \"Write a response to the following message\"]\n        )\n        assert example.startswith(prefix)\n\n    recovered_data = []\n    for decoded_sequence in decoded_data:\n        for decoded_example in decoded_sequence.split(tokenizer.eos_token)[:-1]:\n            decoded_example = decoded_example.removeprefix(tokenizer.bos_token + \" \")\n            decoded_example += tokenizer.eos_token\n            recovered_data.append(decoded_example)\n\n    for i in range(len(recovered_data)):\n        assert recovered_data[i] == formatted_dataset[i][\"text\"]\n\n\ndef test_tokenize_data_without_packing(formatted_dataset, tokenizer, dataset_config):\n    dataset_config.pack_sequences = False\n    tokenized_data = tokenize_data(formatted_dataset, tokenizer, dataset_config)\n\n    for tokenized_example in tokenized_data[\"input_ids\"]:\n        assert tokenized_example[-1] == tokenizer.eos_token_id\n\n    decoded_data = tokenizer.batch_decode(\n        tokenized_data[\"input_ids\"], skip_special_tokens=True\n    )\n    for i, example in enumerate(decoded_data):\n        assert example.startswith(\"Write a response to the following message\")\n        assert example + tokenizer.eos_token == formatted_dataset[i][\"text\"]\n"
  },
  {
    "path": "tests/unit_tests/test_utils.py",
    "content": "import pytest\n\nimport sys\n\nsys.path.append(\".\")\n\nfrom src.src.utils import StreamingTextStopSequenceHandler\n\n\n@pytest.fixture(scope=\"session\")\ndef tokenizer():\n    from transformers import LlamaTokenizer\n\n    tokenizer = LlamaTokenizer.from_pretrained(\n        \"tests/assets/llama_tokenizer\", legacy=False\n    )\n    tokenizer.add_special_tokens(\n        {\n            \"pad_token\": \"<PAD>\",\n            \"eos_token\": \"</s>\",\n            \"bos_token\": \"<s>\",\n        }\n    )\n    return tokenizer\n\n\ndef get_decoded_prompt_tokens(tokenizer, prompt):\n    token_ids = tokenizer.encode(prompt, add_special_tokens=False)\n    tokens = [tokenizer.decode(token_id) for token_id in token_ids]\n    return tokens\n\n\ndef test_no_stop_sequences(tokenizer):\n    stop_sequences = None\n    stop_sequence_handler = StreamingTextStopSequenceHandler(\n        stop_sequences, eos_token=tokenizer.eos_token\n    )\n\n    prompt = \"Hello world\"\n    prompt_tokens = tokenizer.encode(prompt)\n\n    response = \"how are <end> you?\"\n    response_tokens = tokenizer.encode(response, add_special_tokens=False)\n\n    old_text = tokenizer.decode(prompt_tokens)\n    output = []\n    for token in response_tokens:\n        prompt_tokens.append(token)\n        text = tokenizer.decode(prompt_tokens)\n        new_text = text[len(old_text) :]\n        old_text = text\n\n        for yielded_text in stop_sequence_handler(new_text):\n            if yielded_text == stop_sequence_handler.eos_token:\n                break\n\n            output.append(yielded_text)\n\n        if yielded_text == stop_sequence_handler.eos_token:\n            break\n\n    for yielded_text in stop_sequence_handler.finalize():\n        output.append(yielded_text)\n\n    assert (\n        \"\".join(output) == \" how are <end> you?\"\n    )  # All tokens are yielded since no stop sequence was provided\n\n\ndef test_single_stop_sequence_1(tokenizer):\n    stop_sequences = [\"<end>\"]\n    stop_sequence_handler = StreamingTextStopSequenceHandler(\n        stop_sequences, eos_token=tokenizer.eos_token\n    )\n\n    prompt = \"Hello world\"\n    prompt_tokens = tokenizer.encode(prompt)\n\n    response = \"how are <end> you?\"\n    response_tokens = tokenizer.encode(response, add_special_tokens=False)\n\n    old_text = tokenizer.decode(prompt_tokens)\n    output = []\n    for token in response_tokens:\n        prompt_tokens.append(token)\n        text = tokenizer.decode(prompt_tokens)\n        new_text = text[len(old_text) :]\n        old_text = text\n\n        for yielded_text in stop_sequence_handler(new_text):\n            if yielded_text == stop_sequence_handler.eos_token:\n                break\n\n            output.append(yielded_text)\n\n        if yielded_text == stop_sequence_handler.eos_token:\n            break\n\n    for yielded_text in stop_sequence_handler.finalize():\n        output.append(yielded_text)\n\n    assert (\n        \"\".join(output) == \" how are \"\n    )  # All tokens are yielded since no stop sequence was provided\n\n\ndef test_single_stop_sequence_2(tokenizer):\n    stop_sequences = [\"###\"]\n    stop_sequence_handler = StreamingTextStopSequenceHandler(\n        stop_sequences, eos_token=tokenizer.eos_token\n    )\n\n    prompt = \"Hello world\"\n    prompt_tokens = tokenizer.encode(prompt)\n\n    response = \"how are ### you?\"\n    response_tokens = tokenizer.encode(response, add_special_tokens=False)\n\n    old_text = tokenizer.decode(prompt_tokens)\n    output = []\n    for token in response_tokens:\n        prompt_tokens.append(token)\n        text = tokenizer.decode(prompt_tokens)\n        new_text = text[len(old_text) :]\n        old_text = text\n\n        for yielded_text in stop_sequence_handler(new_text):\n            if yielded_text == stop_sequence_handler.eos_token:\n                break\n\n            output.append(yielded_text)\n\n        if yielded_text == stop_sequence_handler.eos_token:\n            break\n\n    for yielded_text in stop_sequence_handler.finalize():\n        output.append(yielded_text)\n\n    assert (\n        \"\".join(output) == \" how are \"\n    )  # All tokens are yielded since no stop sequence was provided\n\n\ndef test_multiple_stop_sequence(tokenizer):\n    stop_sequences = [\"<end>\", \"|STOP|\"]\n    stop_sequence_handler = StreamingTextStopSequenceHandler(\n        stop_sequences, eos_token=tokenizer.eos_token\n    )\n\n    prompt = \"Hello world\"\n    prompt_tokens = tokenizer.encode(prompt)\n\n    response = \"how are <end you |STOP| today?\"\n    response_tokens = tokenizer.encode(response, add_special_tokens=False)\n\n    old_text = tokenizer.decode(prompt_tokens)\n    output = []\n    for token in response_tokens:\n        prompt_tokens.append(token)\n        text = tokenizer.decode(prompt_tokens)\n        new_text = text[len(old_text) :]\n        old_text = text\n\n        for yielded_text in stop_sequence_handler(new_text):\n            if yielded_text == stop_sequence_handler.eos_token:\n                break\n\n            output.append(yielded_text)\n\n        if yielded_text.endswith(stop_sequence_handler.eos_token):\n            break\n\n    for yielded_text in stop_sequence_handler.finalize():\n        output.append(yielded_text)\n\n    assert (\n        \"\".join(output) == \" how are <end you \"\n    )  # All tokens are yielded since no stop sequence was provided\n\n\ndef test_adjacent_stop_sequences(tokenizer):\n    stop_sequences = [\"<end>\", \"|STOP|\"]\n    stop_sequence_handler = StreamingTextStopSequenceHandler(\n        stop_sequences, eos_token=tokenizer.eos_token\n    )\n\n    prompt = \"Hello world\"\n    prompt_tokens = tokenizer.encode(prompt)\n\n    response = \"how are <end |STOP| today?\"\n    response_tokens = tokenizer.encode(response, add_special_tokens=False)\n\n    old_text = tokenizer.decode(prompt_tokens)\n    output = []\n    for token in response_tokens:\n        prompt_tokens.append(token)\n        text = tokenizer.decode(prompt_tokens)\n        new_text = text[len(old_text) :]\n        old_text = text\n\n        for yielded_text in stop_sequence_handler(new_text):\n            if yielded_text == stop_sequence_handler.eos_token:\n                break\n\n            output.append(yielded_text)\n\n        if yielded_text == stop_sequence_handler.eos_token:\n            break\n\n    for yielded_text in stop_sequence_handler.finalize():\n        output.append(yielded_text)\n\n    assert (\n        \"\".join(output) == \" how are <end \"\n    )  # All tokens are yielded since no stop sequence was provided\n\n\ndef test_substring_stop_sequence(tokenizer):\n    \"\"\"\n    This test ensures that we stop generating when a stop sequence is a substring.\n    \"\"\"\n    stop_sequences = [\"</output>\"]\n    stop_sequence_handler = StreamingTextStopSequenceHandler(\n        stop_sequences, eos_token=tokenizer.eos_token\n    )\n\n    prompt = \"<input>4</input><output>\"\n    prompt_tokens = tokenizer.encode(prompt)\n\n    response = \"\"\"5</output></block>\"\"\"\n\n    response_tokens = tokenizer.encode(response, add_special_tokens=False)\n\n    old_text = tokenizer.decode(prompt_tokens)\n    output = []\n    for token in response_tokens:\n        prompt_tokens.append(token)\n        text = tokenizer.decode(prompt_tokens)\n        new_text = text[len(old_text) :]\n        old_text = text\n\n        for yielded_text in stop_sequence_handler(new_text):\n            if yielded_text == stop_sequence_handler.eos_token:\n                break\n\n            output.append(yielded_text)\n            print(\"\".join(output))\n\n        if yielded_text == stop_sequence_handler.eos_token:\n            break\n\n    for yielded_text in stop_sequence_handler.finalize():\n        output.append(yielded_text)\n\n    assert (\n        \"\".join(output) == \" 5\"\n    )  # All tokens are yielded since no stop sequence was provided\n"
  },
  {
    "path": "train.py",
    "content": "import argparse\nimport asyncio\nimport os\nimport shutil\nimport subprocess\nfrom zipfile import ZipFile\nimport psutil\n\n\nimport torch\nfrom cog import BaseModel, Input, Path\n\nfrom config import (\n    LOCAL_TRAINING_WEIGHTS_PATH,\n    REMOTE_TRAINING_WEIGHTS_PATH,\n    REMOTE_TRAINING_FILES_TO_DOWNLOAD,\n    MODEL_NAME,\n)\n\nfrom src.utils import maybe_download_with_pget, download_file_with_pget\n\n\nMODEL_OUT = \"/src/tuned_weights.tensors\"\nCHECKPOINT_DIR = \"checkpoints\"\nSAVE_STRATEGY = \"epoch\"\nOUTPUT_DIR = \"training_output\"\n\n\nclass TrainingOutput(BaseModel):\n    weights: Path\n\n\ndef train(\n    fake_output: str = Input(description=\"fake training\", default=None),\n    train_data: Path = Input(\n        description=\"path to data file to use for fine-tuning your model\"\n    ),\n    num_train_epochs: int = Input(\n        description=\"number of training epochs\",\n        ge=1,\n        default=1,\n    ),\n    train_batch_size: int = Input(\n        description=\"Global batch size. This specifies the batch size that will be used to calculate gradients.\",\n        default=4,\n        ge=1,\n    ),\n    gradient_accumulation_steps: int = Input(\n        description=\"Number of training steps (each of train_batch_size) to update gradients for before performing a backward pass.\",\n        default=1,\n        ge=1,\n    ),\n    num_validation_samples: int = Input(\n        description=(\n            \"Number of samples to use for validation.\"\n            \"If `run_validation` is `True` and `validation_data` is not specified, this number of samples\"\n            \"will be selected from the tail of the training data. If `validation_data` is specified, this\"\n            \"number of samples will be selected from the head of the validation data, up to the size of the validation data.\"\n        ),\n        default=50,\n        ge=1,\n    ),\n    validation_data: Path = Input(\n        description=\"path to optional evaluation data file to use for model eval\",\n        default=None,\n    ),\n    validation_batch_size: int = Input(\n        description=\"Batch size for evaluation\", default=1, ge=1\n    ),\n    run_validation: bool = Input(\n        description=\"Whether to run validation during training.\", default=True\n    ),\n    validation_prompt: str = Input(\n        description=\"Prompt to use for generation during validation. If provided, a response to this prompt will be sampled and logged during validation.\",\n        default=None,\n    ),\n    learning_rate: float = Input(\n        description=\"learning rate, for learning!\", default=1e-4, ge=0\n    ),\n    pack_sequences: bool = Input(\n        description=\"If 'True', sequences will be packed into a single sequences up to a given length. This improves computational efficiency.\",\n        default=False,\n    ),\n    wrap_packed_sequences: bool = Input(\n        description=\"If 'pack_sequences' is 'True', this will wrap packed sequences across examples, ensuring a constant sequence length but breaking prompt formatting.\",\n        default=False,\n    ),\n    chunk_size: int = Input(\n        description=\"If 'pack_sequences' is 'True', this will chunk sequences into chunks of this size.\",\n        default=2048,\n        ge=1,\n    ),\n    peft_method: str = Input(\n        description=\"Training method to use. Currently, 'lora' and 'qlora'.\",\n        default=\"lora\",\n        choices=[\"lora\", \"qlora\"],\n    ),\n    seed: int = Input(description=\"random seed to use for training\", default=42),\n    local_model_path: str = Input(\n        description=\"Path to local model to use for training. If not specified, will download a model based on `REMOTE_TRAINING_WEIGHTS_PATH`.\",\n        default=None,\n    ),\n    # weights: Path = Input(\n    #     description=\"location of weights that are going to be fine-tuned\", default=None\n    # ),\n    #\n    # warmup_ratio: float = Input(\n    #     description=\"pct of steps for a linear learning rate warmup\",\n    #     ge=0,\n    #     le=0.5,\n    #     default=0.03,\n    # ),\n    # max_steps: int = Input(\n    #     description=\"number of steps to run training for, supersedes num_train_epochs\",\n    #     default=-1,\n    # ),\n    # logging_steps: int = Input(\n    #     description=\"number of steps between logging epoch & loss\", default=1\n    # ),\n    lora_rank: int = Input(description=\"Rank of the lora matrices\", default=8, ge=1),\n    lora_alpha: int = Input(\n        description=\"Alpha parameter for scaling lora weights; weights are scaled by alpha/rank\",\n        default=16,\n        ge=1,\n    ),\n    lora_dropout: float = Input(\n        description=\"Dropout for lora training\", default=0.05, ge=0.0, le=1.0\n    ),\n    # lora_target_modules: str = Input(description=\"Comma-separated list of lora modules to target, i.e. 'q_proj,v_proj'. Leave blank for default.\", default=\"q_proj,v_proj\")\n) -> TrainingOutput:\n    if fake_output:\n        out_path = f\"/tmp/{os.path.basename(fake_output)}\"\n        asyncio.run(download_file_with_pget(fake_output, out_path))\n        return TrainingOutput(weights=Path(out_path))\n\n    # Hardcode QLoRA for 70B models for now\n    if \"70\" in MODEL_NAME and peft_method != \"qlora\":\n        print(\"Using 70B model, setting peft_method to qlora\")\n        peft_method = \"qlora\"\n\n    if not local_model_path:\n        weights = REMOTE_TRAINING_WEIGHTS_PATH\n\n        if \"http\" in weights:\n            print(f\"Downloading weights to {LOCAL_TRAINING_WEIGHTS_PATH}...\")\n            model_path = maybe_download_with_pget(\n                LOCAL_TRAINING_WEIGHTS_PATH,\n                weights,\n                REMOTE_TRAINING_FILES_TO_DOWNLOAD,\n            )\n\n    else:\n        model_path = local_model_path\n\n    root_path = os.getcwd()\n\n    output_dir = OUTPUT_DIR\n    if os.path.exists(output_dir):\n        shutil.rmtree(output_dir)\n    os.makedirs(output_dir)\n\n    num_gpus = torch.cuda.device_count()\n\n    print(f\"Local Output Dir: {output_dir}\")\n    print(f\"Number of GPUs: {num_gpus}\")\n\n    os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n    os.environ[\"HF_DATASETS_CACHE\"] = \"/src/.hf-cache\"\n\n    args = []\n\n    if peft_method != \"qlora\":\n        args.extend(\n            [\n                \"python3\",\n                \"-m\",\n                \"torch.distributed.run\",\n                \"--nnodes=1\",\n                f\"--nproc_per_node={num_gpus}\",\n            ]\n        )\n    else:\n        args.append(\"python\")\n\n    args.append(\n        \"llama_recipes/llama_finetuning.py\",\n    )\n\n    if peft_method != \"qlora\":\n        args.append(\n            \"--enable_fsdp\",\n        )\n\n    args.extend(\n        [\n            # Hard coded for now\n            \"--use_peft\",\n            f\"--model_name={model_path}\",\n            \"--pure_bf16\",\n            f\"--output_dir={output_dir}\",\n            # User specified arguments -----\n            # Preprocessing arguments\n            f\"--pack_sequences={pack_sequences}\",\n            f\"--wrap_packed_sequences={wrap_packed_sequences}\",\n            f\"--chunk_size={chunk_size}\",\n            # Train arguments\n            f\"--data_path={train_data}\",\n            f\"--num_epochs={num_train_epochs}\",\n            f\"--batch_size_training={train_batch_size}\",\n            f\"--gradient_accumulation_steps={gradient_accumulation_steps}\",\n            f\"--lr={learning_rate}\",\n            f\"--lora_rank={lora_rank}\",\n            f\"--lora_alpha={lora_alpha}\",\n            f\"--lora_dropout={lora_dropout}\",\n            f\"--peft_method={peft_method}\",\n            # Validation arguments\n            f\"--run_validation={'False' if not run_validation else 'True'}\",\n            f\"--num_validation_samples={num_validation_samples}\",\n            f\"--validation_data_path={validation_data}\",\n            f\"--val_batch_size={validation_batch_size}\",\n            f\"--validation_prompt={validation_prompt}\",\n            # Other arguments\n            f\"--seed={seed}\",\n        ]\n    )\n\n    print(f\"Train.py Arguments: \\n{args}\")\n\n    p = None\n    try:\n        p = subprocess.Popen(args, close_fds=False)\n        p.wait()\n        return_code = p.poll()\n        if return_code != 0:\n            raise Exception(\n                f\"Training failed with exit code {return_code}! Check logs for details\"\n            )\n        out_path = \"training_output.zip\"\n\n        directory = Path(output_dir)\n        with ZipFile(out_path, \"w\") as zip:\n            for file_path in directory.rglob(\"*\"):\n                print(file_path)\n                zip.write(file_path, arcname=file_path.relative_to(directory))\n\n        return TrainingOutput(weights=Path(out_path))\n    finally:\n        if p and p.poll() is None:\n            top = psutil.Process(p.pid)\n            children = top.children(recursive=True)\n            for process in children + [top]:\n                process.terminate()\n            _, alive = psutil.wait_procs(children + [top], timeout=5)\n            if alive:\n                for process in alive:\n                    print(f\"process {process.pid} survived termination\")\n            else:\n                print(\"terminated all processes successfully\")\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser(\n        description=\"Fine-tune a language model on a text dataset\"\n    )\n    parser.add_argument(\n        \"--train_data\", type=Path, required=True, help=\"Path to the json dataset\"\n    )\n    parser.add_argument(\n        \"--eval_data\",\n        type=Path,\n        required=False,\n        help=\"Path to the json dataset\",\n        default=None,\n    )\n    parser.add_argument(\n        \"--weights\",\n        type=str,\n        default=None,\n        help=\"The model class to fine-tune on HF or as a local path (e.g. 'google/flan-t5-xxl'\",\n    )\n    parser.add_argument(\n        \"--num_train_epochs\", type=int, required=True, help=\"Number of training epochs\"\n    )\n    parser.add_argument(\n        \"--learning_rate\",\n        type=float,\n        default=2e-5,\n        help=\"Learning rate for the optimizer\",\n    )\n    parser.add_argument(\n        \"--train_batch_size\", type=int, default=4, help=\"Batch size for training\"\n    )\n    parser.add_argument(\n        \"--warmup_ratio\",\n        type=float,\n        default=0.03,\n        help=\"Number of warmup steps for the learning rate scheduler\",\n    )\n    parser.add_argument(\n        \"--max_steps\",\n        type=int,\n        default=0,\n        help=\"Number of training steps to run, overrides num_train_epochs, useful for testing\",\n    )\n    parser.add_argument(\n        \"--gradient_accumulation_steps\",\n        type=int,\n        default=8,\n        help=\"Number of training steps to run, overrides num_train_epochs, useful for testing\",\n    )\n    parser.add_argument(\"--logging_steps\", type=int, default=1)\n    some_args = parser.parse_args()\n    train(**vars(some_args))\n"
  }
]