[
  {
    "path": ".gitattributes",
    "content": "*.sh eol=lf\n*.py eol=lf\n/git-filter-repo eol=lf\n/contrib/filter-repo-demos/[a-z]* eol=lf\n/t/t9*/* eol=lf\n"
  },
  {
    "path": ".github/dependabot.yml",
    "content": "---\nversion: 2\nupdates:\n  - package-ecosystem: \"github-actions\"\n    directory: \"/\"\n    schedule:\n      interval: \"monthly\""
  },
  {
    "path": ".github/workflows/test.yml",
    "content": "name: Run tests\n\non: [push, pull_request]\n\njobs:\n  run-tests:\n    strategy:\n      matrix:\n        os: [ 'windows', 'ubuntu', 'macos' ]\n      fail-fast: false\n    runs-on: ${{ matrix.os }}-latest\n    steps:\n    - uses: actions/checkout@v4\n    - name: Setup python\n      uses: actions/setup-python@v5\n      with:\n        python-version: 3.x\n    - name: test\n      shell: bash\n      run: |\n        # setup-python puts `python` into the `PATH`, not `python3`, yet\n        # `git-filter-repo` expects `python3` in the `PATH`. Let's add\n        # a shim.\n        printf '#!/bin/sh\\n\\nexec python \"$@\"\\n' >python3 &&\n\n        export PATH=$PWD:$PATH &&\n\n        if ! t/run_tests -q -v -x\n        then\n          mkdir failed &&\n          tar czf failed/failed.tar.gz t\n          exit 1\n        fi\n    - name: upload failed tests' directories\n      if: failure()\n      uses: actions/upload-artifact@v4\n      with:\n        name: failed-${{ matrix.os }}\n        path: failed\n"
  },
  {
    "path": ".gitignore",
    "content": "/Documentation/html/\n/Documentation/man1/\n/t/test-results\n/t/trash directory*\n/__pycache__/\n"
  },
  {
    "path": "COPYING",
    "content": "git-filter-repo itself and most the files in this repository (exceptions\nnoted below) are provided under the MIT license (see COPYING.mit).\n\nThe usage of the MIT license probably makes filter-repo compatible with\neverything, but just in case, these files can also be used under whatever\nopen source license[1] that git.git or libgit2 use now or in the future\n(currently GPL[2] and GPL-with-linking-exception[3]).  Further, the\nexamples (in contrib/filter-repo-demos/ and t/t9391/) can also be used\nunder the same license that libgit2 provides their examples under (CC0,\ncurrently[4]).\n\nExceptions:\n\n  - The test harness (t/test-lib.sh, t/test-lib-functions.sh) is a slightly\n    modified copy of git.git's test harness (the difference being that my\n    copy doesn't require a built version of 'git' to be present).  These\n    are thus GPL2 (see COPYING.gpl), and are individually marked as such.\n\n\n[1] ...as defined by the Open Source Initiative (https://opensource.org/)\n[2] https://git.kernel.org/pub/scm/git/git.git/tree/COPYING\n[3] https://github.com/libgit2/libgit2/blob/master/COPYING\n[4] https://github.com/libgit2/libgit2/blob/master/examples/COPYING\n"
  },
  {
    "path": "COPYING.gpl",
    "content": "                    GNU GENERAL PUBLIC LICENSE\n                       Version 2, June 1991\n\n Copyright (C) 1989, 1991 Free Software Foundation, Inc.,\n 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\n Everyone is permitted to copy and distribute verbatim copies\n of this license document, but changing it is not allowed.\n\n                            Preamble\n\n  The licenses for most software are designed to take away your\nfreedom to share and change it.  By contrast, the GNU General Public\nLicense is intended to guarantee your freedom to share and change free\nsoftware--to make sure the software is free for all its users.  This\nGeneral Public License applies to most of the Free Software\nFoundation's software and to any other program whose authors commit to\nusing it.  (Some other Free Software Foundation software is covered by\nthe GNU Lesser General Public License instead.)  You can apply it to\nyour programs, too.\n\n  When we speak of free software, we are referring to freedom, not\nprice.  Our General Public Licenses are designed to make sure that you\nhave the freedom to distribute copies of free software (and charge for\nthis service if you wish), that you receive source code or can get it\nif you want it, that you can change the software or use pieces of it\nin new free programs; and that you know you can do these things.\n\n  To protect your rights, we need to make restrictions that forbid\nanyone to deny you these rights or to ask you to surrender the rights.\nThese restrictions translate to certain responsibilities for you if you\ndistribute copies of the software, or if you modify it.\n\n  For example, if you distribute copies of such a program, whether\ngratis or for a fee, you must give the recipients all the rights that\nyou have.  You must make sure that they, too, receive or can get the\nsource code.  And you must show them these terms so they know their\nrights.\n\n  We protect your rights with two steps: (1) copyright the software, and\n(2) offer you this license which gives you legal permission to copy,\ndistribute and/or modify the software.\n\n  Also, for each author's protection and ours, we want to make certain\nthat everyone understands that there is no warranty for this free\nsoftware.  If the software is modified by someone else and passed on, we\nwant its recipients to know that what they have is not the original, so\nthat any problems introduced by others will not reflect on the original\nauthors' reputations.\n\n  Finally, any free program is threatened constantly by software\npatents.  We wish to avoid the danger that redistributors of a free\nprogram will individually obtain patent licenses, in effect making the\nprogram proprietary.  To prevent this, we have made it clear that any\npatent must be licensed for everyone's free use or not licensed at all.\n\n  The precise terms and conditions for copying, distribution and\nmodification follow.\n\n                    GNU GENERAL PUBLIC LICENSE\n   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION\n\n  0. This License applies to any program or other work which contains\na notice placed by the copyright holder saying it may be distributed\nunder the terms of this General Public License.  The \"Program\", below,\nrefers to any such program or work, and a \"work based on the Program\"\nmeans either the Program or any derivative work under copyright law:\nthat is to say, a work containing the Program or a portion of it,\neither verbatim or with modifications and/or translated into another\nlanguage.  (Hereinafter, translation is included without limitation in\nthe term \"modification\".)  Each licensee is addressed as \"you\".\n\nActivities other than copying, distribution and modification are not\ncovered by this License; they are outside its scope.  The act of\nrunning the Program is not restricted, and the output from the Program\nis covered only if its contents constitute a work based on the\nProgram (independent of having been made by running the Program).\nWhether that is true depends on what the Program does.\n\n  1. You may copy and distribute verbatim copies of the Program's\nsource code as you receive it, in any medium, provided that you\nconspicuously and appropriately publish on each copy an appropriate\ncopyright notice and disclaimer of warranty; keep intact all the\nnotices that refer to this License and to the absence of any warranty;\nand give any other recipients of the Program a copy of this License\nalong with the Program.\n\nYou may charge a fee for the physical act of transferring a copy, and\nyou may at your option offer warranty protection in exchange for a fee.\n\n  2. You may modify your copy or copies of the Program or any portion\nof it, thus forming a work based on the Program, and copy and\ndistribute such modifications or work under the terms of Section 1\nabove, provided that you also meet all of these conditions:\n\n    a) You must cause the modified files to carry prominent notices\n    stating that you changed the files and the date of any change.\n\n    b) You must cause any work that you distribute or publish, that in\n    whole or in part contains or is derived from the Program or any\n    part thereof, to be licensed as a whole at no charge to all third\n    parties under the terms of this License.\n\n    c) If the modified program normally reads commands interactively\n    when run, you must cause it, when started running for such\n    interactive use in the most ordinary way, to print or display an\n    announcement including an appropriate copyright notice and a\n    notice that there is no warranty (or else, saying that you provide\n    a warranty) and that users may redistribute the program under\n    these conditions, and telling the user how to view a copy of this\n    License.  (Exception: if the Program itself is interactive but\n    does not normally print such an announcement, your work based on\n    the Program is not required to print an announcement.)\n\nThese requirements apply to the modified work as a whole.  If\nidentifiable sections of that work are not derived from the Program,\nand can be reasonably considered independent and separate works in\nthemselves, then this License, and its terms, do not apply to those\nsections when you distribute them as separate works.  But when you\ndistribute the same sections as part of a whole which is a work based\non the Program, the distribution of the whole must be on the terms of\nthis License, whose permissions for other licensees extend to the\nentire whole, and thus to each and every part regardless of who wrote it.\n\nThus, it is not the intent of this section to claim rights or contest\nyour rights to work written entirely by you; rather, the intent is to\nexercise the right to control the distribution of derivative or\ncollective works based on the Program.\n\nIn addition, mere aggregation of another work not based on the Program\nwith the Program (or with a work based on the Program) on a volume of\na storage or distribution medium does not bring the other work under\nthe scope of this License.\n\n  3. You may copy and distribute the Program (or a work based on it,\nunder Section 2) in object code or executable form under the terms of\nSections 1 and 2 above provided that you also do one of the following:\n\n    a) Accompany it with the complete corresponding machine-readable\n    source code, which must be distributed under the terms of Sections\n    1 and 2 above on a medium customarily used for software interchange; or,\n\n    b) Accompany it with a written offer, valid for at least three\n    years, to give any third party, for a charge no more than your\n    cost of physically performing source distribution, a complete\n    machine-readable copy of the corresponding source code, to be\n    distributed under the terms of Sections 1 and 2 above on a medium\n    customarily used for software interchange; or,\n\n    c) Accompany it with the information you received as to the offer\n    to distribute corresponding source code.  (This alternative is\n    allowed only for noncommercial distribution and only if you\n    received the program in object code or executable form with such\n    an offer, in accord with Subsection b above.)\n\nThe source code for a work means the preferred form of the work for\nmaking modifications to it.  For an executable work, complete source\ncode means all the source code for all modules it contains, plus any\nassociated interface definition files, plus the scripts used to\ncontrol compilation and installation of the executable.  However, as a\nspecial exception, the source code distributed need not include\nanything that is normally distributed (in either source or binary\nform) with the major components (compiler, kernel, and so on) of the\noperating system on which the executable runs, unless that component\nitself accompanies the executable.\n\nIf distribution of executable or object code is made by offering\naccess to copy from a designated place, then offering equivalent\naccess to copy the source code from the same place counts as\ndistribution of the source code, even though third parties are not\ncompelled to copy the source along with the object code.\n\n  4. You may not copy, modify, sublicense, or distribute the Program\nexcept as expressly provided under this License.  Any attempt\notherwise to copy, modify, sublicense or distribute the Program is\nvoid, and will automatically terminate your rights under this License.\nHowever, parties who have received copies, or rights, from you under\nthis License will not have their licenses terminated so long as such\nparties remain in full compliance.\n\n  5. You are not required to accept this License, since you have not\nsigned it.  However, nothing else grants you permission to modify or\ndistribute the Program or its derivative works.  These actions are\nprohibited by law if you do not accept this License.  Therefore, by\nmodifying or distributing the Program (or any work based on the\nProgram), you indicate your acceptance of this License to do so, and\nall its terms and conditions for copying, distributing or modifying\nthe Program or works based on it.\n\n  6. Each time you redistribute the Program (or any work based on the\nProgram), the recipient automatically receives a license from the\noriginal licensor to copy, distribute or modify the Program subject to\nthese terms and conditions.  You may not impose any further\nrestrictions on the recipients' exercise of the rights granted herein.\nYou are not responsible for enforcing compliance by third parties to\nthis License.\n\n  7. If, as a consequence of a court judgment or allegation of patent\ninfringement or for any other reason (not limited to patent issues),\nconditions are imposed on you (whether by court order, agreement or\notherwise) that contradict the conditions of this License, they do not\nexcuse you from the conditions of this License.  If you cannot\ndistribute so as to satisfy simultaneously your obligations under this\nLicense and any other pertinent obligations, then as a consequence you\nmay not distribute the Program at all.  For example, if a patent\nlicense would not permit royalty-free redistribution of the Program by\nall those who receive copies directly or indirectly through you, then\nthe only way you could satisfy both it and this License would be to\nrefrain entirely from distribution of the Program.\n\nIf any portion of this section is held invalid or unenforceable under\nany particular circumstance, the balance of the section is intended to\napply and the section as a whole is intended to apply in other\ncircumstances.\n\nIt is not the purpose of this section to induce you to infringe any\npatents or other property right claims or to contest validity of any\nsuch claims; this section has the sole purpose of protecting the\nintegrity of the free software distribution system, which is\nimplemented by public license practices.  Many people have made\ngenerous contributions to the wide range of software distributed\nthrough that system in reliance on consistent application of that\nsystem; it is up to the author/donor to decide if he or she is willing\nto distribute software through any other system and a licensee cannot\nimpose that choice.\n\nThis section is intended to make thoroughly clear what is believed to\nbe a consequence of the rest of this License.\n\n  8. If the distribution and/or use of the Program is restricted in\ncertain countries either by patents or by copyrighted interfaces, the\noriginal copyright holder who places the Program under this License\nmay add an explicit geographical distribution limitation excluding\nthose countries, so that distribution is permitted only in or among\ncountries not thus excluded.  In such case, this License incorporates\nthe limitation as if written in the body of this License.\n\n  9. The Free Software Foundation may publish revised and/or new versions\nof the General Public License from time to time.  Such new versions will\nbe similar in spirit to the present version, but may differ in detail to\naddress new problems or concerns.\n\nEach version is given a distinguishing version number.  If the Program\nspecifies a version number of this License which applies to it and \"any\nlater version\", you have the option of following the terms and conditions\neither of that version or of any later version published by the Free\nSoftware Foundation.  If the Program does not specify a version number of\nthis License, you may choose any version ever published by the Free Software\nFoundation.\n\n  10. If you wish to incorporate parts of the Program into other free\nprograms whose distribution conditions are different, write to the author\nto ask for permission.  For software which is copyrighted by the Free\nSoftware Foundation, write to the Free Software Foundation; we sometimes\nmake exceptions for this.  Our decision will be guided by the two goals\nof preserving the free status of all derivatives of our free software and\nof promoting the sharing and reuse of software generally.\n\n                            NO WARRANTY\n\n  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY\nFOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN\nOTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES\nPROVIDE THE PROGRAM \"AS IS\" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED\nOR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF\nMERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS\nTO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE\nPROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,\nREPAIR OR CORRECTION.\n\n  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING\nWILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR\nREDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,\nINCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING\nOUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED\nTO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY\nYOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER\nPROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE\nPOSSIBILITY OF SUCH DAMAGES.\n\n                     END OF TERMS AND CONDITIONS\n\n            How to Apply These Terms to Your New Programs\n\n  If you develop a new program, and you want it to be of the greatest\npossible use to the public, the best way to achieve this is to make it\nfree software which everyone can redistribute and change under these terms.\n\n  To do so, attach the following notices to the program.  It is safest\nto attach them to the start of each source file to most effectively\nconvey the exclusion of warranty; and each file should have at least\nthe \"copyright\" line and a pointer to where the full notice is found.\n\n    <one line to give the program's name and a brief idea of what it does.>\n    Copyright (C) <year>  <name of author>\n\n    This program is free software; you can redistribute it and/or modify\n    it under the terms of the GNU General Public License as published by\n    the Free Software Foundation; either version 2 of the License, or\n    (at your option) any later version.\n\n    This program is distributed in the hope that it will be useful,\n    but WITHOUT ANY WARRANTY; without even the implied warranty of\n    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n    GNU General Public License for more details.\n\n    You should have received a copy of the GNU General Public License along\n    with this program; if not, write to the Free Software Foundation, Inc.,\n    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\n\nAlso add information on how to contact you by electronic and paper mail.\n\nIf the program is interactive, make it output a short notice like this\nwhen it starts in an interactive mode:\n\n    Gnomovision version 69, Copyright (C) year name of author\n    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.\n    This is free software, and you are welcome to redistribute it\n    under certain conditions; type `show c' for details.\n\nThe hypothetical commands `show w' and `show c' should show the appropriate\nparts of the General Public License.  Of course, the commands you use may\nbe called something other than `show w' and `show c'; they could even be\nmouse-clicks or menu items--whatever suits your program.\n\nYou should also get your employer (if you work as a programmer) or your\nschool, if any, to sign a \"copyright disclaimer\" for the program, if\nnecessary.  Here is a sample; alter the names:\n\n  Yoyodyne, Inc., hereby disclaims all copyright interest in the program\n  `Gnomovision' (which makes passes at compilers) written by James Hacker.\n\n  <signature of Ty Coon>, 1 April 1989\n  Ty Coon, President of Vice\n\nThis General Public License does not permit incorporating your program into\nproprietary programs.  If your program is a subroutine library, you may\nconsider it more useful to permit linking proprietary applications with the\nlibrary.  If this is what you want to do, use the GNU Lesser General\nPublic License instead of this License.\n"
  },
  {
    "path": "COPYING.mit",
    "content": "Copyright (c) 2009, 2018-2019\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "Documentation/Contributing.md",
    "content": "Welcome to the community!\n\nContributions need to meet the bar for inclusion in git.git.  Although\nfilter-repo is not part of the git.git repository, I want to leave the\noption open for it to be merged in the future.  As such, any\ncontributions need to follow the same [guidelines for contribution to\ngit.git](https://git.kernel.org/pub/scm/git/git.git/tree/Documentation/SubmittingPatches),\nwith a few exceptions:\n\n  * While I\n    [hate](https://public-inbox.org/git/CABPp-BG2SkH0GrRYpHLfp2Wey91ThwQoTgf9UmPa9f5Szn+v3Q@mail.gmail.com/)\n    [GitHub](https://public-inbox.org/git/CABPp-BEcpasV4vBTm0uxQ4Vzm88MQAX-ArDG4e9QU8tEoNsZWw@mail.gmail.com/)\n    [PRs](https://public-inbox.org/git/CABPp-BEHy8c3raHwf9aFXvXN0smf_WwCcNiYxQBwh7W6An60qQ@mail.gmail.com/)\n    (as others point out, [it's mind-boggling in a bad way that\n    web-based Git hosting and code review systems do such a poor\n    job](http://nhaehnle.blogspot.com/2020/06/they-want-to-be-small-they-want-to-be.html)),\n    git-format-patch and git-send-email can be a beast and I have not\n    yet found time to modify Dscho's excellent\n    [GitGitGadget](https://github.com/gitgitgadget/gitgitgadget) to\n    work with git-filter-repo.  As such:\n      * For very short single-commit changes, feel free to open GitHub PRs.\n      * For more involved changes, if format-patch or send-email give you\n        too much trouble, go ahead and open a GitHub PR and just mention\n        that email didn't work out.\n  * If emailing patches to the git list:\n    * Include \"filter-repo\" at the start of the subject,\n      e.g. \"[filter-repo PATCH] Add packaging scripts for uploading to PyPI\"\n      instead of just \"[PATCH] Add packaging scripts for uploading to PyPI\"\n    * CC me instead of the git maintainer\n  * Git's [CodingGuidlines for python\n    code](https://github.com/git/git/blob/v2.24.0/Documentation/CodingGuidelines#L482-L494)\n    are only partially applicable:\n    * python3 is a hard requirement; python2 is/was EOL at the end of\n      2019 and should not be used.  (Commit 4d0264ab723c\n      (\"filter-repo: workaround python<2.7.9 exec bug\", 2019-04-30)\n      was the last version of filter-repo that worked with python2).\n    * You can depend on anything in python 3.6 or earlier.  I may bump\n      this minimum version over time, but do want to generally work\n      with the python3 version found in current enterprise Linux\n      distributions.\n    * In filter-repo, it's not just OK to use bytestrings, you are\n      expected to use them a lot.  Using unicode strings result in\n      lots of ugly errors since input comes from filesystem names,\n      commit messages, file contents, etc., none of which are\n      guaranteed to be unicode.  (Plus unicode strings require lots of\n      effort to verify, encode, and decode -- slowing the filtering\n      process down).  I tried to work with unicode strings more\n      broadly in the code base multiple times; but it's just a bad\n      idea to use an abstraction that doesn't fit the data.\n    * I generally like [PEP\n      8](https://www.python.org/dev/peps/pep-0008/), but used\n      two-space indents for years before learning of it and have just\n      continued that habit.  For consistency, contributions should also\n      use two-space indents and otherwise generally follow PEP 8.\n\nThere are a few extra things I would like folks to keep in mind:\n\n  * Please test line coverage if you add or modify code\n\n    * `make test` will run the testsuite under\n      [coverage3](https://pypi.org/project/coverage/) (which you will\n      need to install), and report on line coverage.  Line coverage of\n      git-filter-repo needs to remain at 100%; line coverage of\n      contrib and test scripts can be ignored.\n\n  * Please do not be intimidated by detailed feedback:\n\n    * In the git community, I have been contributing for years and\n      have had hundreds of patches accepted but I still find that even\n      when I try to make patches perfect I am not surprised when I\n      have to spend as much or more time fixing up patches after\n      submitting them than I did figuring out the patches in the first\n      place.  git folks tend to do thorough reviews, which has taught\n      me a lot, and I try to do the same for filter-repo.  Plus, as\n      noted above, I want contributions from others to be acceptable\n      in git.git itself.\n"
  },
  {
    "path": "Documentation/FAQ.md",
    "content": "# Frequently Answered Questions\n\n## Table of Contents\n\n  * [Why did `git-filter-repo` rewrite commit hashes?](#why-did-git-filter-repo-rewrite-commit-hashes)\n  * [Why did `git-filter-repo` rewrite more commit hashes than I expected?](#why-did-git-filter-repo-rewrite-more-commit-hashes-than-i-expected)\n  * [Why did `git-filter-repo` rewrite other branches too?](#why-did-git-filter-repo-rewrite-other-branches-too)\n  * [How should paths be specified?](#How-should-paths-be-specified)\n  * [Help! Can I recover or undo the filtering?](#help-can-i-recover-or-undo-the-filtering)\n  * [Can you change `git-filter-repo` to allow future folks to recover from `--force`'d rewrites?](#can-you-change-git-filter-repo-to-allow-future-folks-to-recover-from---forced-rewrites)\n  * [Can I use `git-filter-repo` to fix a repository with corruption?](#Can-I-use-git-filter-repo-to-fix-a-repository-with-corruption)\n  * [What kinds of problems does `git-filter-repo` not try to solve?](#What-kinds-of-problems-does-git-filter-repo-not-try-to-solve)\n    * [Filtering history but magically keeping the same commit IDs](#Filtering-history-but-magically-keeping-the-same-commit-IDs)\n    * [Bidirectional development between a filtered and unfiltered repository](#Bidirectional-development-between-a-filtered-and-unfiltered-repository)\n    * [Removing specific commits, or filtering based on the difference (a.k.a. patch or change) between commits](#Removing-specific-commits-or-filtering-based-on-the-difference-aka-patch-or-change-between-commits)\n    * [Filtering two different clones of the same repository and getting the same new commit IDs](#Filtering-two-different-clones-of-the-same-repository-and-getting-the-same-new-commit-IDs)\n\n## Why did `git-filter-repo` rewrite commit hashes?\n\nThis is fundamental to how Git operates.  In more detail...\n\nEach commit in Git is a hash of its contents.  Those contents include\nthe commit message, the author (name, email, and time authored), the\ncommitter (name, email and time committed), the toplevel tree hash,\nand the parent(s) of the commit.  This means that if any of the commit\nfields change, including the tree hash or the hash of the parent(s) of\nthe commit, then the hash for the commit will change.\n\n(The same is true for files (\"blobs\") and trees stored in git as well;\neach is a hash of its contents, so literally if anything changes, the\ncommit hash will change.)\n\nIf you attempt to write a commit (or tree or blob) object with an\nincorrect hash, Git will reject it as corrupt.\n\n## Why did `git-filter-repo` rewrite more commit hashes than I expected?\n\nThere are two aspects to this, or two possible underlying questions users\nmight be asking here:\n  * Why did commits newer than the ones I expected have their hash change?\n  * Why did commits older than the ones I expected have their hash change?\n\nFor the first question, see [why filter-repo rewrites commit\nhashes](#why-did-git-filter-repo-rewrite-commit-hashes), and note that\nif you modify some old commit, perhaps to remove a file, then obviously\nthat commit's hash must change.  Further, since that commit will have a\nnew hash, any other commit with that commit as a parent will need to\nhave a new hash.  That will need to chain all the way to the most recent\ncommits in history.  This is fundamental to Git and there is nothing you\ncan do to change this.\n\nFor the second question, there are two causes: (1) the filter you\nspecified applies to the older commits too, or (2) git-fast-export and\ngit-fast-import (both of which git-filter-repo uses) canonicalize\nhistory in various ways.  The second cause means that even if you have\nno filter, these tools sometimes change commit hashes.  This can happen\nin any of these cases:\n\n  * If you have signed commits, the signatures will be stripped\n  * If you have commits with extended headers, the extended headers will\n    be stripped (signed commits are actually a special case of this)\n  * If you have commits in an encoding other than UTF-8, they will by\n    default be re-encoded into UTF-8\n  * If you have a commit without an author, one will be added that\n    matches the committer.\n  * If you have trees that are not canonical (e.g. incorrect sorting\n    order), they will be canonicalized\n\nIf this affects you and you really only want to rewrite newer commits in\nhistory, you can use the `--refs` argument to git-filter-repo to specify\na range of history that you want rewritten.\n\n(For those attempting to be clever and use `--refs` for the first\nquestion: Note that if you attempt to only rewrite a few old commits,\nthen all you'll succeed in is adding new commits that won't be part of\nany branch and will be subject to garbage collection.  The branches will\nstill hold on to the unrewritten versions of the commits.  Thus, you\nhave to rewrite all the way to the branch tip for the rewrite to be\nmeaningful.  Said another way, the `--refs` trick is only useful for\nrestricting the rewrite to newer commits, never for restricting the\nrewrite to older commits.)\n\n## Why did `git-filter-repo` rewrite other branches too?\n\ngit-filter-repo's name is git-filter-**_repo_**.  Obviously it is going\nto rewrite all branches by default.\n\n`git-filter-repo` can restrict its rewriting to a subset of history,\nsuch as a single branch, using the `--refs` option.  However, using that\ncomes with the risk that one branch now has a different version of some\ncommits than other branches do; usually, when you rewrite history, you\nwant all branches that depend on what you are rewriting to be updated.\n\n## How should paths be specified?\n\nArguments to `--path` should be paths as Git would report them, when run\nfrom the toplevel of the git repository (explained more below after some\nexamples).\n\n**Good** path examples:\n  * `README.md`\n  * `Documentation/README.md`\n  * `src/modules/flux/capacitor.rs`\n\nYou can find examples of valid path names from your repository by\nrunning either `git diff --no-relative --name-only` or `git log\n--no-relative --name-only --format=\"\"`.\n\nThe following are basic rules about paths the way that Git reports and uses\nthem:\n  * do not use absolute paths\n  * always treats paths as relative to the toplevel of the repository\n    (do not add a leading slash, and do not specify paths relative to some\n     subdirectory of the repository even if that is your current working\n     directory)\n  * do not use the special directories `.` or `..` anywhere in your path\n  * do not use `\\`,  the Windows path separator, between directories and\n    files; always use `/` regardless of platform.\n\n**Erroneous** path examples (do **_NOT_** use any of these styles):\n * `/absolute/path/to/src/modules/program.c`\n * `/src/modules/program.c`\n * `src/docs/../modules/main.java`\n * `scripts/config/./update.sh`\n * `./tests/fixtures/image.jpg`\n * `../src/main.rs`\n * `C:\\absolute\\path\\to\\src\\modules\\program.c`\n * `src\\modules\\program.c`\n\n## Help! Can I recover or undo the filtering?\n\nSure, _if_ you followed the instructions.  The instructions told you to\nmake a fresh clone before running git-filter-repo.  If you did that (and\ndidn't force push your rewritten history back over the original), you\ncan just throw away your clone with the flubbed rewrite, and make a new\nclone.\n\nIf you didn't make a fresh clone, and you didn't run with `--force`, you\nwould have seen the following warning:\n```\nAborting: Refusing to destructively overwrite repo history since\nthis does not look like a fresh clone.\n[...]\nPlease operate on a fresh clone instead.  If you want to proceed\nanyway, use --force.\n```\nIf you then added `--force`, well, you were warned.\n\nIf you didn't make a fresh clone, and you started with `--force`, and you\ndidn't think to read the description of the `--force` option:\n```\n\tIgnore fresh clone checks and rewrite history (an irreversible\n\toperation, especially since it by default ends with an\n\timmediate pruning of reflogs and old objects).\n```\nand you didn't read even the beginning of the manual\n```\ngit-filter-repo destructively rewrites history\n```\nand you think it's okay to run a command with `--force` in it on\nsomething you don't have a backup of, then now is the time to reasses\nyour life choices.  `--force` should be a pretty clear warning sign.\n(If someone on the internet suggested `--force`, you can complain at\n_them_, but either way you should learn to carefully vet commands\nsuggested by others on the internet.  Sadly, even sites like Stack\nOverflow where someone really ought to be able to correct bad guidance\nstill unfortunately has a fair amount of this bad advice.)\n\nSee also the next question.\n\n## Can you change `git-filter-repo` to allow future folks to recover from --force'd rewrites?\n\nThis will never be supported.\n\n* Providing an alternate method to restore would require storing both\n  the original history and the new history, meaning that those who are\n  trying to shrink their repository size instead see it grow and have to\n  figure out extra steps to expunge the old history to see the actual\n  size savings.  Experience with other tools showed that this was\n  frustrating and difficult to figure out for many users.\n\n* Providing an alternate method to restore would mean that users who are\n  trying to purge sensitive data from their repository still find the\n  sensitive data after the rewrite because it hasn't actually been\n  purged. In order to actually purge it, they have to take extra steps.\n  Same as with the last bullet point, experience has shown that extra\n  steps to purge the extra information is difficult and error-prone.\n  This extra difficulty is particularly problematic when you're trying\n  to expunge sensitive data.\n\n* Providing an alternate method to restore would also mean trying to\n  figure out what should be backed up and how. The obvious choices used\n  by previous tools only actually provided partial backups (reflogs\n  would be ignored for example, as would uncommitted changes whether\n  staged or not). The more you try to carefully backup everything, the\n  more difficult the restoration from backup will be.  The only backup\n  mechanism I've found that seems reasonable, is making a separate\n  clone.  That's expensive to do automatically for the user (especially\n  if the filtering is done via multiple invocations of the tool).  Plus,\n  it's not clear where the clone should be stored, especially to avoid\n  the previous problems for size-reduction and sensitive-data-removal\n  folks.\n\n* Providing an alternate method to restore would also mean providing\n  documentation on how to restore. Past methods by other tools in the\n  history rewriting space suggested that it was rather difficult for\n  users to figure out.  Difficult enough, in fact, that users simply\n  didn't ever use them.  They instead made a separate clone before\n  rewriting history and if they didn't like the rewrite, then they just\n  blew it away and made a new clone to work with.  Since that was\n  observed to be the easy restoration method, I simply enforced it with\n  this tool, requiring users who look like they might not be operating\n  on a fresh clone to use the --force flag.\n\nBut more than all that, if there were an alternate method to restore,\nwhy would you have needed to specify the --force flag? Doesn't its\nexistence (and the wording of its documentation) make it pretty clear on\nits own that there isn't going to be a way to restore?\n\n## Can I use `git-filter-repo` to fix a repository with corruption?\n\nSome kinds of corruption can be fixed, in conjunction with `git\nreplace`.  If `git fsck` reports warnings/errors for certain objects,\nyou can often [replace them and rewrite\nhistory](examples-from-user-filed-issues.md#Handling-repository-corruption).\n\n## What kinds of problems does `git-filter-repo` not try to solve?\n\nThis question is often asked in the form of \"How do I...\" or even\nwritten as a statement such as \"I found a bug with `git-filter-repo`;\nthe behavior I got was different than I expected...\"  But if you're\ntrying to do one of the things below, then `git-filter-repo` is behaving\nas designed and either there is no solution to your problem, or you need\nto use a different tool to solve your problem.  The following subsections\naddress some of these common requests:\n\n### Filtering history but magically keeping the same commit IDs\n\nThis is impossible.  If you modify commits, or the files contained in\nthem, then you change their commit IDs; this is [fundamental to\nGit](#why-did-git-filter-repo-rewrite-commit-hashes).\n\nHowever, _if_ you don't need to modify commits, but just don't want to\ndownload everything, then look into one of the following:\n  * [partial clones](https://git-scm.com/docs/partial-clone)\n  * the ugly, retarded hack known as [shallow clones](https://git-scm.com/docs/shallow)\n  * a massive hack like [cheap fake\n    clones](https://github.com/newren/sequester-old-big-blobs) that at\n    least let you put your evil overlord laugh to use\n\n### Bidirectional development between a filtered and unfiltered repository\n\nSome folks want to extract a subset of a repository, do development work\non it, then bring those changes back to the original repository, and\nsend further changes in both directions.  Such a tool can be written\nusing fast-export and fast-import, but would need to make very different\ndesign decisions than `git-filter-repo` did.  Such a tool would be\ncapable of supporting this kind of development, but lose the ability\n[\"to write arbitrary filters using a scripting\nlanguage\"](https://josh-project.github.io/josh/#concept) and other\nfeatures that `git-filter-repo` has.\n\nSuch a tool exists; it's called [Josh](https://github.com/josh-project/josh).\nUse it if this is your usecase.\n\n### Removing specific commits, or filtering based on the difference (a.k.a. patch or change) between commits\n\nYou are probably looking for `git rebase`.  `git rebase` operates on the\ndifference between commits (\"diff\"), allowing you to e.g. drop or modify\nthe diff, but then runs the risk of conflicts as it attempts to apply\nfuture diffs. If you tweak one diff in the middle, since it just applies\nmore diffs for the remaining patches, you'll still see your changes at\nthe end.\n\nfilter-repo, by contrast, uses fast-export and fast-import.  Those tools\ntreat every commit not as a diff but as a \"use the same versions of most\nfiles from the parent commit, but make these five files have these exact\ncontents\". Since you don't have either the diff or ready access to the\nversion of files from the parent commit, that makes it hard to \"undo\"\npart of the changes to some file.  Further, if you attempt to drop an\nentire commit or tweak the contents of those new files in that commit,\nthose changes will be reverted by the next commit in the stream that\nmentions that file because handling the next commit does not involve\napplying a diff but a \"make this file have these exact contents\". So,\nfilter-repo works well for things like removing a file entirely, but if\nyou want to make any tweaks to any files you have to make the exact same\ntweak over and over for every single commit that touches that file.\n\nIn short, `git rebase` is the tool you want for removing specific\ncommits or otherwise operating on the diff between commits.\n\n### Filtering two different clones of the same repository and getting the same new commit IDs\n\nSometimes two co-workers have a clone of the same repository and they\nrun the same `git-filter-repo` command, and they expect to get the same\nnew commit IDs.  Often they do get the same new commit IDs, but\nsometimes they don't.\n\nWhen people get the same commit IDs, it is only by luck; not by design.\nThere are three reasons this is unsupported and will never be reliable:\n\n  * Different Git versions used could cause differences in filtering\n\n    Since `git fast-export` and `git fast-import` do various\n    canonicalizations of history, and these could change over time,\n    having different versions of Git installed can result in differences\n    in filtering.\n\n  * Different git-filter-repo versions used could cause differences in\n    filtering\n\n    Over time, `git-filter-repo` may include new filterings by default,\n    or fix existing filterings, or make any other number of changes.  As\n    such, having different versions of `git-filter-repo` installed can\n    result in differences in filtering.\n\n  * Different amounts of the repository cloned or differences in\n    local-only commits can cause differences in filtering\n\n    If the clones weren't made at the same time, one clone may have more\n    commits than the other.  Also, both may have made local commits the\n    other doesn't have.  These additional commits could cause history to\n    be traversed in a different order, and filtering rules are allowed\n    to have order-dependent rules for how they filter.  Further,\n    filtering rules are allowed to depend upon what history exists in\n    your clone.  As one example, filter-repo's default to update commit\n    messages which refer to other commits by abbreviated hash, may be\n    unable to find these other commits in your clone but find them in\n    your coworkers' clone.  Relatedly, filter-repo's update of\n    abbreviated hashes in commit messages only works for commits that\n    have already been filtered, and thus depends on the order in which\n    fast-export traverses the history.\n\n`git-filter-repo` is designed as a _one_-shot history rewriting tool.\nOnce you have filtered one clone of the repository, you should not be\nusing it to filter other clones.  All other clones of the repository\nshould either be discarded and recloned, or [have all their history\nrebased on top of the rewritten\nhistory](https://htmlpreview.github.io/?https://github.com/newren/git-filter-repo/blob/docs/html/git-filter-repo.html#_make_sure_other_copies_are_cleaned_up_clones_of_colleagues).\n\n<!--\n## How do I see what was removed?\n\nRun `git rev-list --objects --all` in both a separate fresh clone from\nbefore the rewrite and in the repo where the rewrite was done.  Then\nfind the objects that exist in the old but not the new.\n\n-->\n"
  },
  {
    "path": "Documentation/converting-from-bfg-repo-cleaner.md",
    "content": "# Cheat Sheet: Converting from BFG Repo Cleaner\n\nThis document is aimed at folks who are familiar with BFG Repo Cleaner\nand want to learn how to convert over to using filter-repo.\n\n## Table of Contents\n\n  * [Half-hearted conversions](#half-hearted-conversions)\n  * [Intention of \"equivalent\" commands](#intention-of-equivalent-commands)\n  * [Basic Differences](#basic-differences)\n  * [Cheat Sheet: Conversion of Examples from BFG](#cheat-sheet-conversion-of-examples-from-bfg)\n\n## Half-hearted conversions\n\nYou can switch most any BFG command to use filter-repo under the\ncovers by just replacing the `java -jar bfg.jar` part of the command\nwith [`bfg-ish`](../contrib/filter-repo-demos/bfg-ish).\n\nbfg-ish is a reasonable tool, and provides a number of bug fixes and\nfeatures on top of bfg, but most of my focus is naturally on\nfilter-repo which has a number of capabilities lacking in bfg-ish.\n\n## Intention of \"equivalent\" commands\n\nBFG and filter-repo have a few differences, highlighted in the Basic\nDifferences section below, that make it hard to get commands that\nbehave identically.  Rather than focusing on matching BFG output as\nexactly as possible, I treat the BFG examples as idiomatic ways to\nsolve a certain type of problem with BFG, and express how one would\nidiomatically solve the same problem in filter-repo.  Sometimes that\nmeans the results are not identical, but they are largely the same in\neach case.\n\n## Basic Differences\n\nBFG operates directly on tree objects, which have no notion of their\nleading path.  Thus, it has no way of differentiating between\n'README.md' at the toplevel versus in some subdirectory.  You simply\noperate on the basename of files and directories.  This precludes\ndoing things like renaming files and directories or other bigger\nrestructures.  By directly operating on trees, it also runs into\nproblems with loose vs. packed objects, loose vs. packed refs, not\nunderstanding replace refs or grafts, and not understanding the index\nand working tree as another data source.\n\nWith `git filter-repo`, you are essentially given an editing tool to\noperate on the [fast-export](https://git-scm.com/docs/git-fast-export)\nserialization of a repo, which operates on filenames including their\nfull paths from the toplevel of the repo.  Directories are not\nseparately specified, so any directory-related filtering is done by\nchecking the leading path of each file.  Further, you aren't limited\nto the pre-defined filtering types, python callbacks which operate on\nthe data structures from the fast-export stream can be provided to do\njust about anything you want.  By leveraging fast-export and\nfast-import, filter-repo gains automatic handling of objects and refs\nwhether they are packed or not, automatic handling of replace refs and\ngrafts, and future features that may appear.  It also tries hard to\nprovide a full rewrite solution, so it takes care of additional\nimportant concerns such as updating the index and working tree and\nrunning an automatic gc for the user afterwards.\n\nThe \"protection\" and \"privacy\" defaults in BFG are something I\nfundamentally disagreed with for a variety of reasons; see the\ncomments at the top of the\n[bfg-ish](../contrib/filter-repo-demos/bfg-ish) script if you want\ndetails.  The bfg-ish script implemented these protection and privacy\noptions since it was designed to act like BFG, but still flipped the\ndefault to the opposite of what BFG chose.  I left the \"protection\"\nand \"non-private\" features out of filter-repo entirely.  This means a\nnumber of things with filter-repo:\n  * any filters you specify will also be applied to HEAD, so that you\n    don't have a weird disconnect from your history transformations\n    only being applied to most commits\n  * `[formerly OLDHASH]` references are not munged into commit\n    messages; the replace refs that filter-repo adds are a much\n    cleaner way of looking up commits by old commit hashes.\n  * `Former-commit-id:` footers are not added to commit messages; the\n    replace refs that filter-repo adds are a much cleaner way of\n    looking up commits by old commit hashes.\n  * History is not littered with `<filename>.REMOVED.git-id` files.\n\nBFG expects you to specify the repository to rewrite as its final\nargument, whereas filter-repo expects you to cd into the repo and then\nrun filter-repo.\n\n## Cheat Sheet: Conversion of Examples from BFG\n\n### Stripping big blobs\n\n```shell\n  java -jar bfg.jar --strip-blobs-bigger-than 100M some-big-repo.git\n```\n\nbecomes\n\n```shell\n  git filter-repo --strip-blobs-bigger-than 100M\n```\n\n### Deleting files\n\n```shell\n  java -jar bfg.jar --delete-files id_{dsa,rsa}  my-repo.git\n```\n\nbecomes\n\n```shell\n  git filter-repo --use-base-name --path id_dsa --path id_rsa --invert-paths\n```\n\n### Removing sensitive content\n\n```shell\n  java -jar bfg.jar --replace-text passwords.txt my-repo.git\n```\n\nbecomes\n\n```shell\n  git filter-repo --replace-text passwords.txt\n```\n\nThe `--replace-text` was a really clever idea that the BFG came up\nwith and I just implemented mostly as-is within filter-repo.  Sadly,\nBFG didn't document the format of files passed to --replace text very\nwell, but I added more detail in the filter-repo documentation.\n\nThere is one small but important difference between the two tools: if\nyou use both \"regex:\" and \"==>\" on a single line to specify a regex\nsearch and replace, then filter-repo will use \"\\1\", \"\\2\", \"\\3\",\netc. for replacement strings whereas BFG used \"$1\", \"$2\", \"$3\", etc.\nThe reason for this difference is simply that python used backslashes\nin its regex format while scala used dollar signs, and both tools\nwanted to just pass along the strings unmodified to the underlying\nlanguage.  (Since bfg-ish attempts to emulate the BFG, it accepts\n\"$1\", \"$2\" and so forth and translates them to \"\\1\", \"\\2\", etc. so\nthat filter-repo/python will understand it.)\n\n### Removing files and folders with a certain name\n\n```shell\n  java -jar bfg.jar --delete-folders .git --delete-files .git --no-blob-protection  my-repo.git\n```\n\nbecomes\n\n```shell\n  git filter-repo --invert-paths --path-glob '*/.git' --path .git\n```\n\nYes, that glob will handle .git directories one or more directories\ndeep; it's a git-style glob rather than a shell-style glob.  Also, the\n`--path .git` was added because `--path-glob '*/.git'` won't match a\ndirectory named .git in the toplevel directory since it has a '/'\ncharacter in the glob expression (though I would hope the repository\ndoesn't have a tracked .git toplevel directory in its history).\n"
  },
  {
    "path": "Documentation/converting-from-filter-branch.md",
    "content": "# Cheat Sheet: Converting from filter-branch\n\nThis document is aimed at folks who are familiar with filter-branch and want\nto learn how to convert over to using filter-repo.\n\n## Table of Contents\n\n  * [Half-hearted conversions](#half-hearted-conversions)\n  * [Intention of \"equivalent\" commands](#intention-of-equivalent-commands)\n  * [Basic Differences](#basic-differences)\n  * [Cheat Sheet: Conversion of Examples from the filter-branch manpage](#cheat-sheet-conversion-of-examples-from-the-filter-branch-manpage)\n  * [Cheat Sheet: Additional conversion examples](#cheat-sheet-additional-conversion-examples)\n\n## Half-hearted conversions\n\nYou can switch nearly any `git filter-branch` command to use\nfilter-repo under the covers by just replacing the `git filter-branch`\npart of the command with\n[`filter-lamely`](../contrib/filter-repo-demos/filter-lamely).  The\ngit.git regression testsuite passes when I swap out the filter-branch\nscript with filter-lamely, for example.  (However, the filter-branch\ntests are not very comprehensive, so don't rely on that too much.)\n\nDoing a half-hearted conversion has nearly all of the drawbacks of\nfilter-branch and nearly none of the benefits of filter-repo, but it\nwill make your command run a few times faster and makes for a very\nsimple conversion.\n\nYou'll get a lot more performance, safety, and features by just\nswitching to direct filter-repo commands.\n\n## Intention of \"equivalent\" commands\n\nfilter-branch and filter-repo have different defaults, as highlighted\nin the Basic Differences section below.  As such, getting a command\nwhich behaves identically is not possible.  Also, sometimes the\nfilter-branch manpage lies, e.g. it says \"suppose you want to...from\nall commits\" and then uses a command line like \"git filter-branch\n... HEAD\", which only operates on commits in the current branch rather\nthan on all commits.\n\nRather than focusing on matching filter-branch output as exactly as\npossible, I treat the filter-branch examples as idiomatic ways to\nsolve a certain type of problem with filter-branch, and express how\none would idiomatically solve the same problem in filter-repo.\nSometimes that means the results are not identical, but they are\nlargely the same in each case.\n\n## Basic Differences\n\nWith `git filter-branch`, you have a git repository where every single\ncommit (within the branches or revisions you specify) is checked out\nand then you run one or more shell commands to transform the working\ncopy into your desired end state.\n\nWith `git filter-repo`, you are essentially given an editing tool to\noperate on the [fast-export](https://git-scm.com/docs/git-fast-export)\nserialization of a repo.  That means there is an input stream of all\nthe contents of the repository, and rather than specifying filters in\nthe form of commands to run, you usually employ a number of common\npre-defined filters that provide various ways to slice, dice, or\nmodify the repo based on its components (such as pathnames, file\ncontent, user names or emails, etc.)  That makes common operations\neasier, even if it's not as versatile as shell callbacks.  For cases\nwhere more complexity or special casing is needed, filter-repo\nprovides python callbacks that can operate on the data structures\npopulated from the fast-export stream to do just about anything you\nwant.\n\nfilter-branch defaults to working on a subset of the repository, and\nrequires you to specify a branch or branches, meaning you need to\nspecify `-- --all` to modify all commits.  filter-repo by contrast\ndefaults to rewriting everything, and you need to specify `--refs\n<rev-list-args>` if you want to limit to just a certain set of\nbranches or range of commits.  (Though any `<rev-list-args>` that\nbegin with a hyphen are not accepted by filter-repo as they look like\nthe start of different options.)\n\nfilter-repo also takes care of additional concerns automatically, like\nrewriting commit messages that reference old commit IDs to instead\nreference the rewritten commit IDs, pruning commits which do not start\nempty but become empty due to the specified filters, and automatically\nshrinking and gc'ing the repo at the end of the filtering operation.\n\n## Cheat Sheet: Conversion of Examples from the filter-branch manpage\n\n### Removing a file\n\nThe filter-branch manual provided three different examples of removing\na single file, based on different levels of ease vs. carefulness and\nperformance:\n\n```shell\n  git filter-branch --tree-filter 'rm filename' HEAD\n```\n```shell\n  git filter-branch --tree-filter 'rm -f filename' HEAD\n```\n```shell\n  git filter-branch --index-filter 'git rm --cached --ignore-unmatch filename' HEAD\n```\n\nAll of these just become\n\n```shell\n  git filter-repo --invert-paths --path filename\n```\n\n### Extracting a subdirectory\n\nExtracting a subdirectory via\n\n```shell\n  git filter-branch --subdirectory-filter foodir -- --all\n```\n\nis one of the easiest commands to convert; it just becomes\n\n```shell\n  git filter-repo --subdirectory-filter foodir\n```\n\n### Moving the whole tree into a subdirectory\n\nKeeping all files but placing them in a new subdirectory via\n\n```shell\n  git filter-branch --index-filter \\\n      'git ls-files -s | sed \"s-\\t\\\"*-&newsubdir/-\" |\n              GIT_INDEX_FILE=$GIT_INDEX_FILE.new \\\n                      git update-index --index-info &&\n       mv \"$GIT_INDEX_FILE.new\" \"$GIT_INDEX_FILE\"' HEAD\n```\n\n(which happens to be GNU-specific and will fail with BSD userland in\nvery subtle ways) becomes\n\n```shell\n  git filter-repo --to-subdirectory-filter newsubdir\n```\n\n(which works fine regardless of GNU vs BSD userland differences.)\n\n### Re-grafting history\n\nThe filter-branch manual provided one example with three different\ncommands that could be used to achieve it, though the first of them\nhad limited applicability (only when the repo had a single initial\ncommit).  These three examples were:\n```shell\n  git filter-branch --parent-filter 'sed \"s/^\\$/-p <graft-id>/\"' HEAD\n```\n```shell\n  git filter-branch --parent-filter \\\n      'test $GIT_COMMIT = <commit-id> && echo \"-p <graft-id>\" || cat' HEAD\n```\n```shell\n  git replace --graft $commit-id $graft-id\n  git filter-branch $graft-id..HEAD\n```\n\ngit-replace did not exist when the original two examples were written,\nbut it is clear that the last example is far easier to understand.  As\nsuch, filter-repo just uses the same mechanism:\n\n```shell\n  git replace --graft $commit-id $graft-id\n  git filter-repo --proceed\n```\n\nNOTE: --proceed is needed here because filter-repo errors out if no\narguments are specified (doing so is usually an error).\n\n### Removing commits by a certain author\n\nWARNING: This is a BAD example for BOTH filter-branch and filter-repo.\nIt does not remove the changes the user made from the repo, it just\nremoves the commit in question while smashing the changes from it into\nany subsequent commits as though the subsequent authors had been\nresponsible for those changes as well.  `git rebase` is likely to be a\nbetter fit for what you really want if you are looking at this\nexample.  (See also [this explanation of the differences between\nrebase and\nfilter-repo](https://github.com/newren/git-filter-repo/issues/62#issuecomment-597725502))\n\nThis filter-branch example\n\n```shell\n  git filter-branch --commit-filter '\n      if [ \"$GIT_AUTHOR_NAME\" = \"Darl McBribe\" ];\n      then\n          skip_commit \"$@\";\n      else\n          git commit-tree \"$@\";\n      fi' HEAD\n```\n\nbecomes\n\n```shell\n  git filter-repo --commit-callback '\n      if commit.author_name == b\"Darl McBribe\":\n          commit.skip()\n      '\n```\n\n### Rewriting commit messages -- removing text\n\nRemoving git-svn-id: lines from commit messages via\n\n```shell\n  git filter-branch --msg-filter '\n      sed -e \"/^git-svn-id:/d\"\n      '\n```\n\nbecomes\n\n```shell\n  git filter-repo --message-callback '\n      return re.sub(b\"^git-svn-id:.*\\n\", b\"\", message, flags=re.MULTILINE)\n      '\n```\n\n### Rewriting commit messages -- adding text\n\nAdding Acked-by lines to the last ten commits via\n\n```shell\n  git filter-branch --msg-filter '\n          cat &&\n          echo \"Acked-by: Bugs Bunny <bunny@bugzilla.org>\"\n      ' master~10..master\n```\n\nbecomes\n\n```shell\n  git filter-repo --message-callback '\n          return message + b\"Acked-by: Bugs Bunny <bunny@bugzilla.org>\\n\"\n      ' --refs master~10..master\n```\n\n### Changing author/committer(/tagger?) information\n\n```shell\n  git filter-branch --env-filter '\n      if test \"$GIT_AUTHOR_EMAIL\" = \"root@localhost\"\n      then\n              GIT_AUTHOR_EMAIL=john@example.com\n      fi\n      if test \"$GIT_COMMITTER_EMAIL\" = \"root@localhost\"\n      then\n              GIT_COMMITTER_EMAIL=john@example.com\n      fi\n      ' -- --all\n```\n\nbecomes either\n\n```shell\n  # Ensure '<john@example.com> <root@localhost>' is a line in .mailmap, then:\n  git filter-repo --use-mailmap\n```\n\nor\n\n```shell\n  git filter-repo --email-callback '\n    return email if email != b\"root@localhost\" else b\"john@example.com\"\n    '\n```\n\n(and as a bonus both filter-repo alternatives will fix tagger emails\ntoo, unlike the filter-branch example)\n\n\n### Restricting to a range\n\nThe partial examples\n\n```shell\n  git filter-branch ... C..H\n```\n```shell\n  git filter-branch ... C..H ^D\n```\n```shell\n  git filter-branch ... D..H ^C\n```\n\nbecome\n\n```shell\n  git filter-repo ... --refs C..H\n```\n```shell\n  git filter-repo ... --refs C..H ^D\n```\n```shell\n  git filter-repo ... --refs D..H ^C\n```\n\nNote that filter-branch accepts `--not` among the revision specifiers,\nbut that appears to python to be a flag name which breaks parsing.\nSo, instead of e.g. `--not C` as we might use with filter-branch, we\ncan specify `^C` to filter-repo.\n\n## Cheat Sheet: Additional conversion examples\n\n### Running a code formatter or linter on each file with some extension\n\nRunning some program on a subset of files is relatively natural in\nfilter-branch:\n\n```shell\n  git filter-branch --tree-filter '\n      git ls-files -z \"*.c\" \\\n          | xargs -0 -n 1 clang-format -style=file -i\n      '\n```\n\nthough it has the disadvantage of running on every c file for every\ncommit in history, even if some commits do not modify any c files.  This\nmeans this kind of command can be excruciatingly slow.\n\nThe same functionality is slightly more involved in filter-repo for\ntwo reasons:\n  - fast-export and fast-import split file contents and file names into\n    completely different data structures that aren't normally available\n    together\n  - to run a program on a file, you'll need to write the contents to the\n    a file, execute the program on that file, and then read the contents\n    of the file back in\n\n```shell\n  git filter-repo --file-info-callback '\n    if not filename.endswith(b\".c\"):\n      return (filename, mode, blob_id)  # no changes\n\n    contents = value.get_contents_by_identifier(blob_id)\n    tmpfile = os.path.basename(filename)\n    with open(tmpfile, \"wb\") as f:\n      f.write(contents)\n    subprocess.check_call([\"clang-format\", \"-style=file\", \"-i\", filename])\n    with open(filename, \"rb\") as f:\n      contents = f.read()\n    new_blob_id = value.insert_file_with_contents(contents)\n\n    return (filename, mode, new_blob_id)\n    '\n```\n\nHowever, one can write a script that uses filter-repo as a library to\nsimplify this, while also gaining filter-repo's automatic handling of\nother concerns like rewriting commit IDs in commit messages or pruning\ncommits that become empty.  In fact, one of the [contrib\ndemos](../contrib/filter-repo-demos),\n[lint-history](../contrib/filter-repo-demos/lint-history), was\nspecifically written to make this kind of case really easy:\n\n```shell\n  lint-history --relevant 'return filename.endswith(b\".c\")' \\\n      clang-format -style=file -i\n```\n"
  },
  {
    "path": "Documentation/examples-from-user-filed-issues.md",
    "content": "# Examples from user-filed issues\n\nLots of people have filed issues against git-filter-repo, and many times their\nissue boils down into questions of \"How do I?\" or \"Why doesn't this work?\"\n\nBelow are a collection of example repository filterings in answer to their\nquestions, which may be of interest to others.\n\n## Table of Contents\n\n  * [Adding files to root commits](#adding-files-to-root-commits)\n  * [Purge a large list of files](#purge-a-large-list-of-files)\n  * [Extracting a libary from a repo](#Extracting-a-libary-from-a-repo)\n  * [Replace words in all commit messages](#Replace-words-in-all-commit-messages)\n  * [Only keep files from two branches](#Only-keep-files-from-two-branches)\n  * [Renormalize end-of-line characters and add a .gitattributes](#Renormalize-end-of-line-characters-and-add-a-gitattributes)\n  * [Remove spaces at the end of lines](#Remove-spaces-at-the-end-of-lines)\n  * [Having both exclude and include rules for filenames](#Having-both-exclude-and-include-rules-for-filenames)\n  * [Removing paths with a certain extension](#Removing-paths-with-a-certain-extension)\n  * [Removing a directory](#Removing-a-directory)\n  * [Convert from NFD filenames to NFC](#Convert-from-NFD-filenames-to-NFC)\n  * [Set the committer of the last few commits to myself](#Set-the-committer-of-the-last-few-commits-to-myself)\n  * [Handling special characters, e.g. accents in names](#Handling-special-characters-eg-accents-in-names)\n  * [Handling repository corruption](#Handling-repository-corruption)\n  * [Removing all files with a backslash in them](#Removing-all-files-with-a-backslash-in-them)\n  * [Replace a binary blob in history](#Replace-a-binary-blob-in-history)\n  * [Remove commits older than N days](#Remove-commits-older-than-N-days)\n  * [Replacing pngs with compressed alternative](#Replacing-pngs-with-compressed-alternative)\n  * [Updating submodule hashes](#Updating-submodule-hashes)\n  * [Using multi-line strings in callbacks](#Using-multi-line-strings-in-callbacks)\n\n\n## Adding files to root commits\n\n<!-- https://github.com/newren/git-filter-repo/issues/21 -->\n\nHere's an example that will take `/path/to/existing/README.md` and\nstore it as `README.md` in the repository, and take\n`/home/myusers/mymodule.gitignore` and store it as `src/.gitignore` in\nthe repository:\n\n```\ngit filter-repo --commit-callback \"if not commit.parents: commit.file_changes += [\n    FileChange(b'M', b'README.md', b'$(git hash-object -w '/path/to/existing/README.md')', b'100644'), \n    FileChange(b'M', b'src/.gitignore', b'$(git hash-object -w '/home/myusers/mymodule.gitignore')', b'100644')]\"\n```\n\nAlternatively, you could also use the [insert-beginning](../contrib/filter-repo-demos/insert-beginning) contrib script:\n\n```\nmv /path/to/existing/README.md README.md\nmv /home/myusers/mymodule.gitignore src/.gitignore\ninsert-beginning --file README.md\ninsert-beginning --file src/.gitignore\n```\n\n## Purge a large list of files\n\n<!-- https://github.com/newren/git-filter-repo/issues/63 -->\n\nStick all the files in some file (one per line),\ne.g. `../DELETED_FILENAMES.txt`, and then run\n\n```\ngit filter-repo --invert-paths --paths-from-file ../DELETED_FILENAMES.txt\n```\n\n## Extracting a libary from a repo\n\n<!-- https://github.com/newren/git-filter-repo/issues/80 -->\n\nIf you want to pick out some subdirectory to keep\n(e.g. `src/some-filder/some-feature/`), but don't want it moved to the\nrepository root (so that --subdirectory-filter isn't applicable) but\ninstead want it to become some other higher level directory\n(e.g. `src/`):\n\n```\ngit filter-repo \\\n    --path src/some-folder/some-feature/ \\\n    --path-rename src/some-folder/some-feature/:src/\n```\n\n## Replace words in all commit messages\n\n<!-- https://github.com/newren/git-filter-repo/issues/83 -->\n\nReplace \"stuff\" in any commit message with \"task\".\n\n```\ngit filter-repo --message-callback 'return message.replace(b\"stuff\", b\"task\")'\n```\n\n## Only keep files from two branches\n\n<!-- https://github.com/newren/git-filter-repo/issues/91 -->\n\nLet's say you know that the files currently present on two branches\nare the only files that matter.  Files that used to exist in either of\nthese branches, or files that only exist on some other branch, should\nall be deleted from all versions of history.  This can be accomplished\nby getting a list of files from each branch, combining them, sorting\nthe list and picking out just the unique entries, then passing the\nresult to `--paths-from-file`:\n\n```\ngit ls-tree -r ${BRANCH1} >../my-files\ngit ls-tree -r ${BRANCH2} >>../my-files\nsort ../my-files | uniq >../my-relevant-files\ngit filter-repo --paths-from-file ../my-relevant-files\n```\n\n## Renormalize end-of-line characters and add a .gitattributes\n\n<!-- https://github.com/newren/git-filter-repo/issues/122 -->\n\n```\ncontrib/filter-repo-demos/lint-history dos2unix\n[edit .gitattributes]\ncontrib/filter-repo-demos/insert-beginning .gitattributes\n```\n\n## Remove spaces at the end of lines\n\n<!-- https://github.com/newren/git-filter-repo/issues/145 -->\n\nRemoving all spaces at the end of lines of non-binary files, including\nconverting CRLF to LF:\n\n```\ngit filter-repo --replace-text <(echo 'regex:[\\r\\t ]+(\\n|$)==>\\n')\n```\n\n## Having both exclude and include rules for filenames\n\n<!-- https://github.com/newren/git-filter-repo/issues/230 -->\n\nIf you want to have rules to both include and exclude filenames, you\ncan simply invoke `git filter-repo` multiple times.  Alternatively,\nyou can do it in one run if you dispense with `--path` arguments and\ninstead use the more generic `--filename-callback`.  For example to\ninclude all files under `src/` except for `src/README.md`:\n\n```\ngit filter-repo --filename-callback '\n    if filename == b\"src/README.md\":\n        return None\n    if filename.startswith(b\"src/\"):\n        return filename\n  return None'\n```\n\n## Removing paths with a certain extension\n\n<!-- https://github.com/newren/git-filter-repo/issues/274 -->\n\n```\ngit filter-repo --invert-paths --path-glob '*.xsa'\n```\n\nor\n\n```\ngit filter-repo --filename-callback '\n    if filename.endswith(b\".xsa\"):\n        return None\n    return filename'\n```\n\n## Removing a directory\n\n<!-- https://github.com/newren/git-filter-repo/issues/278 -->\n\n```\ngit filter-repo --path node_modules/electron/dist/ --invert-paths\n```\n\n## Convert from NFD filenames to NFC\n\n<!-- https://github.com/newren/git-filter-repo/issues/296 -->\n\nGiven that Mac does utf-8 normalization of filenames, and has\nhistorically switched which kind of normalization it does, users may\nhave committed files with alternative normalizations to their\nrepository.  If someone wants to convert filenames in NFD form to NFC,\nthey could run\n\n```\ngit filter-repo --filename-callback '\n    try: \n        return subprocess.check_output(\"iconv -f utf-8-mac -t utf-8\".split(),\n                                       input=filename)\n    except:\n        return filename\n'\n```\n\nor instead of relying on the system iconv utility and spawning separate\nprocesses, doing it within python:\n\n```\ngit filter-repo --filename-callback '\n    import unicodedata\n    try:\n       return bytearray(unicodedata.normalize('NFC', filename.decode('utf-8')), 'utf-8')\n    except:\n      return filename\n'\n```\n  \n## Set the committer of the last few commits to myself\n\n<!-- https://github.com/newren/git-filter-repo/issues/379 -->\n\n```\ngit filter-repo --refs main~5..main --commit-callback '\n    commit.commiter_name = b\"My Wonderful Self\"\n    commit.committer_email = b\"my@self.org\"\n'\n```\n\n## Handling special characters, e.g. accents and umlauts in names\n\n<!-- https://github.com/newren/git-filter-repo/issues/383 -->\n\nSince characters like ë and á are multi-byte characters and python\nwon't allow you to directly place those in a bytestring\n(e.g. `b\"Raphaël González\"` would result in a `SyntaxError: bytes can\nonly contain ASCII literal characters` error from Python), you just\nneed to make a normal (UTF-8) string and then convert to a bytestring\nto handle these.  For example, changing the author name and email\nwhere the author email is currently `example@test.com`:\n\n```\ngit filter-repo --refs main~5..main --commit-callback '\n    if commit.author_email = b\"example@test.com\":\n        commit.author_name = \"Raphaël González\".encode()\n        commit.author_email = b\"rgonzalez@test.com\"\n'\n```\n\n## Handling repository corruption\n\n<!-- https://github.com/newren/git-filter-repo/issues/420 -->\n\nFirst, run fsck to get a list of the corrupt objects, e.g.:\n```\n$ git fsck --full\nerror in commit 166f57b3fbe31257100361ecaf735f305b533b21: missingSpaceBeforeDate: invalid author/committer line - missing space before date\nerror in tree c15680eae81cc8539af7e7de766a8a7c13bd27df: duplicateEntries: contains duplicate file entries\nChecking object directories: 100% (256/256), done.\n```\n\nOdds are you'll only see one type of corruption, but if you see\nmultiple, you can either do multiple filterings, or create replacement\nobjects for all the corrupt objects (both commits and trees), and then\ndo the filtering.  Since the method for handling corrupt commits and\ncorrupt tress is slightly different, I'll give examples below for each.\n\n### Handling repository corruption -- commit objects\n\nPrint out the corrupt object literally to a temporary file:\n```\n$ git cat-file -p 166f57b3fbe31257100361ecaf735f305b533b21 >tmp\n```\n\nTaking a look at the file would show, for example:\n```\n$ cat tmp\ntree e1d871155fce791680ec899fe7869067f2b4ffd2\nauthor My Name <my@email.com>1673287380 -0800\ncommitter My Name <my@email.com> 1673287380 -0800\n\nInitial\n```\n\nEdit that file to fix the error (in this case, the missing space\nbetween author email and author date).  In this case, it would look\nlike this after editing:\n\n```\ntree e1d871155fce791680ec899fe7869067f2b4ffd2\nauthor My Name <my@email.com> 1673287380 -0800\ncommitter My Name <my@email.com> 1673287380 -0800\n\nInitial\n```\n\nSave the updated file, then use `git replace` to make a replace reference\nfor it.\n```\n$ git replace -f 166f57b3fbe31257100361ecaf735f305b533b21 $(git hash-object -t commit -w tmp)\n```\n\nThen remove the temporary file `tmp` and run `filter-repo` to consume\nthe replace reference and make it permanent:\n\n```\n$ rm tmp\n$ git filter-repo --proceed\n```\n\nNote that if you have multiple corrupt objects, you need to create\nreplacements for all of them, and then run filter-repo.  Leaving any\ncorrupt object without a replacement is likely to cause the filter-repo run\nto fail.\n\n### Handling repository corruption -- tree objects\n\n<!-- GitHub customer example -->\n\nPrint out the corrupt object literally to a temporary file:\n```\n$ git cat-file -p c15680eae81cc8539af7e7de766a8a7c13bd27df >tmp\n```\n\nTaking a look at the file would show, for example:\n```\n$ cat tmp\n100644 blob cd5ded43e86f80bfd384702e3f4cc7ce42de49f9\t.gitignore\n100644 blob 226febfcc91ec2c166a5a06834fb47c3553ec469\tREADME.md\n100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391\tsrc\n040000 tree df2b8fc99e1c1d4dbc0a854d9f72157f1d6ea078\tsrc\n040000 tree 99d732476808176bb9d73bcbfe2505e43d65cb4f\tt\n```\n\nEdit that file to fix the error (in this case, removing either the `src`\nfile (blob) or the `src` directory (tree)).  In this case, it might look\nlike this after editing:\n\n```\n100644 blob cd5ded43e86f80bfd384702e3f4cc7ce42de49f9\t.gitignore\n100644 blob 226febfcc91ec2c166a5a06834fb47c3553ec469\tREADME.md\n040000 tree df2b8fc99e1c1d4dbc0a854d9f72157f1d6ea078\tsrc\n040000 tree 99d732476808176bb9d73bcbfe2505e43d65cb4f\tt\n```\n\nSave the updated file, then use `git mktree` to turn it into an actual\ntree object:\n```\n$ git mktree <tmp\nace04f50a5d13b43e94c12802d3d8a6c66a35b1d\n```\n\nNow use the output of that command to create a replacement object for\nthe original corrupt object:\n```\ngit replace -f c15680eae81cc8539af7e7de766a8a7c13bd27df ace04f50a5d13b43e94c12802d3d8a6c66a35b1d\n```\n\nThen remove the temporary file `tmp` and run `filter-repo` to consume\nthe replace reference and make it permanent:\n\n```\n$ rm tmp\n$ git filter-repo --proceed\n```\n\nAs mentioned with corrupt commit objects, if you have multiple corrupt\nobjects, as long as you create all the replacements for those objects\nfirst, you only need to run filter-repo once.\n\n\n## Removing all files with a backslash in them\n\n<!-- https://github.com/newren/git-filter-repo/issues/427 -->\n\n```\ngit filter-repo --filename-callback 'return None if b'\\\\' in filename else filename'\n```\n\n## Replace a binary blob in history\n\n<!-- https://github.com/newren/git-filter-repo/issues/436 -->\n\nLet's say you committed a binary blob, perhaps an image file, with\nsensitive data, and never modified it.  You want to replace it with\nthe contents of some alternate file, currently found at\n`../alternative-file.jpg` (it can have a different filename than what\nis stored in the repository).  Let's also say the hash of the old file\nwas `f4ede2e944868b9a08401dafeb2b944c7166fd0a`.  You can replace it\nwith either\n\n```\ngit filter-repo --blob-callback '\n    if blob.original_id == b\"f4ede2e944868b9a08401dafeb2b944c7166fd0a\":\n        blob.data = open(\"../alternative-file.jpg\", \"rb\").read()\n'\n```\n\nor\n\n```\ngit replace -f f4ede2e944868b9a08401dafeb2b944c7166fd0a $(git hash-object -w ../alternative-file.jpg)\ngit filter-repo --proceed\n```\n\n## Remove commits older than N days\n\n<!-- https://github.com/newren/git-filter-repo/issues/300 -->\n\nThis is such a bad usecase.  I'm tempted to leave it out, but it has\ncome up multiple times, and there are people who are totally fine with\nchanging every commit hash in their repository and throwing away\nhistory periodically.  First, identify an ${OLD_COMMIT} that you want\nto be a new root commit, then run:\n\n```\ngit replace --graft ${OLD_COMMIT}\ngit filter-repo --proceed\n```\n\n(The trick here is that `git replace --graft` takes a commit to replace, and\na list of new parents for the commit.  Since ${OLD_COMMIT} is the final\npositional argument, it means the list of new parents is an empty list, i.e.\nwe are turning it into a new root commit.)\n\n## Replacing pngs with compressed alternative\n\n<!-- https://github.com/newren/git-filter-repo/issues/492 -->\n\nLet's say you committed thousands of pngs that were poorly compressed,\nbut later aggressively recompressed the pngs and commited and pushed.\nUnfortunately, clones are slow because they still contain the poorly\ncompressed pngs and you'd like to rewrite history to pretend that the\naggressively compressed versions were used when the files were first\nintroduced.\n\nFirst, take a look at the commit that aggressively recompressed the pngs:\n\n```\ngit log -1 --raw --no-abbrev ${COMMIT_WHERE_YOU_COMPRESSED_PNGS}\n```\n\nthat will show output like\n```\n:100755 100755 edf570fde099c0705432a389b96cb86489beda09 9cce52ae0806d695956dcf662cd74b497eaa7b12 M      resources/foo.png\n:100755 100755 644f7c55e1a88a29779dc86b9ff92f512bf9bc11 88b02e9e45c0a62db2f1751b6c065b0c2e538820 M      resources/bar.png\n```\n\nUse that to make a --file-info-callback to fix up the original versions:\n```\ngit filter-repo --file-info-callback '\n    if filename == b\"resources/foo.png\" and blob_id == b\"edf570fde099c0705432a389b96cb86489beda09\":\n        blob_id = b\"9cce52ae0806d695956dcf662cd74b497eaa7b12\"\n    if filename == b\"resources/bar.png\" and blob_id == b\"644f7c55e1a88a29779dc86b9ff92f512bf9bc11\":\n        blob_id = b\"88b02e9e45c0a62db2f1751b6c065b0c2e538820\"\n    return (filename, mode, blob_id)\n'\n```\n\n## Updating submodule hashes\n\n<!-- https://github.com/newren/git-filter-repo/issues/537 -->\n\nLet's say you have a repo with a submodule at src/my-submodule, and\nthat you feel the wrong commit-hashes of the submodule were commited\nwithin your project and you want them updated according to the\nfollowing table:\n```\nold                                      new\nedf570fde099c0705432a389b96cb86489beda09 9cce52ae0806d695956dcf662cd74b497eaa7b12\n644f7c55e1a88a29779dc86b9ff92f512bf9bc11 88b02e9e45c0a62db2f1751b6c065b0c2e538820\n```\n\nYou could do this as follows:\n```\ngit filter-repo --file-info-callback '\n    if filename == b\"src/my-submodule\" and blob_id == b\"edf570fde099c0705432a389b96cb86489beda09\":\n        blob_id = b\"9cce52ae0806d695956dcf662cd74b497eaa7b12\"\n    if filename == b\"src/my-submodule\" and blob_id == b\"644f7c55e1a88a29779dc86b9ff92f512bf9bc11\":\n        blob_id = b\"88b02e9e45c0a62db2f1751b6c065b0c2e538820\"\n    return (filename, mode, blob_id)\n```\n\nYes, `blob_id` is kind of a misnomer here since the file's hash\nactually refers to a commit from the sub-project.  But `blob_id` is\nthe name of the parameter passed to the --file-info-callback, so that\nis what must be used.\n\n## Using multi-line strings in callbacks\n\n<!-- https://lore.kernel.org/git/CABPp-BFqbiS8xsbLouNB41QTc5p0hEOy-EoV0Sjnp=xJEShkTw@mail.gmail.com/ -->\n\nSince the text for callbacks have spaces inserted at the front of every\nline, multi-line strings are normally munged.  For example, the command\n\n```\ngit filter-repo --blob-callback '\n  blob.data = bytes(\"\"\"\\\nThis is the new\nfile that I am\nreplacing every blob\nwith.  It is great.\\n\"\"\", \"utf-8\")\n'\n```\n\nwould result in a file with extra spaces at the front of every line:\n```\n  This is the new\n  file that I am\n  replacing every blob\n  with.  It is great.\n```\n\nThe two spaces at the beginning of every-line were inserted into every\nline of the callback when trying to compile it as a function.\nHowever, you can use textwrap.dedent to fix this; in fact, using it\nwill even allow you to add more leading space so that it looks nicely\nindented.  For example:\n\n```\ngit filter-repo --blob-callback '\n  import textwrap\n  blob.data = bytes(textwrap.dedent(\"\"\"\\\n    This is the new\n    file that I am\n    replacing every blob\n    with.  It is great.\\n\"\"\"), \"utf-8\")\n'\n```\n\nThat will result in a file with contents\n```\nThis is the new\nfile that I am\nreplacing every blob\nwith.  It is great.\n```\n\nwhich has no leading spaces on any lines."
  },
  {
    "path": "Documentation/git-filter-repo.txt",
    "content": "// This file is NOT the documentation; it's the *source code* for it.\n// Please follow the \"user manual\" link under\n//     https://github.com/newren/git-filter-repo#how-do-i-use-it\n// to access the actual documentation, or view another site that\n// has compiled versions available, such as:\n//     https://www.mankier.com/1/git-filter-repo\n\ngit-filter-repo(1)\n==================\n\nNAME\n----\ngit-filter-repo - Rewrite repository history\n\nSYNOPSIS\n--------\n[verse]\n'git filter-repo' --analyze\n'git filter-repo' [<path_filtering_options>] [<content_filtering_options>]\n\t[<ref_renaming_options>] [<commit_message_filtering_options>]\n\t[<name_or_email_filtering_options>] [<parent_rewriting_options>]\n\t[<generic_callback_options>] [<miscellaneous_options>]\n\nDESCRIPTION\n-----------\n\nRapidly rewrite entire repository history using user-specified filters.\nThis is a destructive operation which should not be used lightly; it\nwrites new commits, trees, tags, and blobs corresponding to (but\nfiltered from) the original objects in the repository, then deletes the\noriginal history and leaves only the new.  See <<DISCUSSION>> for more\ndetails on the ramifications of using this tool.  Several different\ntypes of history rewrites are possible; examples include (but are not\nlimited to):\n\n  * stripping large files (or large directories or large extensions)\n  * stripping unwanted files by path\n  * extracting wanted paths and their history (stripping everything else)\n  * restructuring the file layout (such as moving all files into a\n    subdirectory in preparation for merging with another repo, making a\n    subdirectory become the new toplevel directory, or merging two\n    directories with independent filenames into one directory)\n  * renaming tags (also often in preparation for merging with another repo)\n  * replacing or removing sensitive text such as passwords\n  * making mailmap rewriting of user names or emails permanent\n  * making grafts or replacement refs permanent\n  * rewriting commit messages\n\nAdditionally, several concerns are handled automatically (many of these\ncan be overridden, but they are all on by default):\n\n  * rewriting (possibly abbreviated) hashes in commit messages to\n    refer to the new post-rewrite commit hashes\n  * pruning commits which become empty due to the above filters (also\n    handles edge cases like pruning of merge commits which become\n    degenerate and empty)\n  * rewriting stashes\n  * baking the changes made by refs/replace/ refs into the permanent\n    history and removing the replace refs\n  * stripping of original history to avoid mixing old and new history\n  * repacking the repository post-rewrite to shrink the repo for the\n    user\n\nAnd additional facilities are available via a config option\n\n  * creating replace-refs (see linkgit:git-replace[1]) for old commit\n    hashes, which if manually pushed and fetched will allow users to\n    continue to refer to new commits using (unabbreviated) old commit\n    IDs\n\nAlso, it's worth noting that there is an important safety mechanism:\n\n  * abort if run from a repo that is not a fresh clone (to prevent\n    accidental data loss from rewriting local history that doesn't\n    exist anywhere else).  See <<FRESHCLONE>>.\n\nFor those who know that there is large unwanted stuff in their history\nand want help finding it, this command also\n\n  * provides an option to analyze a repository and generate reports that\n    can be useful in determining what to filter (or in determining\n    whether a separate filtering command was successful).\n\nSee also <<VERSATILITY>>, <<DISCUSSION>>, <<EXAMPLES>>, and\n<<INTERNALS>>.\n\nOPTIONS\n-------\n\nAnalysis Options\n~~~~~~~~~~~~~~~~\n\n--analyze::\n\tAnalyze repository history and create a report that may be\n\tuseful in determining what to filter in a subsequent run (or\n\tin determining if a previous filtering command did what you\n\twanted).  Will not modify your repo.\n\nFiltering based on paths (see also --filename-callback)\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nThese options specify the paths to select.  Note that much like git\nitself, renames are NOT followed so you may need to specify multiple\npaths, e.g. `--path olddir/ --path newdir/`\n\n--invert-paths::\n\tInvert the selection of files from the specified\n\t--path-{match,glob,regex} options below, i.e. only select\n\tfiles matching none of those options.\n\n--path-match <dir_or_file>::\n--path <dir_or_file>::\n\tExact paths (files or directories) to include in filtered\n\thistory.  Multiple --path options can be specified to get a\n\tunion of paths.\n\n--path-glob <glob>::\n\tGlob of paths to include in filtered history.  Multiple\n\t--path-glob options can be specified to get a union of paths.\n\n--path-regex <regex>::\n\tRegex of paths to include in filtered history.  Multiple\n\t--path-regex options can be specified to get a union of paths.\n\n--use-base-name::\n\tMatch on file base name instead of full path from the top of\n\tthe repo.  Incompatible with --path-rename, and incompatible\n\twith matching against directory names.\n\nRenaming based on paths (see also --filename-callback)\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nNote: if you combine path filtering with path renaming, be aware that\n      a rename directive does not select paths, it only says how to\n      rename paths that are selected with the filters.\n\n--path-rename <old_name:new_name>::\n--path-rename-match <old_name:new_name>::\n\tPath to rename; if filename or directory matches <old_name>\n\trename to <new_name>.  Multiple --path-rename options can be\n\tspecified.\n\nPath shortcuts\n~~~~~~~~~~~~~~\n\n--paths-from-file <filename>::\n\tSpecify several path filtering and renaming directives, one\n\tper line. Lines with `==>` in them specify path renames, and\n\tlines can begin with `literal:` (the default), `glob:`, or\n\t`regex:` to specify different matching styles.  Blank lines\n\tand lines starting with a `#` are ignored (if you have a\n\tfilename that you want to filter on that starts with\n\t`literal:`, `#`, `glob:`, or `regex:`, then prefix the line\n\twith 'literal:').\n\n--subdirectory-filter <directory>::\n\tOnly look at history that touches the given subdirectory and\n\ttreat that directory as the project root. Equivalent to using\n\t`--path <directory>/ --path-rename <directory>/:`\n\n--to-subdirectory-filter <directory>::\n\tTreat the project root as if it were under\n\t<directory>.  Equivalent to using `--path-rename :<directory>/`\n\nContent editing filters (see also --blob-callback)\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n--replace-text <expressions_file>::\n\tA file with expressions that, if found, will be replaced. By\n\tdefault, each expression is treated as literal text, but\n\t`regex:` and `glob:` prefixes are supported. You can end the\n\tline with `==>` and some replacement text to choose a\n\treplacement choice other than the default of `***REMOVED***`.\n\n--strip-blobs-bigger-than <size>::\n\tStrip blobs (files) bigger than specified size (e.g. `5M`,\n\t`2G`, etc)\n\n--strip-blobs-with-ids <blob_id_filename>::\n\tRead git object ids from each line of the given file, and\n\tstrip all of them from history\n\nRenaming of refs (see also --refname-callback)\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n--tag-rename <old:new>::\n\tRename tags starting with <old> to start with <new>. For example,\n\t--tag-rename foo:bar will rename tag foo-1.2.3 to bar-1.2.3;\n\teither <old> or <new> can be empty.\n\nFiltering of commit messages (see also --message-callback)\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n--replace-message <expressions_file>::\n\tA file with expressions that, if found in commit or tag\n\tmessages, will be replaced. This file uses the same syntax as\n\t--replace-text.\n\n--preserve-commit-hashes::\n\tBy default, since commits are rewritten and thus gain new\n\thashes, references to old commit hashes in commit messages are\n\treplaced with new commit hashes (abbreviated to the same\n\tlength as the old reference).  Use this flag to turn off\n\tupdating commit hashes in commit messages.\n\n--preserve-commit-encoding::\n\tDo not reencode commit messages into UTF-8. By default, if the\n\tcommit object specifies an encoding for the commit message,\n\tthe message is re-encoded into UTF-8.\n\nFiltering of names & emails (see also --name-callback and --email-callback)\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n--mailmap <filename>::\n\tUse specified mailmap file (see linkgit:git-shortlog[1] for details\n\ton the format) when rewriting author, committer, and tagger names\n\tand emails. If the specified file is part of git history,\n\thistorical versions of the file will be ignored; only the current\n\tcontents are consulted.\n\n--use-mailmap::\n\tSame as: '--mailmap .mailmap'\n\nParent rewriting\n~~~~~~~~~~~~~~~~\n\n--replace-refs {delete-no-add, delete-and-add, update-no-add, update-or-add, update-and-add, old-default}::\n\tHow to handle replace refs (see git-replace(1)).  Replace refs\n\tcan be added during the history rewrite as a way to allow\n\tusers to pass old commit IDs (from before git-filter-repo was\n\trun) to git commands and have git know how to translate those\n\told commit IDs to the new (post-rewrite) commit IDs.  Also,\n\treplace refs that existed before the rewrite can either be\n\tdeleted or updated.  The choices to pass to --replace-refs\n\tthus need to specify both what to do with existing refs and\n\twhat to do with commit rewrites.  Thus 'update-and-add' means\n\tto update existing replace refs, and for any commit rewrite\n\t(even if already pointed at by a replace ref) add a new\n\trefs/replace/ reference to map from the old commit ID to the\n\tnew commit ID.  The default is update-no-add, meaning update\n\texisting replace refs but do not add any new ones.  There is\n\talso a special 'old-default' option for picking the default\n\tused in versions prior to git-filter-repo-2.45, namely\n\t'update-and-add' upon the first run of git-filter-repo in a\n\trepository and 'update-or-add' if running git-filter-repo\n\tagain on a repository.\n\n--prune-empty {always, auto, never}::\n\tWhether to prune empty commits. 'auto' (the default) means\n\tonly prune commits which become empty (not commits which were\n\tempty in the original repo, unless their parent was\n\tpruned). When the parent of a commit is pruned, the first\n\tnon-pruned ancestor becomes the new parent.\n\n--prune-degenerate {always, auto, never}::\n\tSince merge commits are needed for history topology, they are\n\ttypically exempt from pruning. However, they can become\n\tdegenerate with the pruning of other commits (having fewer\n\tthan two parents, having one commit serve as both parents, or\n\thaving one parent as the ancestor of the other.) If such merge\n\tcommits have no file changes, they can be pruned. The default\n\t('auto') is to only prune empty merge commits which become\n\tdegenerate (not which started as such).\n\n--no-ff::\n\tEven if the first parent is or becomes an ancestor of another\n\tparent, do not prune it.  This modifies how --prune-degenerate\n\tbehaves, and may be useful in projects who always use merge\n\t--no-ff.\n\nGeneric callback code snippets\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n--filename-callback <function_body>::\n\tPython code body for processing filenames; see <<CALLBACKS>>.\n\n--message-callback <function_body>::\n\tPython code body for processing messages (both commit messages and\n\ttag messages); see <<CALLBACKS>>.\n\n--name-callback <function_body>::\n\tPython code body for processing names of people; see <<CALLBACKS>>.\n\n--email-callback <function_body>::\n\tPython code body for processing emails addresses; see\n\t<<CALLBACKS>>.\n\n--refname-callback <function_body>::\n\tPython code body for processing refnames; see <<CALLBACKS>>.\n\n--file-info-callback <function_body>::\n\tPython code body for processing the combination of filename, mode,\n\tand associated file contents; see <<CALLBACKS>.  Note that when\n\t--file-info-callback is specified, any replacements specified by\n\t--replace-text will not be automatically applied; instead, you\n\thave control within the --file-info-callback to choose which files\n\tto apply those transformations to.\n\n--blob-callback <function_body>::\n\tPython code body for processing blob objects; see <<CALLBACKS>>.\n\n--commit-callback <function_body>::\n\tPython code body for processing commit objects; see <<CALLBACKS>>.\n\n--tag-callback <function_body>::\n\tPython code body for processing tag objects; see <<CALLBACKS>>.\n\tNote that lightweight tags have no tag object and thus are not\n\thandled by this callback.  The only thing you really could do with a\n\tlightweight tag is rename it, but for that you should see\n\t--refname-callback instead.\n\n--reset-callback <function_body>::\n\tPython code body for processing reset objects; see <<CALLBACKS>>.\n\nSensitive Data Removal\n~~~~~~~~~~~~~~~~~~~~~~\n\n--sensitive-data-removal::\n--sdr::\n\tThis rewrite is intended to remove sensitive data from a repository.\n\tGather extra information from the rewrite needed to provide\n\tadditional instructions on how to clean up other copies.  This\n\tincludes:\n\t  - Fetching all refs, so that if refs outside of branches and tags\n\t    also reference the sensitive data, they can be cleaned up too\n\n\t    Note that if you have any local-only changes (i.e. un-pushed\n\t    changes) in your repository, on any branch or ref, this fetch step\n\t    may discard them.  Working in a fresh clone avoids this problem;\n\t    see also the --no-fetch option if you don't want to work with a\n\t    fresh clone and you have important local-only changes.\n\n\t  - Tracking and reporting on the first changed commit(s)\n\t  - Tracking and reporting whether any LFS objects become orphaned by\n\t    the rewrite, so they can be removed\n\t  - Providing additional instructions at the end on how to clean up\n\t    the repository you cloned from, and other clones of the repo\n\n--no-fetch::\n\tAvoid the \"fetch all refs\" step with --sensitive-data-removal, and\n\tthus avoid overwriting local-only changes in the repository, but at\n\tthe risk of leaving the sensitive data in other refs in the source\n\trepository.  This option is implied by --partial or any flag that\n\timplies --partial.\n\nLocation to filter from/to\n~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nNOTE: Specifying alternate source or target locations implies\n--partial.  However, unlike normal uses of --partial, this doesn't\nrisk mixing old and new history since the old and new histories are in\ndifferent repositories.\n\n--source <source>::\n\tGit repository to read from\n\n--target <target>::\n\tGit repository to overwrite with filtered history\n\nMiscellaneous options\n~~~~~~~~~~~~~~~~~~~~~\n\n--help::\n-h::\n\tShow a help message and exit.\n\n--force::\n-f::\n\tIgnore fresh clone checks and rewrite history (an irreversible\n\toperation, especially since it by default ends with an\n\timmediate pruning of reflogs and old objects).  See\n\t<<FRESHCLONE>>.  Note that when cloning repos on a local\n\tfilesystem, it is better to pass `--no-local` to git clone\n\tthan passing `--force` to git-filter-repo.\n\n--partial::\n\tDo a partial history rewrite, resulting in the mixture of old and\n\tnew history.  This disables rewriting refs/remotes/origin/* to\n\trefs/heads/*, disables removing of the 'origin' remote, disables\n\tremoving unexported refs, disables expiring the reflog, and\n\tdisables the automatic post-filter gc.  Also, this modifies\n\t--tag-rename and --refname-callback options such that instead of\n\treplacing old refs with new refnames, it will instead create new\n\trefs and keep the old ones around.  Use with caution.\n\n--refs <refs+>::\n\tLimit history rewriting to the specified refs.  Implies --partial.\n        In addition to the normal caveats of --partial (mixing old and new\n        history, no automatic remapping of refs/remotes/origin/* to\n        refs/heads/*, etc.), this also may cause problems for pruning of\n        degenerate empty merge commits when negative revisions are\n        specified.\n\n--dry-run::\n\tDo not change the repository. Run `git fast-export` and filter its\n\toutput, and save both the original and the filtered version for\n\tcomparison.  This also disables rewriting commit messages due to\n\tnot knowing new commit IDs and disables filtering of some empty\n\tcommits due to inability to query the fast-import backend.\n\n--debug::\n\tPrint additional information about operations being performed and\n\tcommands being run.  (If used together with --dry-run, shows\n\textra information about what would be run).\n\n--stdin::\n\tInstead of running `git fast-export` and filtering its output,\n\tfilter the fast-export stream from stdin.  The stdin must be in\n\tthe expected input format (e.g. it needs to include original-oid\n\tdirectives).\n\n--quiet::\n\tPass --quiet to other git commands called.\n\nOUTPUT\n------\n\nEvery time filter-repo is run, files are created in the `.git/filter-repo/`\ndirectory. These files are updated or overwritten on every run.\n\nCommit map\n~~~~~~~~~~\n\nThe `$GIT_DIR/filter-repo/commit-map` file contains a mapping of how all\ncommits were (or were not) changed.\n\n  * A header is the first line with the text \"old\" and \"new\"\n  * Commit mappings are in no particular order\n  * All commits in range of the rewrite will be listed, even commits\n    that are unchanged (e.g. because the commit pre-dated when files\n    the filtering operation are removing were introduced to the repo).\n  * An all-zeros hash, or null SHA, represents a non-existent object.\n    When in the \"new\" column, this means the commit was removed\n    entirely.\n\nReference map\n~~~~~~~~~~~~~\n\nThe `$GIT_DIR/filter-repo/ref-map` file contains a mapping of which local\nreferences were (or were not) changed.\n\n  * A header is the first line with the text \"old\", \"new\" and \"ref\"\n  * Reference mappings are sorted by ref\n  * An all-zeros hash, or null SHA, represents a non-existent object.\n    When in the \"new\" column, this means the ref was removed entirely.\n\nChanged References\n~~~~~~~~~~~~~~~~~~\n\nThe `$GIT_DIR/filter-repo/changed-refs` file contains a list of refs that\nwere changed.\n\n  * No header is provided\n  * Lists the subsets of refs from ref-map for which old != new\n  * While unnecessary since this provides no new information over ref-map,\n    it does make it easier to quickly determine which refs were changed by\n    the rewrite.\n\nFirst Changed Commits\n~~~~~~~~~~~~~~~~~~~~~\n\nThe `$GIT_DIR/filter-repo/first-changed-commits` contains a list of the\nfirst commit(s) changed by the filtering operation.  These are the commits\nthat got rewritten and which had no parents that were also rewritten.\n\nSo, for example if you had commits\n  A1-B1-C1-D1-E1\nbefore running git-filter-repo, and afterward you had commits\n  A1-B2-C2-D2-E2\nthen the First Changed Commits file would contain just one line, which\nwould be the hash of B2.\n\nIn most cases, there will only be one commit listed, but if you had\nmultiple root commits or a non-linear history where the commits on\nthose diverging histories were the first ones modified, then there\ncould be multiple first changed commits and they will each be listed\non separate lines.\n\nAlready Ran\n~~~~~~~~~~~\n\nThe `$GIT_DIR/filter-repo/already_ran` file contains a file recording that\ngit-filter-repo has been run.  When this file is present, future runs will\nbe treated as an extension of the previous filtering operation.\n\nConcretely, this means:\n  * The \"Fresh Clone\" check is bypassed\n\n    This is done because past runs would cause the repository to no longer\n    look like a fresh clone, and thus fail the fresh clone check, but doing\n    filtering via multiple invocations of git-filter-repo is an intended\n    and support usecase.  You already passed or bypassed the \"Fresh Clone\"\n    check on your initial run.\n\n  * The commit-map and ref-map files above will be updated rather than\n    simply rewritten.\n\n    In other words, if the first filter-repo invocation rewrote commit\n    A to commit B, and the second filter-repo invocation rewrite\n    commit B to commit C, then the second run would have an \"A C\"\n    entry rather than a \"B C\" entry for the changed commit.\n\n  * The first changed commit(s) (reported When using the\n    --sensitive-data-removal option) will be the first original commit\n    modified, not the first intermediate commit modified.\n\n    In more detail, if the repository original had the following commits:\n       A1-B1-C1-D1-E1\n    and the first invocation of filter-repo changed this to\n       A1-B1-C2-D2-E2\n    then the first run would report \"C1\" as the first changed commit.  If\n    a second filter-repo run further changed this to\n       A1-B1-C2-D3-E3\n    then it would report \"C1\" as the first changed commit, not \"D2\",\n    because it is comparing to the original commits rather than the\n    intermediate ones.\n\nHowever, if the already_ran file exists but is older than 1 day when they\ninvoke git-filter-repo, the user will be prompted for whether the new run\nshould be considered a continuation of the previous run.  If they do not\nanswer in the affirmative, then the above three bullets will not apply.\nThis prompt exists because users might do a history rewrite in a repository,\nforget about it and leave the $GIT_DIR/filter-repo directory around, and\nthen some months or years later need to do another rewrite.  If commits\nhave been made public and shared from the previous rewrite, then the next\nfilter-repo run should not be considered a continuation of the previous\nfiltering run.\n\nOriginal LFS Objects\n~~~~~~~~~~~~~~~~~~~~\n\nWhen running with the --sensitive-data-removal flag, and LFS is in use by the\nrepository, the `$GIT_DIR/filter-repo/original_lfs_objects` contains a list of\nLFS objects referenced by the repository before the rewrite, in sorted order.\n\nOrphaned LFS Objects\n~~~~~~~~~~~~~~~~~~~~\n\nWhen running with the --sensitive-data-removal flag, and LFS is in use by the\nrepository, the `$GIT_DIR/filter-repo/orphaned_lfs_objects` contains a list of\nLFS objects that used to be referenced by the repository but no longer are after\ngit-filter-repo has run.  Objects appear in sorted order.\n\n[[FRESHCLONE]]\nFRESH CLONE SAFETY CHECK AND --FORCE\n------------------------------------\n\nSince filter-repo does irreversible rewriting of history, it is\nimportant to avoid making changes to a repo for which the user doesn't\nhave a good backup.  The primary defense mechanism is to simply\neducate users and rely on them to be good stewards of their data; thus\nthere are several warnings in the documentation about how filter repo\nrewrites history.\n\nHowever, as a service to users, we would like to provide an additional\nsafety check beyond the documentation.  There isn't a good way to\ncheck if the user has a good backup, but we can ask a related question\nthat is an imperfect but quite reasonable proxy: \"Is this repository a\nfresh clone?\"  Unfortunately, that is also a question we can't get a\nperfect answer to; git provides no way to answer that question.\nHowever, there are approximately a dozen things that I found that seem\nto always be true of brand new clones (assuming they are either clones\nof remote repositories or are made with the `--no-local` flag), and I\ncheck for all of those.\n\nThese checks can have both false positives and false negatives.\nSomeone might have a perfectly good backup of their repo without it\nactually being a fresh clone -- but there's no way for filter-repo to\nknow that.  Conversely, someone could look at all things that\nfilter-repo checks for in its safety checks and then just tweak their\nnon-backed-up repository to satisfy those conditions (though it would\ntake a fair amount of effort, and it's astronomically unlikely that a\nrepo that isn't a fresh clone randomly happens to match all the\ncriteria).  In practice, the safety checks filter-repo uses seem to be\nreally good at avoiding people accidentally running filter-repo on a\nrepository that they shouldn't be running it on. It even caught me\nonce when I did mean to run filter-repo but was in a different\ndirectory than I thought I was.\n\nIn short, it's perfectly fine to use `--force` to override the safety\nchecks as long as you're okay with filter-repo irreversibly rewriting\nthe contents of the current repository.  It is a really bad idea to\nget in the habit of always specifying `--force`; if you do, one day\nyou will run one of your commands in the wrong directory like I did,\nand you won't have the safety check anymore to bail you out.  Also, it\nis definitely NOT okay to recommend `--force` on forums, Q&A sites, or\nin emails to other users without first carefully explaining that\n`--force` means putting your repositories' data at risk.  I am\nespecially bothered by people who suggest the flag when it clearly is\nNOT needed; they are needlessly putting other peoples' data at risk.\n\n[[VERSATILITY]]\nVERSATILITY\n-----------\n\nfilter-repo has a hierarchy of capabilities on the spectrum from easy to\nuse convenience flags that perform pre-defined types of filtering, to\nchoices that provide lots of flexibility in controlling how filtering\noccurs.  This spectrum includes the following:\n\n  * Convenience flags making common types of history rewriting simple (e.g.\n    --path, --strip-blobs-bigger-than, --replace-text, --mailmap)\n  * Options which are shorthand for others or which provide greater control\n    than others (e.g. --subdirectory-filter could just be written using\n    both a path selection (--path) and a path rename (--path-rename)\n    filter; --paths-from-file can handle all other --path* options and more\n    such as regex renaming of paths)\n  * Generic python callbacks for handling a certain type of data (the\n    filename, message, name, email, and refname callbacks)\n  * Generic python callbacks for handling fundamental git objects, allowing\n    greater control over the combination of data types the object holds\n    (the commit, tag, blob, and reset callbacks)\n  * The ability to import filter-repo as a module in a python program and\n    use its classes and functions for even greater control and flexibility\n    while still leveraging lots of basic capabilities.  One can even use\n    this to write new tools with a completely different interface.\n\nFor more information about callbacks, see <<CALLBACKS>>.  For examples on\nwriting python programs that import filter-repo as a module to create new\nhistory rewriting tools, look at the contrib/filter-repo-demos/ directory.\nThat directory includes, among other examples, a reimplementation of\ngit-filter-branch which is faster than git-filter-branch, and a\nreimplementation of BFG Repo Cleaner with several bug fixes and new\nfeatures.\n\n[[DISCUSSION]]\nDISCUSSION\n----------\n\nUsing filter-repo is relatively simple, but rewriting history is part of\na larger discussion in terms of collaboration.  When you rewrite\nhistory, the old and new histories are no longer compatible; if you push\nthis history somewhere for others to view, it will look as though you've\ndone a rebase of all branches and tags.  Make sure you are familiar with\nthe \"RECOVERING FROM UPSTREAM REBASE\" section of linkgit:git-rebase[1]\n(and in particular, \"The hard case\") before proceeding, in addition to\nthis section.\n\nSteps to use git-filter-repo as part of the bigger picture of doing a\nhistory rewrite are roughly as follows:\n\n1. Create a clone of your repository.  You may pass `--bare` or\n   `--mirror` to `git clone`, if you prefer.  You should pass\n   `--no-local` if the repository you are cloning from is on the local\n   filesystem.  Avoid other flags; some might confuse the fresh clone\n   check, and others could cause parts of the data to be missing that\n   are needed for the rewrite.\n\n2. (Optional) Run `git filter-repo --analyze`.  This will create a\n   directory of reports mentioning multiple things: (a) paths that have\n   existed over time in your repo, (b) renames that have occurred in\n   your repo and (c) sizes of objects aggregated by\n   path/directory/extension/blob-id.  This information may be useful in\n   choosing how to filter your repo.  It can also be useful to re-run\n   --analyze after filtering to verify the changes look correct.\n\n3. Before rewriting the history of your local copy with git-filter-repo,\n   determine where you will push the rewritten history to when you are\n   done.  In the special case that you are trying to remove sensitive\n   data from an existing repository, you will want to push it back where\n   you cloned from, as well as clean up all other clones/copies of the\n   repo.  If you will be pushing back to the repository you cloned from,\n   you will want to use the --sensitive-data-removal option and see the\n   Sensitive Data Removal section below.  In most cases not dealing with\n   sensitive data removal, you will want to push to a new repo, because:\n\n   * Even after you rewrite history and push it back, other people who\n     previously cloned from the original repo will have the old history.\n     If they simply run `git pull && git push`, it will merge the\n     unrewritten history with the new, resulting in what looks like two\n     copies of each commit involved in your rewrite -- a new copy of\n     each commit which has the cleanups you made, and an old copy of\n     each commit that has not been cleaned up -- being merged together.\n     That means everything you carefully worked to remove from the\n     repository has been pushed back.  You're more likely to succeed in\n     making sure they don't re-push the unclean data if you just give\n     them a new repository URL and tell them to reclone.\n\n   * Rewriting history will rewrite tags; those who have already\n     downloaded tags will not get the updated tags even if they specify\n     `--tags` to `git fetch` or `git pull` (see the \"On Re-tagging\"\n     section of linkgit:git-tag[1]).  Every user trying to use an\n     existing clone will have to forcibly delete all tags they already\n     downloaded _before_ re-fetching them; it may be easier for them to\n     just re-clone, which they are more likely to do with a new clone\n     URL.\n\n   * Rewriting history may delete some refs (e.g. branches that only\n     had files that you wanted excised from history); unless you run\n     git push with the `--mirror` or `--prune` options, those refs\n     will continue to exist on the server.  If folks then merge these\n     branches into others, then people have started mixing old and new\n     history.  If users had already cloned these branches, removing\n     them from the server isn't enough; you need all users to delete\n     any local branches based on these refs and run fetch with the\n     `--prune` option as well.  Simply re-cloning from a new URL is\n     easier.\n\n   * The server may not allow you to force push over some refs.  For\n     example, code review systems may have special ref namespaces\n     (e.g. refs/changes/, refs/pull/, refs/merge-requests/) that they\n     have locked down, and you'll need to somehow prevent users from\n     merging those locked-down (and thus not cleaned up) histories\n     with your cleaned-up history.  Every software code review system\n     handles this differently (see the sensitive data removal section\n     for some links).\n\n4. Run filter-repo with your desired filtering options.  Many examples\n   are given in the <<EXAMPLES>> section.  For more complex cases, note\n   that doing the filtering in multiple steps (by running multiple\n   filter-repo invocations in a sequence) is supported.  If anything\n   goes wrong here, simply delete your clone and restart.\n\n5. Push your new repository to its new home (note that\n   refs/remotes/origin/* will have been moved to refs/heads/* as the\n   first part of filter-repo, so you can just deal with normal branches\n   instead of remote tracking branches).\n\n6. (Optional) Some additional considerations\n\n   * filter-repo has a --replace-refs option to allow creating replace\n     refs (see linkgit:git-replace[1]) for each rewritten commit ID,\n     allowing you to use old (unabbreviated) commit hashes in the git\n     command line to refer to the newly rewritten commits.  If you\n     want to use these replace refs, manually push them to the\n     relevant clone URL and tell users to manually fetch them (e.g. by\n     adjusting their fetch refspec, `git config --add\n     remote.origin.fetch +refs/replace/*:refs/replace/*`).  Sadly,\n     replace refs are not yet widely understood; projects like jgit\n     and libgit2 do not support them and existing repository managers\n     (e.g. Gerrit, GitHub, GitLab) do not yet understand replace refs.\n     Thus one can't use old commit hashes within the UI of these other\n     systems.  This may change in the future, but replace refs at\n     least help users locally within the git command line interface.\n     Also, be aware that commit-graphs are excessively cautious around\n     replace refs and just turn off entirely if any are present, so\n     after enough time has passed that old commit IDs become less\n     relevant, users may want to locally delete the replace refs to\n     regain the speedups from commit-graphs.\n\nWhy is my origin removed?\n~~~~~~~~~~~~~~~~~~~~~~~~~\n\nWhen you rewrite history, all commit IDs (starting with the first one\nwhere changes are made) are modified.  Even if you think you didn't\nchange an intermediate commit, the fact that you changed any of its\nancestors is also a change that counts and will cause a commit's ID to\nchange as well.  It is unfortunately all-too-easy for yourself or\nsomeone else to accidentally merge the old ugly history you were\ntrying to rewrite with the new history, resulting in not only the old\nugly history returning but getting you \"two copies\" of each commit\n(both an original commit and a cleaned-up alternative), and thus\ndoubling the number of commits in your repository.  In short, you end\nup with an even bigger mess to clean up than you started with.\n\nThis happens frequently to people using `git filter-branch` or `BFG\nrepo cleaner`, and can happen to folks using `git filter-repo` if they\ninsist on pushing back to the original repo.  Example ways you can get\nsuch an even uglier history include:\n\n  * at the command line (of another clone of the same repo from before the\n    cleanup): `git pull && git push`\n  * in a software forge: \"reopen old Pull-Request/Merge-Request/Code-Review\n    and hit the merge/submit button\"\n\nRemoving the `origin` remote and suggesting people push to a new repo\n(and ensuring they tell others to clone the new repo) is usually a\ngood forcing function to avoid these problems.  But, if people really\nwant to push to the original repository despite these warnings, it is\ntrivial to do so; simply run:\n\n  * `git remote add origin $ORIGINAL_CLONE_URL`\n\nand then you can push (e.g. `git push --force --branches --tags\n--prune`).  Since removing the origin url is such a cheap way to\npotentially prevent big messes, and it's so easy to work around for\nthose that really do want to push back over the original history,\nremoving the origin url is a great safety measure that I employ.\n\nOne final warning if you really want to push back to the original repo:\nsee the next section on sensitive data removals.  Those are the steps\nneeded when pushing back to the original repo; they are so involved that\nI assume they are only worth it when sensitive data is involved, but you\ncan choose to follow them for other kinds of rewrites too.\n\nSensitive Data Removals\n~~~~~~~~~~~~~~~~~~~~~~~\n\nSensitive data removals are a specialized type of history rewrite.\nWhile it is always very problematic to mix the cleaned-up history with\nthe non-cleaned-up history, for sensitive data removals it is also bad\nto allow others to continue to view/clone/fetch the non-cleaned-up\nhistory at all; users often need to try to expunge the old history as\nwell.\n\nNote that if the sensitive data under consideration is a\ntoken/password/credential/secret (as is often the case), then it is\nimportant that you revoke and rotate that credential first.  Once the\ncredential is revoked or rotated, it can no longer be used for access.\nRevoking/rotating may resolve your problem without resorting to the\nheavy-handed action of rewriting and purging history.\n\nFor sensitive data removal history rewrites, there are three high-level\nsteps:\n\n  - Rewrite the repository locally, using git-filter-repo\n  - Make sure other copies are cleaned up, including:\n    * the server you cloned from\n    * other clones that exist, such as ones your colleagues made\n  - Prevent repeats and avoid future sensitive data spills\n\nEach will be discussed in greater detail below.\n\nOne important thing to note, though, is that others working on the same\nrepository should be instructed to stop while you do the cleanup; if\nthey continue development during your cleanup, you'll likely be forced to\neither discard their changes or start over on your cleanup.\n\nRewrite the repository locally, using git-filter-repo\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\nThe first step is to rewrite a copy of your repository locally using\ngit-filter-repo.  The exact commands to run will differ based on where\nin your repository the sensitive data is found, but some general tips:\n\n  - Use the --sensitive-data-removal flag.  It will provide additional\n    information useful for the other steps.\n\n  - If the sensitive data is the entirety of one or more files, and no\n    version of those files from history needs to be kept in your\n    repository, the --invert-paths flag together with one or more --path\n    arguments may come in handy.\n\n  - If the sensitive data is just a string found within one or more\n    files and you want to replace that sensitive string with something\n    else while leaving the rest of the file(s) intact, the --replace-text\n    option may come in handy.\n\nAfter rewriting the history locally, make sure to inspect it to ensure the\nsensitive data has been removed.  Some commands that might be handy for\nchecking are:\n\n----\ngit log --all --name-status -- ${PROBLEMATIC_FILE1} ${PROBLEMATIC_FILE2}\n----\n\nor\n\n----\ngit log -S\"${PROBLEMATIC_STRING}\" --all -p --\n----\n\nIf either of these commands turn up more sensitive data, then run additional\ngit-filter-repo commands to clean up the necessary data before proceeding.\n\nMake sure other copies are cleaned up: primary server\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\nCleaning up the repository you cloned from requires force pushing your\nrewritten history over the original.  You need to force push all refs,\nnot just your current branch.  You can use the following command to do so\n(read the bulleted list right after this command before running it):\n\n----\ngit push --force --mirror origin\n----\n\nSeveral comments on this command:\n\n  * If any of your colleagues have pushed any changes since you\n    started, this force push command will discard their changes.\n\n  * This force push is likely to fail to push some refs, since most\n    forges (Gerrit, GitHub, GitLab, etc.) prevent you from updating\n    some refs (e.g. `refs/changes/*`, `refs/pull/*`,\n    `refs/merge-requests/*`).  You will need to follow the directions\n    from those forges to get the remaining refs updated or deleted,\n    and a garbage collection to be triggered on their end.  Some\n    examples:\n    (https://docs.gitlab.com/ee/user/project/repository/reducing_the_repo_size_using_git.html[GitLab's\n    docs on reducing repository size], or\n    https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/removing-sensitive-data-from-a-repository#fully-removing-the-data-from-github[the\n    \"Fully removing the data from GitHub\" section of GitHub's docs]).\n\n  * If you passed the `--no-fetch` option to git-filter-repo (or\n    implied it with another option), you will either need to (1) drop\n    the `--mirror` option and figure out which refs or refspecs to\n    push on your own, or (2) use the `--mirror` option and risk\n    deleting any refs you didn't fetch.  Further, if you lacked some\n    refs the server had which included the sensitive data in their\n    history, then your only options at this point to actually clean up\n    the sensitive data from the server are to either redo your rewrite\n    from scratch (and make sure to get the relevant refs included this\n    time) or delete those refs on the server.\n\n  * Yes, I know that --mirror implies --force and is unnecessary.  I\n    included --force anyway as a visual reminder to readers that this\n    is going to overwrite changes on the server.\n\nAlso, if any LFS objects were orphaned by your rewrite, those objects\nlikely contain sensitive data and need to be deleted/purged from the LFS\nserver.  You'll have to ask the maintainer of the LFS server you are\nusing for how to delete/purge those on the server.\n\nMake sure other copies are cleaned up: clones of colleagues\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\nAfter you have cleaned up the server, the easiest way to clean up other\nclones is to make everyone delete their existing clones and reclone.\n\nIf that isn't an option, then you will need to proceed carefully because\na simple `git pull && git push` from any other clone will recontaminate\nthe main repository and make the mess even harder to clean up.  To avoid\nthis, before pushing from any other clone, you'll need to have them clean\nup their copy, as detailed below.\n\nFirst, though, let me note that you should *not* have other developers\ntry to cleanup their clone by running the same `git-filter-repo`\ncommands that you ran.  While that sometimes may happen to work, it is\nnot reliable in general.  Running the same `git-filter-repo` commands,\neven if identical, can result in them getting new hashes for commits\nthat are different than your new hashes, and you'll end up with a mess\ninvolving two or more copies of every commit.\n\nInstead developers with other clones of the repository should run\nthrough the following steps to clean up their copy if they are unwilling\nto discard their copy and reclone:\n\n  - delete all tags and run `git fetch --prune --tags`.  Running the\n    fetch command without deleting tags first will result in the old\n    tags being kept, which will keep the sensitive data.\n\n  - rebase any changes they have on any branch (or other ref) on top of\n    the new history.  See the \"RECOVERING FROM UPSTREAM REBASE\" section\n    of linkgit:git-rebase[1] (and in particular, \"The hard case\") for\n    instructions.\n\n  - run a few steps to clean out the pre-rebase history (note that the first\n    step drops all reflogs including all stash entries.  That's a high cost,\n    but needed to clean up the sensitive data):\n    * git reflog expire --expire=now --all\n    * git gc --prune=now\n\nOnce these steps are complete, you also need to verify that the clone no\nlonger contains any sensitive data (it is really easy to miss something,\nwhich puts you at risk of recontaminating other repositories with the\nsensitive data).  You can do so by running:\n\n----\ngit cat-file -t ${HASH_OF_FIRST_CHANGED_COMMIT}\n----\n\nWhere `${HASH_OF_FIRST_CHANGED_COMMIT}` was printed by git-filter-repo at\nthe end of its run (if there was more than one \"first changed commit\",\nrun this command multiple times, with each commit hash).  If this\ncommand returns a fatal error, then the commit has correctly been\nremoved from this repository.  If it responds with \"commit\", then the\nobject still exists and you need to re-delete tags, re-rebase all\nnecessary branches/refs, and re-expire reflogs and redo the gc.  If you\nare curious about which branches or refs were the problematic ones\nholding on to `${HASH_OF_FIRST_CHANGED_COMMIT}`, then presuming you did\nthe reflog expire and gc jobs above, the following command should help\nyou find the problematic branches/refs:\n\n----\ngit for-each-ref --contains ${HASH_OF_FIRST_CHANGED_COMMIT}\n----\n\nAlso, remember, the cat-file command needs to come back with a fatal\nerror for every `${HASH_OF_FIRST_CHANGED_COMMIT}` involved if you have\nmore than one.\n\nAfter this is all done, then if any LFS objects were orphaned by the\nrewrite (which again, you will be told if you use the\n--sensitive-data-removal option when you run git-filter-repo), then you\nalso need to remove those LFS objects.  Look for them a couple\ndirectories under .git/lfs/objects/, and delete them.\n\nPrevent repeats and avoid future sensitive data spills\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\nThere are several measures you can take to help avoid repeat problems.\nNot all may be applicable for your case, but the more that are, the more\nlikely you can avoid problems.\n\nFor dealing with the existing sensitive data spill:\n\n- Since it is so easy to re-contaminate the repository you cloned from\n  (it merely takes a colleague to run `git pull && git push` from their\n  clone that was created before your cleanup), take extra vigilance in\n  performing the clean ups steps above for other clones to ensure they\n  have all been cleaned up.\n\n- If you have a central repository everyone pushes to, look into methods\n  to ban the First Changed Commit(s) from being (re-)pushed to your\n  repository.  Sadly, few repository managers currently have such a\n  built-in capability (see Gerrit's ban-commit ability for one such\n  example at\n  https://gerrit-review.googlesource.com/Documentation/cmd-ban-commit.html),\n  but a few may allow you to write your own pre-receive hooks that\n  reject pushes containing these bad commits.  (Pro-tip for writing such\n  a pre-receive hook: use `git cat-file -t ${BAD_COMMIT}` as a cheap\n  check before checking if any revision range between `<old-oid>` and\n  `<new-oid>` contains `${BAD_COMMIT}`)\n\nSteps to help avoid other future sensitive data spills:\n\n* If sensitive data is likely to appear within certain filenames that\n  should not be tracked in git at all, then add those filenames to\n  .gitignore to reduce the risk that others accidentally add them.\n\n* Avoid hardcoding secrets in code.  Use environment variables,\n  configuration management tools, or secrets management services like\n  Azure Key Vault, AWS Secrets Manager, or HashiCorp Vault to manage and\n  inject secrets at runtime.\n\n* Create a pre-commit hook to check for sensitive data before it is\n  committed or pushed anywhere, or use a well-known tool in a pre-commit\n  hook like git-secrets or gitleaks.\n\n[[EXAMPLES]]\nEXAMPLES\n--------\n\nPath based filtering\n~~~~~~~~~~~~~~~~~~~~\n\nTo only keep the 'README.md' file plus the directories 'guides' and\n'tools/releases/':\n\n--------------------------------------------------\ngit filter-repo --path README.md --path guides/ --path tools/releases\n--------------------------------------------------\n\nDirectory names can be given with or without a trailing slash, and all\nfilenames are relative to the toplevel of the repo.  To keep all files\nexcept these paths, just add `--invert-paths`:\n\n--------------------------------------------------\ngit filter-repo --path README.md --path guides/ --path tools/releases --invert-paths\n--------------------------------------------------\n\nIf you want to have both an inclusion filter and an exclusion filter, just\nrun filter-repo multiple times.  For example, to keep the src/main\nsubdirectory but exclude files under src/main named 'data', run:\n\n--------------------------------------------------\ngit filter-repo --path src/main/\ngit filter-repo --path-glob 'src/*/data' --invert-paths\n--------------------------------------------------\n\nNote that the asterisk (`*`) will match across multiple directories, so the\nsecond command would remove e.g. src/main/org/whatever/data.  Also, the\nsecond command by itself would also remove e.g. src/not-main/foo/data, but\nsince src/not-main/ was removed by the first command, that's not an issue.\nAlso, the use of quotes around the asterisk is sometimes important to avoid\nglob expansion by the shell.\n\nYou can also select paths by regular expression (see\nhttps://docs.python.org/3/library/re.html#regular-expression-syntax).\nFor example, to only include files from the repo whose name is in the\nformat YYYY-MM-DD.txt and is found at least two subdirectories deep:\n\n--------------------------------------------------\ngit filter-repo --path-regex '^.*/.*/[0-9]{4}-[0-9]{2}-[0-9]{2}.txt$'\n--------------------------------------------------\n\nIf you want two directories to be renamed (and maybe merged if both are\nrenamed to the same location), use --path-rename; for example, to rename\nboth 'cmds/' and 'src/scripts/' to 'tools/':\n\n--------------------------------------------------\ngit filter-repo --path-rename cmds:tools --path-rename src/scripts/:tools/\n--------------------------------------------------\n\nAs with `--path`, directories can be specified with or without a\ntrailing slash for `--path-rename`.\n\nIf you do a `--path-rename` to something that was already in use, it will\nbe silently overwritten.  However, if you try to rename multiple files to\nthe same location (e.g. src/scripts/run_release.sh and cmds/run_release.sh\nboth existed and had different content with the renames above), then you\nwill be given an error.  If you have such a case, you may want to add\nanother rename command to move one of the paths somewhere else where it\nwon't collide:\n\n--------------------------------------------------\ngit filter-repo --path-rename cmds/run_release.sh:tools/do_release.sh \\\n                --path-rename cmds/:tools/ \\\n                --path-rename src/scripts/:tools/\n--------------------------------------------------\n\nAlso, `--path-rename` brings up ordering issues; all path arguments are\napplied in order.  Thus, a command like\n\n--------------------------------------------------\ngit filter-repo --path-rename sources/:src/main/ --path src/main/\n--------------------------------------------------\n\nwould make sense but reversing the two arguments would not (src/main/ is\ncreated by the rename so reversing the two would give you an empty repo).\nAlso, note that the rename of cmds/run_release.sh a couple examples ago was\ndone before the other renames.\n\nNote that path renaming does not do path filtering, thus the following\ncommand\n\n--------------------------------------------------\ngit filter-repo --path src/main/ --path-rename tools/:scripts/\n--------------------------------------------------\n\nwould not result in the tools or scripts directories being present, because\nthe single filter selected only src/main/.  It's likely that you would\ninstead want to run:\n\n--------------------------------------------------\ngit filter-repo --path src/main/ --path tools/ --path-rename tools/:scripts/\n--------------------------------------------------\n\nIf you prefer to filter based solely on basename, use the `--use-base-name`\nflag (though this is incompatible with `--path-rename`).  For example, to\nonly include README.md and Makefile files from any directory:\n\n--------------------------------------------------\ngit filter-repo --use-base-name --path README.md --path Makefile\n--------------------------------------------------\n\nIf you wanted to delete all .DS_Store files in any directory, you could\neither use:\n\n--------------------------------------------------\ngit filter-repo --invert-paths --path '.DS_Store' --use-base-name\n--------------------------------------------------\n\nor\n\n--------------------------------------------------\ngit filter-repo --invert-paths --path-glob '*/.DS_Store' --path '.DS_Store'\n--------------------------------------------------\n\n(the `--path-glob` isn't sufficient by itself as it might miss a toplevel\n.DS_Store file; further while something like `--path-glob '*.DS_Store'`\nwould workaround that problem it would also grab files named `foo.DS_Store`\nor `bar/baz.DS_Store`)\n\nFinally, see also the `--filename-callback` from <<CALLBACKS>>.\n\nFiltering based on many paths\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nIf you have a long list of files, directories, globs, or regular\nexpressions to filter on, you can stick them in a file and use\n`--paths-from-file`; for example, with a file named stuff-i-want.txt with\ncontents of\n\n--------------------------------------------------\n# Blank lines and comment lines are ignored.\n# Examples similar to --path:\nREADME.md\nguides/\ntools/releases\n\n# An example that is like --path-glob:\nglob:*.py\n\n# An example that is like --path-regex:\nregex:^.*/.*/[0-9]{4}-[0-9]{2}-[0-9]{2}.txt$\n\n# An example of renaming a path\ntools/==>scripts/\n\n# An example of using a regex to rename a path\nregex:(.*)/([^/]*)/([^/]*)\\.text$==>\\2/\\1/\\3.txt\n--------------------------------------------------\n\nthen you could run\n\n--------------------------------------------------\ngit filter-repo --paths-from-file stuff-i-want.txt\n--------------------------------------------------\n\nto get a repo containing only the toplevel README.md file, the guides/\nand tools/releases/ directories, all python files, files whose name\nwas of the form YYYY-MM-DD.txt at least two subdirectories deep, and\nwould rename tools/ to scripts/ and rename files like foo/bar/baz.text\nto bar/foo/baz.txt.  Note the special line prefixes of `glob:` and\n`regex:` and the special string `==>` denoting renames.\n\nSometimes you have a way of easily generating all the files you want.\nFor example, if you know that none of the currently tracked files have\nany newlines or special characters in them (see core.quotePath from\n`git config --help`) so that `git ls-files` would print all files\nliterally one per line, and you knew that you wanted to keep only the\nfiles that are currently tracked (thus deleting from all commits in\nhistory any files that only appear on other branches or that only\nappear in older commits), then you could use a pair of commands such\nas\n\n--------------------------------------------------\ngit ls-files >../paths-i-want.txt\ngit filter-repo --paths-from-file ../paths-i-want.txt\n--------------------------------------------------\n\nSimilarly, you could use --paths-from-file to delete many files.  For\nexample, you could run `git filter-repo --analyze` to get reports,\nlook in one such as .git/filter-repo/analysis/path-deleted-sizes.txt\nand copy all the filenames into a file such as\n/tmp/files-i-dont-want-anymore.txt and then run\n\n--------------------------------------------------\ngit filter-repo --invert-paths --paths-from-file /tmp/files-i-dont-want-anymore.txt\n--------------------------------------------------\n\nto delete them all.\n\nDirectory based shortcuts\n~~~~~~~~~~~~~~~~~~~~~~~~~\nLet's say you had a directory structure like the following:\n\n   module/\n      foo.c\n      bar.c\n   otherDir/\n      blah.config\n      stuff.txt\n   zebra.jpg\n\nIf you wanted just the module/ directory and you wanted it to become the\nnew root so that your new directory structure looked like\n\n      foo.c\n      bar.c\n\nthen you could run:\n\n--------------------------------------------------\ngit filter-repo --subdirectory-filter module/\n--------------------------------------------------\n\nIf you wanted all the files from the original repo, but wanted to move\neverything under a subdirectory named my-module/, so that your new\ndirectory structure looked like\n\n   my-module/\n      module/\n         foo.c\n         bar.c\n      otherDir/\n         blah.config\n         stuff.txt\n      zebra.jpg\n\nthen you would instead run:\n\n--------------------------------------------------\ngit filter-repo --to-subdirectory-filter my-module/\n--------------------------------------------------\n\nContent based filtering\n~~~~~~~~~~~~~~~~~~~~~~~\n\nIf you want to filter out all files bigger than a certain size, you can use\n`--strip-blobs-bigger-than` with some size (K, M, and G suffixes are\nrecognized), e.g.:\n\n--------------------------------------------------\ngit filter-repo --strip-blobs-bigger-than 10M\n--------------------------------------------------\n\nIf you want to strip out all files with specified git object ids (hashes),\nlist the hashes in a file and run\n\n--------------------------------------------------\ngit filter-repo --strip-blobs-with-ids FILE_WITH_GIT_BLOB_IDS\n--------------------------------------------------\n\nIf you want to modify file contents, you can do so based on a list of\nexpressions in a file, one per line.  For example, with a file named\nexpressions.txt containing\n\n--------------------------------------------------\np455w0rd\nfoo==>bar\nglob:*666*==>\nregex:\\bdriver\\b==>pilot\nliteral:MM/DD/YYYY==>YYYY-MM-DD\nregex:([0-9]{2})/([0-9]{2})/([0-9]{4})==>\\3-\\1-\\2\n--------------------------------------------------\n\nthen running\n--------------------------------------------------\ngit filter-repo --replace-text expressions.txt\n--------------------------------------------------\n\nwill go through and replace `p455w0rd` with `***REMOVED***`, `foo` with\n`bar`, any line containing `666` with a blank line, the word `driver` with\n`pilot` (but not if it has letters before or after; e.g. `drivers` will be\nunmodified), replace the exact text `MM/DD/YYYY` with `YYYY-MM-DD` and\nreplace date strings of the form MM/DD/YYYY with ones of the form\nYYYY-MM-DD.  In the expressions file, there are a few things to note:\n\n  * Every line has a replacement, given by whatever is on the right of\n    `==>`.  If `==>` does not appear on the line, the default replacement\n    is `***REMOVED***`.\n  * Lines can start with `literal:`, `glob:`, or `regex:` to specify\n    whether to do literal string matches,\n    globs (see https://docs.python.org/3/library/fnmatch.html), or regular\n    expressions (see https://docs.python.org/3/library/re.html#regular-expression-syntax).\n    If none of these are specified, `literal:` is assumed.\n  * If multiple matches are found, all are replaced.\n  * globs and regexes are applied to the entire file, but without any\n    special flags turned on.  Some folks may be interested in adding `(?m)`\n    to the regex to turn on MULTILINE mode, so that `^` and `$` match the\n    beginning and ends of lines rather than the beginning and end of file.\n    See https://docs.python.org/3/library/re.html for details.\n\nSee also the `--blob-callback` from <<CALLBACKS>>.\n\nUpdating commit/tag messages\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nIf you want to modify commit or tag messages, you can do so with the\nsame syntax as `--replace-text`, explained above.  For example, with a\nfile named expressions.txt containing\n\n--------------------------------------------------\nfoo==>bar\n--------------------------------------------------\n\nthen running\n--------------------------------------------------\ngit filter-repo --replace-message expressions.txt\n--------------------------------------------------\n\nwill replace `foo` in commit or tag messages with `bar`.\n\nSee also the `--message-callback` from <<CALLBACKS>>.\n\nRefname based filtering\n~~~~~~~~~~~~~~~~~~~~~~~\n\nTo rename tags, use `--tag-rename`, e.g.:\n\n--------------------------------------------------\ngit filter-repo --tag-rename foo:bar\n--------------------------------------------------\n\nThis will rename any tags starting with `foo` to now start with `bar`.\nEither side of the colon could be blank, e.g.\n\n--------------------------------------------------\ngit filter-repo --tag-rename '':'my-module-'\n--------------------------------------------------\n\nFor more general refname modification, see `--refname-callback` from\n<<CALLBACKS>>.\n\nUser and email based filtering\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\nTo modify username and emails of commits, you can create a mailmap\nfile in the format accepted by linkgit:git-shortlog[1].  For example,\nif you have a file named my-mailmap you can run\n\n--------------------------------------------------\ngit filter-repo --mailmap my-mailmap\n--------------------------------------------------\n\nand if the current contents of that file are as follows (if the\nspecified mailmap file is version controlled, historical versions of\nthe file are ignored):\n\n--------------------------------------------------\nName For User <email@addre.ss>\n<new@ema.il> <old1@ema.il>\nNew Name And <new@ema.il> <old2@ema.il>\nNew Name And <new@ema.il> Old Name And <old3@ema.il>\n--------------------------------------------------\n\nthen we can update username and/or emails based on the specified\nmapping.\n\nSee also the `--name-callback` and `--email-callback` from\n<<CALLBACKS>>.\n\nParent rewriting\n~~~~~~~~~~~~~~~~\n\nTo replace $commit_A with $commit_B (e.g. make all commits which had\n$commit_A as a parent instead have $commit_B for that parent), and\nrewrite history to make it permanent:\n\n--------------------------------------------------\ngit replace $commit_A $commit_B\ngit filter-repo --proceed\n--------------------------------------------------\n\nTo create a new commit with the same contents as $commit_A except with\ndifferent parent(s) and then replace $commit_A with the new commit,\nand rewrite history to make it permanent:\n\n--------------------------------------------------\ngit replace --graft $commit_A $new_parent_or_parents\ngit filter-repo --proceed\n--------------------------------------------------\n\nThe `--proceed` option is needed to avoid failing the \"no arguments\nspecified\" check.  Note that older versions of git-filter-repo\nrequired `--force` to be passed after creating a graft to avoid\ntriggering the not-a-fresh-clone check; that check has been modified\nto remove this overuse of `--force`.\n\nPartial history rewrites\n~~~~~~~~~~~~~~~~~~~~~~~~\n\nTo rewrite the history on just one branch (which may cause it to no longer\nshare any common history with other branches), use `--refs`.  For example,\nto remove a file named 'extraneous.txt' from the 'master' branch:\n\n--------------------------------------------------\ngit filter-repo --invert-paths --path extraneous.txt --refs master\n--------------------------------------------------\n\nTo rewrite just some recent commits:\n\n--------------------------------------------------\ngit filter-repo --invert-paths --path extraneous.txt --refs master~3..master\n--------------------------------------------------\n\n[[CALLBACKS]]\nCALLBACKS\n---------\n\nFor flexibility, filter-repo allows you to specify functions on the\ncommand line to further filter all changes.  Please note that there\nare some API compatibility caveats associated with these callbacks\nthat you should be aware of before using them; see the \"API BACKWARD\nCOMPATIBILITY CAVEAT\" comment near the top of git-filter-repo source\ncode.\n\nMost callback functions are of the same general format\n(--file-info-callback is an exception which will be noted later).  For\na command line argument like\n\n--------------------------------------------------\n--foo-callback 'BODY'\n--------------------------------------------------\n\nthe following code will be compiled and called:\n\n--------------------------------------------------\ndef foo_callback(foo):\n  BODY\n--------------------------------------------------\n\nThus, you just need to make sure your _BODY_ modifies and returns\n_foo_ appropriately.  One important thing to note for all callbacks is\nthat filter-repo uses bytestrings (see\nhttps://docs.python.org/3/library/stdtypes.html#bytes) everywhere\ninstead of strings.\n\nThere are four callbacks that allow you to operate directly on raw\nobjects that contain data that's easy to write in\nlinkgit:git-fast-import[1] format:\n\n--------------------------------------------------\n--blob-callback\n--commit-callback\n--tag-callback\n--reset-callback\n--------------------------------------------------\n\nWe'll come back to these later because it is often the case that the\nother callbacks are more convenient.  The other callbacks operate on a\nsmall piece of the raw objects or operate on pieces across multiple\ntypes of raw object (e.g. author names and committer names and tagger\nnames across commits and tags, or refnames across commits, tags, and\nresets, or messages across commits and tags).  The convenience\ncallbacks are:\n\n--------------------------------------------------\n--filename-callback\n--message-callback\n--name-callback\n--email-callback\n--refname-callback\n--file-info-callback\n--------------------------------------------------\n\nin each you are expected to simply return a new value based on the one\npassed in.  For example,\n\n--------------------------------------------------\ngit-filter-repo --name-callback 'return name.replace(b\"Wiliam\", b\"William\")'\n--------------------------------------------------\n\nwould result in the following function being called:\n\n--------------------------------------------------\ndef name_callback(name):\n  return name.replace(b\"Wiliam\", b\"William\")\n--------------------------------------------------\n\nThe email callback is quite similar:\n\n--------------------------------------------------\ngit-filter-repo --email-callback 'return email.replace(b\".cm\", b\".com\")'\n--------------------------------------------------\n\nThe refname callback is also similar, but note that the refname passed in\nand returned are expected to be fully qualified (e.g. b\"refs/heads/master\"\ninstead of just b\"master\" and b\"refs/tags/v1.0.7\" instead of b\"1.0.7\"):\n\n--------------------------------------------------\ngit-filter-repo --refname-callback '\n  # Change e.g. refs/heads/master to refs/heads/prefix-master\n  rdir,rpath = os.path.split(refname)\n  return rdir + b\"/prefix-\" + rpath'\n--------------------------------------------------\n\nThe message callback is quite similar to the previous three callbacks,\nthough it operates on a bytestring that is likely more than one line:\n\n--------------------------------------------------\ngit-filter-repo --message-callback '\n  if b\"Signed-off-by:\" not in message:\n    message += b\"\\nSigned-off-by: Me My <self@and.eye>\"\n  return re.sub(b\"[Ee]-?[Mm][Aa][Ii][Ll]\", b\"email\", message)'\n--------------------------------------------------\n\nThe filename callback is slightly more interesting.  Returning None means\nthe file should be removed from all commits, returning the filename\nunmodified marks the file to be kept, and returning a different name means\nthe file should be renamed.  An example:\n\n--------------------------------------------------\ngit-filter-repo --filename-callback '\n  if b\"/src/\" in filename:\n    # Remove all files with a directory named \"src\" in their path\n    # (except when \"src\" appears at the toplevel).\n    return None\n  elif filename.startswith(b\"tools/\"):\n    # Rename tools/ -> scripts/misc/\n    return b\"scripts/misc/\" + filename[6:]\n  else:\n    # Keep the filename and do not rename it\n    return filename\n  '\n--------------------------------------------------\n\nThe file-info callback is more involved.  It is designed to be used in\ncases where filtering depends on both filename and contents (and maybe\nmode).  It is called for file changes other than deletions (since\ndeletions have no file contents to operate on).  The file info\ncallback takes four parameters (filename, mode, blob_id, and value),\nand expects three to be returned (filename, mode, blob_id).  The\nfilename is handled similar to the filename callback; it can be used\nto rename the file (or set to None to drop the change).  The mode is a\nsimple bytestring (b\"100644\" for regular non-executable files,\nb\"100755\" for executable files/scripts, b\"120000\" for symlinks, and\nb\"160000\" for submodules).  The blob_id is most useful in conjunction\nwith the value parameter.  The value parameter is an instance of a\nclass that has the following functions\n  value.get_contents_by_identifier(blob_id) -> contents (bytestring)\n  value.get_size_by_identifier(blob_id) -> size_of_blob (int)\n  value.insert_file_with_contents(contents) -> blob_id\n  value.is_binary(contents) -> bool\n  value.apply_replace_text(contents) -> new_contents (bytestring)\nand has the following member data you can write to\n  value.data (dict)\nThese functions allow you to get the contents of the file, or its\nsize, create a new file in the stream whose blob_id you can return,\ncheck whether some given contents are binary (using the heuristic from\nthe grep(1) command), and apply the replacement rules from --replace-text\n(note that --file-info-callback makes the changes from --replace-text not\nauto-apply).  You could use this for example to only apply the changes\nfrom --replace-text to certain file types and simultaneously rename the\nfiles it applies the changes to:\n\n--------------------------------------------------\ngit-filter-repo --file-info-callback '\n  if not filename.endswith(b\".config\"):\n    # Make no changes to the file; return as-is\n    return (filename, mode, blob_id)\n\n  new_filename = filename[0:-7] + b\".cfg\"\n\n  contents = value.get_contents_by_identifier(blob_id)\n  new_contents = value.apply_replace_text(contents)\n  new_blob_id = value.insert_file_with_contents(new_contents)\n\n  return (new_filename, mode, new_blob_id)\n--------------------------------------------------\n\nNote that if history has multiple revisions with the same file\n(e.g. it was cherry-picked to multiple branches or there were a number\nof reverts), then the --file-info-callback will be called multiple\ntimes.  If you want to avoid processing the same file multiple times,\nthen you can stash transformation results in the value.data dict.\nFor, example, we could modify the above example to make it only apply\ntransformations on blob_ids we have not seen before:\n\n--------------------------------------------------\ngit-filter-repo --file-info-callback '\n  if not filename.endswith(b\".config\"):\n    # Make no changes to the file; return as-is\n    return (filename, mode, blob_id)\n\n  new_filename = filename[0:-7] + b\".cfg\"\n\n  if blob_id in value.data:\n    return (new_filename, mode, value.data[blob_id])\n\n  contents = value.get_contents_by_identifier(blob_id)\n  new_contents = value.apply_replace_text(contents)\n  new_blob_id = value.insert_file_with_contents(new_contents)\n  value.data[blob_id] = new_blob_id\n\n  return (new_filename, mode, new_blob_id)\n--------------------------------------------------\n\nAn alternative example for the --file-info-callback is to make all\n.sh files executable and add an extra trailing newline to the .sh\nfiles:\n\n--------------------------------------------------\ngit-filter-repo --file-info-callback '\n  if not filename.endswith(b\".sh\"):\n    # Make no changes to the file; return as-is\n    return (filename, mode, blob_id)\n\n  # There are only 4 valid modes in git:\n  #   - 100644, for regular non-executable files\n  #   - 100755, for executable files/scripts\n  #   - 120000, for symlinks\n  #   - 160000, for submodules\n  new_mode = b\"100755\"\n\n  contents = value.get_contents_by_identifier(blob_id)\n  new_contents = contents + b\"\\n\"\n  new_blob_id = value.insert_file_with_contents(new_contents)\n\n  return (filename, new_mode, new_blob_id)\n--------------------------------------------------\n\nIn contrast to the previous callback types, the blob, reset, tag, and\ncommit callbacks are not expected to return a value, but are instead\nexpected to modify the object passed in.  Major fields for these\nobjects are (subject to API backward compatibility caveats mentioned\npreviously):\n\n  * Blob: `original_id` (original hash) and `data`\n  * Reset: `ref` (name of reference) and `from_ref` (hash or integer mark)\n  * Tag: `ref`, `from_ref`, `original_id`, `tagger_name`, `tagger_email`,\n         `tagger_date`, `message`\n  * Commit: `branch`, `original_id`, `author_name`, `author_email`,\n            `author_date`, `committer_name`, `committer_email`,\n            `committer_date`, `message`, `file_changes` (list of\n            FileChange objects, each containing a `type`, `filename`,\n            `mode`, and `blob_id`), `parents` (list of hashes or integer\n            marks)\n\nAn example of each:\n\n--------------------------------------------------\ngit filter-repo --blob-callback '\n  if len(blob.data) > 25:\n    # Mark this blob for removal from all commits\n    blob.skip()\n  else:\n    blob.data = blob.data.replace(b\"Hello\", b\"Goodbye\")\n  '\n--------------------------------------------------\n\n--------------------------------------------------\ngit filter-repo --reset-callback 'reset.ref = reset.ref.replace(b\"master\", b\"dev\")'\n--------------------------------------------------\n\n--------------------------------------------------\ngit filter-repo --tag-callback '\n  if tag.tagger_name == b\"Jim Williams\":\n    # Omit this tag\n    tag.skip()\n  else:\n    tag.message = tag.message + b\"\\n\\nTag of %s by %s on %s\" % (tag.ref, tag.tagger_email, tag.tagger_date)'\n--------------------------------------------------\n\n--------------------------------------------------\ngit filter-repo --commit-callback '\n  # Remove executable files with three 6s in their name (including\n  # from leading directories).\n  # Also, undo deletion of sources/foo/bar.txt (change types are\n  # either b\"D\" (deletion) or b\"M\" (add or modify); renames are\n  # handled by deleting the old file and adding a new one)\n  commit.file_changes = [\n         change for change in commit.file_changes\n         if not (change.mode == b\"100755\" and\n                 change.filename.count(b\"6\") == 3) and\n            not (change.type == b\"D\" and\n                 change.filename == b\"sources/foo/bar.txt\")]\n  # Mark all .sh files as executable; modes in git are always one of\n  # 100644 (normal file), 100755 (executable), 120000 (symlink), or\n  # 160000 (submodule)\n  for change in commit.file_changes:\n    if change.filename.endswith(b\".sh\"):\n      change.mode = b\"100755\"\n  '\n--------------------------------------------------\n\n[[INTERNALS]]\nINTERNALS\n---------\n\nYou probably don't need to read this section unless you are just very\ncurious or you are trying to do a very complex history rewrite.\n\nHow filter-repo works\n~~~~~~~~~~~~~~~~~~~~~\n\nRoughly, filter-repo works by running\n\n--------------------------------------------------\ngit fast-export <options> | filter | git fast-import <options>\n--------------------------------------------------\n\nwhere filter-repo not only launches the whole pipeline but also serves as\nthe _filter_ in the middle.  However, filter-repo does a few additional\nthings on top in order to make it into a well-rounded filtering tool.  A\nsequence that more accurately reflects what filter-repo runs is:\n\n  1. Verify we're in a fresh clone\n  2. `git fetch -u . refs/remotes/origin/*:refs/heads/*`\n  3. `git remote rm origin`\n  4. `git fast-export --show-original-ids --reference-excluded-parents --fake-missing-tagger --signed-tags=strip --tag-of-filtered-object=rewrite --use-done-feature --no-data --reencode=yes --mark-tags --all | filter | git -c core.ignorecase=false fast-import --date-format=raw-permissive --force --quiet`\n  5. `git update-ref --no-deref --stdin`, fed with a list of refs to nuke, and a list of replace refs to delete, create, or update.\n  6. `git reset --hard`\n  7. `git reflog expire --expire=now --all`\n  8. `git gc --prune=now`\n\nSome notes or exceptions on each of the above:\n\n  1. If we're not in a fresh clone, users will not be able to recover if\n     they used the wrong command or ran in the wrong repo.  (Though\n     `--force` overrides this check, and it's also off if you've already\n     ran filter-repo once in this repo.)\n  2. Technically, we actually use a `git update-ref` command fed with a lot\n     of input due to the fact that users can use `--force` when local\n     branches might not match remote branches.  But this fetch command\n     catches the intent rather succinctly.\n  3. We don't want users accidentally pushing back to the original repo, as\n     discussed in <<DISCUSSION>>.  It also reminds users that since history\n     has been rewritten, this repo is no longer compatible with the\n     original.  Finally, another minor benefit is this allows users to push\n     with the `--mirror` option to their new home without accidentally\n     sending remote tracking branches.\n  4. Some of these flags are always used but others are actually\n     conditional.  For example, filter-repo's `--replace-text` and\n     `--blob-callback` options need to work on blobs so `--no-data` cannot\n     be passed to fast-export.  But when we don't need to work on blobs,\n     passing `--no-data` speeds things up.  Also, other flags may change\n     the structure of the pipeline as well (e.g. `--dry-run` and `--debug`)\n  5. We use this step to write replace refs for accessing the newly written\n     commit hashes using their previous names.  Also, if refs were renamed\n     by various steps, we need to delete the old refnames in order to avoid\n     mixing old and new history.\n  6. Users also have old versions of files in their working tree and index;\n     we want those cleaned up to match the rewritten history as well.  Note\n     that this step is skipped in bare repos.\n  7. Reflogs will hold on to old history, so we need to expire them.\n  8. We need to gc to avoid mixing new and old history.  Also, it shrinks\n     the repository for users, so they don't have to do extra work.  (Odds\n     are that they've only rewritten trees and commits and maybe a few\n     blobs, so `--aggressive` isn't needed and would be too slow.)\n\nInformation about these steps is printed out when `--debug` is passed\nto filter-repo.  When doing a `--partial` history rewrite, steps 2, 3,\n7, and 8 are unconditionally skipped, step 5 is skipped if\n`--replace-refs` is `update-no-add`, and just the nuke-unused-refs\nportion of step 5 is skipped if `--replace-refs` is something else.\n\nLimitations\n~~~~~~~~~~~\n\nInherited limitations\n^^^^^^^^^^^^^^^^^^^^^\n\nSince git filter-repo calls fast-export and fast-import to do a lot of the\nheavy lifting, it inherits limitations from those systems:\n\n  * extended commit headers, if any, are stripped\n  * commits get rewritten meaning they will have new hashes; therefore,\n    signatures on commits and tags cannot continue to work and instead are\n    just removed (thus signed tags become annotated tags)\n  * tags of commits are supported.  Prior to git-2.24.0, tags of blobs and\n    tags of tags are not supported (fast-export would die on such tags).\n    tags of trees are not supported in any git version (since fast-export\n    ignores tags of trees with a warning and fast-import provides no way to\n    import them).\n  * annotated and signed tags outside of the refs/tags/ namespace are not\n    supported (their location will be mangled in weird ways)\n  * fast-import will die on various forms of invalid input, such as a\n    timezone with more than four digits\n  * fast-export cannot reencode commit messages into UTF-8 if the commit\n    message is not valid in its specified encoding (in such cases, it'll\n    leave the commit message and the encoding header alone).\n  * commits without an author will be given one matching the committer\n  * tags without a tagger will be given a fake tagger\n  * references that include commit cycles in their history (which can be\n    created with linkgit:git-replace[1]) will not be flagged to the user as\n    an error but will be silently deleted by fast-export as though the\n    branch or tag contained no interesting files\n\nThere are also some limitations due to the design of these systems:\n\n  * Trying to insert additional files into the stream can be tricky; since\n    fast-export only lists file changes in a merge relative to its first\n    parent, if you insert additional files into a commit that is in the\n    second (or third or fourth) parent history of a merge, then you also\n    need to add it to the merge manually.  (Similarly, if you change which\n    parent is the first parent in a merge commit, you need to manually\n    update the list of file changes to be relative to the new first\n    parent.)\n\n  * fast-export and fast-import work with exact file contents, not patches.\n    (e.g. \"Whatever the current contents of this file, update them to now\n    have these contents\") Because of this, removing the changes made in a\n    single commit or inserting additional changes to a file in some commit\n    and expecting them to propagate forward is not something that can be\n    done with these tools.  Use linkgit:git-rebase[1] for that.\n\nIntrinsic limitations\n^^^^^^^^^^^^^^^^^^^^^\n\nSome types of filtering have limitations that would affect any tool\nattempting to perform them; the most any tool can do is attempt to notify\nthe user when it detects an issue:\n\n  * When rewriting commit hashes in commit messages, there are a variety\n    of cases when the hash will not be updated (whenever this happens, a\n    note is written to `.git/filter-repo/suboptimal-issues`):\n    ** if a commit hash does not correspond to a commit in the old repo\n    ** if a commit hash corresponds to a commit that gets pruned\n    ** if an abbreviated hash is not unique\n\n  * Pruning of empty commits can cause a merge commit to lose an entire\n    ancestry line and become a non-merge.  If the merge commit had no\n    changes then it can be pruned too, but if it still has changes it needs\n    to be kept.  This might cause minor confusion since the commit will\n    likely have a commit message that makes it sound like a merge commit\n    even though it's not.  (Whenever a merge commit becomes a non-merge\n    commit, a note is written to `.git/filter-repo/suboptimal-issues`)\n\nIssues specific to filter-repo\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n  * Multiple repositories in the wild have been observed which use a bogus\n    timezone (`+051800`); google will find you some reports.  The intended\n    timezone wasn't clear or wasn't always the same.  Replace with a\n    different bogus timezone that fast-import will accept (`+0261`).\n\n  * `--path-rename` can result in pathname collisions; to avoid excessive\n    memory requirements of tracking which files are in all commits or\n    looking up what files exist with either every commit or every usage of\n    --path-rename, we just tell the user that they might clobber other\n    changes if they aren't careful.  We can check if the clobbering comes\n    from another --path-rename without much overhead.  (Perhaps in the\n    future it's worth adding a slow mode to --path-rename that will do the\n    more exhaustive checks?)\n\n  * There is no mechanism for directly controlling which flags are passed\n    to fast-export (or fast-import); only pre-defined flags can be turned\n    on or off as a side-effect of other options.  Direct control would make\n    little sense because some options like `--full-tree` would require\n    additional code in filter-repo (to parse new directives), and others\n    such as `-M` or `-C` would break assumptions used in other places of\n    filter-repo.\n\n  * Partial-repo filtering, while supported, runs counter to filter-repo's\n    \"avoid mixing old and new history\" design.  This support has required\n    improvements to core git as well (e.g. it depends upon the\n    `--reference-excluded-parents` option to fast-export that was added\n    specifically for this usage within filter-repo).  The `--partial` and\n    `--refs` options will continue to be supported since there are people\n    with usecases for them; however, I am concerned that this inconsistency\n    about mixing old and new history seems likely to lead to user mistakes.\n    For now, I just hope that long explanations of caveats in the\n    documentation of these options suffice to curtail any such problems.\n\nComments on reversibility\n^^^^^^^^^^^^^^^^^^^^^^^^^\n\nSome people are interested in reversibility of a rewrite; e.g. rewrite\nhistory, possibly add some commits, then unrewrite and get the original\nhistory back plus a few new \"unrewritten\" commits.  Obviously this is\nimpossible if your rewrite involves throwing away information\n(e.g. filtering out files or replacing several different strings with\n`***REMOVED***`), but may be possible with some rewrites.  filter-repo is\nlikely to be a poor fit for this type of workflow for a few reasons:\n\n  * most of the limitations inherited from fast-export and fast-import\n    are of a type that cause reversibility issues\n  * grafts and replace refs, if present, are used in the rewrite and made\n    permanent\n  * rewriting of commit hashes will probably be reversible, but it is\n    possible for rewritten abbreviated hashes to not be unique even if the\n    original abbreviated hashes were.\n  * filter-repo defaults to several forms of irreversible rewriting that\n    you may need to turn off (e.g. the last two bullet points above or\n    reencoding commit messages into UTF-8); it's possible that additional\n    forms of irreversible rewrites will be added in the future.\n  * I assume that people use filter-repo for one-shot conversions, not\n    ongoing data transfers.  I explicitly reserve the right to change any\n    API in filter-repo based on this presumption (and a comment to this\n    effect is found in multiple places in the code and examples).  You\n    have been warned.\n\nSEE ALSO\n--------\nlinkgit:git-rebase[1], linkgit:git-filter-branch[1]\n\nGIT\n---\nPart of the linkgit:git[1] suite\n"
  },
  {
    "path": "INSTALL.md",
    "content": "# Table of Contents\n\n  * [Pre-requisites](#pre-requisites)\n  * [Simple Installation](#simple-installation)\n  * [Installation via Package Manager](#installation-via-package-manager)\n  * [Detailed installation explanation for\n     packagers](#detailed-installation-explanation-for-packagers)\n  * [Installation as Python Package from\n     PyPI](#installation-as-python-package-from-pypi)\n  * [Installation via Makefile](#installation-via-makefile)\n  * [Notes for Windows Users](#notes-for-windows-users)\n\n# Pre-requisites\n\nInstructions on this page assume you have already installed both\n[Git](https://git-scm.com) and [Python](https://www.python.org/)\n(though the [Notes for Windows Users](#notes-for-windows-users) has\nsome tips on Python).\n\n# Simple Installation\n\nAll you need to do is download one file: the [git-filter-repo script\nin this repository](git-filter-repo) ([direct link to raw\nfile](https://raw.githubusercontent.com/newren/git-filter-repo/main/git-filter-repo)),\nmaking sure to preserve its name (`git-filter-repo`, with no\nextension).  **That's it**.  You're done.\n\nThen you can run any command you want, such as\n\n    $ python3 git-filter-repo --analyze\n\nIf you place the git-filter-repo script in your $PATH, then you can\nshorten commands by replacing `python3 git-filter-repo` with `git\nfilter-repo`; the manual assumes this but you can use the longer form.\n\nOptionally, if you also want to use some of the contrib scripts, then\nyou need to make sure you have a `git_filter_repo.py` file which is\neither a link to or copy of `git-filter-repo`, and you need to place\nthat git_filter_repo.py file in $PYTHONPATH.\n\nIf you prefer an \"official\" installation over the manual installation\nexplained above, the other sections may have useful tips.\n\n# Installation via Package Manager\n\nIf you want to install via some [package\nmanager](https://alternativeto.net/software/yellowdog-updater-modified/?license=opensource),\nyou can run\n\n    $ PACKAGE_TOOL install git-filter-repo\n\nThe following package managers have packaged git-filter-repo:\n\n[![Packaging status](https://repology.org/badge/vertical-allrepos/git-filter-repo.svg)](https://repology.org/project/git-filter-repo/versions)\n\nThis list covers at least Windows (Scoop), Mac OS X (Homebrew), and\nLinux (most the rest).  Note that I do not curate this list (and have\nno interest in doing so); https://repology.org tracks who packages\nthese versions.\n\n# Detailed installation explanation for packagers\n\nfilter-repo only consists of a few files that need to be installed:\n\n  * git-filter-repo\n\n    This is the _only_ thing needed for basic use.\n\n    This can be installed in the directory pointed to by `git --exec-path`,\n    or placed anywhere in $PATH.\n\n    If your python3 executable is named \"python\" instead of \"python3\"\n    (this particularly appears to affect a number of Windows users),\n    then you'll also need to modify the first line of git-filter-repo\n    to replace \"python3\" with \"python\".\n\n  * git_filter_repo.py\n\n    This is needed if you want to make use of one of the scripts in\n    contrib/filter-repo-demos/, or want to write your own script making use\n    of filter-repo as a python library.\n\n    You can create this symlink to (or copy of) git-filter-repo named\n    git_filter_repo.py and place it in your python site packages; `python\n    -c \"import site; print(site.getsitepackages())\"` may help you find the\n    appropriate location for your system.  Alternatively, you can place\n    this file anywhere within $PYTHONPATH.\n\n  * git-filter-repo.1\n\n    This is needed if you want `git filter-repo --help` to succeed in\n    displaying the manpage, when help.format is \"man\" (the default on Linux\n    and Mac).\n\n    This can be installed in the directory pointed to by `$(git\n    --man-path)/man1/`, or placed anywhere in $MANDIR/man1/ where $MANDIR\n    is some entry from $MANPATH.\n\n    Note that `git filter-repo -h` will show a more limited built-in set of\n    instructions regardless of whether the manpage is installed.\n\n  * git-filter-repo.html\n\n    This is needed if you want `git filter-repo --help` to succeed in\n    displaying the html version of the help, when help.format is set to\n    \"html\" (the default on Windows).\n\n    This can be installed in the directory pointed to by `git --html-path`.\n\n    Note that `git filter-repo -h` will show a more limited built-in set of\n    instructions regardless of whether the html version of help is\n    installed.\n\nSo, installation might look something like the following:\n\n1. If you don't have the necessary documentation files (because you\n   are installing from a clone of filter-repo instead of from a\n   tarball) then you can first run:\n\n   `make snag_docs`\n\n   (which just copies the generated documentation files from the\n   `docs` branch)\n\n2. Run the following\n\n   ```\n   cp -a git-filter-repo $(git --exec-path)\n   cp -a git-filter-repo.1 $(git --man-path)/man1 && mandb\n   cp -a git-filter-repo.html $(git --html-path)\n   ln -s $(git --exec-path)/git-filter-repo \\\n       $(python -c \"import site; print(site.getsitepackages()[-1])\")/git_filter_repo.py\n   ```\n\nor you can use the provided Makefile, as noted below.\n\n# Installation as Python Package from PyPI\n\n`git-filter-repo` is also available as\n[PyPI-package](https://pypi.org/project/git-filter-repo/).\n\nTherefore, it can be installed with [pipx](https://pypa.github.io/pipx/)\nor [uv tool](https://docs.astral.sh/uv/concepts/tools/).\nCommand example for pipx:\n\n`pipx install git-filter-repo`\n\n# Installation via Makefile\n\nInstalling should be doable by hand, but a Makefile is provided for those\nthat prefer it.  However, usage of the Makefile really requires overriding\nat least a couple of the directories with sane values, e.g.\n\n    $ make prefix=/usr pythondir=/usr/lib64/python3.8/site-packages install\n\nAlso, the Makefile will not edit the shebang line (the first line) of\ngit-filter-repo if your python executable is not named \"python3\";\nyou'll still need to do that yourself.\n\n# Notes for Windows Users\n\ngit-filter-repo can be installed with multiple tools, such as\n[pipx](https://pypa.github.io/pipx/) or a Windows-specific package manager\nlike Scoop (both of which were covered above).\n\nSadly, Windows sometimes makes things difficult.  Common and historical issues:\n\n  * **Non-functional Python stub**: Windows apparently ships with a\n    [non-functional\n    python](https://github.com/newren/git-filter-repo/issues/36#issuecomment-568933825).\n    This can even manifest as [the app\n    hanging](https://github.com/newren/git-filter-repo/issues/36) or\n    [the system appearing to\n    hang](https://github.com/newren/git-filter-repo/issues/312).  Try\n    installing\n    [Python](https://docs.microsoft.com/en-us/windows/python/beginners)\n    from the [Microsoft\n    Store](https://apps.microsoft.com/store/search?publisher=Python%20Software%20Foundation)\n  * **Modifying PATH, making the script executable**: If modifying your PATH\n    and/or making scripts executable is difficult for you, you can skip that\n    step by just using `python3 git-filter-repo` instead of `git filter-repo`\n    in your commands.\n  * **Different python executable name**:  Some users don't have\n    a `python3` executable but one named something else like `python`\n    or `python3.8` or whatever.  You may need to edit the first line\n    of the git-filter-repo script to specify the appropriate path.  Or\n    just don't bother and instead use the long form for executing\n    filter-repo commands.  Namely, replace the `git filter-repo` part\n    of commands with `PYTHON_EXECUTABLE git-filter-repo`. (Where\n    `PYTHON_EXECUTABLE` is something like `python` or `python3.8` or\n    `C:\\PATH\\TO\\INSTALLATION\\OF\\python3.exe` or whatever).\n  * **Symlink issues**:  git_filter_repo.py is supposed to be a symlink to\n    git-filter-repo, so that it appears to have identical contents.\n    If your system messed up the symlink (usually meaning it looks like a\n    regular file with just one line), then delete git_filter_repo.py and\n    replace it with a copy of git-filter-repo.\n  * **Old GitBash limitations**: older versions of GitForWindows had an\n    unfortunate shebang length limitation (see [git-for-windows issue\n    #3165](https://github.com/git-for-windows/git/pull/3165)).  If\n    you're affected, just use the long form for invoking filter-repo\n    commands, i.e. replace the `git filter-repo` part of commands with\n    `python3 git-filter-repo`.\n\nFor additional historical context, see:\n  * [#371](https://github.com/newren/git-filter-repo/issues/371#issuecomment-1267116186)\n  * [#360](https://github.com/newren/git-filter-repo/issues/360#issuecomment-1276813596)\n  * [#312](https://github.com/newren/git-filter-repo/issues/312)\n  * [#307](https://github.com/newren/git-filter-repo/issues/307)\n  * [#225](https://github.com/newren/git-filter-repo/pull/225)\n  * [#231](https://github.com/newren/git-filter-repo/pull/231)\n  * [#124](https://github.com/newren/git-filter-repo/issues/124)\n  * [#36](https://github.com/newren/git-filter-repo/issues/36)\n  * [this git mailing list\n     thread](https://lore.kernel.org/git/nycvar.QRO.7.76.6.2004251610300.18039@tvgsbejvaqbjf.bet/)\n"
  },
  {
    "path": "Makefile",
    "content": "# A bunch of installation-related paths people can override on the command line\nDESTDIR = /\nINSTALL = install\nprefix = $(HOME)\nbindir = $(prefix)/libexec/git-core\nlocaledir = $(prefix)/share/locale\nmandir = $(prefix)/share/man\nhtmldir = $(prefix)/share/doc/git-doc\npythondir = $(prefix)/lib64/python3.6/site-packages\n\ndefault: build\n\nbuild:\n\t@echo Nothing to do: filter-repo is a script which needs no compilation.\n\ntest:\n\ttime t/run_coverage\n\n# fixup_locale might matter once we actually have translations, but right now\n# we don't.  It might not even matter then, because python has a fallback podir.\nfixup_locale:\n\tsed -ie s%@@LOCALEDIR@@%$(localedir)% git-filter-repo\n\n# People installing from tarball will already have man1/git-filter-repo.1 and\n# html/git-filter-repo.html.  But let's support people installing from a git\n# clone too; for them, just cheat and snag a copy of the built docs that I\n# record in a different branch.\nsnag_docs: Documentation/man1/git-filter-repo.1 Documentation/html/git-filter-repo.html\n\nDocumentation/man1/git-filter-repo.1:\n\tmkdir -p Documentation/man1\n\tgit show origin/docs:man1/git-filter-repo.1 >Documentation/man1/git-filter-repo.1\n\nDocumentation/html/git-filter-repo.html:\n\tmkdir -p Documentation/html\n\tgit show origin/docs:html/git-filter-repo.html >Documentation/html/git-filter-repo.html\n\ninstall: snag_docs #fixup_locale\n\t$(INSTALL) -Dm0755 git-filter-repo \"$(DESTDIR)/$(bindir)/git-filter-repo\"\n\t$(INSTALL) -dm0755 \"$(DESTDIR)/$(pythondir)\"\n\tln -sf \"$(bindir)/git-filter-repo\" \"$(DESTDIR)/$(pythondir)/git_filter_repo.py\"\n\t$(INSTALL) -Dm0644 Documentation/man1/git-filter-repo.1 \"$(DESTDIR)/$(mandir)/man1/git-filter-repo.1\"\n\t$(INSTALL) -Dm0644 Documentation/html/git-filter-repo.html \"$(DESTDIR)/$(htmldir)/git-filter-repo.html\"\n\tif which mandb > /dev/null; then mandb; fi\n\n\n#\n# The remainder of the targets are meant for tasks for the maintainer; if they\n# don't work for you, I don't care.  These tasks modify branches and upload\n# releases and whatnot, and presume a directory layout I have locally.\n#\nupdate_docs:\n\t# Set environment variables once\n\texport GIT_WORK_TREE=$(shell mktemp -d) \\\n\texport GIT_INDEX_FILE=$(shell mktemp) \\\n\tCOMMIT=$(shell git rev-parse HEAD) \\\n\t&& \\\n\t# Sanity check; we'll build docs in a clone of a git repo \\\n\ttest -d ../git && \\\n\t# Sanity check; docs == origin/docs \\\n\ttest -z \"$(git rev-parse docs origin/docs | uniq -u)\" && \\\n\t# Avoid spurious errors by forcing index to be well formatted, if empty \\\n\tgit read-tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904 && # empty tree \\\n\t# Symlink git-filter-repo.txt documentation into git and build it \\\n\tln -sf ../../git-filter-repo/Documentation/git-filter-repo.txt ../git/Documentation/ && \\\n\tmake -C ../git/Documentation -j4 man html && \\\n\t# Take the built documentation and lay it out nicely \\\n\tmkdir $$GIT_WORK_TREE/html && \\\n\tmkdir $$GIT_WORK_TREE/man1 && \\\n\tcp -a ../git/Documentation/*.html $$GIT_WORK_TREE/html/ && \\\n\tcp -a ../git/Documentation/git-filter-repo.1 $$GIT_WORK_TREE/man1/ && \\\n\tdos2unix $$GIT_WORK_TREE/html/* && \\\n\t# Add new version of the documentation as a commit, if it differs \\\n\tgit --work-tree $$GIT_WORK_TREE add . && \\\n\tgit diff --quiet docs || git write-tree \\\n\t\t| xargs git commit-tree -p docs -m \"Update docs to $$COMMIT\" \\\n\t\t| xargs git update-ref refs/heads/docs && \\\n\t# Remove temporary files \\\n\trm -rf $$GIT_WORK_TREE && \\\n\trm $$GIT_INDEX_FILE && \\\n\t# Push the new documentation upstream \\\n\tgit push origin docs && \\\n\t# Notify of completion \\\n\techo && \\\n\techo === filter-repo docs branch updated ===\n\n# Call like this:\n#   make GITHUB_COM_TOKEN=$KEY TAGNAME=v2.23.0 release\nrelease: github_release pypi_release\n\n# Call like this:\n#   make GITHUB_COM_TOKEN=$KEY TAGNAME=v2.23.0 github_release\ngithub_release: update_docs\n\tFILEBASE=git-filter-repo-$(shell echo $(TAGNAME) | tail -c +2) \\\n\tTMP_INDEX_FILE=$(shell mktemp) \\\n\tCOMMIT=$(shell git rev-parse HEAD) \\\n\t&& \\\n\ttest -n \"$(GITHUB_COM_TOKEN)\" && \\\n\ttest -n \"$(TAGNAME)\" && \\\n\ttest -n \"$$COMMIT\" && \\\n\t# Make sure we don't have any staged or unstaged changes \\\n\tgit diff --quiet --staged HEAD && git diff --quiet HEAD && \\\n\t# Make sure 'jq' is installed \\\n\ttype -p jq && \\\n\t# Tag the release, push it to GitHub \\\n\tgit tag -a -m \"filter-repo $(TAGNAME)\" $(TAGNAME) $$COMMIT && \\\n\tgit push origin $(TAGNAME) && \\\n\t# Create the tarball \\\n\tGIT_INDEX_FILE=$$TMP_INDEX_FILE git read-tree $$COMMIT && \\\n\tgit ls-tree -r docs | grep filter-repo    \\\n\t\t| sed -e 's%\\t%\\tDocumentation/%' \\\n\t\t| GIT_INDEX_FILE=$$TMP_INDEX_FILE git update-index --index-info && \\\n\tGIT_INDEX_FILE=$$TMP_INDEX_FILE git write-tree                                    \\\n\t\t| xargs git archive --prefix=$$FILEBASE/ \\\n\t\t| xz -c >$$FILEBASE.tar.xz && \\\n\trm $$TMP_INDEX_FILE && \\\n\t# Make GitHub mark our new tag as an official release \\\n\tcurl -s -H \"Authorization: token $(GITHUB_COM_TOKEN)\" -X POST \\\n\t\thttps://api.github.com/repos/newren/git-filter-repo/releases \\\n\t\t--data \"{                                  \\\n\t\t  \\\"tag_name\\\": \\\"$(TAGNAME)\\\",            \\\n\t\t  \\\"target_commitish\\\": \\\"$$COMMIT\\\",      \\\n\t\t  \\\"name\\\": \\\"$(TAGNAME)\\\",                \\\n\t\t  \\\"body\\\": \\\"filter-repo $(TAGNAME)\\\"     \\\n\t\t}\" | jq -r .id >asset_id && \\\n\t# Upload our tarball \\\n\tcat asset_id | xargs -I ASSET_ID curl -s -H \"Authorization: token $(GITHUB_COM_TOKEN)\" -H \"Content-Type: application/octet-stream\" --data-binary @$$FILEBASE.tar.xz https://uploads.github.com/repos/newren/git-filter-repo/releases/ASSET_ID/assets?name=$$FILEBASE.tar.xz && \\\n\t# Remove temporary file(s) \\\n\trm asset_id && \\\n\t# Notify of completion \\\n\techo && \\\n\techo === filter-repo $(TAGNAME) created and uploaded to GitHub ===\n\npypi_release: # Has an implicit dependency on github_release because...\n\t# Upload to PyPI, automatically picking tag created by github_release\n\tpython3 -m venv venv\n\tvenv/bin/pip install --upgrade pip\n\tvenv/bin/pip install build twine\n\tvenv/bin/pyproject-build\n\t# Note: Retrieve \"git-filter-repo releases\" token; username is 'newren'\n\tvenv/bin/twine upload dist/*\n\t# Remove temporary file(s)\n\trm -rf dist/ venv/ git_filter_repo.egg-info/\n\n# NOTE TO FUTURE SELF: If you accidentally push a bad release, you can remove\n# all but the git-filter-repo-$VERSION.tar.xz asset with\n#    git push --delete origin $TAGNAME\n# To remove the git-filter-repo-$VERSION.tar.xz asset as well:\n#    curl -s -H \"Authorization: token $GITHUB_COM_TOKEN\" -X GET \\\n#        https://api.github.com/repos/newren/git-filter-repo/releases\n# and look for the \"id\", then run\n#    curl -s -H \"Authorization: token $GITHUB_COM_TOKEN\" -X DELETE \\\n#        https://api.github.com/repos/newren/git-filter-repo/releases/$ID\n"
  },
  {
    "path": "README.md",
    "content": "git filter-repo is a versatile tool for rewriting history, which includes\n[capabilities I have not found anywhere\nelse](#design-rationale-behind-filter-repo).  It roughly falls into the\nsame space of tool as [git\nfilter-branch](https://git-scm.com/docs/git-filter-branch) but without the\ncapitulation-inducing poor\n[performance](https://public-inbox.org/git/CABPp-BGOz8nks0+Tdw5GyGqxeYR-3FF6FT5JcgVqZDYVRQ6qog@mail.gmail.com/),\nwith far more capabilities, and with a design that scales usability-wise\nbeyond trivial rewriting cases.  [git filter-repo is now recommended by the\ngit project](https://git-scm.com/docs/git-filter-branch#_warning) instead\nof git filter-branch.\n\nWhile most users will probably just use filter-repo as a simple command\nline tool (and likely only use a few of its flags), at its core filter-repo\ncontains a library for creating history rewriting tools.  As such, users\nwith specialized needs can leverage it to quickly create [entirely new\nhistory rewriting tools](contrib/filter-repo-demos).\n\n# Table of Contents\n\n  * [Prerequisites](#prerequisites)\n  * [How do I install it?](#how-do-i-install-it)\n  * [How do I use it?](#how-do-i-use-it)\n  * [Why filter-repo instead of other alternatives?](#why-filter-repo-instead-of-other-alternatives)\n    * [filter-branch](#filter-branch)\n    * [BFG Repo Cleaner](#bfg-repo-cleaner)\n  * [Simple example, with comparisons](#simple-example-with-comparisons)\n    * [Solving this with filter-repo](#solving-this-with-filter-repo)\n    * [Solving this with BFG Repo Cleaner](#solving-this-with-bfg-repo-cleaner)\n    * [Solving this with filter-branch](#solving-this-with-filter-branch)\n    * [Solving this with fast-export/fast-import](#solving-this-with-fast-exportfast-import)\n  * [Design rationale behind filter-repo](#design-rationale-behind-filter-repo)\n  * [How do I contribute?](#how-do-i-contribute)\n  * [Is there a Code of Conduct?](#is-there-a-code-of-conduct)\n  * [Upstream Improvements](#upstream-improvements)\n\n# Prerequisites\n\nfilter-repo requires:\n\n  * git >= 2.36.0\n  * python3 >= 3.6\n\n# How do I install it?\n\nWhile the `git-filter-repo` repository has many files, the main logic\nis all contained in a single-file python script named\n`git-filter-repo`, which was done to make installation for basic use\non many systems trivial: just place that one file into your $PATH.\n\nSee [INSTALL.md](INSTALL.md) for things beyond basic usage or special\ncases.  The more involved instructions are only needed if one of the\nfollowing apply:\n\n  * you do not find the above comment about trivial installation intuitively\n    obvious\n  * you are working with a python3 executable named something other than\n    \"python3\"\n  * you want to install documentation (beyond the builtin docs shown with -h)\n  * you want to run some of the [contrib](contrib/filter-repo-demos/) examples\n  * you want to create your own python filtering scripts using filter-repo as\n    a module/library\n\n# How do I use it?\n\nFor comprehensive documentation:\n  * see the [user manual](https://htmlpreview.github.io/?https://github.com/newren/git-filter-repo/blob/docs/html/git-filter-repo.html)\n  * alternative formating of the user manual is available on various\n    external sites\n    ([example](https://www.mankier.com/1/git-filter-repo)), for those\n    that don't like the htmlpreview.github.io layout, though it may\n    only be up-to-date as of the latest release\n\nIf you prefer learning from examples:\n  * there is a [cheat sheet for converting filter-branch\n    commands](Documentation/converting-from-filter-branch.md#cheat-sheet-conversion-of-examples-from-the-filter-branch-manpage),\n    which covers every example from the filter-branch manual\n  * there is a [cheat sheet for converting BFG Repo Cleaner\n    commands](Documentation/converting-from-bfg-repo-cleaner.md#cheat-sheet-conversion-of-examples-from-bfg),\n    which covers every example from the BFG website\n  * the [simple example](#simple-example-with-comparisons) below may\n    be of interest\n  * the user manual has an extensive [examples\nsection](https://htmlpreview.github.io/?https://github.com/newren/git-filter-repo/blob/docs/html/git-filter-repo.html#EXAMPLES)\n  * I have collected a set of [example filterings based on user-filed issues](Documentation/examples-from-user-filed-issues.md)\n\nIn either case, you may also find the [Frequently Answered Questions](Documentation/FAQ.md) useful.\n\n# Why filter-repo instead of other alternatives?\n\nThis was covered in more detail in a [Git Rev News article on\nfilter-repo](https://git.github.io/rev_news/2019/08/21/edition-54/#an-introduction-to-git-filter-repo--written-by-elijah-newren),\nbut some highlights for the main competitors:\n\n## filter-branch\n\n  * filter-branch is [extremely to unusably\n    slow](https://public-inbox.org/git/CABPp-BGOz8nks0+Tdw5GyGqxeYR-3FF6FT5JcgVqZDYVRQ6qog@mail.gmail.com/)\n    ([multiple orders of magnitude slower than it should\n    be](https://git-scm.com/docs/git-filter-branch#PERFORMANCE))\n    for non-trivial repositories.\n\n  * [filter-branch is riddled with\n    gotchas](https://git-scm.com/docs/git-filter-branch#SAFETY) that can\n    silently corrupt your rewrite or at least thwart your \"cleanup\"\n    efforts by giving you something more problematic and messy than what\n    you started with.\n\n  * filter-branch is [very onerous](#simple-example-with-comparisons)\n    [to\n    use](https://github.com/newren/git-filter-repo/blob/a6a6a1b0f62d365bbe2e76f823e1621857ec4dbd/contrib/filter-repo-demos/filter-lamely#L9-L61)\n    for any rewrite which is even slightly non-trivial.\n\n  * the git project has stated that the above issues with filter-branch\n    cannot be backward compatibly fixed; they recommend that you [stop\n    using\n    filter-branch](https://git-scm.com/docs/git-filter-branch#_warning)\n\n  * die-hard fans of filter-branch may be interested in\n    [filter-lamely](contrib/filter-repo-demos/filter-lamely)\n    (a.k.a. [filter-branch-ish](contrib/filter-repo-demos/filter-branch-ish)),\n    a reimplementation of filter-branch based on filter-repo which is\n    more performant (though not nearly as fast or safe as\n    filter-repo).\n\n  * a [cheat\n    sheet](Documentation/converting-from-filter-branch.md#cheat-sheet-conversion-of-examples-from-the-filter-branch-manpage)\n    is available showing how to convert example commands from the manual of\n    filter-branch into filter-repo commands.\n\n## BFG Repo Cleaner\n\n  * great tool for its time, but while it makes some things simple, it\n    is limited to a few kinds of rewrites.\n\n  * its architecture is not amenable to handling more types of\n    rewrites.\n\n  * its architecture presents some shortcomings and bugs even for its\n    intended usecase.\n\n  * fans of bfg may be interested in\n    [bfg-ish](contrib/filter-repo-demos/bfg-ish), a reimplementation of bfg\n    based on filter-repo which includes several new features and bugfixes\n    relative to bfg.\n\n  * a [cheat\n    sheet](Documentation/converting-from-bfg-repo-cleaner.md#cheat-sheet-conversion-of-examples-from-bfg)\n    is available showing how to convert example commands from the manual of\n    BFG Repo Cleaner into filter-repo commands.\n\n# Simple example, with comparisons\n\nLet's say that we want to extract a piece of a repository, with the intent\non merging just that piece into some other bigger repo.  For extraction, we\nwant to:\n\n  * extract the history of a single directory, src/.  This means that only\n    paths under src/ remain in the repo, and any commits that only touched\n    paths outside this directory will be removed.\n  * rename all files to have a new leading directory, my-module/ (e.g. so that\n    src/foo.c becomes my-module/src/foo.c)\n  * rename any tags in the extracted repository to have a 'my-module-'\n    prefix (to avoid any conflicts when we later merge this repo into\n    something else)\n\n## Solving this with filter-repo\n\nDoing this with filter-repo is as simple as the following command:\n```shell\n  git filter-repo --path src/ --to-subdirectory-filter my-module --tag-rename '':'my-module-'\n```\n(the single quotes are unnecessary, but make it clearer to a human that we\nare replacing the empty string as a prefix with `my-module-`)\n\n## Solving this with BFG Repo Cleaner\n\nBFG Repo Cleaner is not capable of this kind of rewrite; in fact, all\nthree types of wanted changes are outside of its capabilities.\n\n## Solving this with filter-branch\n\nfilter-branch comes with a pile of caveats (more on that below) even\nonce you figure out the necessary invocation(s):\n\n```shell\n  git filter-branch \\\n      --tree-filter 'mkdir -p my-module && \\\n                     git ls-files \\\n                         | grep -v ^src/ \\\n                         | xargs git rm -f -q && \\\n                     ls -d * \\\n                         | grep -v my-module \\\n                         | xargs -I files mv files my-module/' \\\n          --tag-name-filter 'echo \"my-module-$(cat)\"' \\\n\t  --prune-empty -- --all\n  git clone file://$(pwd) newcopy\n  cd newcopy\n  git for-each-ref --format=\"delete %(refname)\" refs/tags/ \\\n      | grep -v refs/tags/my-module- \\\n      | git update-ref --stdin\n  git gc --prune=now\n```\n\nSome might notice that the above filter-branch invocation will be really\nslow due to using --tree-filter; you could alternatively use the\n--index-filter option of filter-branch, changing the above commands to:\n\n```shell\n  git filter-branch \\\n      --index-filter 'git ls-files \\\n                          | grep -v ^src/ \\\n                          | xargs git rm -q --cached;\n                      git ls-files -s \\\n                          | sed \"s%$(printf \\\\t)%&my-module/%\" \\\n                          | git update-index --index-info;\n                      git ls-files \\\n                          | grep -v ^my-module/ \\\n                          | xargs git rm -q --cached' \\\n      --tag-name-filter 'echo \"my-module-$(cat)\"' \\\n      --prune-empty -- --all\n  git clone file://$(pwd) newcopy\n  cd newcopy\n  git for-each-ref --format=\"delete %(refname)\" refs/tags/ \\\n      | grep -v refs/tags/my-module- \\\n      | git update-ref --stdin\n  git gc --prune=now\n```\n\nHowever, for either filter-branch command there are a pile of caveats.\nFirst, some may be wondering why I list five commands here for\nfilter-branch.  Despite the use of --all and --tag-name-filter, and\nfilter-branch's manpage claiming that a clone is enough to get rid of\nold objects, the extra steps to delete the other tags and do another\ngc are still required to clean out the old objects and avoid mixing\nnew and old history before pushing somewhere.  Other caveats:\n  * Commit messages are not rewritten; so if some of your commit\n    messages refer to prior commits by (abbreviated) sha1, after the\n    rewrite those messages will now refer to commits that are no longer\n    part of the history.  It would be better to rewrite those\n    (abbreviated) sha1 references to refer to the new commit ids.\n  * The --prune-empty flag sometimes misses commits that should be\n    pruned, and it will also prune commits that *started* empty rather\n    than just ended empty due to filtering.  For repositories that\n    intentionally use empty commits for versioning and publishing\n    related purposes, this can be detrimental.\n  * The commands above are OS-specific.  GNU vs. BSD issues for sed,\n    xargs, and other commands often trip up users; I think I failed to\n    get most folks to use --index-filter since the only example in the\n    filter-branch manpage that both uses it and shows how to move\n    everything into a subdirectory is linux-specific, and it is not\n    obvious to the reader that it has a portability issue since it\n    silently misbehaves rather than failing loudly.\n  * The --index-filter version of the filter-branch command may be two to\n    three times faster than the --tree-filter version, but both\n    filter-branch commands are going to be multiple orders of magnitude\n    slower than filter-repo.\n  * Both commands assume all filenames are composed entirely of ascii\n    characters (even special ascii characters such as tabs or double\n    quotes will wreak havoc and likely result in missing files or\n    misnamed files)\n\n## Solving this with fast-export/fast-import\n\nOne can kind of hack this together with something like:\n\n```shell\n  git fast-export --no-data --reencode=yes --mark-tags --fake-missing-tagger \\\n      --signed-tags=strip --tag-of-filtered-object=rewrite --all \\\n      | grep -vP '^M [0-9]+ [0-9a-f]+ (?!src/)' \\\n      | grep -vP '^D (?!src/)' \\\n      | perl -pe 's%^(M [0-9]+ [0-9a-f]+ )(.*)$%\\1my-module/\\2%' \\\n      | perl -pe 's%^(D )(.*)$%\\1my-module/\\2%' \\\n      | perl -pe s%refs/tags/%refs/tags/my-module-% \\\n      | git -c core.ignorecase=false fast-import --date-format=raw-permissive \\\n            --force --quiet\n  git for-each-ref --format=\"delete %(refname)\" refs/tags/ \\\n      | grep -v refs/tags/my-module- \\\n      | git update-ref --stdin\n  git reset --hard\n  git reflog expire --expire=now --all\n  git gc --prune=now\n```\n\nBut this comes with some nasty caveats and limitations:\n  * The various greps and regex replacements operate on the entire\n    fast-export stream and thus might accidentally corrupt unintended\n    portions of it, such as commit messages.  If you needed to edit\n    file contents and thus dropped the --no-data flag, it could also\n    end up corrupting file contents.\n  * This command assumes all filenames in the repository are composed\n    entirely of ascii characters, and also exclude special characters\n    such as tabs or double quotes.  If such a special filename exists\n    within the old src/ directory, it will be pruned even though it\n    was intended to be kept.  (In slightly different repository\n    rewrites, this type of editing also risks corrupting filenames\n    with special characters by adding extra double quotes near the end\n    of the filename and in some leading directory name.)\n  * This command will leave behind huge numbers of useless empty\n    commits, and has no realistic way of pruning them.  (And if you\n    tried to combine this technique with another tool to prune the\n    empty commits, then you now have no way to distinguish between\n    commits which were made empty by the filtering that you want to\n    remove, and commits which were empty before the filtering process\n    and which you thus may want to keep.)\n  * Commit messages which reference other commits by hash will now\n    reference old commits that no longer exist.  Attempting to edit\n    the commit messages to update them is extraordinarily difficult to\n    add to this kind of direct rewrite.\n\n# Design rationale behind filter-repo\n\nNone of the existing repository filtering tools did what I wanted;\nthey all came up short for my needs. No tool provided any of the\nfirst eight traits below I wanted, and no tool provided more than\ntwo of the last four traits either:\n\n  1. [Starting report] Provide user an analysis of their repo to help\n     them get started on what to prune or rename, instead of expecting\n     them to guess or find other tools to figure it out.  (Triggered, e.g.\n     by running the first time with a special flag, such as --analyze.)\n\n  1. [Keep vs. remove] Instead of just providing a way for users to\n     easily remove selected paths, also provide flags for users to\n     only *keep* certain paths.  Sure, users could workaround this by\n     specifying to remove all paths other than the ones they want to\n     keep, but the need to specify all paths that *ever* existed in\n     **any** version of the repository could sometimes be quite\n     painful.  For filter-branch, using pipelines like `git ls-files |\n     grep -v ... | xargs -r git rm` might be a reasonable workaround\n     but can get unwieldy and isn't as straightforward for users; plus\n     those commands are often operating-system specific (can you spot\n     the GNUism in the snippet I provided?).\n\n  1. [Renaming] It should be easy to rename paths.  For example, in\n     addition to allowing one to treat some subdirectory as the root\n     of the repository, also provide options for users to make the\n     root of the repository just become a subdirectory.  And more\n     generally allow files and directories to be easily renamed.\n     Provide sanity checks if renaming causes multiple files to exist\n     at the same path.  (And add special handling so that if a commit\n     merely copied oldname->newname without modification, then\n     filtering oldname->newname doesn't trigger the sanity check and\n     die on that commit.)\n\n  1. [More intelligent safety] Writing copies of the original refs to\n     a special namespace within the repo does not provide a\n     user-friendly recovery mechanism.  Many would struggle to recover\n     using that.  Almost everyone I've ever seen do a repository\n     filtering operation has done so with a fresh clone, because\n     wiping out the clone in case of error is a vastly easier recovery\n     mechanism.  Strongly encourage that workflow by [detecting and\n     bailing if we're not in a fresh\n     clone](https://htmlpreview.github.io/?https://github.com/newren/git-filter-repo/blob/docs/html/git-filter-repo.html#FRESHCLONE),\n     unless the user overrides with --force.\n\n  1. [Auto shrink] Automatically remove old cruft and repack the\n     repository for the user after filtering (unless overridden); this\n     simplifies things for the user, helps avoid mixing old and new\n     history together, and avoids problems where the multi-step\n     process for shrinking the repo documented in the manpage doesn't\n     actually work in some cases.  (I'm looking at you,\n     filter-branch.)\n\n  1. [Clean separation] Avoid confusing users (and prevent accidental\n     re-pushing of old stuff) due to mixing old repo and rewritten\n     repo together.  (This is particularly a problem with filter-branch\n     when using the --tag-name-filter option, and sometimes also an\n     issue when only filtering a subset of branches.)\n\n  1. [Versatility] Provide the user the ability to extend the tool or\n     even write new tools that leverage existing capabilities, and\n     provide this extensibility in a way that (a) avoids the need to\n     fork separate processes (which would destroy performance), (b)\n     avoids making the user specify OS-dependent shell commands (which\n     would prevent users from sharing commands with each other), (c)\n     takes advantage of rich data structures (because hashes, dicts,\n     lists, and arrays are prohibitively difficult in shell) and (d)\n     provides reasonable string manipulation capabilities (which are\n     sorely lacking in shell).\n\n  1. [Old commit references] Provide a way for users to use old commit\n     IDs with the new repository (in particular via mapping from old to\n     new hashes with refs/replace/ references).\n\n  1. [Commit message consistency] If commit messages refer to other\n     commits by ID (e.g. \"this reverts commit 01234567890abcdef\", \"In\n     commit 0013deadbeef9a...\"), those commit messages should be\n     rewritten to refer to the new commit IDs.\n\n  1. [Become-empty pruning] Commits which become empty due to filtering\n     should be pruned.  If the parent of a commit is pruned, the first\n     non-pruned ancestor needs to become the new parent.  If no\n     non-pruned ancestor exists and the commit was not a merge, then it\n     becomes a new root commit.  If no non-pruned ancestor exists and\n     the commit was a merge, then the merge will have one less parent\n     (and thus make it likely to become a non-merge commit which would\n     itself be pruned if it had no file changes of its own).  One\n     special thing to note here is that we prune commits which become\n     empty, NOT commits which start empty.  Some projects intentionally\n     create empty commits for versioning or publishing reasons, and\n     these should not be removed.  (As a special case, commits which\n     started empty but whose parent was pruned away will also be\n     considered to have \"become empty\".)\n\n  1. [Become-degenerate pruning] Pruning of commits which become empty\n     can potentially cause topology changes, and there are lots of\n     special cases.  Normally, merge commits are not removed since they\n     are needed to preserve the graph topology, but the pruning of\n     parents and other ancestors can ultimately result in the loss of\n     one or more parents.  A simple case was already noted above: if a\n     merge commit loses enough parents to become a non-merge commit and\n     it has no file changes, then it too can be pruned.  Merge commits\n     can also have a topology that becomes degenerate: it could end up\n     with the merge_base serving as both parents (if all intervening\n     commits from the original repo were pruned), or it could end up\n     with one parent which is an ancestor of its other parent.  In such\n     cases, if the merge has no file changes of its own, then the merge\n     commit can also be pruned.  However, much as we do with empty\n     pruning we do not prune merge commits that started degenerate\n     (which indicates it may have been intentional, such as with --no-ff\n     merges) but only merge commits that become degenerate and have no\n     file changes of their own.\n\n  1. [Speed] Filtering should be reasonably fast\n\n# How do I contribute?\n\nSee the [contributing guidelines](Documentation/Contributing.md).\n\n# Is there a Code of Conduct?\n\nParticipants in the filter-repo community are expected to adhere to\nthe same standards as for the git project, so the [git Code of\nConduct](https://git.kernel.org/pub/scm/git/git.git/tree/CODE_OF_CONDUCT.md)\napplies.\n\n# Upstream Improvements\n\nWork on filter-repo and [its\npredecessor](https://public-inbox.org/git/51419b2c0904072035u1182b507o836a67ac308d32b9@mail.gmail.com/)\nhas also driven numerous improvements to fast-export and fast-import\n(and occasionally other commands) in core git, based on things\nfilter-repo needs to do its work:\n\n  * git-2.48.0\n    * [fast-import: disallow more path components](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=da91a90c2f)\n    * [fast-import: disallow \".\" and \"..\" path components](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=4a2790a257)\n    * [fast-import: avoid making replace refs point to themselves](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=5e904f1a4a)\n  * git-2.28.0\n    * [fast-import: add new --date-format=raw-permissive format](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=d42a2fb72f)\n  * git-2.24.0\n    * [fast-export: handle nested tags](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=941790d7de)\n    * [t9350: add tests for tags of things other than a commit](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=8d7d33c1ce)\n    * [fast-export: allow user to request tags be marked with --mark-tags](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=a1638cfe12)\n    * [fast-export: add support for --import-marks-if-exists](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=208d69246e)\n    * [fast-import: add support for new 'alias' command](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=b8f50e5b60)\n    * [fast-import: allow tags to be identified by mark labels](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=f73b2aba05)\n    * [fast-import: fix handling of deleted tags](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=3164e6bd24)\n    * [fast-export: fix exporting a tag and nothing else](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=af2abd870b)\n    * [git-fast-import.txt: clarify that multiple merge commits are allowed](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=d1387d3895)\n  * git-2.23.0\n    * [t9350: fix encoding test to actually test reencoding](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=32615ce762)\n    * [fast-import: support 'encoding' commit header](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=3edfcc65fd)\n    * [fast-export: avoid stripping encoding header if we cannot reencode](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=ccbfc96dc4)\n    * [fast-export: differentiate between explicitly UTF-8 and implicitly\n      UTF-8](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=57a8be2cb0)\n    * [fast-export: do automatic reencoding of commit messages only if\n      requested](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=e80001f8fd)\n  * git-2.22.0\n    * [log,diff-tree: add --combined-all-paths option](\n        https://git.kernel.org/pub/scm/git/git.git/commit/?id=d76ce4f734)\n    * [t9300: demonstrate bug with get-mark and empty orphan commits](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=62edbec7de)\n    * [git-fast-import.txt: fix wording about where ls command can appear](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=a63c54a019)\n    * [fast-import: check most prominent commands first](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=5056bb7646)\n    * [fast-import: only allow cat-blob requests where it makes sense](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=7ffde293f2)\n    * [fast-import: fix erroneous handling of get-mark with empty orphan\n      commits](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=cf7b857a77)\n    * [Honor core.precomposeUnicode in more places](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=8e712ef6fc)\n  * git-2.21.0\n    * [fast-export: convert sha1 to oid](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=843b9e6d48)\n    * [git-fast-import.txt: fix documentation for --quiet option](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=f55c979b14)\n    * [git-fast-export.txt: clarify misleading documentation about rev-list\n      args](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=4532be7cba)\n    * [fast-export: use value from correct enum](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=b93b81e799)\n    * [fast-export: avoid dying when filtering by paths and old tags exist](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=1f30c904b3)\n    * [fast-export: move commit rewriting logic into a function for reuse](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=f129c4275c)\n    * [fast-export: when using paths, avoid corrupt stream with non-existent\n      mark](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=cd13762d8f)\n    * [fast-export: ensure we export requested refs](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=fdf31b6369)\n    * [fast-export: add --reference-excluded-parents option](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=530ca19c02)\n    * [fast-import: remove unmaintained duplicate documentation](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=25dd3e4889)\n    * [fast-export: add a --show-original-ids option to show\n      original names](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=a965bb3116)\n    * [git-show-ref.txt: fix order of flags](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=bd8d6f0def)\n  * git-2.20.0\n    * [update-ref: fix type of update_flags variable to\n      match its usage](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=e4c34855a2)\n    * [update-ref: allow --no-deref with --stdin](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=d345e9fbe7)\n  * git-1.7.3\n    * [fast-export: Fix dropping of files with --import-marks and path\n      limiting](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=4087a02e45)\n    * [fast-export: Add a --full-tree option](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=7f40ab0916)\n    * [fast-export: Fix output order of D/F changes](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=060df62422)\n    * [fast-import: Improve robustness when D->F changes provided in wrong\n      order](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=253fb5f889)\n  * git-1.6.4:\n    * [fast-export: Set revs.topo_order before calling setup_revisions](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=668f3aa776)\n    * [fast-export: Omit tags that tag trees](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=02c48cd69b)\n    * [fast-export: Make sure we show actual ref names instead of \"(null)\"](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=2374502c6c)\n    * [fast-export: Do parent rewriting to avoid dropping relevant commits](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=32164131db)\n    * [fast-export: Add a --tag-of-filtered-object option for newly\n      dangling tags](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=2d8ad46919)\n    * [Add new fast-export testcases](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=25e0ca5dd6)\n    * [fast-export: Document the fact that git-rev-list arguments are\n      accepted](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=8af15d282e)\n  * git-1.6.3:\n    * [git-filter-branch: avoid collisions with variables in eval'ed\n      commands](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=d5b0c97d13)\n    * [Correct missing SP characters in grammar comment at top of\n      fast-import.c](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=98e1a4186a)\n    * [fast-export: Avoid dropping files from commits](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=ebeec7dbc5)\n  * git-1.6.1.4:\n    * [fast-export: ensure we traverse commits in topological order](\n      https://git.kernel.org/pub/scm/git/git.git/commit/?id=784f8affe4)\n"
  },
  {
    "path": "contrib/filter-repo-demos/README.md",
    "content": "## Background\n\nfilter-repo is not merely a history rewriting tool, it also contains a\nlibrary that can be used to write new history rewriting tools.  This\ndirectory contains several examples showing the breadth of different things\nthat could be done.\n\n## Quick overview\n\nCommand&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; |Description\n-------|-----------\nbarebones-example    |Simple example with no modifications to filter-repo behavior, just showing what to import and run.\ninsert-beginning     |Add a new file (e.g. LICENSE/COPYING) to the beginning of history.\nsigned-off-by        |Add a Signed-off-by tag to a range of commits\nlint-history         |Run some lint command on all non-binary files in history.\nclean-ignore         |Delete files from history which match current gitignore rules.\nfilter-lamely (or filter&#8209;branch&#8209;ish) |A nearly bug compatible re-implementation of filter-branch (the git testsuite passes using it instead of filter-branch), with some performance tricks to make it several times faster (though it's still glacially slow compared to filter-repo).\nbfg-ish              |A re-implementation of most of BFG Repo Cleaner, with new features and bug fixes.\nconvert-svnexternals |Insert Git submodules according to SVN externals.\n\n## Purpose\n\nPlease note that the point of these examples is not to provide new complete\ntools, but simply to demonstrate that extremely varied history rewriting\ntools can be created which automatically inherit lots of useful base\nfunctionality: rewriting hashes in commit messages, pruning commits that\nbecome empty, handling filenames with funny characters, non-standard\nencodings, handling of replace refs, etc.  (Additional examples of using\nfilter-repo as a library can also be found in [the\ntestsuite](../../t/t9391/).)  My sincerest hope is that these examples\nprovide lots of useful functionality, but that each is missing at least one\ncritical piece for your usecase.  Go forth and extend and improve.\n\n## Usage\n\nAll the examples require a symlink to git-filter-repo in your PYTHONPATH\nnamed git_filter_repo.py in order to run; also, all have a --help flag to\nget a description of their usage and flags.\n"
  },
  {
    "path": "contrib/filter-repo-demos/barebones-example",
    "content": "#!/usr/bin/env python3\n\n\"\"\"\nThis is a simple program that behaves identically to git-filter-repo.  Its\nentire purpose is just to show what to import and run to get the normal\ngit-filter-repo behavior, to serve as a starting point for you to figure\nout what you want to modify.\n\"\"\"\n\n\"\"\"\nPlease see the\n  ***** API BACKWARD COMPATIBILITY CAVEAT *****\nnear the top of git-filter-repo.\n\"\"\"\n\nimport sys\n\ntry:\n  import git_filter_repo as fr\nexcept ImportError:\n  raise SystemExit(\"Error: Couldn't find git_filter_repo.py.  Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?\")\n\nargs = fr.FilteringOptions.parse_args(sys.argv[1:])\nif args.analyze:\n  fr.RepoAnalyze.run(args)\nelse:\n  filter = fr.RepoFilter(args)\n  filter.run()\n"
  },
  {
    "path": "contrib/filter-repo-demos/bfg-ish",
    "content": "#!/usr/bin/env python3\n\n\"\"\"\nThis is a re-implementation of BFG Repo Cleaner, with some changes...\n\nNew features:\n* pruning unwanted objects streamlined (automatic repack) and made more robust\n  (BFG makes user repack manually, and while it provides instructions on how\n   to do so, it won't successfully remove large objects in cases like unpacked\n   refs, loose objects, or use of --no-blob-protection; the robustness details\n   are bugfixes, so are covered below.)\n* pruning of commits which become empty (or become degenerate and empty)\n* creation of new replace refs so folks can access new commits using old\n  (unabbreviated) commit hashes\n* respects and uses grafts and replace refs in the rewrite to make them\n  permanent (this is half new feature, half bug fix; thus also mentioned\n  in bugfixes below)\n* auto-update of commit encoding to utf-8 (as per fast-export's default;\n  could pass --preserve-commit-encoding to FilteringOptions.parse_args() if\n  this isn't wanted...)\n\nBug fixes:\n* Works for both packfiles and loose objects\n  (With BFG, if you don't repack before running, large blobs may be retained.)\n  (With BFG, any files larger than core.bigFileThreshold are thus hard to\n   remove since they will not be packed by a gc or a repack.)\n* Works for both packed-refs and loose refs\n  (As per BFG issue #221, BFG fails to properly walk history unless packed.)\n* Works with replace refs\n  (BFG operates directly on packfiles and packed-refs, and does not\n   understand replace refs; see BFG issue #82)\n* Updates both index and working tree at end of rewrite\n  (With BFG and --no-blob-protection, these are still left out-of-date.  This\n   is a doubly-whammy principle-of-least-astonishment violation: (1) users\n   are likely to accidentally commit the \"staged\" changes, re-introducing the\n   large blobs or removed passwords, (2) even if they don't commit the\n   changes the index holding them will prevent gc from shrinking the repo.\n   Fixing these two glaring problems not only makes --no-blob-protection\n   safe to recommend, it makes it safe to make it the default.)\n* Fixes the \"protection\" defaults\n  (With BFG, it won't rewrite the tree for HEAD; it can't reasonably switch\n   to doing so because of the bugs mentioned above with updating the index\n   and working tree.  However, this behavior comes with a surprise for users:\n   if HEAD is \"protected\" because users should manually update it first, why\n   isn't that also true of the other branches?  In my opinion, there's no\n   user-facing distinction that makes sense for such a difference in\n   handling.  \"Protecting\" HEAD can also be an error-prone requirement for\n   users -- why do they have to manually edit all files the same way\n   --replace-text is doing and why do they have to risk dirty diffs if they\n   get it slightly different (or a useless and ugly empty commit if they\n   manage to get it right)?  Finally, a third reason this was in my opinion a\n   bad default was that it works really poorly in conjunction with other\n   types of history rewrites, e.g. --subdirectory-filter,\n   --to-subdirectory-filter, --convert-to-git-lfs, --path-rename, etc.  For\n   all three of these reasons, and the fixes mentioned above to make it safe,\n   --no-blob-protection is made the default.)\n* Implements privacy improvements, defaulting to true\n  (As per BFG #139, one of the BFG maintainers notes problematic issues\n   with the \"privacy\" handling in BFG, suggesting options which could be\n   added to improve the story.  I implemented those options, except that I\n   felt --private should be the default and made the various non-private\n   choices individual options; see the --use-* options.)\n\nOther changes:\n* Removed the --convert-to-git-lfs option\n  (As per BFG issues #116 and #215, and git-lfs issue #1589, handling LFS\n   conversion is poor in BFG and not recommended; other tools are suggested\n   even by the BFG authors.)\n* Removed the --strip-biggest-blobs option\n  (I philosophically disagree with offering such an option when no\n   mechanism is provided to see what the N biggest blobs are.  How is the\n   user supposed to select N?  Even if they know they have three files\n   which have been large, they may be unaware of others in history.  Even\n   if there aren't any other files in history and the user requests to\n   remove the largest three blobs, it might not be what they want: one of\n   the files might have had multiple versions, in which case their request\n   would only remove some versions of the largest file from history and\n   leave all versions of the second and third largest files as well as all\n   but three versions of the largest file.  Finally, on a more minor note,\n   what is done in the case of a tie -- remove more than N, less than N, or\n   just pick one of the objects tieing for Nth largest at random?  It's\n   ill-defined.)\n\n...even with all these improvements, I think filter-repo is the better tool,\nand thus I suggest folks use it.  I have no plans to improve bfg-ish\nfurther.  However, bfg-ish serves as a nice demonstration of the ability to\nuse filter-repo to write different filtering tools, which was its purpose.\n\"\"\"\n\n\"\"\"\nPlease see the\n  ***** API BACKWARD COMPATIBILITY CAVEAT *****\nnear the top of git-filter-repo.\n\"\"\"\n\nimport argparse\nimport fnmatch\nimport os\nimport re\nimport subprocess\nimport tempfile\ntry:\n  import git_filter_repo as fr\nexcept ImportError:\n  raise SystemExit(\"Error: Couldn't find git_filter_repo.py.  Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?\")\n\nsubproc = fr.subproc\n\ndef java_to_fnmatch_glob(extended_glob):\n  if not extended_glob:\n    return None\n  curly_re = re.compile(br'(.*){([^{}]*)}(.*)')\n  m = curly_re.match(extended_glob)\n  if not m:\n    return [extended_glob]\n\n  all_answers = [java_to_fnmatch_glob(m.group(1)+x+m.group(3))\n                 for x in m.group(2).split(b',')]\n  return [item for sublist in all_answers for item in sublist]\n\nclass BFG_ish:\n  def __init__(self):\n    self.blob_sizes = {}\n    self.filtered_blobs = {}\n    self.cat_file_proc = None\n    self.replacement_rules = None\n    self._hash_re = re.compile(br'(\\b[0-9a-f]{7,40}\\b)')\n    self.args = None\n\n  def parse_options(self):\n    usage = 'bfg-ish [options] [<repo>]'\n    parser = argparse.ArgumentParser(description=\"bfg-ish 1.13.0\", usage=usage)\n    parser.add_argument('--strip-blobs-bigger-than', '-b', metavar='<size>',\n            help=(\"strip blobs bigger than X (e.g. '128K', '1M', etc)\"))\n    #parser.add_argument('--strip-biggest-blobs', '-B', metavar='NUM',\n    #        help=(\"strip the top NUM biggest blobs\"))\n    parser.add_argument('--strip-blobs-with-ids', '-bi',\n                        metavar='<blob-ids-file>',\n            help=(\"strip blobs with the specified Git object ids\"))\n    parser.add_argument('--delete-files', '-D', metavar='<glob>',\n                        type=os.fsencode,\n            help=(\"delete files with the specified names (e.g. '*.class', '*.{txt,log}' - matches on file name, not path within repo)\"))\n    parser.add_argument('--delete-folders', metavar='<glob>',\n                        type=os.fsencode,\n            help=(\"delete folders with the specified names (e.g. '.svn', '*-tmp' - matches on folder name, not path within repo)\"))\n    parser.add_argument('--replace-text', '-rt', metavar='<expressions-file>',\n            help=(\"filter content of files, replacing matched text. Match expressions should be listed in the file, one expression per line - by default, each expression is treated as a literal, but 'regex:' & 'glob:' prefixes are supported, with '==>' to specify a replacement string other than the default of '***REMOVED***'.\"))\n    parser.add_argument('--filter-content-including', '-fi', metavar='<glob>',\n                        type=os.fsencode,\n            help=(\"do file-content filtering on files that match the specified expression (eg '*.{txt,properties}')\"))\n    parser.add_argument('--filter-content-excluding', '-fe', metavar='<glob>',\n                        type=os.fsencode,\n            help=(\"don't do file-content filtering on files that match the specified expression (eg '*.{xml,pdf}')\"))\n    parser.add_argument('--filter-content-size-threshold', '-fs',\n                        metavar='<size>', default=1048576, type=int,\n            help=(\"only do file-content filtering on files smaller than <size> (default is 1048576 bytes)\"))\n    parser.add_argument('--preserve-ref-tips', '--protect-blobs-from', '-p',\n                        metavar='<refs>', nargs='+',\n            help=(\"Do not filter the trees for final commit of the specified refs, only in the history before those commits (by default, filtering options affect all commits, even those at ref tips).  This is not recommended.\"))\n    parser.add_argument('--no-blob-protection', action='store_true',\n            help=(\"allow the BFG to modify even your *latest* commit.  Not only is this highly recommended, it is the default.  As such, this option does not actually do anything and is provided solely for compatibility with BFG.  To undo this option, use --preserve-ref-tips and specify HEAD or the current branch name\"))\n    parser.add_argument('--use-formerly-log-text', action='store_true',\n            help=(\"when updating commit hashes in commit messages also add a [formerly OLDHASH] text, possibly violating commit message line length guidelines and providing an inferior way to lookup old hashes (replace references are much preferred as git itself will understand them)\"))\n    parser.add_argument('--use-formerly-commit-footer', action='store_true',\n            help=(\"append a `Former-commit-id:` footer to commit messages.  This is an inferior way to lookup old hashes (replace references are much preferred as git itself will understand them)\"))\n    parser.add_argument('--use-replace-blobs', action='store_true',\n            help=(\"replace any removed file by a `<filename>.REMOVED.git-id` file.  Makes history ugly as it litters it with replacement files for each one you want removed, but has a small chance of being useful if you find you pruned something incorrectly.\"))\n    parser.add_argument('--private', action='store_true',\n            help=(\"this option does nothing and is provided solely for compatibility with bfg; to undo it, use the --use-* options\"))\n    parser.add_argument('--massive-non-file-objects-sized-up-to',\n                        metavar='<size>',\n            help=(\"this option does nothing and is provided solely for compatibility with bfg\"))\n    parser.add_argument('repo', type=os.fsencode,\n            help=(\"file path for Git repository to clean\"))\n\n    args = parser.parse_args()\n\n    # Sanity check on args.repo\n    if not os.path.isdir(args.repo):\n      raise SystemExit(\"Repo not found: {}\".format(os.fsdecode(args.repo)))\n    dirname, basename = os.path.split(args.repo)\n    if not basename:\n      dirname, basename = os.path.split(dirname)\n    if not dirname:\n      dirname = b'.'\n    if basename == b\".git\":\n      raise SystemExit(\"For non-bare repos, please specify the toplevel directory ({}) for repo\"\n                       .format(os.fsdecode(dirname)))\n\n    return args\n\n  def convert_replace_text(self, filename):\n    tmpfile, newname = tempfile.mkstemp()\n    os.close(tmpfile)\n    with open(newname, 'bw') as outfile:\n      with open(filename, 'br') as infile:\n        for line in infile:\n          if line.startswith(b'regex:'):\n            beg, end = line.split(b'==>')\n            end = re.sub(br'\\$([0-9])', br'\\\\\\1', end)\n            outfile.write(b'%s==>%s\\n' % (beg, end))\n          elif line.startswith(b'glob:'):\n            outfile.write(b'glob:' + java_to_fnmatch_glob(line[5:]))\n          else:\n            outfile.write(line)\n    return newname\n\n  def path_wanted(self, filename):\n    if not self.args.delete_files and not self.args.delete_folders:\n      return filename\n    paths = filename.split(b'/')\n    dirs = paths[0:-1]\n    basename = paths[-1]\n    if self.args.delete_files and any(fnmatch.fnmatch(basename, x)\n                                 for x in self.args.delete_files):\n      return False\n    if self.args.delete_folders and any(any(fnmatch.fnmatch(dirname, x)\n                                            for dirname in dirs)\n                                        for x in self.args.delete_folders):\n      return False\n    return True\n\n  def should_filter_path(self, filename):\n    def matches(basename, glob_list):\n      return any(fnmatch.fnmatch(basename, x) for x in glob_list)\n\n    basename = os.path.basename(filename)\n    if self.args.filter_content_including and \\\n       not matches(basename, self.args.filter_content_including):\n        return False\n\n    if self.args.filter_content_excluding and \\\n       matches(basename, self.args.filter_content_excluding):\n      return False\n\n    return True\n\n  def filter_relevant_blobs(self, commit):\n    for change in commit.file_changes:\n      if change.type == b'D':\n        continue # deleted files have no remaining content to filter\n\n      if change.mode in (b'120000', b'160000'):\n        continue # symlinks and submodules aren't text files we can filter\n\n      if change.blob_id in self.filtered_blobs:\n        change.blob_id = self.filtered_blobs[change.blob_id]\n        continue\n\n      if self.args.filter_content_size_threshold:\n        size = self.blob_sizes[change.blob_id]\n        if size >= self.args.filter_content_size_threshold:\n          continue\n\n      if not self.should_filter_path(change.filename):\n        continue\n\n      self.cat_file_proc.stdin.write(change.blob_id + b'\\n')\n      self.cat_file_proc.stdin.flush()\n      objhash, objtype, objsize = self.cat_file_proc.stdout.readline().split()\n      # FIXME: This next line assumes the file fits in memory; though the way\n      # fr.Blob works we kind of have that assumption baked in elsewhere too...\n      contents = self.cat_file_proc.stdout.read(int(objsize))\n      if not any(x == b\"0\" for x in contents[0:8192]): # not binaries\n        for literal, replacement in self.replacement_rules['literals']:\n          contents = contents.replace(literal, replacement)\n        for regex,   replacement in self.replacement_rules['regexes']:\n          contents = regex.sub(replacement, contents)\n      self.cat_file_proc.stdout.read(1) # Read trailing newline\n\n      blob = fr.Blob(contents)\n      self.filter.insert(blob)\n      self.filtered_blobs[change.blob_id] = blob.id\n      change.blob_id = blob.id\n\n  def munge_message(self, message, metadata):\n    def replace_hash(matchobj):\n      oldhash = matchobj.group(1)\n      newhash = metadata['commit_rename_func'](oldhash)\n      if newhash != oldhash and self.args.use_formerly_log_text:\n        newhash = b'%s [formerly %s]' % (newhash, oldhash)\n      return newhash\n\n    return self._hash_re.sub(replace_hash, message)\n\n  def commit_update(self, commit, metadata):\n    # Strip out unwanted files\n    new_file_changes = []\n    for change in commit.file_changes:\n      if not self.path_wanted(change.filename):\n        if not self.args.use_replace_blobs:\n          continue\n        blob = fr.Blob(change.blob_id)\n        self.filter.insert(blob)\n        change.blob_id = blob.id\n        change.filename += b'.REMOVED.git-id'\n      new_file_changes.append(change)\n    commit.file_changes = new_file_changes\n\n    # Filter text of relevant files\n    if self.replacement_rules:\n      self.filter_relevant_blobs(commit)\n\n    # Replace commit hashes in commit message with 'newhash [formerly oldhash]'\n    if self.args.use_formerly_log_text:\n      commit.message = self.munge_message(commit.message, metadata)\n\n    # Add a 'Former-commit-id:' footer\n    if self.args.use_formerly_commit_footer:\n      if not commit.message.endswith(b'\\n'):\n        commit.message += b'\\n'\n      lastline = commit.message.splitlines()[-1]\n      if not re.match(b'\\n[A-Za-z0-9-_]*: ', lastline):\n        commit.message += b'\\n'\n      commit.message += b'Former-commit-id: %s' % commit.original_id\n\n  def get_preservation_info(self, ref_tips):\n    if not ref_tips:\n      return []\n    cmd = 'git rev-parse --symbolic-full-name'.split()\n    p = subproc.Popen(cmd + ref_tips,\n                      stdout = subprocess.PIPE,\n                      stderr = subprocess.STDOUT)\n    ret = p.wait()\n    output = p.stdout.read()\n    if ret != 0:\n      raise SystemExit(\"Failed to translate --preserve-ref-tips arguments into refs\\n\"+fr.decode(output))\n    refs = output.splitlines()\n    ref_trees = [b'%s^{tree}' % ref for ref in refs]\n    output = subproc.check_output(['git', 'rev-parse'] + ref_trees)\n    trees = output.splitlines()\n    return dict(zip(refs, trees))\n\n  def revert_tree_changes(self, preserve_refs):\n    # FIXME: Since this function essentially creates a new commit (with the\n    # original tree) to replace the commit at the ref tip (which has a\n    # filtered tree), I should update the created refs/replace/ object to\n    # point to the newest commit.  Also, the double reset (see comment near\n    # where revert_tree_changes is called) seems kinda lame.  It'd be easy\n    # enough to fix these issues, but I'm very unmotivated since\n    # --preserve-ref-tips/--protect-blobs-from is a design mistake.\n    updates = {}\n    for ref, tree in preserve_refs.items():\n      output = subproc.check_output('git cat-file -p'.split()+[ref])\n      lines = output.splitlines()\n      if not lines[0].startswith(b'tree '):\n        raise SystemExit(\"Error: --preserve-ref-tips only works with commit refs\")\n      num = 1\n      parents = []\n      while lines[num].startswith(b'parent '):\n        parents.append(lines[num][7:])\n        num += 1\n      assert lines[num].startswith(b'author ')\n      author_info = [x.strip()\n                     for x in re.split(b'[<>]', lines[num][7:])]\n      aenv = 'GIT_AUTHOR_NAME GIT_AUTHOR_EMAIL GIT_AUTHOR_DATE'.split()\n      assert lines[num+1].startswith(b'committer ')\n      committer_info = [x.strip()\n                        for x in re.split(b'[<>]', lines[num+1][10:])]\n      cenv = 'GIT_COMMITTER_NAME GIT_COMMITTER_EMAIL GIT_COMMITTER_DATE'.split()\n      new_env = {**os.environ.copy(),\n                 **dict(zip(aenv, author_info)),\n                 **dict(zip(cenv, committer_info))}\n      assert lines[num+2] == b''\n      commit_msg = b'\\n'.join(lines[num+3:])+b'\\n'\n      p_s = [val for pair in zip(['-p',]*len(parents), parents) for val in pair]\n      p = subproc.Popen('git commit-tree'.split() + p_s + [tree],\n                        stdin = subprocess.PIPE, stdout = subprocess.PIPE,\n                        env = new_env)\n      p.stdin.write(commit_msg)\n      p.stdin.close()\n      if p.wait() != 0:\n        raise SystemExit(\"Error: failed to write preserve commit for {} [{}]\"\n                         .format(ref, tree))\n      updates[ref] = p.stdout.read().strip()\n    p = subproc.Popen('git update-ref --stdin'.split(), stdin = subprocess.PIPE)\n    for ref, newvalue in updates.items():\n      p.stdin.write(b'update %s %s\\n' % (ref, newvalue))\n    p.stdin.close()\n    if p.wait() != 0:\n      raise SystemExit(\"Error: failed to write preserve commits\")\n\n  def run(self):\n    bfg_args = self.parse_options()\n    preserve_refs = self.get_preservation_info(bfg_args.preserve_ref_tips)\n\n    work_dir = os.getcwd()\n    os.chdir(bfg_args.repo)\n    bfg_args.delete_files = java_to_fnmatch_glob(bfg_args.delete_files)\n    bfg_args.delete_folders = java_to_fnmatch_glob(bfg_args.delete_folders)\n    bfg_args.filter_content_including = \\\n             java_to_fnmatch_glob(bfg_args.filter_content_including)\n    bfg_args.filter_content_excluding = \\\n             java_to_fnmatch_glob(bfg_args.filter_content_excluding)\n    if bfg_args.replace_text and bfg_args.filter_content_size_threshold:\n      # FIXME (perf): It would be much more performant and probably make more\n      # sense to have a `git cat-file --batch-check` process running and query\n      # it for blob sizes, since we may only need a small subset of blob sizes\n      # rather than the sizes of all objects in the git database.\n      self.blob_sizes, packed_sizes = fr.GitUtils.get_blob_sizes()\n    extra_args = []\n    if bfg_args.strip_blobs_bigger_than:\n      extra_args = ['--strip-blobs-bigger-than',\n                    bfg_args.strip_blobs_bigger_than]\n    if bfg_args.strip_blobs_with_ids:\n      extra_args = ['--strip-blobs-with-ids',\n                    bfg_args.strip_blobs_with_ids]\n    if bfg_args.use_formerly_log_text:\n      extra_args += ['--preserve-commit-hashes']\n    new_replace_file = None\n    if bfg_args.replace_text:\n      if not os.path.isabs(bfg_args.replace_text):\n        bfg_args.replace_text = os.path.join(work_dir, bfg_args.replace_text)\n\n      new_replace_file = self.convert_replace_text(bfg_args.replace_text)\n      rules = fr.FilteringOptions.get_replace_text(new_replace_file)\n      self.replacement_rules = rules\n      self.cat_file_proc = subproc.Popen(['git', 'cat-file', '--batch'],\n                                         stdin = subprocess.PIPE,\n                                         stdout = subprocess.PIPE)\n    self.args = bfg_args\n    # Setting partial prevents:\n    #   * remapping origin remote tracking branches to regular branches\n    #   * deletion of the origin remote\n    #   * nuking unused refs\n    #   * nuking reflogs\n    #   * repacking\n    # While these are arguably desirable things, BFG documentation assumes\n    # the first two aren't done, so for compatibility turn them all off.\n    # The third is irrelevant since BFG has no mechanism for renaming refs,\n    # and we'll manually add the fourth and fifth back in below by calling\n    # RepoFilter.cleanup().\n    fr_args = fr.FilteringOptions.parse_args(['--partial', '--force'] +\n                                             extra_args)\n    self.filter = fr.RepoFilter(fr_args, commit_callback=self.commit_update)\n    self.filter.run()\n    if new_replace_file:\n      os.remove(new_replace_file)\n      self.cat_file_proc.stdin.close()\n      self.cat_file_proc.wait()\n    need_another_reset = False\n    if preserve_refs:\n      self.revert_tree_changes(preserve_refs)\n      # If the repository is not bare, self.filter.run() already did a reset\n      # for us.  However, if we are preserving refs (and the repository isn't\n      # bare), we need another since we possibly updated HEAD after that\n      # reset (FIXME: two resets is kinda ugly; would be nice to just do\n      # one).\n      if not fr.GitUtils.is_repository_bare('.'):\n        need_another_reset = True\n\n    if not os.path.isabs(os.fsdecode(bfg_args.repo)):\n      bfg_args.repo = os.fsencode(os.path.join(work_dir, os.fsdecode(bfg_args.repo)))\n\n    self.filter.cleanup(bfg_args.repo, repack=True, reset=need_another_reset)\n\nif __name__ == '__main__':\n  bfg = BFG_ish()\n  bfg.run()\n  # Show the same message BFG does, even if we don't copy the rest of its\n  # progress output.  Make this program feel slightly more authentically BFG.\n  # :-)\n  print('''\n\n--\nYou can rewrite history in Git - don't let Trump do it for real!\nTrump's administration has lied consistently, to make people give up on ever\nbeing told the truth. Don't give up: https://www.rescue.org/topic/refugees-america\n--\n''')\n"
  },
  {
    "path": "contrib/filter-repo-demos/clean-ignore",
    "content": "#!/usr/bin/env python3\n\n\"\"\"\nThis is a simple program that will delete files from history which match\ncurrent gitignore rules, while also:\n  1) pruning commits which become empty\n  2) pruning merge commits which become degenerate and have no changes\n     relative to its remaining relevant parent\n  3) rewriting commit hashes in commit messages to reference new commit IDs.\n\"\"\"\n\n\"\"\"\nPlease see the\n  ***** API BACKWARD COMPATIBILITY CAVEAT *****\nnear the top of git-filter-repo.\n\"\"\"\n\nimport argparse\nimport os\nimport subprocess\nimport sys\ntry:\n  import git_filter_repo as fr\nexcept ImportError:\n  raise SystemExit(\"Error: Couldn't find git_filter_repo.py.  Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?\")\n\nclass CheckIgnores:\n  def __init__(self):\n    self.ignored = set()\n    self.okay = set()\n\n    cmd = 'git check-ignore --stdin --verbose --non-matching --no-index -z'\n    self.check_ignore_process = subprocess.Popen(cmd.split(),\n                                                 stdin=subprocess.PIPE,\n                                                 stdout=subprocess.PIPE)\n\n  def __del__(self):\n    if self.check_ignore_process:\n      self.check_ignore_process.stdin.close()\n\n  def get_ignored(self, filenames):\n    ignored = set()\n    for name in filenames:\n      if name in self.ignored:\n        ignored.add(name)\n      elif name in self.okay:\n        continue\n      else:\n        self.check_ignore_process.stdin.write(name+b'\\0')\n        self.check_ignore_process.stdin.flush()\n        result = os.read(self.check_ignore_process.stdout.fileno(), 65535).rstrip(b'\\0')\n        (source, linenum, pattern, pathname) = result.split(b\"\\0\")\n        if name != pathname:\n          raise SystemExit(\"Error: Passed {} but got {}\".format(name, pathname))\n        if not source and not linenum and not pattern:\n          self.okay.add(name)\n        else:\n          if pattern[0:1] == b\"!\":\n            self.okay.add(name)\n          else:\n            self.ignored.add(name)\n            ignored.add(name)\n\n    return ignored\n\n  def skip_ignores(self, commit, metadata):\n    filenames = [x.filename for x in commit.file_changes]\n    bad = self.get_ignored(filenames)\n    commit.file_changes = [x for x in commit.file_changes\n                           if x.filename not in bad]\n\n\ndef main():\n  checker = CheckIgnores()\n  args = fr.FilteringOptions.parse_args(sys.argv[1:], error_on_empty = False)\n  filter = fr.RepoFilter(args, commit_callback=checker.skip_ignores)\n  filter.run()\n  \n\nif __name__ == '__main__':\n  main()\n\n"
  },
  {
    "path": "contrib/filter-repo-demos/convert-svnexternals",
    "content": "#!/usr/bin/env python3\n\n\"\"\"\nThis is a program that will insert Git submodules according to SVN externals\ndefinitions (svn:externals properties) from the original Subversion repository\nthroughout the history.\n\nInformation about the externals is obtained from the \".gitsvnextmodules\" file\ncreated during SVN-to-Git conversion by SubGit (https://subgit.com/). Its\nconfig option \"translate.externals=true\" had to be used therefore.\n\nActual modifications:\n- Insert gitlinks (mode 160000) into the tree.\n- Add .gitmodules file with relevant sections.\n- Remove sections converted to submodules from .gitsvnextmodules file\n  and delete it if empty.\n\n.gitsvnextmodules example:\n[submodule \"somedir/extdir\"]\n\tpath = somedir/extdir\n\towner = somedir\n\turl = https://svn.example.com/somesvnrepo/trunk\n\trevision = 1234\n\tbranch = /\n\tfetch = :refs/remotes/git-svn\n\tremote = svn\n\ttype = dir\n\nResulting addition in \"somedir\" tree (cat-file pretty-print format):\n160000 commit 1234123412341234123412341234123412341234\textdir\n\nResulting .gitmodules entry:\n[submodule \"somedir/extdir\"]\n\tpath = somedir/extdir\n\turl = https://git.example.com/somegitrepo.git\n\nSVN-to-Git mapping file:\nCan be created from SubGit's \"refs/svn/map\".\nOne line per mapping in following format:\n<svn url> TAB <svn rev> TAB <git url> TAB <git commit> TAB <state>\n- Leading '#' can be used for comments.\n- <svn url> must not contain a trailing slash.\n- <state> has to be \"commit\" to be usable, but can be \"missing\" if <git commit>\n  does not exist in the repository anymore. Adopted from git-cat-file output.\nExample:\nhttps://svn.example.com/somesvnrepo/trunk\t1234\thttps://git.example.com/somegitrepo.git\t1234123412341234123412341234123412341234\tcommit\n\nFeatures:\n- Repeatedly added/removed externals will be handled properly.\n- Externals replaced by directly added files and vice versa will be handled\n  properly.\n\nCaveats:\n- This script must NOT be run repeatedly. A second invocation would lead to a\n  different result in case the externals could only be converted partially.\n- Inconsistent SVN repositories (with failing checkout) not handled, i.e.\n  - normal directory and external with the same path\n  - external path not existing for the given revision\n- No attention was paid to non-ASCII and special characters in gitlink paths,\n  might cause problems.\n- There is no error handling for mandatory options missing in .gitsvnextmodules\n  file. The script would crash in case of such buggy files, but that shouldn't\n  happen in practice.\n\nTODO:\n- Add external files directly.\n- Alternatively add external directories directly instead of using a submodule.\n\"\"\"\n\n\"\"\"\nPlease see the\n  ***** API BACKWARD COMPATIBILITY CAVEAT *****\nnear the top of git-filter-repo.\n\"\"\"\n\nimport argparse\nimport os\nimport sys\nimport shutil\nimport subprocess\nimport configparser\nfrom urllib.parse import urlsplit\n\ntry:\n  import git_filter_repo as fr\nexcept ImportError:\n  raise SystemExit(\"Error: Couldn't find git_filter_repo.py.  Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?\")\n\nsvn_root_url = \"\"\nsvn_git_mappings = []\n\ndef parse_args():\n  \"\"\"\n  Parse and return arguments for this script.\n\n  Also do some argument sanity checks and adaptions.\n  \"\"\"\n  parser = argparse.ArgumentParser(\n      description=\"Add Git submodules according to svn:externals from .gitsvnextmodules. \"\n                  \"As preparation for this conversion process, an analysis can be performed.\")\n\n  parser.add_argument('--force', '-f', action='store_true',\n      help=\"Rewrite repository history even if the current repo does not \"\n           \"look like a fresh clone.\")\n  parser.add_argument('--refs', nargs='+',\n      help=\"Limit history rewriting to the specified refs. Option is directly \"\n           \"forwarded to git-filter-repo, see there for details and caveats. \"\n           \"Use for debugging purposes only!\")\n  parser.add_argument('--svn-root-url',\n      help=\"Root URL of the corresponding SVN repository, \"\n           \"needed for conversion of relative to absolute external URLs.\")\n\n  analysis = parser.add_argument_group(title=\"Analysis\")\n  analysis.add_argument('--analyze', action='store_true',\n      help=\"Analyze repository history and create auxiliary files for conversion process.\")\n  analysis.add_argument('--report-dir', type=os.fsencode,\n      help=\"Directory to write report, defaults to GIT_DIR/filter-repo/svnexternals, \"\n           \"refuses to run if exists, --force delete existing dir first.\")\n\n  conversion = parser.add_argument_group(title=\"Conversion\")\n  conversion.add_argument('--svn-git-mapfiles', type=os.fsencode, nargs='+', metavar='MAPFILE',\n      help=\"Files with SVN-to-Git revision mappings for SVN externals conversion.\")\n\n  args = parser.parse_args()\n\n  if args.analyze and args.svn_git_mapfiles:\n    raise SystemExit(\"Error: --svn-git-mapfiles makes no sense with --analyze.\")\n\n  if not args.analyze and not args.svn_git_mapfiles:\n    raise SystemExit(\"Error: --svn-git-mapfiles is required for the conversion process.\")\n\n  return args\n\ndef read_mappings(mapfiles):\n  \"\"\"\n  Read files with SVN-to-Git mappings and return a list of mappings from it.\n  \"\"\"\n  mappings = []\n  for mapfile in mapfiles:\n    with open(mapfile, \"rb\") as f:\n      for line in f:\n        line = line.rstrip(b'\\r\\n')\n\n        # Skip blank and comment lines\n        if not line or line.startswith(b'#'):\n          continue\n\n        # Convert to string for use with configparser later\n        line = line.decode()\n\n        # Parse the line\n        fields = line.split('\\t', 4)\n        mapping = {'svn_url': fields[0],\n                   'svn_rev': int(fields[1]),\n                   'git_url': fields[2],\n                   'git_commit': fields[3],\n                   'state': fields[4]}\n\n        mappings.append(mapping)\n  return mappings\n\ncat_file_process = None\ndef parse_config(blob_id):\n  \"\"\"\n  Create a configparser object for a .gitsvnextmodules/.gitmodules file from\n  its blob ID.\n  \"\"\"\n  parsed_config = configparser.ConfigParser()\n\n  if blob_id is not None:\n    # Get the blob contents\n    cat_file_process.stdin.write(blob_id + b'\\n')\n    cat_file_process.stdin.flush()\n    objhash, objtype, objsize = cat_file_process.stdout.readline().split()\n    contents_plus_newline = cat_file_process.stdout.read(int(objsize)+1)\n\n    # Parse it\n    parsed_config.read_string(contents_plus_newline.decode())\n\n  return parsed_config\n\ndef create_blob(parsed_config):\n  \"\"\"\n  Create a filter-repo blob object from a .gitsvnextmodules/.gitmodules\n  configparser object according to Git config style.\n  \"\"\"\n  lines = []\n  for sec in parsed_config.sections():\n    lines.append(\"[\" + sec + \"]\\n\")\n    for opt in parsed_config.options(sec):\n      lines.append(\"\\t\" + opt + \" = \" + parsed_config[sec][opt] + \"\\n\")\n\n  return fr.Blob(''.join(lines).encode())\n\ndef get_git_url(svn_url):\n  \"\"\"\n  Get the Git URL for a corresponding SVN URL.\n  \"\"\"\n  for entry in svn_git_mappings:\n    if entry['svn_url'] == svn_url:\n      return entry['git_url']\n  else:\n    return None\n\ndef get_git_commit_hash(svn_url, svn_rev):\n  \"\"\"\n  Get the Git commit hash for its corresponding SVN URL+revision.\n\n  The mapping is not restricted to the exact revision, but also uses the next\n  lower revision found. Needed when the revision was set to that of the root\n  URL instead of to that of the specific subdirectory (e.g. trunk). TortoiseSVN\n  behaves so when setting the external to HEAD.\n  \"\"\"\n  ent = None\n  rev = 0\n\n  for entry in svn_git_mappings:\n    if (entry['svn_url'] == svn_url\n          and entry['svn_rev'] <= svn_rev\n          and entry['svn_rev'] > rev):\n      ent = entry\n      rev = entry['svn_rev']\n\n  if ent is not None and ent['state'] == \"commit\":\n    return ent['git_commit']\n  else:\n    return None\n\ndef get_absolute_svn_url(svnext_url, svn_root_url):\n  \"\"\"\n  Convert a relative svn:externals URL to an absolute one.\n\n  If the format is unsupported, return the URL unchanged with success=False.\n  If no root URL is given or the URL is absolute already, return it unchanged.\n\n  In all cases, even if returned \"unchanged\", trailing slashes are removed.\n  \"\"\"\n  # Remove trailing slash(es)\n  svnext_url = svnext_url.rstrip(\"/\")\n  svn_root_url = svn_root_url.rstrip(\"/\")\n\n  # Normalize URLs in relative format\n  svn_root_parsed = urlsplit(svn_root_url)\n  if svnext_url.startswith((\"../\", \"^/../\")): # unsupported\n    return (False, svnext_url)\n  elif not svn_root_url:\n    pass # unchanged\n  elif svnext_url.startswith(\"^/\"):\n    svnext_url = svn_root_url + svnext_url[1:]\n  elif svnext_url.startswith(\"//\"):\n    svnext_url = svn_root_parsed.scheme + \":\" + svnext_url\n  elif svnext_url.startswith(\"/\"):\n    svnext_url = svn_root_parsed.scheme + \"://\" + svn_root_parsed.netloc + svnext_url\n\n  return True, svnext_url\n\ndef parse_revision_value(value):\n  \"\"\"\n  Parse the value of key 'revision' from a .gitsvnextmodules file and return it\n  as integer.\n\n  Used to handle non-numeric values like 1k, 2k, 3k etc. added by SubGit\n  instead of 1024, 2048, 3072 etc., likewise 1m, 2m, ..., 1g, ...\n  \"\"\"\n  suffix = value[-1]\n  if suffix in \"kmg\":\n    mult = {\"k\": 1024, \"m\": 1024**2, \"g\": 1024**3}\n    return int(value[0:-1]) * mult[suffix]\n  else:\n    return int(value)\n\ndef add_submodule_tree_entry(commit, parsed_config, section):\n  \"\"\"\n  Add a submodule entry to the tree of a Git commit.\n\n  SVN externals information obtained from parsed .gitsvnextmodules file.\n  \"\"\"\n  # Skip type=file (SVN file external), not possible as submodule\n  if parsed_config[section]['type'] != 'dir':\n    return False\n\n  success, svn_url = get_absolute_svn_url(parsed_config[section]['url'], svn_root_url)\n  # Skip unsupported URL format\n  if not success:\n    return False\n\n  # Get SVN revision\n  if parsed_config.has_option(section, 'revision'):\n    svn_rev = parse_revision_value(parsed_config[section]['revision'])\n  else:\n    # TODO: revision has to be guessed according to commit timestamp, skip for now\n    return False\n\n  # SVN url+revision mapping to Git commit\n  git_hash = get_git_commit_hash(svn_url, svn_rev)\n  # Skip missing or unusable mapping\n  if git_hash is None:\n    return False\n  git_hash = git_hash.encode()\n\n  dirname = parsed_config[section]['path'].encode()\n\n  # Add gitlink to tree\n  commit.file_changes.append(fr.FileChange(b'M', dirname, git_hash, b'160000'))\n\n  return True\n\ndef get_commit_map_path():\n  \"\"\"\n  Return path to commit-map file.\n  \"\"\"\n  git_dir = fr.GitUtils.determine_git_dir(b'.')\n  return os.path.join(git_dir, b'filter-repo', b'commit-map')\n\ndef parse_commit_map(commit_map_file):\n  \"\"\"\n  Parse commit-map file and return a dictionary.\n  \"\"\"\n  parsed_map = {}\n  with open(commit_map_file, \"rb\") as f:\n    for line in f:\n      line = line.rstrip(b'\\r\\n')\n\n      # Skip blank lines\n      if not line:\n        continue\n\n      # Store old/new commits, also the \"old\"/\"new\" header in the first line\n      old, new = line.split()\n      parsed_map[old] = new\n  return parsed_map\n\ndef merge_commit_maps(old_commit_map, new_commit_map):\n  \"\"\"\n  Merge old and new commit-map by omitting intermediate commits.\n\n  Return the merged dictionary.\n  \"\"\"\n  merged_map = {}\n  for (key, old_val) in old_commit_map.items():\n    new_val = new_commit_map[old_val] if old_val in new_commit_map else old_val\n    merged_map[key] = new_val\n  return merged_map\n\ndef write_commit_map(commit_map, commit_map_file):\n  \"\"\"\n  Write commit-map dictionary to file.\n  \"\"\"\n  with open(commit_map_file, 'wb') as f:\n    for (old, new) in commit_map.items():\n      f.write(b'%-40s %s\\n' % (old, new))\n\ndef create_report_dir(args):\n  \"\"\"\n  Create the directory for analysis report.\n  \"\"\"\n  if args.report_dir:\n    reportdir = args.report_dir\n  else:\n    git_dir = fr.GitUtils.determine_git_dir(b'.')\n\n  # Create the report directory as necessary\n    results_tmp_dir = os.path.join(git_dir, b'filter-repo')\n    if not os.path.isdir(results_tmp_dir):\n      os.mkdir(results_tmp_dir)\n    reportdir = os.path.join(results_tmp_dir, b'svnexternals')\n\n  if os.path.isdir(reportdir):\n    if args.force:\n      sys.stdout.write(\"Warning: Removing recursively: \\\"%s\\\"\" % fr.decode(reportdir))\n      shutil.rmtree(reportdir)\n    else:\n      sys.stdout.write(\"Error: dir already exists (use --force to delete): \\\"%s\\\"\\n\" % fr.decode(reportdir))\n      sys.exit(1)\n\n  os.mkdir(reportdir)\n\n  return reportdir\n\nanalysis = {'dir_ext_orig': [],\n            'dir_ext_abs': [],\n            'file_ext_orig': [],\n            'file_ext_abs': []}\ndef write_analysis(reportdir):\n  \"\"\"\n  Prepare analysis and write it to files in report directory.\n  \"\"\"\n  analysis['dir_ext_orig'].sort()\n  analysis['dir_ext_abs'].sort()\n  analysis['file_ext_orig'].sort()\n  analysis['file_ext_abs'].sort()\n\n  sys.stdout.write(\"Writing reports to %s...\" % fr.decode(reportdir))\n  sys.stdout.flush()\n\n  with open(os.path.join(reportdir, b\"dir-externals-original.txt\"), 'wb') as f:\n    for url in analysis['dir_ext_orig']:\n      f.write((\"%s\\n\" % url).encode())\n\n  with open(os.path.join(reportdir, b\"dir-externals-absolute.txt\"), 'wb') as f:\n    for url in analysis['dir_ext_abs']:\n      f.write((\"%s\\n\" % url).encode())\n\n  with open(os.path.join(reportdir, b\"file-externals-original.txt\"), 'wb') as f:\n    for url in analysis['file_ext_orig']:\n      f.write((\"%s\\n\" % url).encode())\n\n  with open(os.path.join(reportdir, b\"file-externals-absolute.txt\"), 'wb') as f:\n    for url in analysis['file_ext_abs']:\n      f.write((\"%s\\n\" % url).encode())\n\n  sys.stdout.write(\"done.\\n\")\n\ndef analyze_externals(commit, metadata):\n  \"\"\"\n  Generate/extend analysis of SVN externals for a Git commit.\n\n  Used as filter-repo commit callback.\n  \"\"\"\n  for change in commit.file_changes:\n    if change.filename == b'.gitsvnextmodules' and change.type == b'M':\n      gitsvnextmodules = parse_config(change.blob_id)\n\n      for sec in gitsvnextmodules.sections():\n        url = gitsvnextmodules[sec]['url']\n        success, abs_url = get_absolute_svn_url(url, svn_root_url)\n\n        # List of svn:externals URLs, also add the URL to the absolute list if\n        # conversion was not successful\n        if gitsvnextmodules[sec]['type'] == 'dir':\n          if url not in analysis['dir_ext_orig']:\n            analysis['dir_ext_orig'].append(url)\n          if abs_url not in analysis['dir_ext_abs']:\n            analysis['dir_ext_abs'].append(abs_url)\n        else:\n          if url not in analysis['file_ext_orig']:\n            analysis['file_ext_orig'].append(url)\n          if abs_url not in analysis['file_ext_abs']:\n            analysis['file_ext_abs'].append(abs_url)\n\ndef insert_submodules(commit, metadata):\n  \"\"\"\n  Insert submodules for a Git commit.\n\n  Used as filter-repo commit callback.\n\n  Since .gitsvnextmodules just contains the svn:externals state for the given\n  commit, we cannot derive specific changes from that file.\n  So we can only add/modify the gitlinks according to .gitsvnextmodules\n  (without knowing whether adding a new or modifying an existing or even\n  \"modifying\" an unchanged submodule, but none of that really matters).\n  We do not have information about deleted externals, those will be handled in\n  a separate filter run afterwards.\n\n  The .gitmodules file however will already be correct in this function because\n  we don't need to know about specific changes to add, modify or delete it.\n  \"\"\"\n  for change in commit.file_changes:\n    if change.filename == b'.gitsvnextmodules' and change.type in (b'M', b'D'):\n      gitsvnextmodules = parse_config(change.blob_id)\n      gitmodules = configparser.ConfigParser()\n\n      # Add gitlinks to the tree and prepare .gitmodules file content\n      for sec in gitsvnextmodules.sections():\n        if add_submodule_tree_entry(commit, gitsvnextmodules, sec):\n          # Gitlink added\n          # -> Add this entry to .gitmodules as well\n\n          # Create the section name string manually, do not rely on\n          # .gitsvnextmodules to always use the proper section name.\n          sec_name = 'submodule \"' + gitsvnextmodules[sec]['path'] + '\"'\n          gitmodules[sec_name] = {}\n\n          # submodule.<name>.path\n          gitmodules[sec_name]['path'] = gitsvnextmodules[sec]['path']\n\n          # submodule.<name>.url\n          success, svn_url = get_absolute_svn_url(gitsvnextmodules[sec]['url'], svn_root_url)\n          git_url = get_git_url(svn_url)\n          if git_url is not None:\n            gitmodules[sec_name]['url'] = git_url\n          else:\n            # Abort, but this will not happen in practice, catched in\n            # add_submodule_tree_entry() via get_git_commit_hash() already.\n            raise SystemExit(\"Error: No Git URL found in mapping although a commit hash could be found.\")\n\n      # Write blob and adapt tree for .gitmodules\n      if gitmodules.sections():\n        # Create a blob object from the content and add it to the tree.\n        blob = create_blob(gitmodules)\n        filter.insert(blob)\n        commit.file_changes.append(fr.FileChange(b'M', b'.gitmodules', blob.id, b'100644'))\n      else:\n        # Delete the file, even if a \"git rm\" of all submodules keeps it empty.\n        commit.file_changes.append(fr.FileChange(b'D', b'.gitmodules'))\n\ndef delete_submodules(commit, metadata):\n  \"\"\"\n  Delete submodules from a Git commit.\n\n  Used as filter-repo commit callback.\n\n  Delete all submodules (inserted in the previous filter run) without an entry\n  in .gitsvnextmodules, these were real deletions of externals, which couldn't\n  be detected before.\n  Only the tree entries have to be removed because the .gitmodules file is\n  already in correct state from previous filter run.\n  \"\"\"\n  for change in commit.file_changes:\n    if change.filename == b'.gitsvnextmodules' and change.type in (b'M', b'D'):\n      gitsvnextmodules = parse_config(change.blob_id)\n\n      # Search for all submodules in the tree\n      output = subprocess.check_output('git ls-tree -d -r -z'.split() + [commit.original_id])\n      for line in output.split(b'\\x00'):\n        if not line:\n          continue\n        mode_objtype_objid, dirname = line.split(b'\\t', 1)\n        mode, objtype, objid = mode_objtype_objid.split(b' ')\n        if mode == b'160000' and objtype == b'commit':\n          # Submodule found\n          # -> Delete it if there is no corresponding entry in\n          #    .gitsvnextmodules, keep/reinsert it otherwise\n          for sec in gitsvnextmodules.sections():\n            if gitsvnextmodules[sec]['path'].encode() == dirname:\n              # Reinsert it, might have been deleted in previous commits\n              if add_submodule_tree_entry(commit, gitsvnextmodules, sec):\n                # And remove the config section because this external has been\n                # converted\n                gitsvnextmodules.remove_section(sec)\n                break\n          else:\n            # Delete it\n            commit.file_changes.append(fr.FileChange(b'D', dirname))\n\n      # Rewrite .gitsvnextmodules to contain the unhandled externals only,\n      # delete it if empty (all externals converted).\n      if gitsvnextmodules.sections():\n        # Create a blob object from the content and replace the original one.\n        blob = create_blob(gitsvnextmodules)\n        filter.insert(blob)\n        change.blob_id = blob.id\n      else:\n        if change.type == b'M':\n          # File became empty, delete it\n          commit.file_changes.append(fr.FileChange(b'D', b'.gitsvnextmodules'))\n          break # avoid endless for loop\n        #else:\n          # File was empty already, delete command already present in stream\n\nmy_args = parse_args()\n\n# Use passed URL without trailing slash(es)\nif my_args.svn_root_url:\n  svn_root_url = my_args.svn_root_url.rstrip(\"/\")\n\n# Arguments forwarded to filter-repo\nextra_args = []\nif my_args.force:\n  extra_args = ['--force']\nif my_args.refs:\n  extra_args += ['--refs'] + my_args.refs\n\ncat_file_process = subprocess.Popen(['git', 'cat-file', '--batch'],\n                                    stdin = subprocess.PIPE,\n                                    stdout = subprocess.PIPE)\nif my_args.analyze:\n  # Analysis\n  reportdir = create_report_dir(my_args)\n\n  fr_args = fr.FilteringOptions.parse_args(['--dry-run']\n                                           + extra_args)\n  filter = fr.RepoFilter(fr_args, commit_callback=analyze_externals)\n  filter.run()\n\n  write_analysis(reportdir)\nelse:\n  # Conversion\n  svn_git_mappings = read_mappings(my_args.svn_git_mapfiles)\n\n  # There are no references to commit hashes in commit messages because this\n  # script runs on a Git repository converted from a Subversion repository.\n  fr_args = fr.FilteringOptions.parse_args(['--preserve-commit-hashes',\n                                            '--preserve-commit-encoding',\n                                            '--replace-refs', 'update-no-add']\n                                           + extra_args)\n  filter = fr.RepoFilter(fr_args, commit_callback=insert_submodules)\n  filter.run()\n\n  # Store commit-map after first run\n  first_commit_map = parse_commit_map(get_commit_map_path())\n\n  filter = fr.RepoFilter(fr_args, commit_callback=delete_submodules)\n  filter.run()\n\n  # Update commit-map after second run, based on original IDs\n  second_commit_map = parse_commit_map(get_commit_map_path())\n  merged_commit_map = merge_commit_maps(first_commit_map, second_commit_map)\n  write_commit_map(merged_commit_map, get_commit_map_path())\n\ncat_file_process.stdin.close()\ncat_file_process.wait()\n"
  },
  {
    "path": "contrib/filter-repo-demos/filter-lamely",
    "content": "#!/usr/bin/env python3\n\n\"\"\"This is a bug compatible-ish[1] reimplementation of filter-branch, which\nhappens to be faster.  The goal is _only_ to show filter-repo's flexibility\nin re-implementing other types of history rewriting commands.  It is not\nmeant for actual end-user use, because filter-branch (and thus\nfilter-lamely) is an abomination of user interfaces:\n\n  * it is difficult to learn, except for a few exceedingly trivial rewrites\n  * it is difficult to use; even for expert users like me I often have to\n    spend significant time to craft the filters to do what is needed\n  * it is painfully slow to use: the slow execution (even if filter-lamely\n    is several times faster than filter-branch it will still be far slower\n    than filter-repo) is doubly problematic because users have to retry\n    their commands often to see if they've crafted the right filters, so\n    the real execution time is much worse than what benchmarks typically\n    show.  (Benchmarks don't include how long it took to come up with the\n    right command.)\n  * it provides really bad feedback: broken filters often modify history\n    incorrectly rather than providing errors; even when errors are printed,\n    it takes forever before the errors are shown, the errors are lost in\n    a sea of output, and no context about which commits were involved are\n    saved.\n  * users cannot share commands they come up with very well, because BSD vs.\n    GNU userland differences will result in errors -- causing the above\n    problems to be repeated and/or resulting in silent corruption of repos\n  * the usability defaults are atrocious...\n    * partial history rewrites\n    * backup to refs/original/\n    * no automatic post-run cleanup\n    * not pruning empty commits\n    * not rewriting commit hashes in commit messages\n  * ...and the atrocious defaults combine for even worse effects:\n    * users mix up old and new history, push both, things get merged, and\n      then they have even more of a mess with banned objects still floating\n      around\n    * since users can run arbitrary commands in the filters, relying on\n      the local repo to keep a backup of itself seems suspect\n    * refs/original/ doesn't correctly back up tags (it dereferences them),\n      so it isn't a safe mechanism for recovery even if all goes well\n    * even if the backups in refs/original/ were good, many users don't know\n      how to restore using that mechanism.  So they clone before filtering\n      and just nuke the clone if the filtering goes poorly.\n    * --tag-name-filter writes out new tags but leaves the old ones around,\n      making claims like \"just clone the repo to get rid of the old\n      history\" a farce.  It also makes it hard to extricate old vs. new\n      bits of history, as if the default to partial history rewrites wasn't\n      bad enough\n    * since filtering can result in lots of empty commits, filter-branch at\n      least provides an option to nuke all empty commits, but naturally\n      that includes the empty commits that were intentionally added to the\n      original reposository as opposed to just commits that become empty\n      due to filtering.  And, for good measure, filter-branch's --prune-empty\n      actually still misses some commits that become empty.\n    * it's extremely difficult in filter-branch to rewrite commit hashes in\n      commit messages sanely.  It requires using undocumented capabilities\n      and even then is going to be extremely painful and slow.  As long as\n      --commit-filter isn't used, I could do it in filter-lamely with just\n      a one-line change, but the point was demonstrating compatibility with\n      a horrible tool, not showing how we can make it ever so slightly less\n      awful.\n\n[1] Replacing git-filter-branch with this script will still pass all the\n    git-v2.22.0 regression tests.  However, I know those tests aren't\n    thorough enough and that I did break backward compatibility in some\n    cases.  But, assuming people are crazy enough to want filter-branch to\n    continue to exist, I assert that filter-lamely would be a better\n    filter-branch due to its improved speed.  I won't maintain or improve\n    filter-lamely though, because the only proper thing to do with\n    filter-branch is attempt to rewrite our collective history so that\n    people are unaware of its existence.  People should use filter-repo\n    instead.\n\nIntentional differences from git-filter-branch:\n  * (Perf) --tree-filter and --index-filter only operate on files that have\n    changed since the previous commit, which significantly reduces the amount\n    of work needed.  This requires special efforts to correctly handle deletes\n    when the filters attempt to rename files, but provides significant perf\n    improvements.  There is a vanishingly small chance that someone out there\n    is depending on rewriting all files in every commit and does so\n    differently depending on topology of commits instead of contents of files\n    and is thus adversely affected by this change.  I doubt it, though.\n  * I vastly simplified the map() function to just ignore writing out the\n    mapping; I've never seen anyone explicitly use it, and filter-repo\n    handles remapping to ancestors without it.  I dare you to find anyone\n    that was reading the $workdir/../map/ directory and using it in their\n    filtering.\n  * When git-replace was introduced, --parent-filter became obsolete and\n    deprecated IMO.  As such, I didn't bother reimplementing.  If I were\n    to reimplement it, I'd just do an extra loop over commits and invoke\n    git-replace based on the --parent-filter output or something similar\n    to that.\n  * I took a bit of liberty in the implementation of --state-branch; I\n    still pass the regression tests, but I kind of violated the spirit of\n    the option.  I may actually circle back and fix this, if I add such\n    a similarly named option to filter-repo.\n\"\"\"\n\n\"\"\"\nPlease see the\n  ***** API BACKWARD COMPATIBILITY CAVEAT *****\nnear the top of git-filter-repo.\n\"\"\"\n\nimport argparse\nimport datetime\nimport os\nimport shutil\nimport subprocess\nimport sys\ntry:\n  import git_filter_repo as fr\nexcept ImportError:\n  raise SystemExit(\"Error: Couldn't find git_filter_repo.py.  Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?\")\n\nsubproc = fr.subproc\n\nclass UserInterfaceNightmare:\n  def __init__(self):\n    args = UserInterfaceNightmare.parse_args()\n\n    # Fix up args.refs\n    if not args.refs:\n      args.refs = [\"HEAD\"]\n    elif args.refs[0] == '--':\n      args.refs = args.refs[1:]\n\n    # Make sure args.d is an absolute path\n    if not args.d.startswith(b'/'):\n      args.d = os.path.abspath(args.d)\n\n    # Save the args\n    self.args = args\n\n    self._orig_refs = {}\n    self._special_delete_mode = b'deadbeefdeadbeefdeadbeefdeadbeefdeadbeef'\n    self._commit_filter_functions = b'''\n      EMPTY_TREE=$(git hash-object -t tree /dev/null)\n\n      # if you run 'skip_commit \"$@\"' in a commit filter, it will print\n      # the (mapped) parents, effectively skipping the commit.\n      skip_commit()\n      {\n        shift;\n        while [ -n \"$1\" ];\n        do\n          shift;\n          echo \"$1\";\n          shift;\n        done;\n      }\n\n      # map is lame; just fake it.\n      map()\n      {\n        echo \"$1\"\n      }\n\n      # if you run 'git_commit_non_empty_tree \"$@\"' in a commit filter,\n      # it will skip commits that leave the tree untouched, commit the other.\n      git_commit_non_empty_tree()\n      {\n        if test $# = 3 && test \"$1\" = $(git rev-parse \"$3^{tree}\"); then\n          echo \"$3\"\n        elif test $# = 1 && test \"$1\" = $EMPTY_TREE; then\n          :\n        else\n          git commit-tree \"$@\"\n        fi\n      }\n    '''\n\n  @staticmethod\n  def parse_args():\n    parser = argparse.ArgumentParser(\n          description='Mimic filter-branch functionality, for those who '\n                      'lamely have not upgraded their scripts to filter-repo')\n    parser.add_argument('--setup', metavar='<command>',\n          help=(\"Common commands to be included before every other filter\"))\n    parser.add_argument('--subdirectory-filter', metavar='<command>',\n          help=(\"Only include paths under the given directory and rewrite \"\n                \"that directory to be the new project root.\"))\n    parser.add_argument('--env-filter', metavar='<command>',\n          help=(\"Modify the name/email/date of either author or committer\"))\n    parser.add_argument('--tree-filter', metavar='<command>',\n          help=(\"Command to rewrite the tree and its contents.  The working \"\n                \"directory will be set to the root of the checked out tree.  \"\n                \"New files are auto-added, disappeared, etc.\"))\n    parser.add_argument('--index-filter', metavar='<command>',\n          help=(\"Command to rewrite the index.  Similar to the tree filter, \"\n                \"but there are no working tree files which makes it \"\n                \"faster.  Commonly used with `git rm --cached \"\n                \"--ignore-unmatch` and `git update-index --index-info`\"))\n    parser.add_argument('--parent-filter', metavar='<command>',\n          help=(\"Bail with an error; deprecated years ago\"))\n    parser.add_argument('--remap-to-ancestor', action='store_true',\n                        # Does nothing, this option is always on.  Only exists\n                        # because filter-branch once allowed it to be off and\n                        # so some tests pass this option.\n          help=argparse.SUPPRESS)\n    parser.add_argument('--msg-filter', metavar='<command>',\n          help=(\"Command to run for modifying commit and tag messages which \"\n                \"are received on standard input; standard output will be used \"\n                \"as the new message.\"))\n    parser.add_argument('--commit-filter', metavar='<command>',\n          help=(\"A command to perform the commit.  It will be called with \"\n                \"arguments of the form \\\"<TREE_ID> [(-p <PARENT_COMMIT_ID>)...]\"\n                \"\\\" and the log message on stdin.  The commit id is expected \"\n                \"on stdout.  The simplest commit filter would be 'git \"\n                \"commit-tree $@'\"))\n    parser.add_argument('--tag-name-filter', metavar='<command>',\n          help=(\"This filter is rewriting tag names.  It will be called \"\n                \"with tag names on stdin and expect a new tag name on stdout.\"))\n    parser.add_argument('--prune-empty', action='store_true',\n          help=(\"Prune empty commits, even commits that were intentionally \"\n                \"added as empty commits in the original repository and really \"\n                \"shouldn't be removed.\"))\n    parser.add_argument('--original', metavar='<namespace>', type=os.fsencode,\n                        default=b'refs/original/',\n          help=(\"Alter misguided backup strategy to store refs under \"\n                \"<namespace> instead of refs/original/\"))\n    parser.add_argument('-d', metavar='<directory>', default='.git-rewrite',\n                        type=os.fsencode,\n          help=(\"Alter the temporary directory used for rewriting\"))\n    parser.add_argument('--force', '-f', action='store_true',\n          help=(\"Run even if there is an existing temporary directory or \"\n                \"an existing backup (e.g. under refs/original/)\"))\n    parser.add_argument('--state-branch', metavar='<branch>',\n          help=(\"Do nothing; filter-lamely is enough faster than \"\n                \"filter-branch that it doesn't need incrementalism.\"))\n    parser.add_argument('refs', metavar='rev-list options',\n                        nargs=argparse.REMAINDER,\n          help=(\"Arguments for git rev-list.  All positive refs included by \"\n                \"these options are rewritten.  Sane people specify things like \"\n                \"--all, though that annoyingly requires prefacing with --\"))\n\n    args = parser.parse_args()\n\n    # Make setup apply to all the other shell filters\n    if args.setup:\n      if args.env_filter:\n        args.env_filter = args.setup + \"\\n\" + args.env_filter\n      if args.tree_filter:\n        args.tree_filter = args.setup + \"\\n\" + args.tree_filter\n      if args.index_filter:\n        args.index_filter = args.setup + \"\\n\" + args.index_filter\n      if args.msg_filter:\n        args.msg_filter = args.setup + \"\\n\" + args.msg_filter\n      if args.commit_filter:\n        args.commit_filter = args.setup + \"\\n\" + args.commit_filter\n      if args.tag_name_filter:\n        args.tag_name_filter = args.setup + \"\\n\" + args.tag_name_filter\n    return args\n\n  @staticmethod\n  def _get_dereferenced_refs():\n    # [BUG-COMPAT] We could leave out the --dereference and the '^{}' handling\n    # and fix a nasty bug from filter-branch.  But, as stated elsewhere, the\n    # goal is not to provide sane behavior, but to match what filter-branch\n    # does.\n    cur_refs = {}\n    cmd = 'git show-ref --head --dereference'\n    output = subproc.check_output(cmd.split())\n    for line in output.splitlines():\n      objhash, refname = line.split()\n      if refname.endswith(b'^{}'):\n        refname = refname[0:-3]\n      cur_refs[refname] = objhash\n    return cur_refs\n\n  def _get_and_check_orig_refs(self):\n    self._orig_refs = self._get_dereferenced_refs()\n    if any(ref.startswith(self.args.original) for ref in self._orig_refs):\n      if self.args.force:\n        cmds = b''.join([b\"delete %s\\n\" % r\n                         for r in sorted(self._orig_refs)\n                         if r.startswith(self.args.original)])\n        subproc.check_output('git update-ref --no-deref --stdin'.split(),\n                             input = cmds)\n      else:\n        raise SystemExit(\"Error: {} already exists.  Force overwriting with -f\"\n                         .format(fr.decode(self.args.original)))\n\n  def _write_original_refs(self):\n    new_refs = self._get_dereferenced_refs()\n\n    exported_refs, imported_refs = self.filter.get_exported_and_imported_refs()\n    overwritten = imported_refs & exported_refs\n\n    cmds = b''.join([b\"update %s%s %s\\n\" % (self.args.original, r,\n                                            self._orig_refs[r])\n                     for r in sorted(overwritten)\n                     if r not in new_refs or self._orig_refs[r] != new_refs[r]])\n    subproc.check_output('git update-ref --no-deref --stdin'.split(),\n                         input = cmds)\n\n  def _setup(self):\n    if self.args.force and os.path.exists(self.args.d):\n      shutil.rmtree(self.args.d)\n    if os.path.exists(self.args.d):\n      raise SystemExit(\"Error: {} already exists; use --force to bypass.\"\n                       .format(self.args.d))\n\n    self._get_and_check_orig_refs()\n\n    os.makedirs(self.args.d)\n    self.index_file = os.path.join(self.args.d, b'temp_index')\n    self.tmp_tree = os.path.join(self.args.d, b't')\n    os.makedirs(self.tmp_tree)\n\n    # Hack (stupid regression tests depending on implementation details\n    # instead of verifying user-visible and intended functionality...)\n    if self.args.d.endswith(b'/dfoo'):\n      with open(os.path.join(self.args.d, b'backup-refs'), 'w') as f:\n        f.write('drepo\\n')\n    # End hack\n\n  def _cleanup(self):\n    shutil.rmtree(self.args.d)\n\n  def _check_for_unsupported_args(self):\n    if self.args.parent_filter:\n      raise SystemExit(\"Error: --parent-filter was deprecated years ago with git-replace(1).  Use it instead.\")\n\n  def get_extended_refs(self):\n    if not self.args.tag_name_filter:\n      return self.args.refs\n    if '--all' in self.args.refs or '--tags' in self.args.refs:\n      # No need to follow tags pointing at refs we are exporting if we are\n      # already exporting all tags; besides, if we do so fast export will\n      # buggily export such tags multiple times, and fast-import will scream\n      # \"error: multiple updates for ref 'refs/tags/$WHATEVER' not allowed\"\n      return self.args.refs\n\n    # filter-branch treats --tag-name-filter as an implicit \"follow-tags\"-ish\n    # behavior.  So, we need to determine which tags point to commits we are\n    # rewriting.\n    output = subproc.check_output(['git', 'rev-list'] + self.args.refs)\n    all_commits = set(output.splitlines())\n\n    cmd = 'git show-ref --tags --dereference'.split()\n    output = subproc.check_output(cmd)\n\n    # In ideal world, follow_tags would be a list of tags which point at one\n    # of the commits in all_commits.  But since filter-branch is insane and\n    # we need to match its insanity, we instead store the tags as the values\n    # of a dict, with the keys being the new name for the given tags.  The\n    # reason for this is due to problems with multiple tags mapping to the\n    # same name and filter-branch not wanting to error out on this obviously\n    # broken condition, as noted below.\n    follow_tags = {}\n    for line in output.splitlines():\n      objhash, refname = line.split()\n      if refname.endswith(b'^{}'):\n        refname = refname[0:-3]\n      refname = fr.decode(refname)\n      if refname in self.args.refs:\n        # Don't specify the same tag multiple times, or fast export will\n        # buggily export it multiple times, and fast-import will scream that\n        # \"error: multiple updates for ref 'refs/tags/$WHATEVER' not allowed\"\n        continue\n      if objhash in all_commits:\n        newname = self.tag_rename(refname.encode())\n        # [BUG-COMPAT] What if multiple tags map to the same newname, you ask?\n        # Well, a sane program would detect that and give the user an error.\n        # fast-import does precisely that.  We could do it too, but providing\n        # sane behavior goes against the core principle of filter-lamely:\n        #\n        #    dispense with sane behavior; do what filter-branch does instead\n        #\n        # And filter-branch has a testcase that relies on no error being\n        # shown to the user with only an update corresponding to the tag\n        # which was originally alphabetically last being performed.  We rely\n        # on show-ref printing tags in alphabetical order to match that lame\n        # functionality from filter-branch.\n        follow_tags[newname] = refname\n    return self.args.refs + list(follow_tags.values())\n\n  def _populate_full_index(self, commit):\n    subproc.check_call(['git', 'read-tree', commit])\n\n  def _populate_index(self, file_changes):\n    subproc.check_call('git read-tree --empty'.split())\n    # [BUG-COMPAT??] filter-branch tests are weird, and filter-branch itself\n    # manually sets GIT_ALLOW_NULL_SHA1, so to pass the same tests we need to\n    # as well.\n    os.environ['GIT_ALLOW_NULL_SHA1'] = '1'\n    p = subproc.Popen('git update-index -z --index-info'.split(),\n                      stdin = subprocess.PIPE)\n    for change in file_changes:\n      if change.type == b'D':\n        # We need to write something out to the index for the delete in\n        # case they are renaming all files (e.g. moving into a subdirectory);\n        # they need to be able to rename what is deleted so it actually deletes\n        # the right thing.\n        p.stdin.write(b'160000 %s\\t%s\\x00'\n                      % (self._special_delete_mode, change.filename))\n      else:\n        p.stdin.write(b'%s %s\\t%s\\x00' %\n                      (change.mode, change.blob_id, change.filename))\n    p.stdin.close()\n    if p.wait() != 0:\n      raise SystemExit(\"Failed to setup index for tree or index filter\")\n    del os.environ['GIT_ALLOW_NULL_SHA1']\n\n  def _update_file_changes_from_index(self, commit):\n    new_changes = {}\n    output = subproc.check_output('git ls-files -sz'.split())\n    for line in output.split(b'\\x00'):\n      if not line:\n        continue\n      mode_thru_stage, filename = line.split(b'\\t', 1)\n      mode, objid, stage = mode_thru_stage.split(b' ')\n      if mode == b'160000' and objid == self._special_delete_mode:\n        new_changes[filename] = fr.FileChange(b'D', filename)\n      elif set(objid) == set(b'0'):\n        # [BUG-COMPAT??] Despite filter-branch setting GIT_ALLOW_NULL_SHA1\n        # before calling read-tree, it expects errors to be thrown if any null\n        # shas remain.  Crazy filter-branch.\n        raise SystemExit(\"Error: file {} has broken id {}\"\n                         .format(fr.decode(filename), fr.decode(objid)))\n      else:\n        new_changes[filename] = fr.FileChange(b'M', filename, objid, mode)\n    commit.file_changes = list(new_changes.values())\n\n  def _env_variables(self, commit):\n    # Define GIT_COMMIT and GIT_{AUTHOR,COMMITTER}_{NAME,EMAIL,DATE}\n    envvars = b''\n    envvars += b'export GIT_COMMIT=\"%s\"\\n' % commit.original_id\n    envvars += b'export GIT_AUTHOR_NAME=\"%s\"\\n' % commit.author_name\n    envvars += b'export GIT_AUTHOR_EMAIL=\"%s\"\\n' % commit.author_email\n    envvars += b'export GIT_AUTHOR_DATE=\"@%s\"\\n' % commit.author_date\n    envvars += b'export GIT_COMMITTER_NAME=\"%s\"\\n' % commit.committer_name\n    envvars += b'export GIT_COMMITTER_EMAIL=\"%s\"\\n' % commit.committer_email\n    envvars += b'export GIT_COMMITTER_DATE=\"@%s\"\\n' % commit.committer_date\n    return envvars\n\n  def fixup_commit(self, commit, metadata):\n    if self.args.msg_filter:\n      commit.message = subproc.check_output(self.args.msg_filter, shell=True,\n                                            input = commit.message)\n\n    if self.args.env_filter and not self.args.commit_filter:\n      envvars = self._env_variables(commit)\n      echo_results = b'''\n        echo \"${GIT_AUTHOR_NAME}\"\n        echo \"${GIT_AUTHOR_EMAIL}\"\n        echo \"${GIT_AUTHOR_DATE}\"\n        echo \"${GIT_COMMITTER_NAME}\"\n        echo \"${GIT_COMMITTER_EMAIL}\"\n        echo \"${GIT_COMMITTER_DATE}\"\n        '''\n      shell_snippet = envvars + self.args.env_filter.encode() + echo_results\n      output = subproc.check_output(['/bin/sh', '-c', shell_snippet]).strip()\n      last = output.splitlines()[-6:]\n      commit.author_name     = last[0]\n      commit.author_email    = last[1]\n      assert(last[2][0:1] == b'@')\n      commit.author_date     = last[2][1:]\n      commit.committer_name  = last[3]\n      commit.committer_email = last[4]\n      assert(last[5][0:1] == b'@')\n      commit.committer_date  = last[5][1:]\n\n    if not (self.args.tree_filter or self.args.index_filter or\n            self.args.commit_filter):\n      return\n\n    # os.environ needs its arguments to be strings because it will call\n    # .encode on them.  So lame when we already know the necessary bytes,\n    # but whatever...just call fr.decode() and be done with it.\n    os.environ['GIT_INDEX_FILE'] = fr.decode(self.index_file)\n    os.environ['GIT_WORK_TREE'] = fr.decode(self.tmp_tree)\n    if self.args.tree_filter or self.args.index_filter:\n      full_tree = False\n      deletion_changes = [x for x in commit.file_changes if x.type == b'D']\n      if len(commit.parents) >= 1 and not isinstance(commit.parents[0], int):\n        # When a commit's parent is a commit hash rather than an integer,\n        # it means that we are doing a partial history rewrite with an\n        # excluded revision range.  In such a case, the first non-excluded\n        # commit (i.e. this commit) won't be building on a bunch of history\n        # that was filtered, so we filter the entire tree for that commit\n        # rather than just the files it modified relative to its parent.\n        full_tree = True\n        self._populate_full_index(commit.parents[0])\n      else:\n        self._populate_index(commit.file_changes)\n      if self.args.tree_filter:\n        # Make sure self.tmp_tree is a new clean directory and we're in it\n        if os.path.exists(self.tmp_tree):\n          shutil.rmtree(self.tmp_tree)\n        os.makedirs(self.tmp_tree)\n        # Put the files there\n        subproc.check_call('git checkout-index --all'.split())\n        # Call the tree filter\n        subproc.call(self.args.tree_filter, shell=True, cwd=self.tmp_tree)\n        # Add the files, then move out of the directory\n        subproc.check_call('git add -A'.split())\n      if self.args.index_filter:\n        subproc.call(self.args.index_filter, shell=True, cwd=self.tmp_tree)\n      self._update_file_changes_from_index(commit)\n      if full_tree:\n        commit.file_changes.insert(0, fr.FileChange(b'DELETEALL'))\n      elif deletion_changes and self.args.tree_filter:\n        orig_deletions = set(x.filename for x in deletion_changes)\n        # Populate tmp_tree with all the deleted files, each containing its\n        # original name\n        shutil.rmtree(self.tmp_tree)\n        os.makedirs(self.tmp_tree)\n        for change in deletion_changes:\n          dirname, basename = os.path.split(change.filename)\n          realdir = os.path.join(self.tmp_tree, dirname)\n          if not os.path.exists(realdir):\n            os.makedirs(realdir)\n          with open(os.path.join(realdir, basename), 'bw') as f:\n            f.write(change.filename)\n        # Call the tree filter\n        subproc.call(self.args.tree_filter, shell=True, cwd=self.tmp_tree)\n        # Get the updated file deletions\n        updated_deletion_paths = set()\n        for dirname, subdirs, files in os.walk(self.tmp_tree):\n          for basename in files:\n            filename = os.path.join(dirname, basename)\n            with open(filename, 'br') as f:\n              orig_name = f.read()\n            if orig_name in orig_deletions:\n              updated_deletion_paths.add(filename[len(self.tmp_tree)+1:])\n        # ...and finally add them to the list\n        commit.file_changes += [fr.FileChange(b'D', filename)\n                                for filename in updated_deletion_paths]\n\n    if self.args.commit_filter:\n      # Define author and committer info for commit_filter\n      envvars = self._env_variables(commit)\n      if self.args.env_filter:\n        envvars += self.args.env_filter.encode() + b'\\n'\n\n      # Get tree and parents we need to pass\n      cmd = b'git rev-parse %s^{tree}' % commit.original_id\n      tree = subproc.check_output(cmd.split()).strip()\n      parent_pairs = zip(['-p']*len(commit.parents), commit.parents)\n\n      # Define the command to run\n      combined_shell_snippet = (self._commit_filter_functions + envvars +\n                                self.args.commit_filter.encode())\n      cmd = ['/bin/sh', '-c', combined_shell_snippet, \"git commit-tree\", tree]\n      cmd += [item for pair in parent_pairs for item in pair]\n\n      # Run it and get the new commit\n      new_commit = subproc.check_output(cmd, input = commit.message).strip()\n      commit.skip(new_commit)\n\n      reset = fr.Reset(commit.branch, new_commit)\n      self.filter.insert(reset)\n    del os.environ['GIT_WORK_TREE']\n    del os.environ['GIT_INDEX_FILE']\n\n  def tag_rename(self, refname):\n    if not self.args.tag_name_filter or not refname.startswith(b'refs/tags/'):\n      return refname\n\n    newname = subproc.check_output(self.args.tag_name_filter, shell=True,\n                                   input=refname[10:]).strip()\n    return b'refs/tags/' + newname\n\n  def deref_tags(self, tag, metadata):\n    '''[BUG-COMPAT] fast-export and fast-import nicely and naturally handle tag\n       objects.  Trying to break this and destroy the correct handling of tags\n       requires extra work.  In particular, De-referencing tags and thus\n       forcing all tags to be lightweight is something that would only be done\n       by someone who was insane, or someone who was trying to mimic\n       filter-branch's functionality.  But then, perhaps I repeat myself.\n       Anyway, let's mimic yet another insanity of filter-branch here...\n    '''\n\n    if self.args.tag_name_filter:\n      return\n\n    tag.skip()\n    reset = fr.Reset(tag.ref, tag.from_ref)\n    self.filter.insert(reset, direct_insertion = False)\n\n  def muck_stuff_up(self):\n    self._check_for_unsupported_args()\n    self._setup()\n    extra_args = []\n    if self.args.subdirectory_filter:\n      extra_args = ['--subdirectory-filter', self.args.subdirectory_filter]\n      self.args.prune_empty = True\n    fr_args = fr.FilteringOptions.parse_args(['--preserve-commit-hashes',\n                                              '--preserve-commit-encoding',\n                                              '--partial',\n                                              '--force'] + extra_args)\n    fr_args.prune_empty = 'always' if self.args.prune_empty else 'never'\n    fr_args.refs = self.get_extended_refs()\n    self.filter = fr.RepoFilter(fr_args,\n                                commit_callback=self.fixup_commit,\n                                refname_callback=self.tag_rename,\n                                tag_callback=self.deref_tags)\n    self.filter.run()\n    self._write_original_refs()\n    self._cleanup()\n\noverrides = ('GIT_TEST_DISALLOW_ABBREVIATED_OPTIONS',\n             'I_PROMISE_TO_UPGRADE_TO_FILTER_REPO')\nif not any(x in os.environ for x in overrides) and sys.argv[1:] != ['--help']:\n  print(\"\"\"\nWARNING: While filter-lamely is a better filter-branch than filter-branch,\n         it is vastly inferior to filter-repo.  Please use filter-repo\n         instead. (You can squelch this warning and five second pause with\n           export {}=1 )\"\"\".format(overrides[-1]))\n  import time\n  time.sleep(5)\nfilter_branch = UserInterfaceNightmare()\nfilter_branch.muck_stuff_up()\n"
  },
  {
    "path": "contrib/filter-repo-demos/insert-beginning",
    "content": "#!/usr/bin/env python3\n\n\"\"\"\nThis is a simple program that will insert some regular file into the root\ncommit(s) of history, e.g. adding a file named LICENSE or COPYING to the\nfirst commit.  It also rewrites commit hashes in commit messages to update\nthem based on these changes.\n\"\"\"\n\n\"\"\"\nPlease see the\n  ***** API BACKWARD COMPATIBILITY CAVEAT *****\nnear the top of git-filter-repo.\n\"\"\"\n\n# Technically, this program could be replaced by a one-liner:\n#    git filter-repo --commit-callback \"if not commit.parents: commit.file_changes.append(FileChange(b'M', $RELATIVE_TO_PROJECT_ROOT_PATHNAME, b'$(git hash-object -w $FILENAME)', b'100644'))\"\n# but let's do it as a full-fledged program that imports git_filter_repo\n# anyway...\n\nimport argparse\nimport os\nimport subprocess\ntry:\n  import git_filter_repo as fr\nexcept ImportError:\n  raise SystemExit(\"Error: Couldn't find git_filter_repo.py.  Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?\")\n\nparser = argparse.ArgumentParser(\n          description='Add a file to the root commit(s) of history')\nparser.add_argument('--file', type=os.fsencode,\n        help=(\"Relative-path to file whose contents should be added to root commit(s)\"))\nargs = parser.parse_args()\nif not args.file:\n  raise SystemExit(\"Error: Need to specify the --file option\")\nif any([x == b\".\" or x== b\"..\" for x in args.file.split(b\"/\")]):\n  raise SystemExit(f\"Error: Invalid path components in {fr.decode(args.file)}\")\nif not os.path.isfile(args.file):\n  raise SystemExit(f\"Error: {fr.decode(args.file)} not found\")\n\nfhash = subprocess.check_output(['git', 'hash-object', '-w', args.file]).strip()\nfmode = b'100755' if os.access(args.file, os.X_OK) else b'100644'\n# FIXME: I've assumed the file wasn't a directory or symlink...\n\ndef fixup_commits(commit, metadata):\n  if len(commit.parents) == 0:\n    commit.file_changes.append(fr.FileChange(b'M', args.file, fhash, fmode))\n  # FIXME: What if the history already had a file matching the given name,\n  # but which didn't exist until later in history?  Is the intent for the\n  # user to keep the other version that existed when it existed, or to\n  # overwrite the version for all of history with the specified file?  I\n  # don't know, but if it's the latter, we'd need to add an 'else' clause\n  # like the following:\n  #else:\n  #  commit.file_changes = [x for x in commit.file_changes\n  #                         if x.filename != args.file]\n\nfr_args = fr.FilteringOptions.parse_args(['--preserve-commit-encoding',\n                                          '--force',\n                                          '--replace-refs', 'update-no-add'])\nfilter = fr.RepoFilter(fr_args, commit_callback=fixup_commits)\nfilter.run()\n"
  },
  {
    "path": "contrib/filter-repo-demos/lint-history",
    "content": "#!/usr/bin/env python3\n\n\"\"\"\nThis is a simple program that will run a linting program on all non-binary\nfiles in history.  It also rewrites commit hashes in commit messages to\nrefer to the new commits with the rewritten files.  You call it like this:\n   lint-history my-lint-command --arg whatever --another-arg\nand it will repeatedly call\n   my-lint-command --arg whatever --another-arg $TEMPORARY_FILE\nwith $TEMPORARY_FILE having contents of some file from history.\n\nNOTE: Several people have taken and modified this script for a variety\nof special cases (linting python files, linting jupyter notebooks, just\nlinting java files, etc.) and posted their modifications at\n  https://github.com/newren/git-filter-repo/issues/45\nFeel free to take a look and adopt some of their ideas.  Most of these\nmodifications are probably strictly unnecessary since you could just make\na lint-script that takes the filename, checks that it matches what you\nwant, and then calls the real linter.  But I guess folks don't like making\nan intermediate script.  So I eventually added the --relevant flag for\npicking out certain files providing yet another way to handle it.\n\"\"\"\n\n\"\"\"\nPlease see the\n  ***** API BACKWARD COMPATIBILITY CAVEAT *****\nnear the top of git-filter-repo.\n\"\"\"\n\n# Technically, if you are only running on all non-binary files and don't care\n# about filenames, then this program could be replaced by a \"one-liner\"; e.g.\n#    git filter-repo --blob-callback '\n#      if not b\"\\0\" in blob.data[0:8192]:\n#        filename = \".git/info/tmpfile\"\n#        with open(filename, \"wb\") as f:\n#          f.write(blob.data)\n#        subprocess.check_call([\"lint_program\", \"--some\", \"arg\", filename])\n#        with open(filename, \"rb\") as f:\n#          blob.data = f.read()\n#        os.remove(filename)\n#      '\n# but let's do it as a full-fledged program that imports git_filter_repo\n# and show how to also do it with filename handling...\n\nimport argparse\nimport os\nimport subprocess\nimport tempfile\ntry:\n  import git_filter_repo as fr\nexcept ImportError:\n  raise SystemExit(\"Error: Couldn't find git_filter_repo.py.  Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?\")\n\nexample_text = '''CALLBACK\n\n    When you pass --relevant 'BODY', the following style of function\n    will be compiled and called:\n\n        def is_relevant(filename):\n            BODY\n\n    Where filename is the full relative path from the toplevel of the\n    repository.\n\n    Thus, to only run on files with a \".txt\" extension you would run\n        lint-history --relevant 'return filename.endswith(b\".txt\")' ...\n\nEXAMPLES\n\n    To run dos2unix on all non-binary files in history:\n        lint-history dos2unix\n\n    To run eslint --fix on all .js files in history:\n        lint-history --relevant 'return filename.endswith(b\".js\")' eslint --fix\n\nINTERNALS\n\n    Linting of files in history will be done by writing the files to a\n    temporary directory before running the linting program; the\n    location of this temporary directory can be controlled via the\n    TMPDIR environment variable as per\n    https://docs.python.org/3/library/tempfile.html#tempfile.mkdtemp.\n    '''\n\nparser = argparse.ArgumentParser(description='Run a program (e.g. code formatter or linter) on files in history',\n                                 epilog = example_text,\n                                 formatter_class=argparse.RawDescriptionHelpFormatter)\n\nparser.add_argument('--relevant', metavar=\"FUNCTION_BODY\",\n        help=(\"Python code for determining whether to apply linter to a \"\n              \"given filename.  Implies --filenames-important.  See CALLBACK \"\n              \"below.\"))\nparser.add_argument('--filenames-important', action='store_true',\n        help=(\"By default, contents are written to a temporary file with a \"\n              \"random name.  If the linting program needs to know the file \"\n              \"basename to operate correctly (e.g. because it needs to know \"\n              \"the file's extension), then pass this argument\"))\nparser.add_argument('--refs', nargs='+',\n                    help=(\"Limit history rewriting to the specified refs. \"\n                          \"Implies --partial of git-filter-repo (and all its \"\n                          \"implications).\"))\nparser.add_argument('command', nargs=argparse.REMAINDER,\n        help=(\"Lint command to run, other than the filename at the end\"))\nlint_args = parser.parse_args()\nif not lint_args.command:\n  raise SystemExit(\"Error: Need to specify a lint command\")\nif len(lint_args.command) > 1 and lint_args.command[0] == '--':\n  lint_args.command.pop(0)\n\ntmpdir = None\nblobs_handled = {}\ncat_file_process = None\ndef lint_with_real_filenames(commit, metadata):\n  for change in commit.file_changes:\n    if change.blob_id in blobs_handled:\n      change.blob_id = blobs_handled[change.blob_id]\n    elif change.type == b'D':\n      continue\n    elif not is_relevant(change.filename):\n      continue\n    else:\n      # Get the old blob contents\n      cat_file_process.stdin.write(change.blob_id + b'\\n')\n      cat_file_process.stdin.flush()\n      objhash, objtype, objsize = cat_file_process.stdout.readline().split()\n      contents_plus_newline = cat_file_process.stdout.read(int(objsize)+1)\n\n      # Write it out to a file with the same basename\n      filename = os.path.join(tmpdir, os.path.basename(change.filename))\n      with open(filename, \"wb\") as f:\n        f.write(contents_plus_newline[:-1])\n\n      # Lint the file\n      subprocess.check_call(lint_args.command + [filename.decode('utf-8')])\n\n      # Get the new contents\n      with open(filename, \"rb\") as f:\n        blob = fr.Blob(f.read())\n\n      # Insert the new file into the filter's stream, and remove the tempfile\n      filter.insert(blob)\n      os.remove(filename)\n\n      # Record our handling of the blob and use it for this change\n      blobs_handled[change.blob_id] = blob.id\n      change.blob_id = blob.id\n\ndef lint_non_binary_blobs(blob, metadata):\n  if not b\"\\0\" in blob.data[0:8192]:\n    filename = '.git/info/tmpfile'\n    with open(filename, \"wb\") as f:\n      f.write(blob.data)\n    subprocess.check_call(lint_args.command + [filename])\n    with open(filename, \"rb\") as f:\n      blob.data = f.read()\n    os.remove(filename)\n\nif lint_args.filenames_important and not lint_args.relevant:\n  lint_args.relevant = 'return True'\nif lint_args.relevant:\n  body = lint_args.relevant\n  exec('def is_relevant(filename):\\n  '+'\\n  '.join(body.splitlines()),\n       globals())\n  lint_args.filenames_important = True\ninput_args = []\nif lint_args.refs:\n  input_args = [\"--refs\",] + lint_args.refs\nargs = fr.FilteringOptions.parse_args(input_args, error_on_empty = False)\nargs.force = True\nif lint_args.filenames_important:\n  tmpdir = tempfile.mkdtemp().encode()\n  cat_file_process = subprocess.Popen(['git', 'cat-file', '--batch'],\n                                      stdin = subprocess.PIPE,\n                                      stdout = subprocess.PIPE)\n  filter = fr.RepoFilter(args, commit_callback=lint_with_real_filenames)\n  filter.run()\n  cat_file_process.stdin.close()\n  cat_file_process.wait()\nelse:\n  if not os.path.exists('.git/info'):\n    os.makedirs('.git/info')\n  filter = fr.RepoFilter(args, blob_callback=lint_non_binary_blobs)\n  filter.run()\n"
  },
  {
    "path": "contrib/filter-repo-demos/signed-off-by",
    "content": "#!/usr/bin/env python3\n\n\"\"\"\nThis is a simple program that will add Signed-off-by: tags to a range of\ncommits.  Example usage, to add a signed-off-by trailer to every commit that\nis not in next but is in any of master, develop, or maint:\n  signed-off-by master develop maint ^next\nMore likely called as:\n  signed-off-by master~4..master\nThere's no real reason to use this script since `rebase --signoff` exists;\nit's mostly just a demonstration of what could be done.\n\"\"\"\n\n\"\"\"\nPlease see the\n  ***** API BACKWARD COMPATIBILITY CAVEAT *****\nnear the top of git-filter-repo.\n\"\"\"\n\nimport argparse\nimport re\nimport subprocess\ntry:\n  import git_filter_repo as fr\nexcept ImportError:\n  raise SystemExit(\"Error: Couldn't find git_filter_repo.py.  Did you forget to make a symlink to git-filter-repo named git_filter_repo.py or did you forget to put the latter in your PYTHONPATH?\")\n\nparser = argparse.ArgumentParser(\n          description=\"Add 'Signed-off-by:' tags to a range of commits\")\nparser.add_argument('rev_list_args', metavar='rev-list args',\n                    nargs=argparse.REMAINDER,\n        help=(\"Range of commits (need to include ref tips) to work on\"))\nmyargs = parser.parse_args()\n\nuser_name = subprocess.check_output('git config user.name'.split()).rstrip()\nuser_email = subprocess.check_output('git config user.email'.split()).rstrip()\ntrailer = b'Signed-off-by: %s <%s>' % (user_name, user_email)\n\ndef add_signed_off_by_trailer(commit, metadata):\n  if trailer in commit.message:\n    return\n\n  # We want to add the trailer, but we want it to be separated from any\n  # existing paragraphs by a blank line.  However, if the commit message\n  # already ends with trailers, then we want all trailers to be on adjacent\n  # lines.\n  if not commit.message.endswith(b'\\n'):\n    commit.message += b'\\n'\n  lastline = commit.message.splitlines()[-1]\n  if not re.match(b'[A-Za-z0-9-_]*: ', lastline):\n    commit.message += b'\\n'\n  commit.message += trailer\n\n# Setting source and target to anything prevents:\n#   * remapping origin remote tracking branches to regular branches\n#   * deletion of the origin remote\n#   * nuking unused refs\n#   * nuking reflogs\n#   * repacking\n# so we cheat and set source and target both to '.'\nargs = fr.FilteringOptions.parse_args(['--force',\n                                       '--refs'] + myargs.rev_list_args)\nargs.refs = myargs.rev_list_args\nfilter = fr.RepoFilter(args, commit_callback=add_signed_off_by_trailer)\nfilter.run()\n"
  },
  {
    "path": "git-filter-repo",
    "content": "#!/usr/bin/env python3\n\n\"\"\"\ngit-filter-repo filters git repositories, similar to git filter-branch, BFG\nrepo cleaner, and others.  The basic idea is that it works by running\n   git fast-export <options> | filter | git fast-import <options>\nwhere this program not only launches the whole pipeline but also serves as\nthe 'filter' in the middle.  It does a few additional things on top as well\nin order to make it into a well-rounded filtering tool.\n\ngit-filter-repo can also be used as a library for more involved filtering\noperations; however:\n  ***** API BACKWARD COMPATIBILITY CAVEAT *****\n  Programs using git-filter-repo as a library can reach pretty far into its\n  internals, but I am not prepared to guarantee backward compatibility of\n  all APIs.  I suspect changes will be rare, but I reserve the right to\n  change any API.  Since it is assumed that repository filtering is\n  something one would do very rarely, and in particular that it's a\n  one-shot operation, this should not be a problem in practice for anyone.\n  However, if you want to re-use a program you have written that uses\n  git-filter-repo as a library (or makes use of one of its --*-callback\n  arguments), you should either make sure you are using the same version of\n  git and git-filter-repo, or make sure to re-test it.\n\n  If there are particular pieces of the API you are concerned about, and\n  there is not already a testcase for it in t9391-lib-usage.sh or\n  t9392-python-callback.sh, please contribute a testcase.  That will not\n  prevent me from changing the API, but it will allow you to look at the\n  history of a testcase to see whether and how the API changed.\n  ***** END API BACKWARD COMPATIBILITY CAVEAT *****\n\"\"\"\n\nimport argparse\nimport collections\nimport fnmatch\nimport gettext\nimport io\nimport os\nimport platform\nimport re\nimport shutil\nimport subprocess\nimport sys\nimport time\nimport textwrap\n\nfrom datetime import tzinfo, timedelta, datetime\n\n__all__ = [\"Blob\", \"Reset\", \"FileChange\", \"Commit\", \"Tag\", \"Progress\",\n           \"Checkpoint\", \"FastExportParser\", \"ProgressWriter\",\n           \"string_to_date\", \"date_to_string\",\n           \"record_id_rename\", \"GitUtils\", \"FilteringOptions\", \"RepoFilter\"]\n\n# The globals to make visible to callbacks. They will see all our imports for\n# free, as well as our public API.\npublic_globals = [\"__builtins__\", \"argparse\", \"collections\", \"fnmatch\",\n                  \"gettext\", \"io\", \"os\", \"platform\", \"re\", \"shutil\",\n                  \"subprocess\", \"sys\", \"time\", \"textwrap\", \"tzinfo\",\n                  \"timedelta\", \"datetime\"] + __all__\n\ndeleted_hash = b'0'*40\nwrite_marks = True\ndate_format_permissive = True\n\ndef gettext_poison(msg):\n  if \"GIT_TEST_GETTEXT_POISON\" in os.environ: # pragma: no cover\n    return \"# GETTEXT POISON #\"\n  return gettext.gettext(msg)\n\n_ = gettext_poison\n\ndef setup_gettext():\n  TEXTDOMAIN=\"git-filter-repo\"\n  podir = os.environ.get(\"GIT_TEXTDOMAINDIR\") or \"@@LOCALEDIR@@\"\n  if not os.path.isdir(podir): # pragma: no cover\n    podir = None  # Python has its own fallback; use that\n\n  ## This looks like the most straightforward translation of the relevant\n  ## code in git.git:gettext.c and git.git:perl/Git/I18n.pm:\n  #import locale\n  #locale.setlocale(locale.LC_MESSAGES, \"\");\n  #locale.setlocale(locale.LC_TIME, \"\");\n  #locale.textdomain(TEXTDOMAIN);\n  #locale.bindtextdomain(TEXTDOMAIN, podir);\n  ## but the python docs suggest using the gettext module (which doesn't\n  ## have setlocale()) instead, so:\n  gettext.textdomain(TEXTDOMAIN);\n  gettext.bindtextdomain(TEXTDOMAIN, podir);\n\ndef _timedelta_to_seconds(delta):\n  \"\"\"\n  Converts timedelta to seconds\n  \"\"\"\n  offset = delta.days*86400 + delta.seconds + (delta.microseconds+0.0)/1000000\n  return round(offset)\n\nclass FixedTimeZone(tzinfo):\n  \"\"\"\n  Fixed offset in minutes east from UTC.\n  \"\"\"\n\n  tz_re = re.compile(br'^([-+]?)(\\d\\d)(\\d\\d)$')\n\n  def __init__(self, offset_string):\n    tzinfo.__init__(self)\n    sign, hh, mm = FixedTimeZone.tz_re.match(offset_string).groups()\n    factor = -1 if (sign and sign == b'-') else 1\n    self._offset = timedelta(minutes = factor*(60*int(hh) + int(mm)))\n    self._offset_string = offset_string\n\n  def utcoffset(self, dt):\n    return self._offset\n\n  def tzname(self, dt):\n    return self._offset_string\n\n  def dst(self, dt):\n    return timedelta(0)\n\ndef string_to_date(datestring):\n  (unix_timestamp, tz_offset) = datestring.split()\n  return datetime.fromtimestamp(int(unix_timestamp),\n                                FixedTimeZone(tz_offset))\n\ndef date_to_string(dateobj):\n  epoch = datetime.fromtimestamp(0, dateobj.tzinfo)\n  return(b'%d %s' % (int(_timedelta_to_seconds(dateobj - epoch)),\n                     dateobj.tzinfo.tzname(0)))\n\ndef decode(bytestr):\n  'Try to convert bytestr to utf-8 for outputting as an error message.'\n  return bytestr.decode('utf-8', 'backslashreplace')\n\ndef glob_to_regex(glob_bytestr):\n  'Translate glob_bytestr into a regex on bytestrings'\n\n  # fnmatch.translate is idiotic and won't accept bytestrings\n  if (decode(glob_bytestr).encode() != glob_bytestr): # pragma: no cover\n    raise SystemExit(_(\"Error: Cannot handle glob %s\").format(glob_bytestr))\n\n  # Create regex operating on string\n  regex = fnmatch.translate(decode(glob_bytestr))\n\n  # FIXME: This is an ugly hack...\n  # fnmatch.translate tries to do multi-line matching and wants the glob to\n  # match up to the end of the input, which isn't relevant for us, so we\n  # have to modify the regex.  fnmatch.translate has used different regex\n  # constructs to achieve this with different python versions, so we have\n  # to check for each of them and then fix it up.  It would be much better\n  # if fnmatch.translate could just take some flags to allow us to specify\n  # what we want rather than employing this hackery, but since it\n  # doesn't...\n  if regex.endswith(r'\\Z(?ms)'): # pragma: no cover\n    regex = regex[0:-7]\n  elif regex.startswith(r'(?s:') and regex.endswith(r')\\Z'): # pragma: no cover\n    regex = regex[4:-3]\n  elif regex.startswith(r'(?s:') and regex.endswith(r')\\z'): # pragma: no cover\n    # Yaay, python3.14 for senselessly duplicating \\Z as \\z...\n    regex = regex[4:-3]\n\n  # Finally, convert back to regex operating on bytestr\n  return regex.encode()\n\nclass PathQuoting:\n  _unescape = {b'a': b'\\a',\n               b'b': b'\\b',\n               b'f': b'\\f',\n               b'n': b'\\n',\n               b'r': b'\\r',\n               b't': b'\\t',\n               b'v': b'\\v',\n               b'\"': b'\"',\n               b'\\\\':b'\\\\'}\n  _unescape_re = re.compile(br'\\\\([a-z\"\\\\]|[0-9]{3})')\n  _escape = [bytes([x]) for x in range(127)]+[\n             b'\\\\'+bytes(ord(c) for c in oct(x)[2:]) for x in range(127,256)]\n  _reverse = dict(map(reversed, _unescape.items()))\n  for x in _reverse:\n    _escape[ord(x)] = b'\\\\'+_reverse[x]\n  _special_chars = [len(x) > 1 for x in _escape]\n\n  @staticmethod\n  def unescape_sequence(orig):\n    seq = orig.group(1)\n    return PathQuoting._unescape[seq] if len(seq) == 1 else bytes([int(seq, 8)])\n\n  @staticmethod\n  def dequote(quoted_string):\n    if quoted_string.startswith(b'\"'):\n      assert quoted_string.endswith(b'\"')\n      return PathQuoting._unescape_re.sub(PathQuoting.unescape_sequence,\n                                          quoted_string[1:-1])\n    return quoted_string\n\n  @staticmethod\n  def enquote(unquoted_string):\n    # Option 1: Quoting when fast-export would:\n    #    pqsc = PathQuoting._special_chars\n    #    if any(pqsc[x] for x in set(unquoted_string)):\n    # Option 2, perf hack: do minimal amount of quoting required by fast-import\n    if unquoted_string.startswith(b'\"') or b'\\n' in unquoted_string:\n      pqe = PathQuoting._escape\n      return b'\"' + b''.join(pqe[x] for x in unquoted_string) + b'\"'\n    return unquoted_string\n\nclass AncestryGraph(object):\n  \"\"\"\n  A class that maintains a direct acycle graph of commits for the purpose of\n  determining if one commit is the ancestor of another.\n\n  A note about identifiers in Commit objects:\n    * Commit objects have 2 identifiers: commit.old_id and commit.id, because:\n    * The original fast-export stream identified commits by an identifier.\n      This is often an integer, but is sometimes a hash (particularly when\n      --reference-excluded-parents is provided)\n    * The new fast-import stream we use may not use the same identifiers.\n      If new blobs or commits are inserted (such as lint-history does), then\n      the integer (or hash) are no longer valid.\n\n  A note about identifiers in AncestryGraph objects, of which there are three:\n    * A given AncestryGraph is based on either commit.old_id or commit.id, but\n      not both.  These are the keys for self.value.\n    * Using full hashes (occasionally) for children in self.graph felt\n      wasteful, so we use our own internal integer within self.graph.\n      self.value maps from commit {old_}id to our internal integer id.\n    * When working with commit.old_id, it is also sometimes useful to be able\n      to map these to the original hash, i.e. commit.original_id.  So, we\n      also have self.git_hash for mapping from commit.old_id to git's commit\n      hash.\n  \"\"\"\n\n  def __init__(self):\n    # The next internal identifier we will use; increments with every commit\n    # added to the AncestryGraph\n    self.cur_value = 0\n\n    # A mapping from the external identifers given to us to the simple integers\n    # we use in self.graph\n    self.value = {}\n\n    # A tuple of (depth, list-of-ancestors).  Values and keys in this graph are\n    # all integers from the (values of the) self.value dict.  The depth of a\n    # commit is one more than the max depth of any of its ancestors.\n    self.graph = {}\n\n    # A mapping from external identifier (i.e. from the keys of self.value) to\n    # the hash of the given commit.  Only populated for graphs based on\n    # commit.old_id, since we won't know until later what the git_hash for\n    # graphs based on commit.id (since we have to wait for fast-import to\n    # create the commit and notify us of its hash; see _pending_renames).\n    # elsewhere\n    self.git_hash = {}\n\n    # Reverse maps; only populated if needed.  Caller responsible to check\n    # and ensure they are populated\n    self._reverse_value = {}\n    self._hash_to_id = {}\n\n    # Cached results from previous calls to is_ancestor().\n    self._cached_is_ancestor = {}\n\n  def record_external_commits(self, external_commits):\n    \"\"\"\n    Record in graph that each commit in external_commits exists, and is\n    treated as a root commit with no parents.\n    \"\"\"\n    for c in external_commits:\n      if c not in self.value:\n        self.cur_value += 1\n        self.value[c] = self.cur_value\n        self.graph[self.cur_value] = (1, [])\n        self.git_hash[c] = c\n\n  def add_commit_and_parents(self, commit, parents, githash = None):\n    \"\"\"\n    Record in graph that commit has the given parents (all identified by\n    fast export stream identifiers, usually integers but sometimes hashes).\n    parents _MUST_ have been first recorded.  commit _MUST_ not have been\n    recorded yet.  Also, record the mapping between commit and githash, if\n    githash is given.\n    \"\"\"\n    assert all(p in self.value for p in parents)\n    assert commit not in self.value\n\n    # Get values for commit and parents\n    self.cur_value += 1\n    self.value[commit] = self.cur_value\n    if githash:\n      self.git_hash[commit] = githash\n    graph_parents = [self.value[x] for x in parents]\n\n    # Determine depth for commit, then insert the info into the graph\n    depth = 1\n    if parents:\n      depth += max(self.graph[p][0] for p in graph_parents)\n    self.graph[self.cur_value] = (depth, graph_parents)\n\n  def record_hash(self, commit_id, githash):\n    '''\n    If a githash was not recorded for commit_id, when add_commit_and_parents\n    was called, add it now.\n    '''\n    assert commit_id in self.value\n    assert commit_id not in self.git_hash\n    self.git_hash[commit_id] = githash\n\n  def _ensure_reverse_maps_populated(self):\n    if not self._hash_to_id:\n      assert not self._reverse_value\n      self._hash_to_id = {v: k for k, v in self.git_hash.items()}\n      self._reverse_value = {v: k for k, v in self.value.items()}\n\n  def get_parent_hashes(self, commit_hash):\n    '''\n    Given a commit_hash, return its parents hashes\n    '''\n    #\n    # We have to map:\n    #    commit hash -> fast export stream id -> graph id\n    # then lookup\n    #    parent graph ids for given graph id\n    # then we need to map\n    #    parent graph ids -> parent fast export ids -> parent commit hashes\n    #\n    self._ensure_reverse_maps_populated()\n    commit_fast_export_id = self._hash_to_id[commit_hash]\n    commit_graph_id = self.value[commit_fast_export_id]\n    parent_graph_ids = self.graph[commit_graph_id][1]\n    parent_fast_export_ids = [self._reverse_value[x] for x in parent_graph_ids]\n    parent_hashes = [self.git_hash[x] for x in parent_fast_export_ids]\n    return parent_hashes\n\n  def map_to_hash(self, commit_id):\n    '''\n    Given a commit (by fast export stream id), return its hash\n    '''\n    return self.git_hash.get(commit_id, None)\n\n  def is_ancestor(self, possible_ancestor, check):\n    \"\"\"\n    Return whether possible_ancestor is an ancestor of check\n    \"\"\"\n    a, b = self.value[possible_ancestor], self.value[check]\n    original_pair = (a,b)\n    a_depth = self.graph[a][0]\n    ancestors = [b]\n    visited = set()\n    while ancestors:\n      ancestor = ancestors.pop()\n      prev_pair = (a, ancestor)\n      if prev_pair in self._cached_is_ancestor:\n        if not self._cached_is_ancestor[prev_pair]:\n          continue\n        self._cached_is_ancestor[original_pair] = True\n        return True\n      if ancestor in visited:\n        continue\n      visited.add(ancestor)\n      depth, more_ancestors = self.graph[ancestor]\n      if ancestor == a:\n        self._cached_is_ancestor[original_pair] = True\n        return True\n      elif depth <= a_depth:\n        continue\n      ancestors.extend(more_ancestors)\n    self._cached_is_ancestor[original_pair] = False\n    return False\n\nclass MailmapInfo(object):\n  def __init__(self, filename):\n    self.changes = {}\n    self._parse_file(filename)\n\n  def _parse_file(self, filename):\n    name_and_email_re = re.compile(br'(.*?)\\s*<([^>]*)>\\s*')\n    comment_re = re.compile(br'\\s*#.*')\n    if not os.access(filename, os.R_OK):\n      raise SystemExit(_(\"Cannot read %s\") % decode(filename))\n    with open(filename, 'br') as f:\n      count = 0\n      for line in f:\n        count += 1\n        err = \"Unparseable mailmap file: line #{} is bad: {}\".format(count, line)\n        # Remove comments\n        line = comment_re.sub(b'', line)\n        # Remove leading and trailing whitespace\n        line = line.strip()\n        if not line:\n          continue\n\n        m = name_and_email_re.match(line)\n        if not m:\n          raise SystemExit(err)\n        proper_name, proper_email = m.groups()\n        if len(line) == m.end():\n          self.changes[(None, proper_email)] = (proper_name, proper_email)\n          continue\n        rest = line[m.end():]\n        m = name_and_email_re.match(rest)\n        if m:\n          commit_name, commit_email = m.groups()\n          if len(rest) != m.end():\n            raise SystemExit(err)\n        else:\n          commit_name, commit_email = rest, None\n        self.changes[(commit_name, commit_email)] = (proper_name, proper_email)\n\n  def translate(self, name, email):\n    ''' Given a name and email, return the expected new name and email from the\n        mailmap if there is a translation rule for it, otherwise just return\n        the given name and email.'''\n    for old, new in self.changes.items():\n      old_name, old_email = old\n      new_name, new_email = new\n      if (old_email is None or email.lower() == old_email.lower()) and (\n          name == old_name or not old_name):\n        return (new_name or name, new_email or email)\n    return (name, email)\n\nclass ProgressWriter(object):\n  def __init__(self):\n    self._last_progress_update = time.time()\n    self._last_message = None\n\n  def show(self, msg):\n    self._last_message = msg\n    now = time.time()\n    if now - self._last_progress_update > .1:\n      self._last_progress_update = now\n      sys.stdout.write(\"\\r{}\".format(msg))\n      sys.stdout.flush()\n\n  def finish(self):\n    self._last_progress_update = 0\n    if self._last_message:\n      self.show(self._last_message)\n    sys.stdout.write(\"\\n\")\n\nclass _IDs(object):\n  \"\"\"\n  A class that maintains the 'name domain' of all the 'marks' (short int\n  id for a blob/commit git object). There are two reasons this mechanism\n  is necessary:\n    (1) the output text of fast-export may refer to an object using a different\n        mark than the mark that was assigned to that object using IDS.new().\n        (This class allows you to translate the fast-export marks, \"old\" to\n         the marks assigned from IDS.new(), \"new\").\n    (2) when we prune a commit, its \"old\" id becomes invalid.  Any commits\n        which had that commit as a parent needs to use the nearest unpruned\n        ancestor as its parent instead.\n\n  Note that for purpose (1) above, this typically comes about because the user\n  manually creates Blob or Commit objects (for insertion into the stream).\n  It could also come about if we attempt to read the data from two different\n  repositories and trying to combine the data (git fast-export will number ids\n  from 1...n, and having two 1's, two 2's, two 3's, causes issues; granted, we\n  this scheme doesn't handle the two streams perfectly either, but if the first\n  fast export stream is entirely processed and handled before the second stream\n  is started, this mechanism may be sufficient to handle it).\n  \"\"\"\n\n  def __init__(self):\n    \"\"\"\n    Init\n    \"\"\"\n    # The id for the next created blob/commit object\n    self._next_id = 1\n\n    # A map of old-ids to new-ids (1:1 map)\n    self._translation = {}\n\n    # A map of new-ids to every old-id that points to the new-id (1:N map)\n    self._reverse_translation = {}\n\n  def has_renames(self):\n    \"\"\"\n    Return whether there have been ids remapped to new values\n    \"\"\"\n    return bool(self._translation)\n\n  def new(self):\n    \"\"\"\n    Should be called whenever a new blob or commit object is created. The\n    returned value should be used as the id/mark for that object.\n    \"\"\"\n    rv = self._next_id\n    self._next_id += 1\n    return rv\n\n  def record_rename(self, old_id, new_id, handle_transitivity = False):\n    \"\"\"\n    Record that old_id is being renamed to new_id.\n    \"\"\"\n    if old_id != new_id or old_id in self._translation:\n      # old_id -> new_id\n      self._translation[old_id] = new_id\n\n      # Transitivity will be needed if new commits are being inserted mid-way\n      # through a branch.\n      if handle_transitivity:\n        # Anything that points to old_id should point to new_id\n        if old_id in self._reverse_translation:\n          for id_ in self._reverse_translation[old_id]:\n            self._translation[id_] = new_id\n\n      # Record that new_id is pointed to by old_id\n      if new_id not in self._reverse_translation:\n        self._reverse_translation[new_id] = []\n      self._reverse_translation[new_id].append(old_id)\n\n  def translate(self, old_id):\n    \"\"\"\n    If old_id has been mapped to an alternate id, return the alternate id.\n    \"\"\"\n    if old_id in self._translation:\n      return self._translation[old_id]\n    else:\n      return old_id\n\n  def __str__(self):\n    \"\"\"\n    Convert IDs to string; used for debugging\n    \"\"\"\n    rv = \"Current count: %d\\nTranslation:\\n\" % self._next_id\n    for k in sorted(self._translation):\n      rv += \"  %d -> %s\\n\" % (k, self._translation[k])\n\n    rv += \"Reverse translation:\\n\"\n    reverse_keys = list(self._reverse_translation.keys())\n    if None in reverse_keys: # pragma: no cover\n      reverse_keys.remove(None)\n      reverse_keys = sorted(reverse_keys)\n      reverse_keys.append(None)\n    for k in reverse_keys:\n      rv += \"  \" + str(k) + \" -> \" + str(self._reverse_translation[k]) + \"\\n\"\n\n    return rv\n\nclass _GitElement(object):\n  \"\"\"\n  The base class for all git elements that we create.\n  \"\"\"\n\n  def __init__(self):\n    # A string that describes what type of Git element this is\n    self.type = None\n\n    # A flag telling us if this Git element has been dumped\n    # (i.e. printed) or skipped.  Typically elements that have been\n    # dumped or skipped will not be dumped again.\n    self.dumped = 0\n\n  def dump(self, file_):\n    \"\"\"\n    This version should never be called. Derived classes need to\n    override! We should note that subclasses should implement this\n    method such that the output would match the format produced by\n    fast-export.\n    \"\"\"\n    raise SystemExit(_(\"Unimplemented function: %s\") % type(self).__name__\n                     +\".dump()\") # pragma: no cover\n\n  def __bytes__(self):\n    \"\"\"\n    Convert GitElement to bytestring; used for debugging\n    \"\"\"\n    old_dumped = self.dumped\n    writeme = io.BytesIO()\n    self.dump(writeme)\n    output_lines = writeme.getvalue().splitlines()\n    writeme.close()\n    self.dumped = old_dumped\n    return b\"%s:\\n  %s\" % (type(self).__name__.encode(),\n                           b\"\\n  \".join(output_lines))\n\n  def skip(self, new_id=None):\n    \"\"\"\n    Ensures this element will not be written to output\n    \"\"\"\n    self.dumped = 2\n\nclass _GitElementWithId(_GitElement):\n  \"\"\"\n  The base class for Git elements that have IDs (commits and blobs)\n  \"\"\"\n\n  def __init__(self):\n    _GitElement.__init__(self)\n\n    # The mark (short, portable id) for this element\n    self.id = _IDS.new()\n\n    # The previous mark for this element\n    self.old_id = None\n\n  def skip(self, new_id=None):\n    \"\"\"\n    This element will no longer be automatically written to output. When a\n    commit gets skipped, it's ID will need to be translated to that of its\n    parent.\n    \"\"\"\n    self.dumped = 2\n\n    _IDS.record_rename(self.old_id or self.id, new_id)\n\nclass Blob(_GitElementWithId):\n  \"\"\"\n  This class defines our representation of git blob elements (i.e. our\n  way of representing file contents).\n  \"\"\"\n\n  def __init__(self, data, original_id = None):\n    _GitElementWithId.__init__(self)\n\n    # Denote that this is a blob\n    self.type = 'blob'\n\n    # Record original id\n    self.original_id = original_id\n\n    # Stores the blob's data\n    assert(type(data) == bytes)\n    self.data = data\n\n  def dump(self, file_):\n    \"\"\"\n    Write this blob element to a file.\n    \"\"\"\n    self.dumped = 1\n    BLOB_HASH_TO_NEW_ID[self.original_id] = self.id\n    BLOB_NEW_ID_TO_HASH[self.id] = self.original_id\n\n    file_.write(b'blob\\n')\n    file_.write(b'mark :%d\\n' % self.id)\n    file_.write(b'data %d\\n%s' % (len(self.data), self.data))\n    file_.write(b'\\n')\n\n\nclass Reset(_GitElement):\n  \"\"\"\n  This class defines our representation of git reset elements.  A reset\n  event is the creation (or recreation) of a named branch, optionally\n  starting from a specific revision).\n  \"\"\"\n\n  def __init__(self, ref, from_ref = None):\n    _GitElement.__init__(self)\n\n    # Denote that this is a reset\n    self.type = 'reset'\n\n    # The name of the branch being (re)created\n    self.ref = ref\n\n    # Some reference to the branch/commit we are resetting from\n    self.from_ref = from_ref\n\n  def dump(self, file_):\n    \"\"\"\n    Write this reset element to a file\n    \"\"\"\n    self.dumped = 1\n\n    file_.write(b'reset %s\\n' % self.ref)\n    if self.from_ref:\n      if isinstance(self.from_ref, int):\n        file_.write(b'from :%d\\n' % self.from_ref)\n      else:\n        file_.write(b'from %s\\n' % self.from_ref)\n      file_.write(b'\\n')\n\nclass FileChange(_GitElement):\n  \"\"\"\n  This class defines our representation of file change elements. File change\n  elements are components within a Commit element.\n  \"\"\"\n\n  def __init__(self, type_, filename = None, id_ = None, mode = None):\n    _GitElement.__init__(self)\n\n    # Denote the type of file-change (b'M' for modify, b'D' for delete, etc)\n    # We could\n    #   assert(type(type_) == bytes)\n    # here but I don't just due to worries about performance overhead...\n    self.type = type_\n\n    # Record the name of the file being changed\n    self.filename = filename\n\n    # Record the mode (mode describes type of file entry (non-executable,\n    # executable, or symlink)).\n    self.mode = mode\n\n    # blob_id is the id (mark) of the affected blob\n    self.blob_id = id_\n\n    if type_ == b'DELETEALL':\n      assert filename is None and id_ is None and mode is None\n      self.filename = b'' # Just so PathQuoting.enquote doesn't die\n    else:\n      assert filename is not None\n\n    if type_ == b'M':\n      assert id_ is not None and mode is not None\n    elif type_ == b'D':\n      assert id_ is None and mode is None\n    elif type_ == b'R':  # pragma: no cover (now avoid fast-export renames)\n      assert mode is None\n      if id_ is None:\n        raise SystemExit(_(\"new name needed for rename of %s\") % filename)\n      self.filename = (self.filename, id_)\n      self.blob_id = None\n\n  def dump(self, file_):\n    \"\"\"\n    Write this file-change element to a file\n    \"\"\"\n    skipped_blob = (self.type == b'M' and self.blob_id is None)\n    if skipped_blob: return\n    self.dumped = 1\n\n    quoted_filename = PathQuoting.enquote(self.filename)\n    if self.type == b'M' and isinstance(self.blob_id, int):\n      file_.write(b'M %s :%d %s\\n' % (self.mode, self.blob_id, quoted_filename))\n    elif self.type == b'M':\n      file_.write(b'M %s %s %s\\n' % (self.mode, self.blob_id, quoted_filename))\n    elif self.type == b'D':\n      file_.write(b'D %s\\n' % quoted_filename)\n    elif self.type == b'DELETEALL':\n      file_.write(b'deleteall\\n')\n    else:\n      raise SystemExit(_(\"Unhandled filechange type: %s\") % self.type) # pragma: no cover\n\nclass Commit(_GitElementWithId):\n  \"\"\"\n  This class defines our representation of commit elements. Commit elements\n  contain all the information associated with a commit.\n  \"\"\"\n\n  def __init__(self, branch,\n               author_name,    author_email,    author_date,\n               committer_name, committer_email, committer_date,\n               message,\n               file_changes,\n               parents,\n               original_id = None,\n               encoding = None, # encoding for message; None implies UTF-8\n               **kwargs):\n    _GitElementWithId.__init__(self)\n    self.old_id = self.id\n\n    # Denote that this is a commit element\n    self.type = 'commit'\n\n    # Record the affected branch\n    self.branch = branch\n\n    # Record original id\n    self.original_id = original_id\n\n    # Record author's name\n    self.author_name  = author_name\n\n    # Record author's email\n    self.author_email = author_email\n\n    # Record date of authoring\n    self.author_date  = author_date\n\n    # Record committer's name\n    self.committer_name  = committer_name\n\n    # Record committer's email\n    self.committer_email = committer_email\n\n    # Record date the commit was made\n    self.committer_date  = committer_date\n\n    # Record commit message and its encoding\n    self.encoding = encoding\n    self.message = message\n\n    # List of file-changes associated with this commit. Note that file-changes\n    # are also represented as git elements\n    self.file_changes = file_changes\n\n    self.parents = parents\n\n  def dump(self, file_):\n    \"\"\"\n    Write this commit element to a file.\n    \"\"\"\n    self.dumped = 1\n\n    # Make output to fast-import slightly easier for humans to read if the\n    # message has no trailing newline of its own; cosmetic, but a nice touch...\n    extra_newline = b'\\n'\n    if self.message.endswith(b'\\n') or not (self.parents or self.file_changes):\n      extra_newline = b''\n\n    if not self.parents:\n      file_.write(b'reset %s\\n' % self.branch)\n    file_.write((b'commit %s\\n'\n                 b'mark :%d\\n'\n                 b'author %s <%s> %s\\n'\n                 b'committer %s <%s> %s\\n'\n                ) % (\n                  self.branch, self.id,\n                  self.author_name, self.author_email, self.author_date,\n                  self.committer_name, self.committer_email, self.committer_date\n               ))\n    if self.encoding:\n      file_.write(b'encoding %s\\n' % self.encoding)\n    file_.write(b'data %d\\n%s%s' %\n                (len(self.message), self.message, extra_newline))\n    for i, parent in enumerate(self.parents):\n      file_.write(b'from ' if i==0 else b'merge ')\n      if isinstance(parent, int):\n        file_.write(b':%d\\n' % parent)\n      else:\n        file_.write(b'%s\\n' % parent)\n    for change in self.file_changes:\n      change.dump(file_)\n    if not self.parents and not self.file_changes:\n      # Workaround a bug in pre-git-2.22 versions of fast-import with\n      # the get-mark directive.\n      file_.write(b'\\n')\n    file_.write(b'\\n')\n\n  def first_parent(self):\n    \"\"\"\n    Return first parent commit\n    \"\"\"\n    if self.parents:\n      return self.parents[0]\n    return None\n\n  def skip(self, new_id=None):\n    _SKIPPED_COMMITS.add(self.old_id or self.id)\n    _GitElementWithId.skip(self, new_id)\n\nclass Tag(_GitElementWithId):\n  \"\"\"\n  This class defines our representation of annotated tag elements.\n  \"\"\"\n\n  def __init__(self, ref, from_ref,\n               tagger_name, tagger_email, tagger_date, tag_msg,\n               original_id = None):\n    _GitElementWithId.__init__(self)\n    self.old_id = self.id\n\n    # Denote that this is a tag element\n    self.type = 'tag'\n\n    # Store the name of the tag\n    self.ref = ref\n\n    # Store the entity being tagged (this should be a commit)\n    self.from_ref = from_ref\n\n    # Record original id\n    self.original_id = original_id\n\n    # Store the name of the tagger\n    self.tagger_name  = tagger_name\n\n    # Store the email of the tagger\n    self.tagger_email = tagger_email\n\n    # Store the date\n    self.tagger_date  = tagger_date\n\n    # Store the tag message\n    self.message = tag_msg\n\n  def dump(self, file_):\n    \"\"\"\n    Write this tag element to a file\n    \"\"\"\n\n    self.dumped = 1\n\n    file_.write(b'tag %s\\n' % self.ref)\n    if (write_marks and self.id):\n      file_.write(b'mark :%d\\n' % self.id)\n    markfmt = b'from :%d\\n' if isinstance(self.from_ref, int) else b'from %s\\n'\n    file_.write(markfmt % self.from_ref)\n    if self.tagger_name:\n      file_.write(b'tagger %s <%s> ' % (self.tagger_name, self.tagger_email))\n      file_.write(self.tagger_date)\n      file_.write(b'\\n')\n    file_.write(b'data %d\\n%s' % (len(self.message), self.message))\n    file_.write(b'\\n')\n\nclass Progress(_GitElement):\n  \"\"\"\n  This class defines our representation of progress elements. The progress\n  element only contains a progress message, which is printed by fast-import\n  when it processes the progress output.\n  \"\"\"\n\n  def __init__(self, message):\n    _GitElement.__init__(self)\n\n    # Denote that this is a progress element\n    self.type = 'progress'\n\n    # Store the progress message\n    self.message = message\n\n  def dump(self, file_):\n    \"\"\"\n    Write this progress element to a file\n    \"\"\"\n    self.dumped = 1\n\n    file_.write(b'progress %s\\n' % self.message)\n    file_.write(b'\\n')\n\nclass Checkpoint(_GitElement):\n  \"\"\"\n  This class defines our representation of checkpoint elements.  These\n  elements represent events which force fast-import to close the current\n  packfile, start a new one, and to save out all current branch refs, tags\n  and marks.\n  \"\"\"\n\n  def __init__(self):\n    _GitElement.__init__(self)\n\n    # Denote that this is a checkpoint element\n    self.type = 'checkpoint'\n\n  def dump(self, file_):\n    \"\"\"\n    Write this checkpoint element to a file\n    \"\"\"\n    self.dumped = 1\n\n    file_.write(b'checkpoint\\n')\n    file_.write(b'\\n')\n\nclass LiteralCommand(_GitElement):\n  \"\"\"\n  This class defines our representation of commands. The literal command\n  includes only a single line, and is not processed in any special way.\n  \"\"\"\n\n  def __init__(self, line):\n    _GitElement.__init__(self)\n\n    # Denote that this is a literal element\n    self.type = 'literal'\n\n    # Store the command\n    self.line = line\n\n  def dump(self, file_):\n    \"\"\"\n    Write this progress element to a file\n    \"\"\"\n    self.dumped = 1\n\n    file_.write(self.line)\n\nclass Alias(_GitElement):\n  \"\"\"\n  This class defines our representation of fast-import alias elements.  An\n  alias element is the setting of one mark to the same sha1sum as another,\n  usually because the newer mark corresponded to a pruned commit.\n  \"\"\"\n\n  def __init__(self, ref, to_ref):\n    _GitElement.__init__(self)\n    # Denote that this is a reset\n    self.type = 'alias'\n\n    self.ref = ref\n    self.to_ref = to_ref\n\n  def dump(self, file_):\n    \"\"\"\n    Write this reset element to a file\n    \"\"\"\n    self.dumped = 1\n\n    file_.write(b'alias\\nmark :%d\\nto :%d\\n\\n' % (self.ref, self.to_ref))\n\nclass FastExportParser(object):\n  \"\"\"\n  A class for parsing and handling the output from fast-export. This\n  class allows the user to register callbacks when various types of\n  data are encountered in the fast-export output. The basic idea is that,\n  FastExportParser takes fast-export output, creates the various objects\n  as it encounters them, the user gets to use/modify these objects via\n  callbacks, and finally FastExportParser outputs the modified objects\n  in fast-import format (presumably so they can be used to create a new\n  repo).\n  \"\"\"\n\n  def __init__(self,\n               tag_callback = None,   commit_callback = None,\n               blob_callback = None,  progress_callback = None,\n               reset_callback = None, checkpoint_callback = None,\n               done_callback = None):\n    # Members below simply store callback functions for the various git\n    # elements\n    self._tag_callback        = tag_callback\n    self._blob_callback       = blob_callback\n    self._reset_callback      = reset_callback\n    self._commit_callback     = commit_callback\n    self._progress_callback   = progress_callback\n    self._checkpoint_callback = checkpoint_callback\n    self._done_callback       = done_callback\n\n    # Keep track of which refs appear from the export, and which make it to\n    # the import (pruning of empty commits, renaming of refs, and creating\n    # new manual objects and inserting them can cause these to differ).\n    self._exported_refs = set()\n    self._imported_refs = set()\n\n    # A list of the branches we've seen, plus the last known commit they\n    # pointed to.  An entry in latest_*commit will be deleted if we get a\n    # reset for that branch.  These are used because of fast-import's weird\n    # decision to allow having an implicit parent via naming the branch\n    # instead of requiring branches to be specified via 'from' directives.\n    self._latest_commit = {}\n    self._latest_orig_commit = {}\n\n    # A handle to the input source for the fast-export data\n    self._input = None\n\n    # A handle to the output file for the output we generate (we call dump\n    # on many of the git elements we create).\n    self._output = None\n\n    # Stores the contents of the current line of input being parsed\n    self._currentline = ''\n\n    # Tracks LFS objects we have found\n    self._lfs_object_tracker = None\n\n    # Compile some regexes and cache those\n    self._mark_re = re.compile(br'mark :(\\d+)\\n$')\n    self._parent_regexes = {}\n    parent_regex_rules = (br' :(\\d+)\\n$', br' ([0-9a-f]{40})\\n')\n    for parent_refname in (b'from', b'merge'):\n      ans = [re.compile(parent_refname+x) for x in parent_regex_rules]\n      self._parent_regexes[parent_refname] = ans\n    self._quoted_string_re = re.compile(br'\"(?:[^\"\\\\]|\\\\.)*\"')\n    self._refline_regexes = {}\n    for refline_name in (b'reset', b'commit', b'tag', b'progress'):\n      self._refline_regexes[refline_name] = re.compile(refline_name+b' (.*)\\n$')\n    self._user_regexes = {}\n    for user in (b'author', b'committer', b'tagger'):\n      self._user_regexes[user] = re.compile(user + b' (.*?) <(.*?)> (.*)\\n$')\n\n  def _advance_currentline(self):\n    \"\"\"\n    Grab the next line of input\n    \"\"\"\n    self._currentline = self._input.readline()\n\n  def _parse_optional_mark(self):\n    \"\"\"\n    If the current line contains a mark, parse it and advance to the\n    next line; return None otherwise\n    \"\"\"\n    mark = None\n    matches = self._mark_re.match(self._currentline)\n    if matches:\n      mark = int(matches.group(1))\n      self._advance_currentline()\n    return mark\n\n  def _parse_optional_parent_ref(self, refname):\n    \"\"\"\n    If the current line contains a reference to a parent commit, then\n    parse it and advance the current line; otherwise return None. Note\n    that the name of the reference ('from', 'merge') must match the\n    refname arg.\n    \"\"\"\n    orig_baseref, baseref = None, None\n    rule, altrule = self._parent_regexes[refname]\n    matches = rule.match(self._currentline)\n    if matches:\n      orig_baseref = int(matches.group(1))\n      # We translate the parent commit mark to what it needs to be in\n      # our mark namespace\n      baseref = _IDS.translate(orig_baseref)\n      self._advance_currentline()\n    else:\n      matches = altrule.match(self._currentline)\n      if matches:\n        orig_baseref = matches.group(1)\n        baseref = orig_baseref\n        self._advance_currentline()\n    return orig_baseref, baseref\n\n  def _parse_optional_filechange(self):\n    \"\"\"\n    If the current line contains a file-change object, then parse it\n    and advance the current line; otherwise return None. We only care\n    about file changes of type b'M' and b'D' (these are the only types\n    of file-changes that fast-export will provide).\n    \"\"\"\n    filechange = None\n    changetype = self._currentline[0:1]\n    if changetype == b'M':\n      (changetype, mode, idnum, path) = self._currentline.split(None, 3)\n      if idnum[0:1] == b':':\n        idnum = idnum[1:]\n      path = path.rstrip(b'\\n')\n      # Check for LFS objects from sources before we might toss this filechange\n      if mode != b'160000' and self._lfs_object_tracker:\n        value = int(idnum) if len(idnum) != 40 else idnum\n        self._lfs_object_tracker.check_file_change_data(value, True)\n      # We translate the idnum to our id system\n      if len(idnum) != 40:\n        idnum = _IDS.translate( int(idnum) )\n      if idnum is not None:\n        if path.startswith(b'\"'):\n          path = PathQuoting.dequote(path)\n        filechange = FileChange(b'M', path, idnum, mode)\n      else:\n        filechange = b'skipped'\n      self._advance_currentline()\n    elif changetype == b'D':\n      (changetype, path) = self._currentline.split(None, 1)\n      path = path.rstrip(b'\\n')\n      if path.startswith(b'\"'):\n        path = PathQuoting.dequote(path)\n      filechange = FileChange(b'D', path)\n      self._advance_currentline()\n    elif changetype == b'R':  # pragma: no cover (now avoid fast-export renames)\n      rest = self._currentline[2:-1]\n      if rest.startswith(b'\"'):\n        m = self._quoted_string_re.match(rest)\n        if not m:\n          raise SystemExit(_(\"Couldn't parse rename source\"))\n        orig = PathQuoting.dequote(m.group(0))\n        new = rest[m.end()+1:]\n      else:\n        orig, new = rest.split(b' ', 1)\n      if new.startswith(b'\"'):\n        new = PathQuoting.dequote(new)\n      filechange = FileChange(b'R', orig, new)\n      self._advance_currentline()\n    return filechange\n\n  def _parse_original_id(self):\n    original_id = self._currentline[len(b'original-oid '):].rstrip()\n    self._advance_currentline()\n    return original_id\n\n  def _parse_encoding(self):\n    encoding = self._currentline[len(b'encoding '):].rstrip()\n    self._advance_currentline()\n    return encoding\n\n  def _parse_ref_line(self, refname):\n    \"\"\"\n    Parses string data (often a branch name) from current-line. The name of\n    the string data must match the refname arg. The program will crash if\n    current-line does not match, so current-line will always be advanced if\n    this method returns.\n    \"\"\"\n    matches = self._refline_regexes[refname].match(self._currentline)\n    if not matches:\n      raise SystemExit(_(\"Malformed %(refname)s line: '%(line)s'\") %\n                       ({'refname': refname, 'line':self._currentline})\n                       ) # pragma: no cover\n    ref = matches.group(1)\n    self._advance_currentline()\n    return ref\n\n  def _parse_user(self, usertype):\n    \"\"\"\n    Get user name, email, datestamp from current-line. Current-line will\n    be advanced.\n    \"\"\"\n    user_regex = self._user_regexes[usertype]\n    (name, email, when) = user_regex.match(self._currentline).groups()\n\n    self._advance_currentline()\n    return (name, email, when)\n\n  def _parse_data(self):\n    \"\"\"\n    Reads data from _input. Current-line will be advanced until it is beyond\n    the data.\n    \"\"\"\n    fields = self._currentline.split()\n    assert fields[0] == b'data'\n    size = int(fields[1])\n    data = self._input.read(size)\n    self._advance_currentline()\n    if self._currentline == b'\\n':\n      self._advance_currentline()\n    return data\n\n  def _parse_blob(self):\n    \"\"\"\n    Parse input data into a Blob object. Once the Blob has been created, it\n    will be handed off to the appropriate callbacks. Current-line will be\n    advanced until it is beyond this blob's data. The Blob will be dumped\n    to _output once everything else is done (unless it has been skipped by\n    the callback).\n    \"\"\"\n    # Parse the Blob\n    self._advance_currentline()\n    id_ = self._parse_optional_mark()\n\n    original_id = None\n    if self._currentline.startswith(b'original-oid'):\n      original_id = self._parse_original_id();\n\n    data = self._parse_data()\n    if self._currentline == b'\\n':\n      self._advance_currentline()\n\n    # Create the blob\n    blob = Blob(data, original_id)\n\n    # If fast-export text had a mark for this blob, need to make sure this\n    # mark translates to the blob's true id.\n    if id_:\n      blob.old_id = id_\n      _IDS.record_rename(id_, blob.id)\n\n    # Check for LFS objects\n    if self._lfs_object_tracker:\n      self._lfs_object_tracker.check_blob_data(data, blob.old_id, True)\n\n    # Call any user callback to allow them to use/modify the blob\n    if self._blob_callback:\n      self._blob_callback(blob)\n\n    # Now print the resulting blob\n    if not blob.dumped:\n      blob.dump(self._output)\n\n  def _parse_reset(self):\n    \"\"\"\n    Parse input data into a Reset object. Once the Reset has been created,\n    it will be handed off to the appropriate callbacks. Current-line will\n    be advanced until it is beyond the reset data. The Reset will be dumped\n    to _output once everything else is done (unless it has been skipped by\n    the callback).\n    \"\"\"\n    # Parse the Reset\n    ref = self._parse_ref_line(b'reset')\n    self._exported_refs.add(ref)\n    ignoreme, from_ref = self._parse_optional_parent_ref(b'from')\n    if self._currentline == b'\\n':\n      self._advance_currentline()\n\n    # fast-export likes to print extraneous resets that serve no purpose.\n    # While we could continue processing such resets, that is a waste of\n    # resources.  Also, we want to avoid recording that this ref was\n    # seen in such cases, since this ref could be rewritten to nothing.\n    if not from_ref:\n      self._latest_commit.pop(ref, None)\n      self._latest_orig_commit.pop(ref, None)\n      return\n\n    # Create the reset\n    reset = Reset(ref, from_ref)\n\n    # Call any user callback to allow them to modify the reset\n    if self._reset_callback:\n      self._reset_callback(reset)\n\n    # Update metadata\n    self._latest_commit[reset.ref] = reset.from_ref\n    self._latest_orig_commit[reset.ref] = reset.from_ref\n\n    # Now print the resulting reset\n    if not reset.dumped:\n      self._imported_refs.add(reset.ref)\n      reset.dump(self._output)\n\n  def _parse_commit(self):\n    \"\"\"\n    Parse input data into a Commit object. Once the Commit has been created,\n    it will be handed off to the appropriate callbacks. Current-line will\n    be advanced until it is beyond the commit data. The Commit will be dumped\n    to _output once everything else is done (unless it has been skipped by\n    the callback OR the callback has removed all file-changes from the commit).\n    \"\"\"\n    # Parse the Commit. This may look involved, but it's pretty simple; it only\n    # looks bad because a commit object contains many pieces of data.\n    branch = self._parse_ref_line(b'commit')\n    self._exported_refs.add(branch)\n    id_ = self._parse_optional_mark()\n\n    original_id = None\n    if self._currentline.startswith(b'original-oid'):\n      original_id = self._parse_original_id();\n\n    author_name = None\n    author_email = None\n    if self._currentline.startswith(b'author'):\n      (author_name, author_email, author_date) = self._parse_user(b'author')\n\n    (committer_name, committer_email, committer_date) = \\\n      self._parse_user(b'committer')\n\n    if not author_name and not author_email:\n      (author_name, author_email, author_date) = \\\n        (committer_name, committer_email, committer_date)\n\n    encoding = None\n    if self._currentline.startswith(b'encoding '):\n      encoding = self._parse_encoding()\n\n    commit_msg = self._parse_data()\n\n    pinfo = [self._parse_optional_parent_ref(b'from')]\n    # Due to empty pruning, we can have real 'from' and 'merge' lines that\n    # due to commit rewriting map to a parent of None.  We need to record\n    # 'from' if its non-None, and we need to parse all 'merge' lines.\n    while self._currentline.startswith(b'merge '):\n      pinfo.append(self._parse_optional_parent_ref(b'merge'))\n    orig_parents, parents = [list(tmp) for tmp in zip(*pinfo)]\n\n    # No parents is oddly represented as [None] instead of [], due to the\n    # special 'from' handling.  Convert it here to a more canonical form.\n    if parents == [None]:\n      parents = []\n    if orig_parents == [None]:\n      orig_parents = []\n\n    # fast-import format is kinda stupid in that it allows implicit parents\n    # based on the branch name instead of requiring them to be specified by\n    # 'from' directives.  The only way to get no parent is by using a reset\n    # directive first, which clears the latest_commit_for_this_branch tracking.\n    if not orig_parents and self._latest_commit.get(branch):\n      parents = [self._latest_commit[branch]]\n    if not orig_parents and self._latest_orig_commit.get(branch):\n      orig_parents = [self._latest_orig_commit[branch]]\n\n    # Get the list of file changes\n    file_changes = []\n    file_change = self._parse_optional_filechange()\n    had_file_changes = file_change is not None\n    while file_change:\n      if not (type(file_change) == bytes and file_change == b'skipped'):\n        file_changes.append(file_change)\n      file_change = self._parse_optional_filechange()\n    if self._currentline == b'\\n':\n      self._advance_currentline()\n\n    # Okay, now we can finally create the Commit object\n    commit = Commit(branch,\n                    author_name,    author_email,    author_date,\n                    committer_name, committer_email, committer_date,\n                    commit_msg, file_changes, parents, original_id, encoding)\n\n    # If fast-export text had a mark for this commit, need to make sure this\n    # mark translates to the commit's true id.\n    if id_:\n      commit.old_id = id_\n      _IDS.record_rename(id_, commit.id)\n\n    # refs/notes/ put commit-message-related material in blobs, and name their\n    # files according to the hash of other commits.  That totally messes with\n    # all normal callbacks; fast-export should really export these as different\n    # kinds of objects.  Until then, let's just pass these commits through as-is\n    # and hope the blob callbacks don't mess things up.\n    if commit.branch.startswith(b'refs/notes/'):\n      self._imported_refs.add(commit.branch)\n      commit.dump(self._output)\n      return\n\n    # Call any user callback to allow them to modify the commit\n    aux_info = {'orig_parents': orig_parents,\n                'had_file_changes': had_file_changes}\n    if self._commit_callback:\n      self._commit_callback(commit, aux_info)\n\n    # Now print the resulting commit, or if prunable skip it\n    self._latest_orig_commit[branch] = commit.id\n    if not (commit.old_id or commit.id) in _SKIPPED_COMMITS:\n      self._latest_commit[branch] = commit.id\n    if not commit.dumped:\n      self._imported_refs.add(commit.branch)\n      commit.dump(self._output)\n\n  def _parse_tag(self):\n    \"\"\"\n    Parse input data into a Tag object. Once the Tag has been created,\n    it will be handed off to the appropriate callbacks. Current-line will\n    be advanced until it is beyond the tag data. The Tag will be dumped\n    to _output once everything else is done (unless it has been skipped by\n    the callback).\n    \"\"\"\n    # Parse the Tag\n    tag = self._parse_ref_line(b'tag')\n    self._exported_refs.add(b'refs/tags/'+tag)\n    id_ = self._parse_optional_mark()\n    ignoreme, from_ref = self._parse_optional_parent_ref(b'from')\n\n    original_id = None\n    if self._currentline.startswith(b'original-oid'):\n      original_id = self._parse_original_id();\n\n    tagger_name, tagger_email, tagger_date = None, None, None\n    if self._currentline.startswith(b'tagger'):\n      (tagger_name, tagger_email, tagger_date) = self._parse_user(b'tagger')\n    tag_msg = self._parse_data()\n    if self._currentline == b'\\n':\n      self._advance_currentline()\n\n    # Create the tag\n    tag = Tag(tag, from_ref,\n              tagger_name, tagger_email, tagger_date, tag_msg,\n              original_id)\n\n    # If fast-export text had a mark for this tag, need to make sure this\n    # mark translates to the tag's true id.\n    if id_:\n      tag.old_id = id_\n      _IDS.record_rename(id_, tag.id)\n\n    # Call any user callback to allow them to modify the tag\n    if self._tag_callback:\n      self._tag_callback(tag)\n\n    # The tag might not point at anything that still exists (self.from_ref\n    # will be None if the commit it pointed to and all its ancestors were\n    # pruned due to being empty)\n    if tag.from_ref:\n      # Print out this tag's information\n      if not tag.dumped:\n        self._imported_refs.add(b'refs/tags/'+tag.ref)\n        tag.dump(self._output)\n    else:\n      tag.skip()\n\n  def _parse_progress(self):\n    \"\"\"\n    Parse input data into a Progress object. Once the Progress has\n    been created, it will be handed off to the appropriate\n    callbacks. Current-line will be advanced until it is beyond the\n    progress data. The Progress will be dumped to _output once\n    everything else is done (unless it has been skipped by the callback).\n    \"\"\"\n    # Parse the Progress\n    message = self._parse_ref_line(b'progress')\n    if self._currentline == b'\\n':\n      self._advance_currentline()\n\n    # Create the progress message\n    progress = Progress(message)\n\n    # Call any user callback to allow them to modify the progress messsage\n    if self._progress_callback:\n      self._progress_callback(progress)\n\n    # NOTE: By default, we do NOT print the progress message; git\n    # fast-import would write it to fast_import_pipes which could mess with\n    # our parsing of output from the 'ls' and 'get-mark' directives we send\n    # to fast-import.  If users want these messages, they need to process\n    # and handle them in the appropriate callback above.\n\n  def _parse_checkpoint(self):\n    \"\"\"\n    Parse input data into a Checkpoint object. Once the Checkpoint has\n    been created, it will be handed off to the appropriate\n    callbacks. Current-line will be advanced until it is beyond the\n    checkpoint data. The Checkpoint will be dumped to _output once\n    everything else is done (unless it has been skipped by the callback).\n    \"\"\"\n    # Parse the Checkpoint\n    self._advance_currentline()\n    if self._currentline == b'\\n':\n      self._advance_currentline()\n\n    # Create the checkpoint\n    checkpoint = Checkpoint()\n\n    # Call any user callback to allow them to drop the checkpoint\n    if self._checkpoint_callback:\n      self._checkpoint_callback(checkpoint)\n\n    # NOTE: By default, we do NOT print the checkpoint message; although it\n    # we would only realistically get them with --stdin, the fact that we\n    # are filtering makes me think the checkpointing is less likely to be\n    # reasonable.  In fact, I don't think it's necessary in general.  If\n    # users do want it, they should process it in the checkpoint_callback.\n\n  def _parse_literal_command(self):\n    \"\"\"\n    Parse literal command.  Then just dump the line as is.\n    \"\"\"\n    # Create the literal command object\n    command = LiteralCommand(self._currentline)\n    self._advance_currentline()\n\n    # Now print the resulting literal command\n    if not command.dumped:\n      command.dump(self._output)\n\n  def insert(self, obj):\n    assert not obj.dumped\n    obj.dump(self._output)\n    if type(obj) == Commit:\n      self._imported_refs.add(obj.branch)\n    elif type(obj) in (Reset, Tag):\n      self._imported_refs.add(obj.ref)\n\n  def run(self, input, output):\n    \"\"\"\n    This method filters fast export output.\n    \"\"\"\n    # Set input. If no args provided, use stdin.\n    self._input = input\n    self._output = output\n\n    # Run over the input and do the filtering\n    self._advance_currentline()\n    while self._currentline:\n      if   self._currentline.startswith(b'blob'):\n        self._parse_blob()\n      elif self._currentline.startswith(b'reset'):\n        self._parse_reset()\n      elif self._currentline.startswith(b'commit'):\n        self._parse_commit()\n      elif self._currentline.startswith(b'tag'):\n        self._parse_tag()\n      elif self._currentline.startswith(b'progress'):\n        self._parse_progress()\n      elif self._currentline.startswith(b'checkpoint'):\n        self._parse_checkpoint()\n      elif self._currentline.startswith(b'feature'):\n        self._parse_literal_command()\n      elif self._currentline.startswith(b'option'):\n        self._parse_literal_command()\n      elif self._currentline.startswith(b'done'):\n        if self._done_callback:\n          self._done_callback()\n        self._parse_literal_command()\n        # Prevent confusion from others writing additional stuff that'll just\n        # be ignored\n        self._output.close()\n      elif self._currentline.startswith(b'#'):\n        self._parse_literal_command()\n      elif self._currentline.startswith(b'get-mark') or \\\n           self._currentline.startswith(b'cat-blob') or \\\n           self._currentline.startswith(b'ls'):\n        raise SystemExit(_(\"Unsupported command: '%s'\") % self._currentline)\n      else:\n        raise SystemExit(_(\"Could not parse line: '%s'\") % self._currentline)\n\n  def get_exported_and_imported_refs(self):\n    return self._exported_refs, self._imported_refs\n\ndef record_id_rename(old_id, new_id):\n  \"\"\"\n  Register a new translation\n  \"\"\"\n  handle_transitivity = True\n  _IDS.record_rename(old_id, new_id, handle_transitivity)\n\n# Internal globals\n_IDS = _IDs()\n_SKIPPED_COMMITS = set()\nBLOB_HASH_TO_NEW_ID = {}\nBLOB_NEW_ID_TO_HASH = {}\nsdr_next_steps = _(\"\"\"\nNEXT STEPS FOR YOUR SENSITIVE DATA REMOVAL:\n  * If you are doing your rewrite in multiple steps, ignore these next steps\n    until you have completed all your invocations of git-filter-repo.\n  * See the \"Sensitive Data Removal\" subsection of the \"DISCUSSION\" section\n    of the manual for more details about any of the steps below.\n  * Inspect this repository and verify that the sensitive data is indeed\n    completely removed from all commits.\n  * Force push the rewritten history to the server:\n      %s\n  * Contact the server admins for additional steps they need to take; the\n    First Changed Commit(s)%s may come in handy here.\n  * Have other colleagues with a clone either discard their clone and reclone\n    OR follow the detailed steps in the manual to repeatedly rebase and\n    purge the sensitive data from their copy.  Again, the First Changed\n    Commit(s)%s may come in handy.\n  * See the \"Prevent repeats and avoid future sensitive data spills\" section\n    of the manual.\n\"\"\"[1:])\n\nclass SubprocessWrapper(object):\n  @staticmethod\n  def decodify(args):\n    if type(args) == str:\n      return args\n    else:\n      assert type(args) == list\n      return [decode(x) if type(x)==bytes else x for x in args]\n\n  @staticmethod\n  def call(*args, **kwargs):\n    if 'cwd' in kwargs:\n      kwargs['cwd'] = decode(kwargs['cwd'])\n    return subprocess.call(SubprocessWrapper.decodify(*args), **kwargs)\n\n  @staticmethod\n  def check_output(*args, **kwargs):\n    if 'cwd' in kwargs:\n      kwargs['cwd'] = decode(kwargs['cwd'])\n    return subprocess.check_output(SubprocessWrapper.decodify(*args), **kwargs)\n\n  @staticmethod\n  def check_call(*args, **kwargs): # pragma: no cover  # used by filter-lamely\n    if 'cwd' in kwargs:\n      kwargs['cwd'] = decode(kwargs['cwd'])\n    return subprocess.check_call(SubprocessWrapper.decodify(*args), **kwargs)\n\n  @staticmethod\n  def Popen(*args, **kwargs):\n    if 'cwd' in kwargs:\n      kwargs['cwd'] = decode(kwargs['cwd'])\n    return subprocess.Popen(SubprocessWrapper.decodify(*args), **kwargs)\n\nsubproc = subprocess\nif platform.system() == 'Windows' or 'PRETEND_UNICODE_ARGS' in os.environ:\n  subproc = SubprocessWrapper\n\nclass GitUtils(object):\n  @staticmethod\n  def get_commit_count(repo, *args):\n    \"\"\"\n    Return the number of commits that have been made on repo.\n    \"\"\"\n    if not args:\n      args = ['--all']\n    if len(args) == 1 and isinstance(args[0], list):\n      args = args[0]\n    p = subproc.Popen([\"git\", \"rev-list\", \"--count\"] + args,\n                      stdout=subprocess.PIPE, stderr=subprocess.PIPE,\n                      cwd=repo)\n    if p.wait() != 0:\n      raise SystemExit(_(\"%s does not appear to be a valid git repository\")\n                       % decode(repo))\n    return int(p.stdout.read())\n\n  @staticmethod\n  def get_total_objects(repo):\n    \"\"\"\n    Return the number of objects (both packed and unpacked)\n    \"\"\"\n    p1 = subproc.Popen([\"git\", \"count-objects\", \"-v\"],\n                          stdout=subprocess.PIPE, cwd=repo)\n    lines = p1.stdout.read().splitlines()\n    # Return unpacked objects + packed-objects\n    return int(lines[0].split()[1]) + int(lines[2].split()[1])\n\n  @staticmethod\n  def is_repository_bare(repo_working_dir):\n    out = subproc.check_output('git rev-parse --is-bare-repository'.split(),\n                               cwd=repo_working_dir)\n    return (out.strip() == b'true')\n\n  @staticmethod\n  def determine_git_dir(repo_working_dir):\n    d = subproc.check_output('git rev-parse --git-dir'.split(),\n                             cwd=repo_working_dir).strip()\n    if repo_working_dir==b'.' or d.startswith(b'/'):\n      return d\n    return os.path.join(repo_working_dir, d)\n\n  @staticmethod\n  def get_refs(repo_working_dir):\n    try:\n      output = subproc.check_output('git show-ref'.split(),\n                                    cwd=repo_working_dir)\n    except subprocess.CalledProcessError as e:\n      # If error code is 1, there just aren't any refs; i.e. new repo.\n      # If error code is other than 1, some other error (e.g. not a git repo)\n      if e.returncode != 1:\n        raise SystemExit('fatal: {}'.format(e))\n      output = ''\n    return dict(reversed(x.split()) for x in output.splitlines())\n\n  @staticmethod\n  def get_config_settings(repo_working_dir):\n    output = ''\n    try:\n      output = subproc.check_output('git config --list --null'.split(),\n                                    cwd=repo_working_dir)\n    except subprocess.CalledProcessError as e: # pragma: no cover\n      raise SystemExit('fatal: {}'.format(e))\n\n    # FIXME: Ignores multi-valued keys, just let them overwrite for now\n    return dict(item.split(b'\\n', maxsplit=1)\n                for item in output.strip().split(b\"\\0\") if item)\n\n  @staticmethod\n  def get_blob_sizes(quiet = False):\n    blob_size_progress = ProgressWriter()\n    num_blobs = 0\n    processed_blobs_msg = _(\"Processed %d blob sizes\")\n\n    # Get sizes of blobs by sha1\n    cmd = '--batch-check=%(objectname) %(objecttype) ' + \\\n          '%(objectsize) %(objectsize:disk)'\n    cf = subproc.Popen(['git', 'cat-file', '--batch-all-objects', cmd],\n                       bufsize = -1,\n                       stdout = subprocess.PIPE)\n    unpacked_size = {}\n    packed_size = {}\n    for line in cf.stdout:\n      try:\n        sha, objtype, objsize, objdisksize = line.split()\n        objsize, objdisksize = int(objsize), int(objdisksize)\n        if objtype == b'blob':\n          unpacked_size[sha] = objsize\n          packed_size[sha] = objdisksize\n          num_blobs += 1\n      except ValueError: # pragma: no cover\n        sys.stderr.write(_(\"Error: unexpected `git cat-file` output: \\\"%s\\\"\\n\") % line)\n      if not quiet:\n        blob_size_progress.show(processed_blobs_msg % num_blobs)\n    cf.wait()\n    if not quiet:\n      blob_size_progress.finish()\n    return unpacked_size, packed_size\n\n  @staticmethod\n  def get_file_changes(repo, parent_hash, commit_hash):\n    \"\"\"\n    Return a FileChanges list with the differences between parent_hash\n    and commit_hash\n    \"\"\"\n    file_changes = []\n\n    cmd = [\"git\", \"diff-tree\", \"-r\", parent_hash, commit_hash]\n    output = subproc.check_output(cmd, cwd=repo)\n    for line in output.splitlines():\n      fileinfo, path = line.split(b'\\t', 1)\n      if path.startswith(b'\"'):\n        path = PathQuoting.dequote(path)\n      oldmode, mode, oldhash, newhash, changetype = fileinfo.split()\n      if changetype == b'D':\n        file_changes.append(FileChange(b'D', path))\n      elif changetype in (b'A', b'M', b'T'):\n        identifier = BLOB_HASH_TO_NEW_ID.get(newhash, newhash)\n        file_changes.append(FileChange(b'M', path, identifier, mode))\n      else: # pragma: no cover\n        raise SystemExit(\"Unknown change type for line {}\".format(line))\n\n    return file_changes\n\n  @staticmethod\n  def print_my_version():\n    with open(__file__, 'br') as f:\n      contents = f.read()\n    # If people replaced @@LOCALEDIR@@ string to point at their local\n    # directory, undo it so we can get original source version.\n    contents = re.sub(br'\\A#\\!.*',\n                      br'#!/usr/bin/env python3', contents)\n    contents = re.sub(br'(\\(\"GIT_TEXTDOMAINDIR\"\\) or \").*\"',\n                      br'\\1@@LOCALEDIR@@\"', contents)\n\n    cmd = 'git hash-object --stdin'.split()\n    version = subproc.check_output(cmd, input=contents).strip()\n    print(decode(version[0:12]))\n\nclass FilteringOptions(object):\n  default_replace_text = b'***REMOVED***'\n  class AppendFilter(argparse.Action):\n    def __call__(self, parser, namespace, values, option_string=None):\n      user_path = values\n      suffix = option_string[len('--path-'):] or 'match'\n      if suffix.startswith('rename'):\n        mod_type = 'rename'\n        match_type = option_string[len('--path-rename-'):] or 'match'\n        values = values.split(b':')\n        if len(values) != 2:\n          raise SystemExit(_(\"Error: --path-rename expects one colon in its\"\n                             \" argument: <old_name:new_name>.\"))\n        if values[0] and values[1] and not (\n           values[0].endswith(b'/') == values[1].endswith(b'/')):\n          raise SystemExit(_(\"Error: With --path-rename, if OLD_NAME and \"\n                             \"NEW_NAME are both non-empty and either ends \"\n                             \"with a slash then both must.\"))\n        if any(v.startswith(b'/') for v in values):\n          raise SystemExit(_(\"Error: Pathnames cannot begin with a '/'\"))\n        components = values[0].split(b'/') + values[1].split(b'/')\n      else:\n        mod_type = 'filter'\n        match_type = suffix\n        components = values.split(b'/')\n        if values.startswith(b'/'):\n          raise SystemExit(_(\"Error: Pathnames cannot begin with a '/'\"))\n      for illegal_path in [b'.', b'..']:\n        if illegal_path in components:\n          raise SystemExit(_(\"Error: Invalid path component '%s' found in '%s'\")\n                           % (decode(illegal_path), decode(user_path)))\n      if match_type == 'regex':\n        values = re.compile(values)\n      items = getattr(namespace, self.dest, []) or []\n      items.append((mod_type, match_type, values))\n      if (match_type, mod_type) == ('glob', 'filter'):\n        if not values.endswith(b'*'):\n          extension = b'*' if values.endswith(b'/') else b'/*'\n          items.append((mod_type, match_type, values+extension))\n      setattr(namespace, self.dest, items)\n\n  class HelperFilter(argparse.Action):\n    def __call__(self, parser, namespace, values, option_string=None):\n      af = FilteringOptions.AppendFilter(dest='path_changes',\n                                         option_strings=None)\n      dirname = values if values[-1:] == b'/' else values+b'/'\n      if option_string == '--subdirectory-filter':\n        af(parser, namespace, dirname,     '--path-match')\n        af(parser, namespace, dirname+b':', '--path-rename')\n      elif option_string == '--to-subdirectory-filter':\n        af(parser, namespace, b':'+dirname, '--path-rename')\n      else:\n        raise SystemExit(_(\"Error: HelperFilter given invalid option_string: %s\")\n                         % option_string) # pragma: no cover\n\n  class FileWithPathsFilter(argparse.Action):\n    def __call__(self, parser, namespace, values, option_string=None):\n      if not namespace.path_changes:\n        namespace.path_changes = []\n      namespace.path_changes += FilteringOptions.get_paths_from_file(values)\n\n  @staticmethod\n  def create_arg_parser():\n    # Include usage in the summary, so we can put the description first\n    summary = _('''Rewrite (or analyze) repository history\n\n    git-filter-repo destructively rewrites history (unless --analyze or\n    --dry-run are given) according to specified rules.  It refuses to do any\n    rewriting unless either run from a clean fresh clone, or --force was\n    given.\n\n    Basic Usage:\n      git-filter-repo --analyze\n      git-filter-repo [FILTER/RENAME/CONTROL OPTIONS]\n\n    See EXAMPLES section for details.\n    ''').rstrip()\n\n    # Provide a long helpful examples section\n    example_text = _('''CALLBACKS\n\n    Most callback functions are of the same general format.  For a command line\n    argument like\n      --foo-callback 'BODY'\n\n    the following code will be compiled and called:\n      def foo_callback(foo):\n        BODY\n\n    The exception on callbacks is the --file-info-callback, which will be\n    discussed further below.\n\n    Given the callback style, we can thus make a simple callback to replace\n    'Jon' with 'John' in author/committer/tagger names:\n      git filter-repo --name-callback 'return name.replace(b\"Jon\", b\"John\")'\n\n    To remove all 'Tested-by' tags in commit (or tag) messages:\n      git filter-repo --message-callback 'return re.sub(br\"\\\\nTested-by:.*\", \"\", message)'\n\n    To remove all .DS_Store files:\n      git filter-repo --filename-callback 'return None if os.path.basename(filename) == b\".DS_Store\" else filename'\n\n    Note that if BODY resolves to a filename, then the contents of that file\n    will be used as the BODY in the callback function.\n\n    The --file-info-callback has a more involved function callback; for it the\n    following code will be compiled and called:\n      def file_info_callback(filename, mode, blob_id, value):\n        BODY\n\n    It is designed to be used in cases where filtering depends on both\n    filename and contents (and maybe mode).  It is called for file changes\n    other than deletions (since deletions have no file contents to operate\n    on).  This callback is expected to return a tuple of (filename, mode,\n    blob_id).  It can make use of the following functions from the value\n    instance:\n      value.get_contents_by_identifier(blob_id) -> contents (bytestring)\n      value.get_size_by_identifier(blob_id) -> size_of_blob (int)\n      value.insert_file_with_contents(contents) -> blob_id\n      value.is_binary(contents) -> bool\n      value.apply_replace_text(contents) -> new_contents (bytestring)\n    and can read/write the following data member from the value instance:\n      value.data (dict)\n\n    The filename can be used for renaming the file similar to\n    --filename-callback (or None to drop the change), and mode is one\n    of b'100644', b'100755', b'120000', or b'160000'.\n\n    For more detailed examples and explanations AND caveats, see\n      https://htmlpreview.github.io/?https://github.com/newren/git-filter-repo/blob/docs/html/git-filter-repo.html#CALLBACKS\n\nEXAMPLES\n\n    To get a bunch of reports mentioning renames that have occurred in\n    your repo and listing sizes of objects aggregated by any of path,\n    directory, extension, or blob-id:\n      git filter-repo --analyze\n\n    (These reports can help you choose how to filter your repo; it can\n    be useful to re-run this command after filtering to regenerate the\n    report and verify the changes look correct.)\n\n    To extract the history that touched just 'guides' and 'tools/releases':\n      git filter-repo --path guides/ --path tools/releases\n\n    To remove foo.zip and bar/baz/zips from every revision in history:\n      git filter-repo --path foo.zip --path bar/baz/zips/ --invert-paths\n\n    To replace the text 'password' with 'p455w0rd':\n      git filter-repo --replace-text <(echo \"password==>p455w0rd\")\n\n    To use the current version of the .mailmap file to update authors,\n    committers, and taggers throughout history and make it permanent:\n      git filter-repo --use-mailmap\n\n    To extract the history of 'src/', rename all files to have a new leading\n    directory 'my-module' (e.g. src/foo.java -> my-module/src/foo.java), and\n    add a 'my-module-' prefix to all tags:\n      git filter-repo --path src/ --to-subdirectory-filter my-module --tag-rename '':'my-module-'\n\n    For more detailed examples and explanations, see\n      https://htmlpreview.github.io/?https://github.com/newren/git-filter-repo/blob/docs/html/git-filter-repo.html#EXAMPLES''')\n\n    # Create the basic parser\n    parser = argparse.ArgumentParser(description=summary,\n                                     usage = argparse.SUPPRESS,\n                                     add_help = False,\n                                     epilog = example_text,\n                                     formatter_class=argparse.RawDescriptionHelpFormatter)\n\n    analyze = parser.add_argument_group(title=_(\"Analysis\"))\n    analyze.add_argument('--analyze', action='store_true',\n        help=_(\"Analyze repository history and create a report that may be \"\n               \"useful in determining what to filter in a subsequent run. \"\n               \"Will not modify your repo.\"))\n    analyze.add_argument('--report-dir',\n        metavar='DIR_OR_FILE',\n        type=os.fsencode,\n        dest='report_dir',\n        help=_(\"Directory to write report, defaults to GIT_DIR/filter_repo/analysis,\"\n               \"refuses to run if exists, --force delete existing dir first.\"))\n\n    path = parser.add_argument_group(title=_(\"Filtering based on paths \"\n                                             \"(see also --filename-callback)\"),\n                                     description=textwrap.dedent(_(\"\"\"\n           These options specify the paths to select.  Note that much like git\n           itself, renames are NOT followed so you may need to specify multiple\n           paths, e.g. `--path olddir/ --path newdir/`\n           \"\"\"[1:])))\n\n    path.add_argument('--invert-paths', action='store_false', dest='inclusive',\n        help=_(\"Invert the selection of files from the specified \"\n               \"--path-{match,glob,regex} options below, i.e. only select \"\n               \"files matching none of those options.\"))\n\n    path.add_argument('--path-match', '--path', metavar='DIR_OR_FILE',\n        type=os.fsencode,\n        action=FilteringOptions.AppendFilter, dest='path_changes',\n        help=_(\"Exact paths (files or directories) to include in filtered \"\n               \"history.  Multiple --path options can be specified to get \"\n               \"a union of paths.\"))\n    path.add_argument('--path-glob', metavar='GLOB', type=os.fsencode,\n        action=FilteringOptions.AppendFilter, dest='path_changes',\n        help=_(\"Glob of paths to include in filtered history. Multiple \"\n               \"--path-glob options can be specified to get a union of \"\n               \"paths.\"))\n    path.add_argument('--path-regex', metavar='REGEX', type=os.fsencode,\n        action=FilteringOptions.AppendFilter, dest='path_changes',\n        help=_(\"Regex of paths to include in filtered history. Multiple \"\n               \"--path-regex options can be specified to get a union of \"\n               \"paths\"))\n    path.add_argument('--use-base-name', action='store_true',\n        help=_(\"Match on file base name instead of full path from the top \"\n               \"of the repo.  Incompatible with --path-rename, and \"\n               \"incompatible with matching against directory names.\"))\n\n    rename = parser.add_argument_group(title=_(\"Renaming based on paths \"\n                                             \"(see also --filename-callback)\"))\n    rename.add_argument('--path-rename', '--path-rename-match',\n        metavar='OLD_NAME:NEW_NAME', dest='path_changes', type=os.fsencode,\n        action=FilteringOptions.AppendFilter,\n        help=_(\"Path to rename; if filename or directory matches OLD_NAME \"\n               \"rename to NEW_NAME.  Multiple --path-rename options can be \"\n               \"specified.  NOTE: If you combine filtering options with \"\n               \"renaming ones, do not rely on a rename argument to select \"\n               \"paths; you also need a filter to select them.\"))\n\n    helpers = parser.add_argument_group(title=_(\"Path shortcuts\"))\n    helpers.add_argument('--paths', help=argparse.SUPPRESS, metavar='IGNORE')\n    helpers.add_argument('--paths-from-file', metavar='FILENAME',\n        type=os.fsencode,\n        action=FilteringOptions.FileWithPathsFilter, dest='path_changes',\n        help=_(\"Specify several path filtering and renaming directives, one \"\n               \"per line.  Lines with '==>' in them specify path renames, \"\n               \"and lines can begin with 'literal:' (the default), 'glob:', \"\n               \"or 'regex:' to specify different matching styles.  Blank \"\n               \"lines and lines starting with a '#' are ignored.\"))\n    helpers.add_argument('--subdirectory-filter', metavar='DIRECTORY',\n        action=FilteringOptions.HelperFilter, type=os.fsencode,\n        help=_(\"Only look at history that touches the given subdirectory \"\n               \"and treat that directory as the project root.  Equivalent \"\n               \"to using '--path DIRECTORY/ --path-rename DIRECTORY/:'\"))\n    helpers.add_argument('--to-subdirectory-filter', metavar='DIRECTORY',\n        action=FilteringOptions.HelperFilter, type=os.fsencode,\n        help=_(\"Treat the project root as if it were under DIRECTORY. \"\n               \"Equivalent to using '--path-rename :DIRECTORY/'\"))\n\n    contents = parser.add_argument_group(title=_(\"Content editing filters \"\n                                                 \"(see also --blob-callback)\"))\n    contents.add_argument('--replace-text', metavar='EXPRESSIONS_FILE',\n        help=_(\"A file with expressions that, if found, will be replaced. \"\n               \"By default, each expression is treated as literal text, \"\n               \"but 'regex:' and 'glob:' prefixes are supported.  You can \"\n               \"end the line with '==>' and some replacement text to \"\n               \"choose a replacement choice other than the default of '{}'.\"\n               .format(decode(FilteringOptions.default_replace_text))))\n    contents.add_argument('--strip-blobs-bigger-than', metavar='SIZE',\n                          dest='max_blob_size', default=0,\n        help=_(\"Strip blobs (files) bigger than specified size (e.g. '5M', \"\n               \"'2G', etc)\"))\n    contents.add_argument('--strip-blobs-with-ids', metavar='BLOB-ID-FILENAME',\n        help=_(\"Read git object ids from each line of the given file, and \"\n               \"strip all of them from history\"))\n\n    refrename = parser.add_argument_group(title=_(\"Renaming of refs \"\n                                              \"(see also --refname-callback)\"))\n    refrename.add_argument('--tag-rename', metavar='OLD:NEW', type=os.fsencode,\n        help=_(\"Rename tags starting with OLD to start with NEW.  For \"\n               \"example, --tag-rename foo:bar will rename tag foo-1.2.3 \"\n               \"to bar-1.2.3; either OLD or NEW can be empty.\"))\n\n    messages = parser.add_argument_group(title=_(\"Filtering of commit messages \"\n                                               \"(see also --message-callback)\"))\n    messages.add_argument('--replace-message', metavar='EXPRESSIONS_FILE',\n        help=_(\"A file with expressions that, if found in commit or tag \"\n               \"messages, will be replaced. This file uses the same syntax \"\n               \"as --replace-text.\"))\n    messages.add_argument('--preserve-commit-hashes', action='store_true',\n        help=_(\"By default, since commits are rewritten and thus gain new \"\n               \"hashes, references to old commit hashes in commit messages \"\n               \"are replaced with new commit hashes (abbreviated to the same \"\n               \"length as the old reference).  Use this flag to turn off \"\n               \"updating commit hashes in commit messages.\"))\n    messages.add_argument('--preserve-commit-encoding', action='store_true',\n        help=_(\"Do not reencode commit messages into UTF-8.  By default, if \"\n               \"the commit object specifies an encoding for the commit \"\n               \"message, the message is re-encoded into UTF-8.\"))\n\n    people = parser.add_argument_group(title=_(\"Filtering of names & emails \"\n                                               \"(see also --name-callback \"\n                                               \"and --email-callback)\"))\n    people.add_argument('--mailmap', dest='mailmap', metavar='FILENAME',\n        type=os.fsencode,\n        help=_(\"Use specified mailmap file (see git-shortlog(1) for \"\n               \"details on the format) when rewriting author, committer, \"\n               \"and tagger names and emails.  If the specified file is \"\n               \"part of git history, historical versions of the file will \"\n               \"be ignored; only the current contents are consulted.\"))\n    people.add_argument('--use-mailmap', dest='mailmap',\n        action='store_const', const=b'.mailmap',\n        help=_(\"Same as: '--mailmap .mailmap' \"))\n\n    parents = parser.add_argument_group(title=_(\"Parent rewriting\"))\n    parents.add_argument('--replace-refs', default=None,\n                         choices=['delete-no-add', 'delete-and-add',\n                                  'update-no-add', 'update-or-add',\n                                  'update-and-add', 'old-default'],\n        help=_(\"How to handle replace refs (see git-replace(1)).  Replace \"\n               \"refs can be added during the history rewrite as a way to \"\n               \"allow users to pass old commit IDs (from before \"\n               \"git-filter-repo was run) to git commands and have git know \"\n               \"how to translate those old commit IDs to the new \"\n               \"(post-rewrite) commit IDs.  Also, replace refs that existed \"\n               \"before the rewrite can either be deleted or updated.  The \"\n               \"choices to pass to --replace-refs thus need to specify both \"\n               \"what to do with existing refs and what to do with commit \"\n               \"rewrites.  Thus 'update-and-add' means to update existing \"\n               \"replace refs, and for any commit rewrite (even if already \"\n               \"pointed at by a replace ref) add a new refs/replace/ reference \"\n               \"to map from the old commit ID to the new commit ID.  The \"\n               \"default is update-no-add, meaning update existing replace refs \"\n               \"but do not add any new ones.  There is also a special \"\n               \"'old-default' option for picking the default used in versions \"\n               \"prior to git-filter-repo-2.45, namely 'update-and-add' upon \"\n               \"the first run of git-filter-repo in a repository and \"\n               \"'update-or-add' if running git-filter-repo again on a \"\n               \"repository.\"))\n    parents.add_argument('--prune-empty', default='auto',\n                         choices=['always', 'auto', 'never'],\n        help=_(\"Whether to prune empty commits.  'auto' (the default) means \"\n               \"only prune commits which become empty (not commits which were \"\n               \"empty in the original repo, unless their parent was pruned). \"\n               \"When the parent of a commit is pruned, the first non-pruned \"\n               \"ancestor becomes the new parent.\"))\n    parents.add_argument('--prune-degenerate', default='auto',\n                         choices=['always', 'auto', 'never'],\n        help=_(\"Since merge commits are needed for history topology, they \"\n               \"are typically exempt from pruning.  However, they can become \"\n               \"degenerate with the pruning of other commits (having fewer \"\n               \"than two parents, having one commit serve as both parents, or \"\n               \"having one parent as the ancestor of the other.)  If such \"\n               \"merge commits have no file changes, they can be pruned.  The \"\n               \"default ('auto') is to only prune empty merge commits which \"\n               \"become degenerate (not which started as such).\"))\n    parents.add_argument('--no-ff', action='store_true',\n        help=_(\"Even if the first parent is or becomes an ancestor of another \"\n               \"parent, do not prune it.  This modifies how \"\n               \"--prune-degenerate behaves, and may be useful in projects who \"\n               \"always use merge --no-ff.\"))\n\n    callback = parser.add_argument_group(title=_(\"Generic callback code snippets\"))\n    callback.add_argument('--filename-callback', metavar=\"FUNCTION_BODY_OR_FILE\",\n        help=_(\"Python code body for processing filenames; see CALLBACKS \"\n               \"sections below.\"))\n    callback.add_argument('--file-info-callback', metavar=\"FUNCTION_BODY_OR_FILE\",\n        help=_(\"Python code body for processing file and metadata; see \"\n               \"CALLBACKS sections below.\"))\n    callback.add_argument('--message-callback', metavar=\"FUNCTION_BODY_OR_FILE\",\n        help=_(\"Python code body for processing messages (both commit \"\n               \"messages and tag messages); see CALLBACKS section below.\"))\n    callback.add_argument('--name-callback', metavar=\"FUNCTION_BODY_OR_FILE\",\n        help=_(\"Python code body for processing names of people; see \"\n               \"CALLBACKS section below.\"))\n    callback.add_argument('--email-callback', metavar=\"FUNCTION_BODY_OR_FILE\",\n        help=_(\"Python code body for processing emails addresses; see \"\n               \"CALLBACKS section below.\"))\n    callback.add_argument('--refname-callback', metavar=\"FUNCTION_BODY_OR_FILE\",\n        help=_(\"Python code body for processing refnames; see CALLBACKS \"\n               \"section below.\"))\n\n    callback.add_argument('--blob-callback', metavar=\"FUNCTION_BODY_OR_FILE\",\n        help=_(\"Python code body for processing blob objects; see \"\n               \"CALLBACKS section below.\"))\n    callback.add_argument('--commit-callback', metavar=\"FUNCTION_BODY_OR_FILE\",\n        help=_(\"Python code body for processing commit objects; see \"\n               \"CALLBACKS section below.\"))\n    callback.add_argument('--tag-callback', metavar=\"FUNCTION_BODY_OR_FILE\",\n        help=_(\"Python code body for processing tag objects.  Note that \"\n               \"lightweight tags have no tag object and are thus not \"\n               \"handled by this callback. See CALLBACKS section below.\"))\n    callback.add_argument('--reset-callback', metavar=\"FUNCTION_BODY_OR_FILE\",\n        help=_(\"Python code body for processing reset objects; see \"\n               \"CALLBACKS section below.\"))\n\n    sdr = parser.add_argument_group(title=_(\"Sensitive Data Removal Handling\"))\n    sdr.add_argument('--sensitive-data-removal', '--sdr', action='store_true',\n        help=_(\"This rewrite is intended to remove sensitive data from a \"\n               \"repository.  Gather extra information from the rewrite needed \"\n               \"to provide additional instructions on how to clean up other \"\n               \"copies.\"))\n    sdr.add_argument('--no-fetch', action='store_true',\n        help=_(\"By default, --sensitive-data-removal will trigger a \"\n               \"mirror-like fetch of all refs from origin, discarding local \"\n               \"changes, but ensuring that _all_ fetchable refs that hold on \"\n               \"to the sensitve data are rewritten.  This flag removes that \"\n               \"fetch, risking that other refs continue holding on to the \"\n               \"sensitive data.  This option is implied by --partial or any \"\n               \"flag that implies --partial.\"))\n\n    desc = _(\n      \"Specifying alternate source or target locations implies --partial,\\n\"\n      \"except that the normal default for --replace-refs is used.  However,\\n\"\n      \"unlike normal uses of --partial, this doesn't risk mixing old and new\\n\"\n      \"history since the old and new histories are in different repositories.\")\n    location = parser.add_argument_group(title=_(\"Location to filter from/to\"),\n                                         description=desc)\n    location.add_argument('--source', type=os.fsencode,\n                          help=_(\"Git repository to read from\"))\n    location.add_argument('--target', type=os.fsencode,\n        help=_(\"Git repository to overwrite with filtered history\"))\n\n    order = parser.add_argument_group(title=_(\"Ordering of commits\"))\n    order.add_argument('--date-order', action='store_true',\n        help=_(\"Processes commits in commit timestamp order.\"))\n\n    misc = parser.add_argument_group(title=_(\"Miscellaneous options\"))\n    misc.add_argument('--help', '-h', action='store_true',\n        help=_(\"Show this help message and exit.\"))\n    misc.add_argument('--version', action='store_true',\n        help=_(\"Display filter-repo's version and exit.\"))\n    misc.add_argument('--proceed', action='store_true',\n        help=_(\"Avoid triggering the no-arguments-specified check.\"))\n    misc.add_argument('--force', '-f', action='store_true',\n        help=_(\"Rewrite repository history even if the current repo does not \"\n               \"look like a fresh clone.  History rewriting is irreversible \"\n               \"(and includes immediate pruning of reflogs and old objects), \"\n               \"so be cautious about using this flag.\"))\n    misc.add_argument('--partial', action='store_true',\n        help=_(\"Do a partial history rewrite, resulting in the mixture of \"\n               \"old and new history.  This disables rewriting \"\n               \"refs/remotes/origin/* to refs/heads/*, disables removing \"\n               \"of the 'origin' remote, disables removing unexported refs, \"\n               \"disables expiring the reflog, and disables the automatic \"\n               \"post-filter gc.  Also, this modifies --tag-rename and \"\n               \"--refname-callback options such that instead of replacing \"\n               \"old refs with new refnames, it will instead create new \"\n               \"refs and keep the old ones around.  Use with caution.\"))\n    misc.add_argument('--no-gc', action='store_true',\n        help=_(\"Do not run 'git gc' after filtering.\"))\n    # WARNING: --refs presents a problem with become-degenerate pruning:\n    #   * Excluding a commit also excludes its ancestors so when some other\n    #     commit has an excluded ancestor as a parent we have no way of\n    #     knowing what it is an ancestor of without doing a special\n    #     full-graph walk.\n    misc.add_argument('--refs', nargs='+',\n        help=_(\"Limit history rewriting to the specified refs.  Implies \"\n               \"--partial.  In addition to the normal caveats of --partial \"\n               \"(mixing old and new history, no automatic remapping of \"\n               \"refs/remotes/origin/* to refs/heads/*, etc.), this also may \"\n               \"cause problems for pruning of degenerate empty merge \"\n               \"commits when negative revisions are specified.\"))\n\n    misc.add_argument('--dry-run', action='store_true',\n        help=_(\"Do not change the repository.  Run `git fast-export` and \"\n               \"filter its output, and save both the original and the \"\n               \"filtered version for comparison.  This also disables \"\n               \"rewriting commit messages due to not knowing new commit \"\n               \"IDs and disables filtering of some empty commits due to \"\n               \"inability to query the fast-import backend.\" ))\n    misc.add_argument('--debug', action='store_true',\n        help=_(\"Print additional information about operations being \"\n               \"performed and commands being run.  When used together \"\n               \"with --dry-run, also show extra information about what \"\n               \"would be run.\"))\n    # WARNING: --state-branch has some problems:\n    #   * It does not work well with manually inserted objects (user creating\n    #     Blob() or Commit() or Tag() objects and calling\n    #     RepoFilter.insert(obj) on them).\n    #   * It does not work well with multiple source or multiple target repos\n    #   * It doesn't work so well with pruning become-empty commits (though\n    #     --refs doesn't work so well with it either)\n    # These are probably fixable, given some work (e.g. re-importing the\n    # graph at the beginning to get the AncestryGraph right, doing our own\n    # export of marks instead of using fast-export --export-marks, etc.), but\n    # for now just hide the option.\n    misc.add_argument('--state-branch',\n        #help=_(\"Enable incremental filtering by saving the mapping of old \"\n        #       \"to new objects to the specified branch upon exit, and\"\n        #       \"loading that mapping from that branch (if it exists) \"\n        #       \"upon startup.\"))\n        help=argparse.SUPPRESS)\n    misc.add_argument('--stdin', action='store_true',\n        help=_(\"Instead of running `git fast-export` and filtering its \"\n               \"output, filter the fast-export stream from stdin.    The \"\n               \"stdin must be in the expected input format (e.g. it needs \"\n               \"to include original-oid directives).\"))\n    misc.add_argument('--quiet', action='store_true',\n        help=_(\"Pass --quiet to other git commands called\"))\n    return parser\n\n  @staticmethod\n  def sanity_check_args(args):\n    if args.analyze and args.path_changes:\n      raise SystemExit(_(\"Error: --analyze is incompatible with --path* flags; \"\n                         \"it's a read-only operation.\"))\n    if args.analyze and args.stdin:\n      raise SystemExit(_(\"Error: --analyze is incompatible with --stdin.\"))\n    # If no path_changes are found, initialize with empty list but mark as\n    # not inclusive so that all files match\n    if args.path_changes == None:\n      args.path_changes = []\n      args.inclusive = False\n    else:\n      # Similarly, if we have no filtering paths, then no path should be\n      # filtered out.  Based on how newname() works, the easiest way to\n      # achieve that is setting args.inclusive to False.\n      if not any(x[0] == 'filter' for x in args.path_changes):\n        args.inclusive = False\n      # Also check for incompatible --use-base-name and --path-rename flags.\n      if args.use_base_name:\n        if any(x[0] == 'rename' for x in args.path_changes):\n          raise SystemExit(_(\"Error: --use-base-name and --path-rename are \"\n                             \"incompatible.\"))\n    # Also throw some sanity checks on git version here;\n    # PERF: remove these checks once new enough git versions are common\n    p = subproc.Popen('git fast-export -h'.split(),\n                      stdout=subprocess.PIPE, stderr=subprocess.STDOUT)\n    output = p.stdout.read()\n    if b'--anonymize-map' not in output: # pragma: no cover\n      global date_format_permissive\n      date_format_permissive = False\n    if not any(x in output for x in [b'--mark-tags',b'--[no-]mark-tags']): # pragma: no cover\n      global write_marks\n      write_marks = False\n      if args.state_branch:\n        # We need a version of git-fast-export with --mark-tags\n        raise SystemExit(_(\"Error: need git >= 2.24.0\"))\n    if not any(x in output for x in [b'--reencode',  b'--[no-]reencode']): # pragma: no cover\n      if args.preserve_commit_encoding:\n        # We need a version of git-fast-export with --reencode\n        raise SystemExit(_(\"Error: need git >= 2.23.0\"))\n      else:\n        # Set args.preserve_commit_encoding to None which we'll check for later\n        # to avoid passing --reencode=yes to fast-export (that option was the\n        # default prior to git-2.23)\n        args.preserve_commit_encoding = None\n      # If we don't have fast-exoprt --reencode, we may also be missing\n      # diff-tree --combined-all-paths, which is even more important...\n      p = subproc.Popen('git diff-tree -h'.split(),\n                        stdout=subprocess.PIPE, stderr=subprocess.STDOUT)\n      output = p.stdout.read()\n      if b'--combined-all-paths' not in output:\n        # We need a version of git-diff-tree with --combined-all-paths\n        raise SystemExit(_(\"Error: need git >= 2.22.0\"))\n    if args.sensitive_data_removal:\n      p = subproc.Popen('git cat-file -h'.split(),\n                        stdout=subprocess.PIPE, stderr=subprocess.STDOUT)\n      output = p.stdout.read()\n      if b\"--batch-command\" not in output: # pragma: no cover\n        raise SystemExit(_(\"Error: need git >= 2.36.0\"))\n    # End of sanity checks on git version\n    if args.max_blob_size:\n      suffix = args.max_blob_size[-1]\n      if suffix not in '1234567890':\n        mult = {'K': 1024, 'M': 1024**2, 'G': 1024**3}\n        if suffix not in mult:\n          raise SystemExit(_(\"Error: could not parse --strip-blobs-bigger-than\"\n                             \" argument %s\")\n                           % args.max_blob_size)\n        args.max_blob_size = int(args.max_blob_size[0:-1]) * mult[suffix]\n      else:\n        args.max_blob_size = int(args.max_blob_size)\n    if args.file_info_callback and (\n        args.stdin or args.blob_callback or args.filename_callback):\n      raise SystemExit(_(\"Error: --file-info-callback is incompatible with \"\n                         \"--stdin, --blob-callback,\\nand --filename-callback.\"))\n\n  @staticmethod\n  def get_replace_text(filename):\n    replace_literals = []\n    replace_regexes = []\n    with open(filename, 'br') as f:\n      for line in f:\n        line = line.rstrip(b'\\r\\n')\n\n        # Determine the replacement\n        replacement = FilteringOptions.default_replace_text\n        if b'==>' in line:\n          line, replacement = line.rsplit(b'==>', 1)\n\n        # See if we need to match via regex\n        regex = None\n        if line.startswith(b'regex:'):\n          regex = line[6:]\n        elif line.startswith(b'glob:'):\n          regex = glob_to_regex(line[5:])\n        if regex:\n          replace_regexes.append((re.compile(regex), replacement))\n        else:\n          # Otherwise, find the literal we need to replace\n          if line.startswith(b'literal:'):\n            line = line[8:]\n          if not line:\n            continue\n          replace_literals.append((line, replacement))\n    return {'literals': replace_literals, 'regexes':  replace_regexes}\n\n  @staticmethod\n  def get_paths_from_file(filename):\n    new_path_changes = []\n    with open(filename, 'br') as f:\n      for line in f:\n        line = line.rstrip(b'\\r\\n')\n\n        # Skip blank lines\n        if not line:\n          continue\n        # Skip comment lines\n        if line.startswith(b'#'):\n          continue\n\n        # Determine the replacement\n        match_type, repl = 'literal', None\n        if b'==>' in line:\n          line, repl = line.rsplit(b'==>', 1)\n\n        # See if we need to match via regex\n        match_type = 'match' # a.k.a. 'literal'\n        if line.startswith(b'regex:'):\n          match_type = 'regex'\n          match = re.compile(line[6:])\n        elif line.startswith(b'glob:'):\n          match_type = 'glob'\n          match = line[5:]\n          if repl:\n            raise SystemExit(_(\"Error: In %s, 'glob:' and '==>' are incompatible (renaming globs makes no sense)\" % decode(filename)))\n        else:\n          if line.startswith(b'literal:'):\n            match = line[8:]\n          else:\n            match = line\n          if repl is not None:\n            if match and repl and match.endswith(b'/') != repl.endswith(b'/'):\n              raise SystemExit(_(\"Error: When rename directories, if OLDNAME \"\n                                 \"and NEW_NAME are both non-empty and either \"\n                                 \"ends with a slash then both must.\"))\n\n        # Record the filter or rename\n        if repl is not None:\n          new_path_changes.append(['rename', match_type, (match, repl)])\n        else:\n          new_path_changes.append(['filter', match_type, match])\n          if match_type == 'glob' and not match.endswith(b'*'):\n            extension = b'*' if match.endswith(b'/') else b'/*'\n            new_path_changes.append(['filter', match_type, match+extension])\n      return new_path_changes\n\n  @staticmethod\n  def default_options():\n    return FilteringOptions.parse_args([], error_on_empty = False)\n\n  @staticmethod\n  def parse_args(input_args, error_on_empty = True):\n    parser = FilteringOptions.create_arg_parser()\n    if not input_args and error_on_empty:\n      parser.print_usage()\n      raise SystemExit(_(\"No arguments specified.\"))\n    args = parser.parse_args(input_args)\n    if args.help:\n      parser.print_help()\n      raise SystemExit()\n    if args.paths:\n      raise SystemExit(\"Error: Option `--paths` unrecognized; did you mean --path or --paths-from-file?\")\n    if args.version:\n      GitUtils.print_my_version()\n      raise SystemExit()\n    FilteringOptions.sanity_check_args(args)\n    if args.mailmap:\n      args.mailmap = MailmapInfo(args.mailmap)\n    if args.replace_text:\n      args.replace_text = FilteringOptions.get_replace_text(args.replace_text)\n    if args.replace_message:\n      args.replace_message = FilteringOptions.get_replace_text(args.replace_message)\n    if args.strip_blobs_with_ids:\n      with open(args.strip_blobs_with_ids, 'br') as f:\n        args.strip_blobs_with_ids = set(f.read().split())\n    else:\n      args.strip_blobs_with_ids = set()\n    if (args.partial or args.refs) and not args.replace_refs:\n      args.replace_refs = 'update-no-add'\n    args.repack = not (args.partial or args.refs or args.no_gc)\n    if args.refs or args.source or args.target:\n      args.partial = True\n    if args.partial:\n      args.no_fetch = True\n    if not args.refs:\n      args.refs = ['--all']\n    return args\n\nclass RepoAnalyze(object):\n\n  # First, several helper functions for analyze_commit()\n\n  @staticmethod\n  def equiv_class(stats, filename):\n    return stats['equivalence'].get(filename, (filename,))\n\n  @staticmethod\n  def setup_equivalence_for_rename(stats, oldname, newname):\n    # if A is renamed to B and B is renamed to C, then the user thinks of\n    # A, B, and C as all being different names for the same 'file'.  We record\n    # this as an equivalence class:\n    #   stats['equivalence'][name] = (A,B,C)\n    # for name being each of A, B, and C.\n    old_tuple = stats['equivalence'].get(oldname, ())\n    if newname in old_tuple:\n      return\n    elif old_tuple:\n      new_tuple = tuple(list(old_tuple)+[newname])\n    else:\n      new_tuple = (oldname, newname)\n    for f in new_tuple:\n      stats['equivalence'][f] = new_tuple\n\n  @staticmethod\n  def setup_or_update_rename_history(stats, commit, oldname, newname):\n    rename_commits = stats['rename_history'].get(oldname, set())\n    rename_commits.add(commit)\n    stats['rename_history'][oldname] = rename_commits\n\n  @staticmethod\n  def handle_renames(stats, commit, change_types, filenames):\n    for index, change_type in enumerate(change_types):\n      if change_type == ord(b'R'):\n        oldname, newname = filenames[index], filenames[-1]\n        RepoAnalyze.setup_equivalence_for_rename(stats, oldname, newname)\n        RepoAnalyze.setup_or_update_rename_history(stats, commit,\n                                                   oldname, newname)\n\n  @staticmethod\n  def handle_file(stats, graph, commit, modes, shas, filenames):\n    mode, sha, filename = modes[-1], shas[-1], filenames[-1]\n\n    # Figure out kind of deletions to undo for this file, and update lists\n    # of all-names-by-sha and all-filenames\n    delmode = 'tree_deletions'\n    if mode != b'040000':\n      delmode = 'file_deletions'\n      stats['names'][sha].add(filename)\n      stats['allnames'].add(filename)\n\n    # If the file (or equivalence class of files) was recorded as deleted,\n    # clearly it isn't anymore\n    equiv = RepoAnalyze.equiv_class(stats, filename)\n    for f in equiv:\n      stats[delmode].pop(f, None)\n\n    # If we get a modify/add for a path that was renamed, we may need to break\n    # the equivalence class.  However, if the modify/add was on a branch that\n    # doesn't have the rename in its history, we are still okay.\n    need_to_break_equivalence = False\n    if equiv[-1] != filename:\n      for rename_commit in stats['rename_history'][filename]:\n        if graph.is_ancestor(rename_commit, commit):\n          need_to_break_equivalence = True\n\n    if need_to_break_equivalence:\n      for f in equiv:\n        if f in stats['equivalence']:\n          del stats['equivalence'][f]\n\n  @staticmethod\n  def analyze_commit(stats, graph, commit, parents, date, file_changes):\n    graph.add_commit_and_parents(commit, parents)\n    for change in file_changes:\n      modes, shas, change_types, filenames = change\n      if len(parents) == 1 and change_types.startswith(b'R'):\n        change_types = b'R'  # remove the rename score; we don't care\n      if modes[-1] == b'160000':\n        continue\n      elif modes[-1] == b'000000':\n        # Track when files/directories are deleted\n        for f in RepoAnalyze.equiv_class(stats, filenames[-1]):\n          if any(x == b'040000' for x in modes[0:-1]):\n            stats['tree_deletions'][f] = date\n          else:\n            stats['file_deletions'][f] = date\n      elif change_types.strip(b'AMT') == b'':\n        RepoAnalyze.handle_file(stats, graph, commit, modes, shas, filenames)\n      elif modes[-1] == b'040000' and change_types.strip(b'RAM') == b'':\n        RepoAnalyze.handle_file(stats, graph, commit, modes, shas, filenames)\n      elif change_types.strip(b'RAMT') == b'':\n        RepoAnalyze.handle_file(stats, graph, commit, modes, shas, filenames)\n        RepoAnalyze.handle_renames(stats, commit, change_types, filenames)\n      else:\n        raise SystemExit(_(\"Unhandled change type(s): %(change_type)s \"\n                           \"(in commit %(commit)s)\")\n                         % ({'change_type': change_types, 'commit': commit})\n                         ) # pragma: no cover\n\n  @staticmethod\n  def gather_data(args):\n    unpacked_size, packed_size = GitUtils.get_blob_sizes()\n    stats = {'names': collections.defaultdict(set),\n             'allnames' : set(),\n             'file_deletions': {},\n             'tree_deletions': {},\n             'equivalence': {},\n             'rename_history': collections.defaultdict(set),\n             'unpacked_size': unpacked_size,\n             'packed_size': packed_size,\n             'num_commits': 0}\n\n    # Setup the rev-list/diff-tree process\n    processed_commits_msg = _(\"Processed %d commits\")\n    commit_parse_progress = ProgressWriter()\n    num_commits = 0\n    cmd = ('git rev-list --topo-order --reverse {}'.format(' '.join(args.refs)) +\n           ' | git diff-tree --stdin --always --root --format=%H%n%P%n%cd' +\n           ' --date=short -M -t -c --raw --combined-all-paths')\n    dtp = subproc.Popen(cmd, shell=True, bufsize=-1, stdout=subprocess.PIPE)\n    f = dtp.stdout\n    line = f.readline()\n    if not line:\n      raise SystemExit(_(\"Nothing to analyze; repository is empty.\"))\n    cont = bool(line)\n    graph = AncestryGraph()\n    while cont:\n      commit = line.rstrip()\n      parents = f.readline().split()\n      date = f.readline().rstrip()\n\n      # We expect a blank line next; if we get a non-blank line then\n      # this commit modified no files and we need to move on to the next.\n      # If there is no line, we've reached end-of-input.\n      line = f.readline()\n      if not line:\n        cont = False\n      line = line.rstrip()\n\n      # If we haven't reached end of input, and we got a blank line meaning\n      # a commit that has modified files, then get the file changes associated\n      # with this commit.\n      file_changes = []\n      if cont and not line:\n        cont = False\n        for line in f:\n          if not line.startswith(b':'):\n            cont = True\n            break\n          n = 1+max(1, len(parents))\n          assert line.startswith(b':'*(n-1))\n          relevant = line[n-1:-1]\n          splits = relevant.split(None, n)\n          modes = splits[0:n]\n          splits = splits[n].split(None, n)\n          shas = splits[0:n]\n          splits = splits[n].split(b'\\t')\n          change_types = splits[0]\n          filenames = [PathQuoting.dequote(x) for x in splits[1:]]\n          file_changes.append([modes, shas, change_types, filenames])\n\n      # If someone is trying to analyze a subset of the history, make sure\n      # to avoid dying on commits with parents that we haven't seen before\n      if args.refs:\n        graph.record_external_commits([p for p in parents\n                                       if not p in graph.value])\n\n      # Analyze this commit and update progress\n      RepoAnalyze.analyze_commit(stats, graph, commit, parents, date,\n                                 file_changes)\n      num_commits += 1\n      commit_parse_progress.show(processed_commits_msg % num_commits)\n\n    # Show the final commits processed message and record the number of commits\n    commit_parse_progress.finish()\n    stats['num_commits'] = num_commits\n\n    # Close the output, ensure rev-list|diff-tree pipeline completed successfully\n    dtp.stdout.close()\n    if dtp.wait():\n      raise SystemExit(_(\"Error: rev-list|diff-tree pipeline failed; see above.\")) # pragma: no cover\n\n    return stats\n\n  @staticmethod\n  def write_report(reportdir, stats):\n    def datestr(datetimestr):\n      return datetimestr if datetimestr else _('<present>').encode()\n\n    def dirnames(path):\n      while True:\n        path = os.path.dirname(path)\n        yield path\n        if path == b'':\n          break\n\n    # Compute aggregate size information for paths, extensions, and dirs\n    total_size = {'packed': 0, 'unpacked': 0}\n    path_size = {'packed': collections.defaultdict(int),\n                 'unpacked': collections.defaultdict(int)}\n    ext_size = {'packed': collections.defaultdict(int),\n                'unpacked': collections.defaultdict(int)}\n    dir_size = {'packed': collections.defaultdict(int),\n                'unpacked': collections.defaultdict(int)}\n    for sha in stats['names']:\n      size = {'packed': stats['packed_size'][sha],\n              'unpacked': stats['unpacked_size'][sha]}\n      for which in ('packed', 'unpacked'):\n        for name in stats['names'][sha]:\n          total_size[which] += size[which]\n          path_size[which][name] += size[which]\n          basename, ext = os.path.splitext(name)\n          ext_size[which][ext] += size[which]\n          for dirname in dirnames(name):\n            dir_size[which][dirname] += size[which]\n\n    # Determine if and when extensions and directories were deleted\n    ext_deleted_data = {}\n    for name in stats['allnames']:\n      when = stats['file_deletions'].get(name, None)\n\n      # Update the extension\n      basename, ext = os.path.splitext(name)\n      if when is None:\n        ext_deleted_data[ext] = None\n      elif ext in ext_deleted_data:\n        if ext_deleted_data[ext] is not None:\n          ext_deleted_data[ext] = max(ext_deleted_data[ext], when)\n      else:\n        ext_deleted_data[ext] = when\n\n    dir_deleted_data = {}\n    for name in dir_size['packed']:\n      dir_deleted_data[name] = stats['tree_deletions'].get(name, None)\n\n    with open(os.path.join(reportdir, b\"README\"), 'bw') as f:\n      # Give a basic overview of this file\n      f.write(b\"== %s ==\\n\" % _(\"Overall Statistics\").encode())\n      f.write((\"  %s: %d\\n\" % (_(\"Number of commits\"),\n                               stats['num_commits'])).encode())\n      f.write((\"  %s: %d\\n\" % (_(\"Number of filenames\"),\n                               len(path_size['packed']))).encode())\n      f.write((\"  %s: %d\\n\" % (_(\"Number of directories\"),\n                               len(dir_size['packed']))).encode())\n      f.write((\"  %s: %d\\n\" % (_(\"Number of file extensions\"),\n                               len(ext_size['packed']))).encode())\n      f.write(b\"\\n\")\n      f.write((\"  %s: %d\\n\" % (_(\"Total unpacked size (bytes)\"),\n                               total_size['unpacked'])).encode())\n      f.write((\"  %s: %d\\n\" % (_(\"Total packed size (bytes)\"),\n                               total_size['packed'])).encode())\n      f.write(b\"\\n\")\n\n      # Mention issues with the report\n      f.write((\"== %s ==\\n\" % _(\"Caveats\")).encode())\n      f.write((\"=== %s ===\\n\" % _(\"Sizes\")).encode())\n      f.write(textwrap.dedent(_(\"\"\"\n        Packed size represents what size your repository would be if no\n        trees, commits, tags, or other metadata were included (though it may\n        fail to represent de-duplication; see below).  It also represents the\n        current packing, which may be suboptimal if you haven't gc'ed for a\n        while.\n\n        Unpacked size represents what size your repository would be if no\n        trees, commits, tags, or other metadata were included AND if no\n        files were packed; i.e., without delta-ing or compression.\n\n        Both unpacked and packed sizes can be slightly misleading.  Deleting\n        a blob from history not save as much space as the unpacked size,\n        because it is obviously normally stored in packed form.  Also,\n        deleting a blob from history may not save as much space as its packed\n        size either, because another blob could be stored as a delta against\n        that blob, so when you remove one blob another blob's packed size may\n        grow.\n\n        Also, the sum of the packed sizes can add up to more than the\n        repository size; if the same contents appeared in the repository in\n        multiple places, git will automatically de-dupe and store only one\n        copy, while the way sizes are added in this analysis adds the size\n        for each file path that has those contents.  Further, if a file is\n        ever reverted to a previous version's contents, the previous\n        version's size will be counted multiple times in this analysis, even\n        though git will only store it once.\n        \"\"\")[1:]).encode())\n      f.write(b\"\\n\")\n      f.write((\"=== %s ===\\n\" % _(\"Deletions\")).encode())\n      f.write(textwrap.dedent(_(\"\"\"\n        Whether a file is deleted is not a binary quality, since it can be\n        deleted on some branches but still exist in others.  Also, it might\n        exist in an old tag, but have been deleted in versions newer than\n        that.  More thorough tracking could be done, including looking at\n        merge commits where one side of history deleted and the other modified,\n        in order to give a more holistic picture of deletions.  However, that\n        algorithm would not only be more complex to implement, it'd also be\n        quite difficult to present and interpret by users.  Since --analyze\n        is just about getting a high-level rough picture of history, it instead\n        implements the simplistic rule that is good enough for 98% of cases:\n          A file is marked as deleted if the last commit in the fast-export\n          stream that mentions the file lists it as deleted.\n        This makes it dependent on topological ordering, but generally gives\n        the \"right\" answer.\n        \"\"\")[1:]).encode())\n      f.write(b\"\\n\")\n      f.write((\"=== %s ===\\n\" % _(\"Renames\")).encode())\n      f.write(textwrap.dedent(_(\"\"\"\n        Renames share the same non-binary nature that deletions do, plus\n        additional challenges:\n          * If the renamed file is renamed again, instead of just two names for\n            a path you can have three or more.\n          * Rename pairs of the form (oldname, newname) that we consider to be\n            different names of the \"same file\" might only be valid over certain\n            commit ranges.  For example, if a new commit reintroduces a file\n            named oldname, then new versions of oldname aren't the \"same file\"\n            anymore.  We could try to portray this to the user, but it's easier\n            for the user to just break the pairing and only report unbroken\n            rename pairings to the user.\n          * The ability for users to rename files differently in different\n            branches means that our chains of renames will not necessarily be\n            linear but may branch out.\n        \"\"\")[1:]).encode())\n      f.write(b\"\\n\")\n\n    # Equivalence classes for names, so if folks only want to keep a\n    # certain set of paths, they know the old names they want to include\n    # too.\n    with open(os.path.join(reportdir, b\"renames.txt\"), 'bw') as f:\n      seen = set()\n      for pathname,equiv_group in sorted(stats['equivalence'].items(),\n                                         key=lambda x:(x[1], x[0])):\n        if equiv_group in seen:\n          continue\n        seen.add(equiv_group)\n        f.write((\"{} ->\\n    \".format(decode(equiv_group[0])) +\n                     \"\\n    \".join(decode(x) for x in equiv_group[1:]) +\n                 \"\\n\").encode())\n\n    # List directories in reverse sorted order of unpacked size\n    with open(os.path.join(reportdir, b\"directories-deleted-sizes.txt\"), 'bw') as f:\n      msg = \"=== %s ===\\n\" % _(\"Deleted directories by reverse size\")\n      f.write(msg.encode())\n      msg = _(\"Format: unpacked size, packed size, date deleted, directory name\\n\")\n      f.write(msg.encode())\n      for dirname, size in sorted(dir_size['packed'].items(),\n                                  key=lambda x:(x[1],x[0]), reverse=True):\n        if (dir_deleted_data[dirname]):\n          f.write(b\"  %10d %10d %-10s %s\\n\" % (dir_size['unpacked'][dirname],\n                                              size,\n                                              datestr(dir_deleted_data[dirname]),\n                                              dirname or _('<toplevel>').encode()))\n\n    with open(os.path.join(reportdir, b\"directories-all-sizes.txt\"), 'bw') as f:\n      f.write((\"=== %s ===\\n\" % _(\"All directories by reverse size\")).encode())\n      msg = _(\"Format: unpacked size, packed size, date deleted, directory name\\n\")\n      f.write(msg.encode())\n      for dirname, size in sorted(dir_size['packed'].items(),\n                                  key=lambda x:(x[1],x[0]), reverse=True):\n        f.write(b\"  %10d %10d %-10s %s\\n\" % (dir_size['unpacked'][dirname],\n                                            size,\n                                            datestr(dir_deleted_data[dirname]),\n                                            dirname or _(\"<toplevel>\").encode()))\n\n    # List extensions in reverse sorted order of unpacked size\n    with open(os.path.join(reportdir, b\"extensions-deleted-sizes.txt\"), 'bw') as f:\n      msg = \"=== %s ===\\n\" % _(\"Deleted extensions by reverse size\")\n      f.write(msg.encode())\n      msg = _(\"Format: unpacked size, packed size, date deleted, extension name\\n\")\n      f.write(msg.encode())\n      for extname, size in sorted(ext_size['packed'].items(),\n                                  key=lambda x:(x[1],x[0]), reverse=True):\n        if (ext_deleted_data[extname]):\n          f.write(b\"  %10d %10d %-10s %s\\n\" % (ext_size['unpacked'][extname],\n                                              size,\n                                              datestr(ext_deleted_data[extname]),\n                                              extname or _('<no extension>').encode()))\n\n    with open(os.path.join(reportdir, b\"extensions-all-sizes.txt\"), 'bw') as f:\n      f.write((\"=== %s ===\\n\" % _(\"All extensions by reverse size\")).encode())\n      msg = _(\"Format: unpacked size, packed size, date deleted, extension name\\n\")\n      f.write(msg.encode())\n      for extname, size in sorted(ext_size['packed'].items(),\n                                  key=lambda x:(x[1],x[0]), reverse=True):\n        f.write(b\"  %10d %10d %-10s %s\\n\" % (ext_size['unpacked'][extname],\n                                            size,\n                                            datestr(ext_deleted_data[extname]),\n                                            extname or _('<no extension>').encode()))\n\n    # List files in reverse sorted order of unpacked size\n    with open(os.path.join(reportdir, b\"path-deleted-sizes.txt\"), 'bw') as f:\n      msg = \"=== %s ===\\n\" % _(\"Deleted paths by reverse accumulated size\")\n      f.write(msg.encode())\n      msg = _(\"Format: unpacked size, packed size, date deleted, path name(s)\\n\")\n      f.write(msg.encode())\n      for pathname, size in sorted(path_size['packed'].items(),\n                                   key=lambda x:(x[1],x[0]), reverse=True):\n        when = stats['file_deletions'].get(pathname, None)\n        if when:\n          f.write(b\"  %10d %10d %-10s %s\\n\" % (path_size['unpacked'][pathname],\n                                              size,\n                                              datestr(when),\n                                              pathname))\n\n    with open(os.path.join(reportdir, b\"path-all-sizes.txt\"), 'bw') as f:\n      msg = \"=== %s ===\\n\" % _(\"All paths by reverse accumulated size\")\n      f.write(msg.encode())\n      msg = _(\"Format: unpacked size, packed size, date deleted, path name\\n\")\n      f.write(msg.encode())\n      for pathname, size in sorted(path_size['packed'].items(),\n                                   key=lambda x:(x[1],x[0]), reverse=True):\n        when = stats['file_deletions'].get(pathname, None)\n        f.write(b\"  %10d %10d %-10s %s\\n\" % (path_size['unpacked'][pathname],\n                                            size,\n                                            datestr(when),\n                                            pathname))\n\n    # List of filenames and sizes in descending order\n    with open(os.path.join(reportdir, b\"blob-shas-and-paths.txt\"), 'bw') as f:\n      f.write((\"=== %s ===\\n\" % _(\"Files by sha and associated pathnames in reverse size\")).encode())\n      f.write(_(\"Format: sha, unpacked size, packed size, filename(s) object stored as\\n\").encode())\n      for sha, size in sorted(stats['packed_size'].items(),\n                              key=lambda x:(x[1],x[0]), reverse=True):\n        if sha not in stats['names']:\n          # Some objects in the repository might not be referenced, or not\n          # referenced by the branches/tags the user cares about; skip them.\n          continue\n        names_with_sha = stats['names'][sha]\n        if len(names_with_sha) == 1:\n          names_with_sha = names_with_sha.pop()\n        else:\n          names_with_sha = b'[' + b', '.join(sorted(names_with_sha)) + b']'\n        f.write(b\"  %s %10d %10d %s\\n\" % (sha,\n                                          stats['unpacked_size'][sha],\n                                          size,\n                                          names_with_sha))\n\n  @staticmethod\n  def run(args):\n    if args.report_dir:\n      reportdir = args.report_dir\n    else:\n      git_dir = GitUtils.determine_git_dir(b'.')\n\n    # Create the report directory as necessary\n      results_tmp_dir = os.path.join(git_dir, b'filter-repo')\n      if not os.path.isdir(results_tmp_dir):\n        os.mkdir(results_tmp_dir)\n      reportdir = os.path.join(results_tmp_dir, b\"analysis\")\n\n    if os.path.isdir(reportdir):\n      if args.force:\n        sys.stdout.write(_(\"Warning: Removing recursively: \\\"%s\\\"\\n\") % decode(reportdir))\n        shutil.rmtree(reportdir)\n      else:\n        sys.stdout.write(_(\"Error: dir already exists (use --force to delete): \\\"%s\\\"\\n\") % decode(reportdir))\n        sys.exit(1)\n\n    os.mkdir(reportdir)\n\n    # Gather the data we need\n    stats = RepoAnalyze.gather_data(args)\n\n    # Write the reports\n    sys.stdout.write(_(\"Writing reports to \\\"%s\\\"...\") % decode(reportdir))\n    sys.stdout.flush()\n    RepoAnalyze.write_report(reportdir, stats)\n    sys.stdout.write(_(\"done.\\n\"))\n    sys.stdout.write(_(\"README: \\\"%s\\\"\\n\") % decode( os.path.join(reportdir, b\"README\") ))\n\nclass FileInfoValueHelper:\n  def __init__(self, replace_text, insert_blob_func, source_working_dir):\n    self.data = {}\n    self._replace_text = replace_text\n    self._insert_blob_func = insert_blob_func\n    cmd = ['git', 'cat-file', '--batch-command']\n    self._cat_file_process = subproc.Popen(cmd,\n                                           stdin = subprocess.PIPE,\n                                           stdout = subprocess.PIPE,\n                                           cwd = source_working_dir)\n\n  def finalize(self):\n    self._cat_file_process.stdin.close()\n    self._cat_file_process.wait()\n\n  def get_contents_by_identifier(self, blobhash):\n    self._cat_file_process.stdin.write(b'contents '+blobhash+b'\\n')\n    self._cat_file_process.stdin.flush()\n    line = self._cat_file_process.stdout.readline()\n    try:\n      (oid, oidtype, size) = line.split()\n    except ValueError:\n      assert(line == blobhash+b\" missing\\n\")\n      return None\n    size = int(size) # Convert e.g. b'6283' to 6283\n    assert(oidtype == b'blob')\n    contents_plus_newline = self._cat_file_process.stdout.read(size+1)\n    return contents_plus_newline[:-1] # return all but the newline\n\n  def get_size_by_identifier(self, blobhash):\n    self._cat_file_process.stdin.write(b'info '+blobhash+b'\\n')\n    self._cat_file_process.stdin.flush()\n    line = self._cat_file_process.stdout.readline()\n    (oid, oidtype, size) = line.split()\n    size = int(size) # Convert e.g. b'6283' to 6283\n    assert(oidtype == b'blob')\n    return size\n\n  def insert_file_with_contents(self, contents):\n    blob = Blob(contents)\n    self._insert_blob_func(blob)\n    return blob.id\n\n  def is_binary(self, contents):\n    return b\"\\0\" in contents[0:8192]\n\n  def apply_replace_text(self, contents):\n    new_contents = contents\n    for literal, replacement in self._replace_text['literals']:\n      new_contents = new_contents.replace(literal, replacement)\n    for regex,   replacement in self._replace_text['regexes']:\n      new_contents = regex.sub(replacement, new_contents)\n    return new_contents\n\nclass LFSObjectTracker:\n  class LFSObjs:\n    def __init__(self):\n      self.id_to_object_map = {}\n      self.objects = set()\n\n  def __init__(self, file_info, check_sources, check_targets):\n    self.source_objects = LFSObjectTracker.LFSObjs()\n    self.target_objects = LFSObjectTracker.LFSObjs()\n    self.hash_to_object_map = {}\n    self.file_info = file_info\n    self.check_sources = check_sources\n    self.check_targets = check_targets\n    self.objects_orphaned = False\n\n  def _get_lfs_values(self, contents):\n    values = {}\n    if len(contents) > 1024:\n      return {}\n    for line in contents.splitlines():\n      try:\n        (key, value) = line.split(b' ', 1)\n      except ValueError:\n        return {}\n      if not values and key != b'version':\n        return values\n      values[key] = value\n    return values\n\n  def check_blob_data(self, contents, fast_export_id, source):\n    if source and not self.check_sources:\n      return\n    mymap = self.source_objects if source else self.target_objects\n    lfs_object_id = self._get_lfs_values(contents).get(b'oid')\n    if lfs_object_id:\n      mymap.id_to_object_map[fast_export_id] = lfs_object_id\n\n  def check_file_change_data(self, git_id, source):\n    if source and not self.check_sources:\n      return\n    mymap = self.source_objects if source else self.target_objects\n    if isinstance(git_id, int):\n      lfs_object_id = mymap.id_to_object_map.get(git_id)\n      if lfs_object_id:\n        mymap.objects.add(lfs_object_id)\n    else:\n      if git_id in self.hash_to_object_map:\n        mymap.objects.add(self.hash_to_object_map[git_id])\n        return\n      size = self.file_info.get_size_by_identifier(git_id)\n      if size >= 1024:\n        return\n      contents = self.file_info.get_contents_by_identifier(git_id)\n      lfs_object_id = self._get_lfs_values(contents).get(b'oid')\n      if lfs_object_id:\n        self.hash_to_object_map[git_id] = lfs_object_id\n        mymap.objects.add(lfs_object_id)\n\n  def check_output_object(self, obj):\n    if not self.check_targets:\n      return\n    if type(obj) == Blob:\n      self.check_blob_data(obj.data, obj.id, False)\n    elif type(obj) == Commit:\n      for change in obj.file_changes:\n        sys.stdout.flush()\n        if change.type != b'M' or change.mode == b'160000':\n          continue\n        self.check_file_change_data(change.blob_id, False)\n\n  def find_all_lfs_objects_in_repo(self, repo, source):\n    if not source:\n      self.file_info = FileInfoValueHelper(None, None, repo)\n    p = subproc.Popen([\"git\", \"rev-list\", \"--objects\", \"--all\"],\n                      stdout=subprocess.PIPE, stderr=subprocess.PIPE,\n                      cwd=repo)\n    for line in p.stdout.readlines():\n      try:\n        (git_oid, filename) = line.split()\n      except ValueError:\n        # Commit and tree objects only have oid\n        continue\n\n      mymap = self.source_objects if source else self.target_objects\n      size = self.file_info.get_size_by_identifier(git_oid)\n      if size >= 1024:\n        continue\n      contents = self.file_info.get_contents_by_identifier(git_oid)\n      lfs_object_id = self._get_lfs_values(contents).get(b'oid')\n      if lfs_object_id:\n        mymap.objects.add(lfs_object_id)\n    if not source:\n      self.file_info.finalize()\n\nclass InputFileBackup:\n  def __init__(self, input_file, output_file):\n    self.input_file  = input_file\n    self.output_file = output_file\n\n  def close(self):\n    self.input_file.close()\n    self.output_file.close()\n\n  def read(self, size):\n    output = self.input_file.read(size)\n    self.output_file.write(output)\n    return output\n\n  def readline(self):\n    line = self.input_file.readline()\n    self.output_file.write(line)\n    return line\n\nclass DualFileWriter:\n  def __init__(self, file1, file2):\n    self.file1 = file1\n    self.file2 = file2\n\n  def write(self, *args):\n    self.file1.write(*args)\n    self.file2.write(*args)\n\n  def flush(self):\n    self.file1.flush()\n    self.file2.flush()\n\n  def close(self):\n    self.file1.close()\n    self.file2.close()\n\nclass RepoFilter(object):\n  def __init__(self,\n               args,\n               filename_callback = None,\n               message_callback = None,\n               name_callback = None,\n               email_callback = None,\n               refname_callback = None,\n               blob_callback = None,\n               commit_callback = None,\n               tag_callback = None,\n               reset_callback = None,\n               done_callback = None,\n               file_info_callback = None):\n\n    self._args = args\n\n    # Repo we are exporting\n    self._repo_working_dir = None\n\n    # Store callbacks for acting on objects printed by FastExport\n    self._blob_callback        = blob_callback\n    self._commit_callback      = commit_callback\n    self._tag_callback         = tag_callback\n    self._reset_callback       = reset_callback\n    self._done_callback        = done_callback\n\n    # Store callbacks for acting on slices of FastExport objects\n    self._filename_callback    = filename_callback  # filenames from commits\n    self._message_callback     = message_callback   # commit OR tag message\n    self._name_callback        = name_callback      # author, committer, tagger\n    self._email_callback       = email_callback     # author, committer, tagger\n    self._refname_callback     = refname_callback   # from commit/tag/reset\n    self._file_info_callback   = file_info_callback # various file info\n    self._handle_arg_callbacks()\n\n    # Helpers for callbacks\n    self._file_info_value = None\n\n    # Defaults for input\n    self._input = None\n    self._fep = None  # Fast Export Process\n    self._fe_orig = None  # Path to where original fast-export output stored\n    self._fe_filt = None  # Path to where filtered fast-export output stored\n    self._parser = None # FastExportParser object we are working with\n\n    # Defaults for output\n    self._output = None\n    self._fip = None  # Fast Import Process\n    self._import_pipes = None\n    self._managed_output = True\n\n    # A tuple of (depth, list-of-ancestors).  Commits and ancestors are\n    # identified by their id (their 'mark' in fast-export or fast-import\n    # speak).  The depth of a commit is one more than the max depth of any\n    # of its ancestors.\n    self._graph = AncestryGraph()\n    # Another one, for ancestry of commits in the original repo\n    self._orig_graph = AncestryGraph()\n\n    # Names of files that were tweaked in any commit; such paths could lead\n    # to subsequent commits being empty\n    self._files_tweaked = set()\n\n    # A set of commit hash pairs (oldhash, newhash) which used to be merge\n    # commits but due to filtering were turned into non-merge commits.\n    # The commits probably have suboptimal commit messages (e.g. \"Merge branch\n    # next into master\").\n    self._commits_no_longer_merges = []\n\n    # A dict of original_ids to new_ids; filtering commits means getting\n    # new commit hash (sha1sums), and we record the mapping both for\n    # diagnostic purposes and so we can rewrite commit messages.  Note that\n    # the new_id can be None rather than a commit hash if the original\n    # commit became empty and was pruned or was otherwise dropped.\n    self._commit_renames = {}\n\n    # A set of original_ids (i.e. original hashes) for which we have not yet\n    # gotten the new hashses; the value is always the corresponding fast-export\n    # id (i.e. commit.id)\n    self._pending_renames = collections.OrderedDict()\n\n    # A dict of commit_hash[0:7] -> set(commit_hashes with that prefix).\n    #\n    # It's common for commit messages to refer to commits by abbreviated\n    # commit hashes, as short as 7 characters.  To facilitate translating\n    # such short hashes, we have a mapping of prefixes to full old hashes.\n    self._commit_short_old_hashes = collections.defaultdict(set)\n\n    # A set of commit hash references appearing in commit messages which\n    # mapped to a valid commit that was removed entirely in the filtering\n    # process.  The commit message will continue to reference the\n    # now-missing commit hash, since there was nothing to map it to.\n    self._commits_referenced_but_removed = set()\n\n    # Other vars related to metadata tracking\n    self._already_ran = False\n    self._changed_refs = set()\n    self._lfs_object_tracker = None\n\n    # Progress handling (number of commits parsed, etc.)\n    self._progress_writer = ProgressWriter()\n    self._num_commits = 0\n\n    # Size of blobs in the repo\n    self._unpacked_size = {}\n\n    # Other vars\n    self._sanity_checks_handled = False\n    self._finalize_handled = False\n    self._orig_refs = None\n    self._config_settings = {}\n    self._newnames = {}\n    self._stash = None\n\n    # Cache a few message translations for performance reasons\n    self._parsed_message = _(\"Parsed %d commits\")\n\n    # Compile some regexes and cache those\n    self._hash_re = re.compile(br'(\\b[0-9a-f]{7,40}\\b)')\n\n  def _handle_arg_callbacks(self):\n    def make_callback(args, bdy):\n      callback_globals = {g: globals()[g] for g in public_globals}\n      callback_locals = {}\n      if type(args) == str:\n        args = (args, '_do_not_use_this_var = None')\n      exec('def callback({}):\\n'.format(', '.join(args))+\n           '  '+'\\n  '.join(bdy.splitlines()), callback_globals, callback_locals)\n      return callback_locals['callback']\n    def handle(which, args=None):\n      which_under = which.replace('-','_')\n      if not args:\n        args = which\n      callback_field = '_{}_callback'.format(which_under)\n      code_string = getattr(self._args, which_under+'_callback')\n      if code_string:\n        if os.path.exists(code_string):\n          with open(code_string, 'r', encoding='utf-8') as f:\n            code_string = f.read()\n        if getattr(self, callback_field):\n          raise SystemExit(_(\"Error: Cannot pass a %s_callback to RepoFilter \"\n                             \"AND pass --%s-callback\"\n                           % (which_under, which)))\n        if 'return ' not in code_string and \\\n           which not in ('blob', 'commit', 'tag', 'reset'):\n          raise SystemExit(_(\"Error: --%s-callback should have a return statement\")\n                           % which)\n        setattr(self, callback_field, make_callback(args, code_string))\n    handle('filename')\n    handle('message')\n    handle('name')\n    handle('email')\n    handle('refname')\n    handle('blob')\n    handle('commit')\n    handle('tag')\n    handle('reset')\n    handle('file-info', ('filename', 'mode', 'blob_id', 'value'))\n\n  def _run_sanity_checks(self):\n    self._sanity_checks_handled = True\n    if not self._managed_output:\n      if not self._args.replace_refs:\n        # If not _managed_output we don't want to make extra changes to the\n        # repo, so set default to no-op 'update-no-add'\n        self._args.replace_refs = 'update-no-add'\n      return\n\n    if self._args.debug:\n      print(\"[DEBUG] Passed arguments:\\n{}\".format(self._args))\n\n    # Determine basic repository information\n    target_working_dir = self._args.target or b'.'\n    self._orig_refs = GitUtils.get_refs(target_working_dir)\n    is_bare = GitUtils.is_repository_bare(target_working_dir)\n    self._config_settings = GitUtils.get_config_settings(target_working_dir)\n\n    # Determine if this is second or later run of filter-repo\n    tmp_dir = self.results_tmp_dir(create_if_missing=False)\n    ran_path = os.path.join(tmp_dir, b'already_ran')\n    self._already_ran = os.path.isfile(ran_path)\n    if self._already_ran:\n      current_time = time.time()\n      file_mod_time = os.path.getmtime(ran_path)\n      file_age = current_time - file_mod_time\n      if file_age > 86400: # file older than a day\n        msg = (f\"The previous run is older than a day ({decode(ran_path)} already exists).\\n\"\n               f\"See \\\"Already Ran\\\" section in the manual for more information.\\n\"\n               f\"Treat this run as a continuation of filtering in the previous run (Y/N)? \")\n        response = input(msg)\n\n        if response.lower() != 'y':\n          os.remove(ran_path)\n          self._already_ran = False\n\n    # Interaction between --already-ran and --sensitive_data_removal\n    msg = textwrap.dedent(_(\"\"\"\\\n      Error: Cannot specify --sensitive-data-removal on a follow-up invocation\n             of git-filter-repo unless it was specified in previously runs.\"\"\"))\n    if self._already_ran:\n      sdr_path = os.path.join(tmp_dir, b'sensitive_data_removal')\n      sdr_previously = os.path.isfile(sdr_path)\n      if not sdr_previously and self._args.sensitive_data_removal:\n        raise SystemExit(msg)\n      # Treat this as a --sensitive-data-removal run if a previous run was,\n      # even if it wasn't specified this time\n      self._args.sensitive_data_removal = sdr_previously\n\n    # Have to check sensitive_data_removal interactions here instead of\n    # sanity_check_args because of the above interaction with already_ran stuff\n    if self._args.sensitive_data_removal:\n      if self._args.stdin:\n        msg = _(\"Error: sensitive data removal is incompatible with --stdin\")\n        raise SystemExit(msg)\n      if self._args.source or self._args.target:\n        msg = _(\"Error: sensitive data removal is incompatible with --source and --target\")\n        raise SystemExit(msg)\n\n    # Default for --replace-refs\n    if not self._args.replace_refs:\n      self._args.replace_refs = 'delete-no-add'\n    if self._args.replace_refs == 'old-default':\n      self._args.replace_refs = ('update-or-add' if self._already_ran\n                                 else 'update-and-add')\n\n    # Do sanity checks from the correct directory\n    if not self._args.force and not self._already_ran:\n      cwd = os.getcwd()\n      os.chdir(target_working_dir)\n      RepoFilter.sanity_check(self._orig_refs, is_bare, self._config_settings)\n      os.chdir(cwd)\n\n  def _setup_lfs_orphaning_checks(self):\n    # Do a couple checks to see if we want to do lfs orphaning checks\n    if not self._args.sensitive_data_removal:\n      return\n    metadata_dir = self.results_tmp_dir()\n    lfs_objects_file = os.path.join(metadata_dir, b'original_lfs_objects')\n    if self._already_ran:\n      # Check if we did lfs filtering in the previous run\n      if not os.path.isfile(lfs_objects_file):\n        return\n\n    # Set up self._file_info_value so we can query git for stuff\n    source_working_dir = self._args.source or b'.'\n    self._file_info_value = FileInfoValueHelper(self._args.replace_text,\n                                                self.insert,\n                                                source_working_dir)\n\n    # One more check to see if we want to do lfs orphaning checks\n    if not self._already_ran:\n      # Check if lfs filtering is active in HEAD's .gitattributes file\n      a = self._file_info_value.get_contents_by_identifier(b\"HEAD:.gitattributes\")\n      if not a or not re.search(rb'\\bfilter=lfs\\b', a):\n        return\n\n    # Set up the object tracker\n    check_sources = not self._already_ran and not self._args.partial\n    check_targets = not self._args.partial\n    self._lfs_object_tracker = LFSObjectTracker(self._file_info_value,\n                                                check_sources,\n                                                check_targets)\n    self._parser._lfs_object_tracker = self._lfs_object_tracker # kinda gross\n\n    # Get initial objects\n    if self._already_ran:\n      with open(lfs_objects_file, 'br') as f:\n        for line in f:\n          self._lfs_object_tracker.source_objects.objects.add(line.strip())\n    elif self._args.partial:\n      source = True\n      self._lfs_object_tracker.find_all_lfs_objects_in_repo(source_working_dir,\n                                                            source)\n\n  @staticmethod\n  def loose_objects_are_replace_refs(git_dir, refs, num_loose_objects):\n    replace_objects = set()\n    for refname, rev in refs.items():\n      if not refname.startswith(b'refs/replace/'):\n        continue\n      replace_objects.add(rev)\n\n    validobj_re = re.compile(rb'^[0-9a-f]{40}$')\n    object_dir=os.path.join(git_dir, b'objects')\n    for root, dirs, files in os.walk(object_dir):\n      for filename in files:\n        objname = os.path.basename(root)+filename\n        if objname not in replace_objects and validobj_re.match(objname):\n          return False\n\n    return True\n\n  @staticmethod\n  def sanity_check(refs, is_bare, config_settings):\n    def abort(reason):\n      dirname = config_settings.get(b'remote.origin.url', b'')\n      msg = \"\"\n      if dirname and os.path.isdir(dirname):\n        msg = _(\"Note: when cloning local repositories, you need to pass\\n\"\n                \"      --no-local to git clone to avoid this issue.\\n\")\n      raise SystemExit(\n        _(\"Aborting: Refusing to destructively overwrite repo history since\\n\"\n          \"this does not look like a fresh clone.\\n\"\n          \"  (%s)\\n%s\"\n          \"Please operate on a fresh clone instead.  If you want to proceed\\n\"\n          \"anyway, use --force.\") % (reason, msg))\n\n    # Avoid letting people running with weird setups and overwriting GIT_DIR\n    # elsewhere\n    git_dir = GitUtils.determine_git_dir(b'.')\n    if is_bare and git_dir != b'.':\n      abort(_(\"GIT_DIR must be .\"))\n    elif not is_bare and git_dir != b'.git':\n      abort(_(\"GIT_DIR must be .git\"))\n\n    # Check for refname collisions\n    if config_settings.get(b'core.ignorecase', b'false') == b'true':\n      collisions = collections.defaultdict(list)\n      for ref in refs:\n        collisions[ref.lower()].append(ref)\n      msg = \"\"\n      for ref in collisions:\n        if len(collisions[ref]) >= 2:\n          msg += \"    \" + decode(b\", \".join(collisions[ref])) + \"\\n\"\n      if msg:\n        raise SystemExit(\n          _(\"Aborting: Cannot rewrite history on a case insensitive\\n\"\n            \"filesystem since you have refs that differ in case only:\\n\"\n            \"%s\") % msg)\n    if config_settings.get(b'core.precomposeunicode', b'false') == b'true':\n      import unicodedata # Mac users need to have python-3.8\n      collisions = collections.defaultdict(list)\n      for ref in refs:\n        strref = decode(ref)\n        collisions[unicodedata.normalize('NFC', strref)].append(strref)\n      msg = \"\"\n      for ref in collisions:\n        if len(collisions[ref]) >= 2:\n          msg += \"    \" + \", \".join(collisions[ref]) + \"\\n\"\n      if msg:\n        raise SystemExit(\n          _(\"Aborting: Cannot rewrite history on a character normalizing\\n\"\n            \"filesystem since you have refs that differ in normalization:\\n\"\n            \"%s\") % msg)\n\n    # Make sure repo is fully packed, just like a fresh clone would be.\n    # Note that transfer.unpackLimit defaults to 100, meaning that a\n    # repository with no packs and less than 100 objects should be considered\n    # fully packed.\n    output = subproc.check_output('git count-objects -v'.split())\n    stats = dict(x.split(b': ') for x in output.splitlines())\n    num_packs = int(stats[b'packs'])\n    num_loose_objects = int(stats[b'count'])\n    if num_packs > 1 or \\\n       num_loose_objects >= 100 or \\\n       (num_packs == 1 and num_loose_objects > 0 and\n        not RepoFilter.loose_objects_are_replace_refs(git_dir, refs,\n                                                      num_loose_objects)):\n      abort(_(\"expected freshly packed repo\"))\n\n    # Make sure there is precisely one remote, named \"origin\"...or that this\n    # is a new bare repo with no packs and no remotes\n    output = subproc.check_output('git remote'.split()).strip()\n    if not (output == b\"origin\" or (num_packs == 0 and not output)):\n      abort(_(\"expected one remote, origin\"))\n\n    # Make sure that all reflogs have precisely one entry\n    reflog_dir=os.path.join(git_dir, b'logs')\n    for root, dirs, files in os.walk(reflog_dir):\n      for filename in files:\n        pathname = os.path.join(root, filename)\n        with open(pathname, 'br') as f:\n          if len(f.read().splitlines()) > 1:\n            shortpath = pathname[len(reflog_dir)+1:]\n            abort(_(\"expected at most one entry in the reflog for %s\") %\n                  decode(shortpath))\n\n    # Make sure there are no stashed changes\n    if b'refs/stash' in refs:\n      abort(_(\"has stashed changes\"))\n\n    # Do extra checks in non-bare repos\n    if not is_bare:\n      # Avoid uncommitted, unstaged, or untracked changes\n      if subproc.call('git diff --staged --quiet'.split()):\n        abort(_(\"you have uncommitted changes\"))\n      if subproc.call('git diff --quiet'.split()):\n        abort(_(\"you have unstaged changes\"))\n      untracked_output = subproc.check_output('git ls-files -o'.split())\n      if len(untracked_output) > 0:\n        uf = untracked_output.rstrip(b'\\n').split(b'\\n')\n        # Since running git-filter-repo can result in files being written to\n        # __pycache__ (depending on python version, env vars, etc.), let's\n        # ignore those as far as \"clean clone\" is concerned.\n        relevant_uf = [x for x in uf\n                       if not x.startswith(b'__pycache__/git_filter_repo.')]\n        if len(relevant_uf) > 0:\n          abort(_(\"you have untracked changes\"))\n\n      # Avoid unpushed changes\n      for refname, rev in refs.items():\n        if not refname.startswith(b'refs/heads/'):\n          continue\n        origin_ref = refname.replace(b'refs/heads/', b'refs/remotes/origin/')\n        if origin_ref not in refs:\n          abort(_('%s exists, but %s not found') % (decode(refname),\n                                                    decode(origin_ref)))\n        if rev != refs[origin_ref]:\n          abort(_('%s does not match %s') % (decode(refname),\n                                             decode(origin_ref)))\n\n      # Make sure there is only one worktree\n      output = subproc.check_output('git worktree list'.split())\n      if len(output.splitlines()) > 1:\n        abort(_('you have multiple worktrees'))\n\n  def cleanup(self, repo, repack, reset,\n              run_quietly=False, show_debuginfo=False):\n    ''' cleanup repo; if repack then expire reflogs and do a gc --prune=now.\n        if reset then do a reset --hard.  Optionally also curb output if\n        run_quietly is True, or go the opposite direction and show extra\n        output if show_debuginfo is True. '''\n    assert not (run_quietly and show_debuginfo)\n\n    if (repack and not run_quietly and not show_debuginfo):\n      print(_(\"Repacking your repo and cleaning out old unneeded objects\"))\n    quiet_flags = '--quiet' if run_quietly else ''\n    cleanup_cmds = []\n    if repack:\n      cleanup_cmds = ['git reflog expire --expire=now --all'.split(),\n                      'git gc {} --prune=now'.format(quiet_flags).split()]\n    if reset:\n      cleanup_cmds.insert(0, 'git reset {} --hard'.format(quiet_flags).split())\n    location_info = ' (in {})'.format(decode(repo)) if repo != b'.' else ''\n    for cmd in cleanup_cmds:\n      if show_debuginfo:\n        print(\"[DEBUG] Running{}: {}\".format(location_info, ' '.join(cmd)))\n      ret = subproc.call(cmd, cwd=repo)\n      if ret != 0:\n        raise SystemExit(\"fatal: running '%s' failed!\" % ' '.join(cmd))\n      if cmd[0:3] == 'git reflog expire'.split():\n        self._write_stash()\n\n  def _get_rename(self, old_hash):\n    # If we already know the rename, just return it\n    new_hash = self._commit_renames.get(old_hash, None)\n    if new_hash:\n      return new_hash\n\n    # If it's not in the remaining pending renames, we don't know it\n    if old_hash is not None and old_hash not in self._pending_renames:\n      return None\n\n    # Read through the pending renames until we find it or we've read them all,\n    # and return whatever we might find\n    self._flush_renames(old_hash)\n    return self._commit_renames.get(old_hash, None)\n\n  def _flush_renames(self, old_hash=None, limit=0):\n    # Parse through self._pending_renames until we have read enough.  We have\n    # read enough if:\n    #   self._pending_renames is empty\n    #   old_hash != None and we found a rename for old_hash\n    #   limit > 0 and len(self._pending_renames) started less than 2*limit\n    #   limit > 0 and len(self._pending_renames) < limit\n    if limit and len(self._pending_renames) < 2 * limit:\n      return\n    fi_input, fi_output = self._import_pipes\n    while self._pending_renames:\n      orig_hash, new_fast_export_id = self._pending_renames.popitem(last=False)\n      new_hash = fi_output.readline().rstrip()\n      self._commit_renames[orig_hash] = new_hash\n      self._graph.record_hash(new_fast_export_id, new_hash)\n      if old_hash == orig_hash:\n        return\n      if limit and len(self._pending_renames) < limit:\n        return\n\n  def _translate_commit_hash(self, matchobj_or_oldhash):\n    old_hash = matchobj_or_oldhash\n    if not isinstance(matchobj_or_oldhash, bytes):\n      old_hash = matchobj_or_oldhash.group(1)\n    orig_len = len(old_hash)\n    new_hash = self._get_rename(old_hash)\n    if new_hash is None:\n      if old_hash[0:7] not in self._commit_short_old_hashes:\n        self._commits_referenced_but_removed.add(old_hash)\n        return old_hash\n      possibilities = self._commit_short_old_hashes[old_hash[0:7]]\n      matches = [x for x in possibilities\n                 if x[0:orig_len] == old_hash]\n      if len(matches) != 1:\n        self._commits_referenced_but_removed.add(old_hash)\n        return old_hash\n      old_hash = matches[0]\n      new_hash = self._get_rename(old_hash)\n\n    assert new_hash is not None\n    return new_hash[0:orig_len]\n\n  def _maybe_trim_extra_parents(self, orig_parents, parents):\n    '''Due to pruning of empty commits, some parents could be non-existent\n       (None) or otherwise redundant.  Remove the non-existent parents, and\n       remove redundant parents ***SO LONG AS*** that doesn't transform a\n       merge commit into a non-merge commit.\n\n       Returns a tuple:\n         (parents, new_first_parent_if_would_become_non_merge)'''\n\n    always_prune = (self._args.prune_degenerate == 'always')\n\n    # Pruning of empty commits means multiple things:\n    #   * An original parent of this commit may have been pruned causing the\n    #     need to rewrite the reported parent to the nearest ancestor.  We\n    #     want to know when we're dealing with such a parent.\n    #   * Further, there may be no \"nearest ancestor\" if the entire history\n    #     of that parent was also pruned.  (Detectable by the parent being\n    #     'None')\n    # Remove all parents rewritten to None, and keep track of which parents\n    # were rewritten to an ancestor.\n    tmp = zip(parents,\n              orig_parents,\n              [(x in _SKIPPED_COMMITS or always_prune) for x in orig_parents])\n    tmp2 = [x for x in tmp if x[0] is not None]\n    if not tmp2:\n      # All ancestors have been pruned; we have no parents.\n      return [], None\n    parents, orig_parents, is_rewritten = [list(x) for x in zip(*tmp2)]\n\n    # We can't have redundant parents if we don't have at least 2 parents\n    if len(parents) < 2:\n      return parents, None\n\n    # Don't remove redundant parents if user doesn't want us to\n    if self._args.prune_degenerate == 'never':\n      return parents, None\n\n    # Remove duplicate parents (if both sides of history have lots of commits\n    # which become empty due to pruning, the most recent ancestor on both\n    # sides may be the same commit), except only remove parents that have\n    # been rewritten due to previous empty pruning.\n    seen = set()\n    seen_add = seen.add\n    # Deleting duplicate rewritten parents means keeping parents if either\n    # they have not been seen or they are ones that have not been rewritten.\n    parents_copy = parents\n    uniq = [[p, orig_parents[i], is_rewritten[i]] for i, p in enumerate(parents)\n            if not (p in seen or seen_add(p)) or not is_rewritten[i]]\n    parents, orig_parents, is_rewritten = [list(x) for x in zip(*uniq)]\n    if len(parents) < 2:\n      return parents_copy, parents[0]\n\n    # Flatten unnecessary merges.  (If one side of history is entirely\n    # empty commits that were pruned, we may end up attempting to\n    # merge a commit with its ancestor.  Remove parents that are an\n    # ancestor of another parent.)\n    num_parents = len(parents)\n    to_remove = []\n    for cur in range(num_parents):\n      if not is_rewritten[cur]:\n        continue\n      for other in range(num_parents):\n        if cur == other:\n          continue\n        if not self._graph.is_ancestor(parents[cur], parents[other]):\n          continue\n        # parents[cur] is an ancestor of parents[other], so parents[cur]\n        # seems redundant.  However, if it was intentionally redundant\n        # (e.g. a no-ff merge) in the original, then we want to keep it.\n        if not always_prune and \\\n           self._orig_graph.is_ancestor(orig_parents[cur],\n                                        orig_parents[other]):\n          continue\n        # Some folks want their history to have all first parents be merge\n        # commits (except for any root commits), and always do a merge --no-ff.\n        # For such folks, don't remove the first parent even if it's an\n        # ancestor of other commits.\n        if self._args.no_ff and cur == 0:\n          continue\n        # Okay so the cur-th parent is an ancestor of the other-th parent,\n        # and it wasn't that way in the original repository; mark the\n        # cur-th parent as removable.\n        to_remove.append(cur)\n        break # cur removed, so skip rest of others -- i.e. check cur+=1\n    for x in reversed(to_remove):\n      parents.pop(x)\n    if len(parents) < 2:\n      return parents_copy, parents[0]\n\n    return parents, None\n\n  def _prunable(self, commit, new_1st_parent, had_file_changes, orig_parents):\n    parents = commit.parents\n\n    if self._args.prune_empty == 'never':\n      return False\n    always_prune = (self._args.prune_empty == 'always')\n\n    # For merge commits, unless there are prunable (redundant) parents, we\n    # do not want to prune\n    if len(parents) >= 2 and not new_1st_parent:\n      return False\n\n    if len(parents) < 2:\n      # Special logic for commits that started empty...\n      if not had_file_changes and not always_prune:\n        had_parents_pruned = (len(parents) < len(orig_parents) or\n                              (len(orig_parents) == 1 and\n                               orig_parents[0] in _SKIPPED_COMMITS))\n        # If the commit remains empty and had parents which were pruned,\n        # then prune this commit; otherwise, retain it\n        return (not commit.file_changes and had_parents_pruned)\n\n      # We can only get here if the commit didn't start empty, so if it's\n      # empty now, it obviously became empty\n      if not commit.file_changes:\n        return True\n\n    # If there are no parents of this commit and we didn't match the case\n    # above, then this commit cannot be pruned.  Since we have no parent(s)\n    # to compare to, abort now to prevent future checks from failing.\n    if not parents:\n      return False\n\n    # Similarly, we cannot handle the hard cases if we don't have a pipe\n    # to communicate with fast-import\n    if not self._import_pipes:\n      return False\n\n    # If there have not been renames/remappings of IDs (due to insertion of\n    # new blobs), then we can sometimes know things aren't prunable with a\n    # simple check\n    if not _IDS.has_renames():\n      # non-merge commits can only be empty if blob/file-change editing caused\n      # all file changes in the commit to have the same file contents as\n      # the parent.\n      changed_files = set(change.filename for change in commit.file_changes)\n      if len(orig_parents) < 2 and changed_files - self._files_tweaked:\n        return False\n\n    # Finally, the hard case: due to either blob rewriting, or due to pruning\n    # of empty commits wiping out the first parent history back to the merge\n    # base, the list of file_changes we have may not actually differ from our\n    # (new) first parent's version of the files, i.e. this would actually be\n    # an empty commit.  Check by comparing the contents of this commit to its\n    # (remaining) parent.\n    #\n    # NOTE on why this works, for the case of original first parent history\n    # having been pruned away due to being empty:\n    #     The first parent history having been pruned away due to being\n    #     empty implies the original first parent would have a tree (after\n    #     filtering) that matched the merge base's tree.  Since\n    #     file_changes has the changes needed to go from what would have\n    #     been the first parent to our new commit, and what would have been\n    #     our first parent has a tree that matches the merge base, then if\n    #     the new first parent has a tree matching the versions of files in\n    #     file_changes, then this new commit is empty and thus prunable.\n    fi_input, fi_output = self._import_pipes\n    self._flush_renames()  # Avoid fi_output having other stuff present\n    # Optimization note: we could have two loops over file_changes, the\n    # first doing all the self._output.write() calls, and the second doing\n    # the rest.  But I'm worried about fast-import blocking on fi_output\n    # buffers filling up so I instead read from it as I go.\n    for change in commit.file_changes:\n      parent = new_1st_parent or commit.parents[0] # exists due to above checks\n      quoted_filename = PathQuoting.enquote(change.filename)\n      if isinstance(parent, int):\n        self._output.write(b\"ls :%d %s\\n\" % (parent, quoted_filename))\n      else:\n        self._output.write(b\"ls %s %s\\n\" % (parent, quoted_filename))\n      self._output.flush()\n      parent_version = fi_output.readline().split()\n      if change.type == b'D':\n        if parent_version != [b'missing', quoted_filename]:\n          return False\n      else:\n        blob_sha = change.blob_id\n        if isinstance(change.blob_id, int):\n          self._output.write(b\"get-mark :%d\\n\" % change.blob_id)\n          self._output.flush()\n          blob_sha = fi_output.readline().rstrip()\n        if parent_version != [change.mode, b'blob', blob_sha, quoted_filename]:\n          return False\n\n    return True\n\n  def _record_remapping(self, commit, orig_parents):\n    new_id = None\n    # Record the mapping of old commit hash to new one\n    if commit.original_id and self._import_pipes:\n      fi_input, fi_output = self._import_pipes\n      self._output.write(b\"get-mark :%d\\n\" % commit.id)\n      self._output.flush()\n      orig_id = commit.original_id\n      self._commit_short_old_hashes[orig_id[0:7]].add(orig_id)\n      # Note that we have queued up an id for later reading; flush a\n      # few of the older ones if we have too many queued up\n      self._pending_renames[orig_id] = commit.id\n      self._flush_renames(None, limit=40)\n    # Also, record if this was a merge commit that turned into a non-merge\n    # commit.\n    if len(orig_parents) >= 2 and len(commit.parents) < 2:\n      self._commits_no_longer_merges.append((commit.original_id, new_id))\n\n  def callback_metadata(self, extra_items = dict()):\n    return {'commit_rename_func': self._translate_commit_hash,\n            'ancestry_graph': self._graph,\n            'original_ancestry_graph': self._orig_graph,\n            **extra_items}\n\n  def _tweak_blob(self, blob):\n    if self._args.max_blob_size and len(blob.data) > self._args.max_blob_size:\n      blob.skip()\n\n    if blob.original_id in self._args.strip_blobs_with_ids:\n      blob.skip()\n\n    if ( self._args.replace_text\n        and not self._file_info_callback\n        # not (if blob contains zero byte in the first 8Kb, that is, if blob is binary data)\n        and not b\"\\0\" in blob.data[0:8192]\n    ):\n      for literal, replacement in self._args.replace_text['literals']:\n        blob.data = blob.data.replace(literal, replacement)\n      for regex,   replacement in self._args.replace_text['regexes']:\n        blob.data = regex.sub(replacement, blob.data)\n\n    if self._blob_callback:\n      self._blob_callback(blob, self.callback_metadata())\n\n    self._insert_into_stream(blob)\n\n  def _filter_files(self, commit):\n    def filename_matches(path_expression, pathname):\n      ''' Returns whether path_expression matches pathname or a leading\n          directory thereof, allowing path_expression to not have a trailing\n          slash even if it is meant to match a leading directory. '''\n      if path_expression == b'':\n        return True\n      n = len(path_expression)\n      if (pathname.startswith(path_expression) and\n          (path_expression[n-1:n] == b'/' or\n           len(pathname) == n or\n           pathname[n:n+1] == b'/')):\n        return True\n      return False\n\n    def newname(path_changes, pathname, use_base_name, filtering_is_inclusive):\n      ''' Applies filtering and rename changes from path_changes to pathname,\n          returning any of None (file isn't wanted), original filename (file\n          is wanted with original name), or new filename. '''\n      wanted = False\n      full_pathname = pathname\n      if use_base_name:\n        pathname = os.path.basename(pathname)\n      for (mod_type, match_type, path_exp) in path_changes:\n        if mod_type == 'filter' and not wanted:\n          assert match_type in ('match', 'glob', 'regex')\n          if match_type == 'match' and filename_matches(path_exp, pathname):\n            wanted = True\n          if match_type == 'glob' and fnmatch.fnmatch(pathname, path_exp):\n            wanted = True\n          if match_type == 'regex' and path_exp.search(pathname):\n            wanted = True\n        elif mod_type == 'rename':\n          match, repl = path_exp\n          assert match_type in ('match','regex') # glob was translated to regex\n          if match_type == 'match' and filename_matches(match, full_pathname):\n            full_pathname = full_pathname.replace(match, repl, 1)\n            pathname = full_pathname # rename incompatible with use_base_name\n          if match_type == 'regex':\n            full_pathname = match.sub(repl, full_pathname)\n            pathname = full_pathname # rename incompatible with use_base_name\n      return full_pathname if (wanted == filtering_is_inclusive) else None\n\n    args = self._args\n    new_file_changes = {}  # Assumes no renames or copies, otherwise collisions\n    for change in commit.file_changes:\n      # NEEDSWORK: _If_ we ever want to pass `--full-tree` to fast-export and\n      # parse that output, we'll need to modify this block; `--full-tree`\n      # issues a deleteall directive which has no filename, and thus this\n      # block would normally strip it.  Of course, FileChange() and\n      # _parse_optional_filechange() would need updates too.\n      if change.type == b'DELETEALL':\n        new_file_changes[b''] = change\n        continue\n      if change.filename in self._newnames:\n        change.filename = self._newnames[change.filename]\n      else:\n        original_filename = change.filename\n        change.filename = newname(args.path_changes, change.filename,\n                                  args.use_base_name, args.inclusive)\n        if self._filename_callback:\n          change.filename = self._filename_callback(change.filename)\n        self._newnames[original_filename] = change.filename\n      if not change.filename:\n        continue # Filtering criteria excluded this file; move on to next one\n      if change.filename in new_file_changes:\n        # Getting here means that path renaming is in effect, and caused one\n        # path to collide with another.  That's usually bad, but can be okay\n        # under two circumstances:\n        #   1) Sometimes people have a file named OLDFILE in old revisions of\n        #      history, and they rename to NEWFILE, and would like to rewrite\n        #      history so that all revisions refer to it as NEWFILE.  As such,\n        #      we can allow a collision when (at least) one of the two paths\n        #      is a deletion.  Note that if OLDFILE and NEWFILE are unrelated\n        #      this also allows the rewrite to continue, which makes sense\n        #      since OLDFILE is no longer in the way.\n        #   2) If OLDFILE and NEWFILE are exactly equal, then writing them\n        #      both to the same location poses no problem; we only need one\n        #      file.  (This could come up if someone copied a file in some\n        #      commit, then later either deleted the file or kept it exactly\n        #      in sync with the original with any changes, and then decides\n        #      they want to rewrite history to only have one of the two files)\n        colliding_change = new_file_changes[change.filename]\n        if change.type == b'D':\n          # We can just throw this one away and keep the other\n          continue\n        elif change.type == b'M' and (\n            change.mode == colliding_change.mode and\n            change.blob_id == colliding_change.blob_id):\n          # The two are identical, so we can throw this one away and keep other\n          continue\n        elif new_file_changes[change.filename].type != b'D':\n          raise SystemExit(_(\"File renaming caused colliding pathnames!\\n\") +\n                           _(\"  Commit: {}\\n\").format(commit.original_id) +\n                           _(\"  Filename: {}\").format(change.filename))\n      # Strip files that are too large\n      if self._args.max_blob_size and \\\n         self._unpacked_size.get(change.blob_id, 0) > self._args.max_blob_size:\n        continue\n      if self._args.strip_blobs_with_ids and \\\n         change.blob_id in self._args.strip_blobs_with_ids:\n        continue\n      # Otherwise, record the change\n      new_file_changes[change.filename] = change\n    commit.file_changes = [v for k,v in sorted(new_file_changes.items())]\n\n  def _tweak_commit(self, commit, aux_info):\n    if self._args.replace_message:\n      for literal, replacement in self._args.replace_message['literals']:\n        commit.message = commit.message.replace(literal, replacement)\n      for regex,   replacement in self._args.replace_message['regexes']:\n        commit.message = regex.sub(replacement, commit.message)\n    if self._message_callback:\n      commit.message = self._message_callback(commit.message)\n\n    # Change the commit message according to callback\n    if not self._args.preserve_commit_hashes:\n      commit.message = self._hash_re.sub(self._translate_commit_hash,\n                                         commit.message)\n\n    # Change the author & committer according to mailmap rules\n    args = self._args\n    if args.mailmap:\n      commit.author_name, commit.author_email = \\\n          args.mailmap.translate(commit.author_name, commit.author_email)\n      commit.committer_name, commit.committer_email = \\\n          args.mailmap.translate(commit.committer_name, commit.committer_email)\n    # Change author & committer according to callbacks\n    if self._name_callback:\n      commit.author_name = self._name_callback(commit.author_name)\n      commit.committer_name = self._name_callback(commit.committer_name)\n    if self._email_callback:\n      commit.author_email = self._email_callback(commit.author_email)\n      commit.committer_email = self._email_callback(commit.committer_email)\n\n    # Sometimes the 'branch' given is a tag; if so, rename it as requested so\n    # we don't get any old tagnames\n    if self._args.tag_rename:\n      commit.branch = RepoFilter._do_tag_rename(args.tag_rename, commit.branch)\n    if self._refname_callback:\n      commit.branch = self._refname_callback(commit.branch)\n\n    # Filter or rename the list of file changes\n    orig_file_changes = set(commit.file_changes)\n    self._filter_files(commit)\n\n    # Record ancestry graph\n    parents, orig_parents = commit.parents, aux_info['orig_parents']\n    if self._args.state_branch:\n      external_parents = parents\n    else:\n      external_parents = [p for p in parents if not isinstance(p, int)]\n    # The use of 'reversed' is intentional here; there is a risk that we have\n    # duplicates in parents, and we want to map from parents to the first\n    # entry we find in orig_parents in such cases.\n    parent_reverse_dict = dict(zip(reversed(parents), reversed(orig_parents)))\n\n    self._graph.record_external_commits(external_parents)\n    self._orig_graph.record_external_commits(external_parents)\n    self._graph.add_commit_and_parents(commit.id, parents) # new githash unknown\n    self._orig_graph.add_commit_and_parents(commit.old_id, orig_parents,\n                                            commit.original_id)\n\n    # Prune parents (due to pruning of empty commits) if relevant, note that\n    # new_1st_parent is None unless this was a merge commit that is becoming\n    # a non-merge\n    prev_1st_parent = parents[0] if parents else None\n    parents, new_1st_parent = self._maybe_trim_extra_parents(orig_parents,\n                                                             parents)\n    commit.parents = parents\n\n    # If parents were pruned, then we need our file changes to be relative\n    # to the new first parent\n    #\n    # Notes:\n    #   * new_1st_parent and new_1st_parent != parents[0] uniquely happens for example when:\n    #       working on merge, selecting subset of files and merge base still\n    #       valid while first parent history doesn't touch any of those paths,\n    #       but second parent history does.  prev_1st_parent had already been\n    #       rewritten to the non-None first ancestor and it remains valid.\n    #       self._maybe_trim_extra_parents() avoids removing this first parent\n    #       because it'd make the commit a non-merge.  However, if there are\n    #       no file_changes of note, we'll drop this commit and mark\n    #       new_1st_parent as the new replacement.  To correctly determine if\n    #       there are no file_changes of note, we need to have the list of\n    #       file_changes relative to new_1st_parent.\n    #       (See t9390#3, \"basic -> basic-ten using '--path ten'\")\n    #   * prev_1st_parent != parents[0] happens for example when:\n    #       similar to above, but the merge base is no longer valid and was\n    #       pruned away as well.  Then parents started as e.g. [None, $num],\n    #       and both prev_1st_parent and new_1st_parent are None, while parents\n    #       after self._maybe_trim_extra_parents() becomes just [$num].\n    #       (See t9390#67, \"degenerate merge with non-matching filename\".)\n    #       Since $num was originally a second parent, we need to rewrite\n    #       file changes to be relative to parents[0].\n    #   * TODO: We should be getting the changes relative to the new first\n    #     parent even if self._fep is None, BUT we can't.  Our method of\n    #     getting the changes right now is an external git diff invocation,\n    #     which we can't do if we just have a fast export stream.  We can't\n    #     really work around it by querying the fast-import stream either,\n    #     because the 'ls' directive only allows us to list info about\n    #     specific paths, but we need to find out which paths exist in two\n    #     commits and then query them.  We could maybe force checkpointing in\n    #     fast-import, then doing a diff from what'll be the new first parent\n    #     back to prev_1st_parent (which may be None, i.e. empty tree), using\n    #     the fact that in A->{B,C}->D, where D is merge of B & C, the diff\n    #     from C->D == C->A + A->B + B->D, and in these cases A==B, so it\n    #     simplifies to C->D == C->A + B->D, and C is our new 1st parent\n    #     commit, A is prev_1st_commit, and B->D is commit.file_changes that\n    #     we already have.  However, checkpointing the fast-import process\n    #     and figuring out how long to wait before we can run our diff just\n    #     seems excessive. For now, just punt and assume the merge wasn't\n    #     \"evil\" (i.e. that it's remerge-diff is empty, as is true for most\n    #     merges).  If the merge isn't evil, no further steps are necessary.\n    if parents and self._fep and (\n        prev_1st_parent != parents[0] or\n        new_1st_parent and new_1st_parent != parents[0]):\n      # Get the id from the original fast export stream corresponding to the\n      # new 1st parent.  As noted above, that new 1st parent might be\n      # new_1st_parent, or if that is None, it'll be parents[0].\n      will_be_1st = new_1st_parent or parents[0]\n      old_id = parent_reverse_dict[will_be_1st]\n      # Now, translate that to a hash\n      will_be_1st_commit_hash = self._orig_graph.map_to_hash(old_id)\n      # Get the changes from what is going to be the new 1st parent to this\n      # merge commit.  Note that since we are going from the new 1st parent\n      # to the merge commit, we can just replace the existing\n      # commit.file_changes rather than getting something we need to combine\n      # with the existing commit.file_changes.  Also, we can just replace\n      # because prev_1st_parent is an ancestor of will_be_1st_commit_hash\n      # (or prev_1st_parent is None and first parent history is gone), so\n      # even if we retain prev_1st_parent and do not prune it, the changes\n      # will still work given the snapshot-based way fast-export/fast-import\n      # work.\n      commit.file_changes = GitUtils.get_file_changes(self._repo_working_dir,\n                                                      will_be_1st_commit_hash,\n                                                      commit.original_id)\n\n      # Save these and filter them\n      orig_file_changes = set(commit.file_changes)\n      self._filter_files(commit)\n\n    # Process the --file-info-callback\n    if self._file_info_callback:\n      if self._file_info_value is None:\n        source_working_dir = self._args.source or b'.'\n        self._file_info_value = FileInfoValueHelper(self._args.replace_text,\n                                                    self.insert,\n                                                    source_working_dir)\n      new_file_changes = []\n      for change in commit.file_changes:\n        if change.type != b'D':\n          assert(change.type == b'M')\n          (filename, mode, blob_id) = \\\n            self._file_info_callback(change.filename,\n                                     change.mode,\n                                     change.blob_id,\n                                     self._file_info_value)\n          if mode is None:\n            # TODO: Should deletion of the file even be a feature?  Might\n            # want to remove this branch of the if-elif-else.\n            assert(filename is not None)\n            assert(blob_id is not None)\n            new_change = FileChange(b'D', filename)\n          elif filename is None:\n            continue # Drop the FileChange from this commit\n          else:\n            new_change = FileChange(b'M', filename, blob_id, mode)\n        else:\n          new_change = change  # use change as-is for deletions\n        new_file_changes.append(new_change)\n      commit.file_changes = new_file_changes\n\n    # Call the user-defined callback, if any\n    if self._commit_callback:\n      self._commit_callback(commit, self.callback_metadata(aux_info))\n\n    # Find out which files were modified by the callbacks.  Such paths could\n    # lead to subsequent commits being empty (e.g. if removing a line containing\n    # a password from every version of a file that had the password, and some\n    # later commit did nothing more than remove that line)\n    final_file_changes = set(commit.file_changes)\n    if self._args.replace_text or self._blob_callback:\n      differences = orig_file_changes.union(final_file_changes)\n    else:\n      differences = orig_file_changes.symmetric_difference(final_file_changes)\n    self._files_tweaked.update(x.filename for x in differences)\n\n    # Now print the resulting commit, or if prunable skip it\n    if not commit.dumped:\n      if not self._prunable(commit, new_1st_parent,\n                            aux_info['had_file_changes'], orig_parents):\n        self._insert_into_stream(commit)\n        self._record_remapping(commit, orig_parents)\n      else:\n        rewrite_to = new_1st_parent or commit.first_parent()\n        commit.skip(new_id = rewrite_to)\n        if self._args.state_branch:\n          alias = Alias(commit.old_id or commit.id, rewrite_to or deleted_hash)\n          self._insert_into_stream(alias)\n        if commit.branch.startswith(b'refs/') or commit.branch == b'HEAD':\n          # The special check above is because when direct revisions are passed\n          # along to fast-export (such as with stashes), there is a chance the\n          # revision is rewritten to nothing.  In such cases, we don't want to\n          # point an invalid ref that just names a revision to some other point.\n          reset = Reset(commit.branch, rewrite_to or deleted_hash)\n          self._insert_into_stream(reset)\n        self._commit_renames[commit.original_id] = None\n\n    # Show progress\n    self._num_commits += 1\n    if not self._args.quiet:\n      self._progress_writer.show(self._parsed_message % self._num_commits)\n\n  @staticmethod\n  def _do_tag_rename(rename_pair, tagname):\n    old, new = rename_pair.split(b':', 1)\n    old, new = b'refs/tags/'+old, b'refs/tags/'+new\n    if tagname.startswith(old):\n      return tagname.replace(old, new, 1)\n    return tagname\n\n  def _tweak_tag(self, tag):\n    # Tweak the tag message according to callbacks\n    if self._args.replace_message:\n      for literal, replacement in self._args.replace_message['literals']:\n        tag.message = tag.message.replace(literal, replacement)\n      for regex,   replacement in self._args.replace_message['regexes']:\n        tag.message = regex.sub(replacement, tag.message)\n    if self._message_callback:\n      tag.message = self._message_callback(tag.message)\n\n    # Tweak the tag name according to tag-name-related callbacks\n    tag_prefix = b'refs/tags/'\n    fullref = tag_prefix+tag.ref\n    if self._args.tag_rename:\n      fullref = RepoFilter._do_tag_rename(self._args.tag_rename, fullref)\n    if self._refname_callback:\n      fullref = self._refname_callback(fullref)\n      if not fullref.startswith(tag_prefix):\n        msg = \"Error: fast-import requires tags to be in refs/tags/ namespace.\"\n        msg += \"\\n       {} renamed to {}\".format(tag_prefix+tag.ref, fullref)\n        raise SystemExit(msg)\n    tag.ref = fullref[len(tag_prefix):]\n\n    # Tweak the tagger according to callbacks\n    if self._args.mailmap:\n      tag.tagger_name, tag.tagger_email = \\\n          self._args.mailmap.translate(tag.tagger_name, tag.tagger_email)\n    if self._name_callback:\n      tag.tagger_name = self._name_callback(tag.tagger_name)\n    if self._email_callback:\n      tag.tagger_email = self._email_callback(tag.tagger_email)\n\n    # Call general purpose tag callback\n    if self._tag_callback:\n      self._tag_callback(tag, self.callback_metadata())\n\n  def _tweak_reset(self, reset):\n    if self._args.tag_rename:\n      reset.ref = RepoFilter._do_tag_rename(self._args.tag_rename, reset.ref)\n    if self._refname_callback:\n      reset.ref = self._refname_callback(reset.ref)\n    if self._reset_callback:\n      self._reset_callback(reset, self.callback_metadata())\n\n  def results_tmp_dir(self, create_if_missing=True):\n    target_working_dir = self._args.target or b'.'\n    git_dir = GitUtils.determine_git_dir(target_working_dir)\n    d = os.path.join(git_dir, b'filter-repo')\n    if create_if_missing and not os.path.isdir(d):\n      os.mkdir(d)\n    return d\n\n  def _load_marks_file(self, marks_basename):\n    full_branch = 'refs/heads/{}'.format(self._args.state_branch)\n    marks_file = os.path.join(self.results_tmp_dir(), marks_basename)\n    working_dir = self._args.target or b'.'\n    cmd = ['git', '-C', working_dir, 'show-ref', full_branch]\n    contents = b''\n    if subproc.call(cmd, stdout=subprocess.DEVNULL) == 0:\n      cmd = ['git', '-C', working_dir, 'show',\n             '%s:%s' % (full_branch, decode(marks_basename))]\n      try:\n        contents = subproc.check_output(cmd)\n      except subprocess.CalledProcessError as e: # pragma: no cover\n        raise SystemExit(_(\"Failed loading %s from %s\") %\n                         (decode(marks_basename), full_branch))\n    if contents:\n      biggest_id = max(int(x.split()[0][1:]) for x in contents.splitlines())\n      _IDS._next_id = max(_IDS._next_id, biggest_id+1)\n    with open(marks_file, 'bw') as f:\n      f.write(contents)\n    return marks_file\n\n  def _save_marks_files(self):\n    basenames = [b'source-marks', b'target-marks']\n    working_dir = self._args.target or b'.'\n\n    # Check whether the branch exists\n    parent = []\n    full_branch = 'refs/heads/{}'.format(self._args.state_branch)\n    cmd = ['git', '-C', working_dir, 'show-ref', full_branch]\n    if subproc.call(cmd, stdout=subprocess.DEVNULL) == 0:\n      parent = ['-p', full_branch]\n\n    # Run 'git hash-object $MARKS_FILE' for each marks file, save result\n    blob_hashes = {}\n    for marks_basename in basenames:\n      marks_file = os.path.join(self.results_tmp_dir(), marks_basename)\n      if not os.path.isfile(marks_file): # pragma: no cover\n        raise SystemExit(_(\"Failed to find %s to save to %s\")\n                         % (marks_file, self._args.state_branch))\n      cmd = ['git', '-C', working_dir, 'hash-object', '-w', marks_file]\n      blob_hashes[marks_basename] = subproc.check_output(cmd).strip()\n\n    # Run 'git mktree' to create a tree out of it\n    p = subproc.Popen(['git', '-C', working_dir, 'mktree'],\n                      stdin=subprocess.PIPE, stdout=subprocess.PIPE)\n    for b in basenames:\n      p.stdin.write(b'100644 blob %s\\t%s\\n' % (blob_hashes[b], b))\n    p.stdin.close()\n    p.wait()\n    tree = p.stdout.read().strip()\n\n    # Create the new commit\n    cmd = (['git', '-C', working_dir, 'commit-tree', '-m', 'New mark files',\n            tree] + parent)\n    commit = subproc.check_output(cmd).strip()\n    subproc.call(['git', '-C', working_dir, 'update-ref', full_branch, commit])\n\n  def importer_only(self):\n    self._run_sanity_checks()\n    self._setup_output()\n\n  def set_output(self, outputRepoFilter):\n    assert outputRepoFilter._output\n\n    # set_output implies this RepoFilter is doing exporting, though may not\n    # be the only one.\n    self._setup_input(use_done_feature = False)\n\n    # Set our output management up to pipe to outputRepoFilter's locations\n    self._managed_output = False\n    self._output = outputRepoFilter._output\n    self._import_pipes = outputRepoFilter._import_pipes\n\n    # Handle sanity checks, though currently none needed for export-only cases\n    self._run_sanity_checks()\n\n  def _read_stash(self):\n    if self._stash:\n      return\n    if self._orig_refs and b'refs/stash' in self._orig_refs and \\\n       self._args.refs == ['--all']:\n      repo_working_dir = self._args.source or b'.'\n      git_dir = GitUtils.determine_git_dir(repo_working_dir)\n      stash = os.path.join(git_dir, b'logs', b'refs', b'stash')\n      if os.path.exists(stash):\n        self._stash = []\n        with open(stash, 'br') as f:\n          for line in f:\n            (oldhash, newhash, rest) = line.split(None, 2)\n            self._stash.append((newhash, rest))\n        self._args.refs.extend([x[0] for x in self._stash])\n\n  def _write_stash(self):\n    last = deleted_hash\n    if self._stash:\n      target_working_dir = self._args.target or b'.'\n      git_dir = GitUtils.determine_git_dir(target_working_dir)\n      stash = os.path.join(git_dir, b'logs', b'refs', b'stash')\n      with open(stash, 'bw') as f:\n        for (hash, rest) in self._stash:\n          new_hash = self._get_rename(hash)\n          if new_hash is None:\n            continue\n          f.write(b' '.join([last, new_hash, rest]) + b'\\n')\n          last = new_hash\n      print(_(\"Rewrote the stash.\"))\n\n  def _setup_input(self, use_done_feature):\n    if self._args.stdin:\n      self._input = sys.stdin.detach()\n      sys.stdin = None # Make sure no one tries to accidentally use it\n      self._fe_orig = None\n    else:\n      self._read_stash()\n      skip_blobs = (self._blob_callback is None and\n                    (self._args.replace_text is None or\n                     self._file_info_callback is not None) and\n                    self._args.source == self._args.target)\n      extra_flags = []\n      if skip_blobs:\n        extra_flags.append('--no-data')\n        if self._args.max_blob_size:\n          self._unpacked_size, packed_size = GitUtils.get_blob_sizes()\n      if use_done_feature:\n        extra_flags.append('--use-done-feature')\n      if write_marks:\n        extra_flags.append(b'--mark-tags')\n      if self._args.state_branch:\n        assert(write_marks)\n        source_marks_file = self._load_marks_file(b'source-marks')\n        extra_flags.extend([b'--export-marks='+source_marks_file,\n                            b'--import-marks='+source_marks_file])\n      if self._args.preserve_commit_encoding is not None: # pragma: no cover\n        reencode = 'no' if self._args.preserve_commit_encoding else 'yes'\n        extra_flags.append('--reencode='+reencode)\n      if self._args.date_order:\n        extra_flags.append('--date-order')\n      location = ['-C', self._args.source] if self._args.source else []\n      fep_cmd = ['git'] + location + ['fast-export', '--show-original-ids',\n                 '--signed-tags=strip', '--tag-of-filtered-object=rewrite',\n                 '--fake-missing-tagger', '--reference-excluded-parents'\n                 ] + extra_flags + self._args.refs\n      self._fep = subproc.Popen(fep_cmd, bufsize=-1, stdout=subprocess.PIPE)\n      self._input = self._fep.stdout\n      if self._args.dry_run or self._args.debug:\n        self._fe_orig = os.path.join(self.results_tmp_dir(),\n                                     b'fast-export.original')\n        output = open(self._fe_orig, 'bw')\n        self._input = InputFileBackup(self._input, output)\n        if self._args.debug:\n          tmp = [decode(x) if isinstance(x, bytes) else x for x in fep_cmd]\n          print(\"[DEBUG] Running: {}\".format(' '.join(tmp)))\n          print(\"  (saving a copy of the output at {})\"\n                .format(decode(self._fe_orig)))\n\n  def _setup_output(self):\n    if not self._args.dry_run:\n      location = ['-C', self._args.target] if self._args.target else []\n      fip_cmd = ['git'] + location + ['-c', 'core.ignorecase=false',\n                                      'fast-import', '--force', '--quiet']\n      if date_format_permissive:\n        fip_cmd.append('--date-format=raw-permissive')\n      if self._args.state_branch:\n        target_marks_file = self._load_marks_file(b'target-marks')\n        fip_cmd.extend([b'--export-marks='+target_marks_file,\n                        b'--import-marks='+target_marks_file])\n      self._fip = subproc.Popen(fip_cmd, bufsize=-1,\n                                stdin=subprocess.PIPE, stdout=subprocess.PIPE)\n      self._import_pipes = (self._fip.stdin, self._fip.stdout)\n    if self._args.dry_run or self._args.debug:\n      self._fe_filt = os.path.join(self.results_tmp_dir(),\n                                   b'fast-export.filtered')\n      self._output = open(self._fe_filt, 'bw')\n    else:\n      self._output = self._fip.stdin\n    if self._args.debug and not self._args.dry_run:\n      self._output = DualFileWriter(self._fip.stdin, self._output)\n      tmp = [decode(x) if isinstance(x, bytes) else x for x in fip_cmd]\n      print(\"[DEBUG] Running: {}\".format(' '.join(tmp)))\n      print(\"  (using the following file as input: {})\"\n            .format(decode(self._fe_filt)))\n\n  def _migrate_origin_to_heads(self):\n    source_working_dir = self._args.source or b'.'\n    target_working_dir = self._args.target or b'.'\n    refs_to_migrate = set(x for x in self._orig_refs\n                          if x.startswith(b'refs/remotes/origin/'))\n    refs_to_warn_about = set()\n    if refs_to_migrate:\n      if self._args.debug:\n        print(\"[DEBUG] Migrating refs/remotes/origin/* -> refs/heads/*\")\n      p = subproc.Popen('git update-ref --no-deref --stdin'.split(),\n                        stdin=subprocess.PIPE, cwd=source_working_dir)\n      for ref in refs_to_migrate:\n        if ref == b'refs/remotes/origin/HEAD':\n          p.stdin.write(b'delete %s %s\\n' % (ref, self._orig_refs[ref]))\n          del self._orig_refs[ref]\n          continue\n        newref = ref.replace(b'refs/remotes/origin/', b'refs/heads/')\n        if newref not in self._orig_refs:\n          p.stdin.write(b'create %s %s\\n' % (newref, self._orig_refs[ref]))\n          self._orig_refs[newref] = self._orig_refs[ref]\n        elif self._orig_refs[ref] != self._orig_refs[newref]:\n          refs_to_warn_about.add(newref)\n        p.stdin.write(b'delete %s %s\\n' % (ref, self._orig_refs[ref]))\n        del self._orig_refs[ref]\n      p.stdin.close()\n      if p.wait(): # pragma: no cover\n        msg = _(\"git update-ref failed; see above\")\n        raise SystemExit(msg)\n\n    if b'remote.origin.url' not in self._config_settings:\n      return\n\n    # For sensitive data removals, fetch ALL refs.  Non-mirror clones normally\n    # only grab branches and tags, but other refs may hold on to the sensitive\n    # data as well.\n    if self._args.sensitive_data_removal and \\\n       not self._args.no_fetch and \\\n       not self._already_ran and \\\n       self._config_settings.get(b'remote.origin.mirror', b'false') != b'true':\n\n      if refs_to_warn_about:\n        msg = (\"Warning: You have refs modified from upstream:\\n             \" +\n               \"\\n            \".join([decode(x) for x in refs_to_warn_about]) +\n               \"\\n\" +\n               \"         We want to forcibly fetch from upstream to ensure\\n\" +\n               \"         that all relevent refs are rewritten, but this will\\n\" +\n               \"         discard your local changes before starting the\\n\" +\n               \"         rewrite.  Proceed with fetch (Y/N)?\")\n        response = input(msg)\n\n        if response.lower() != 'y':\n          self._args.no_fetch = True\n          # Don't do the fetch, and don't remove the origin remote\n          return\n\n      cmd = 'git fetch -q --prune --update-head-ok --refmap \"\" origin +refs/*:refs/*'\n      m = _(\"NOTICE: Fetching all refs from origin to make sure we rewrite\\n\"\n            \"        all history that may reference the sensitive data, via\\n\"\n            \"      \"+cmd)\n      print(m)\n      ret = subproc.call([arg if arg != '\"\"' else '' for arg in cmd.split()],\n                         cwd=source_working_dir)\n      if ret != 0: # pragma: no cover\n        m = _(\"Warning: Fetching all refs from origin failed\")\n        print(m)\n    if self._args.sensitive_data_removal:\n      return\n\n    # Now remove the origin remote\n    url = self._config_settings[b'remote.origin.url'].decode(errors='replace')\n    m = _(\"NOTICE: Removing 'origin' remote; see 'Why is my origin removed?'\\n\"\n          \"        in the manual if you want to push back there.\\n\"\n          \"        (was %s)\") % url\n    print(m)\n    subproc.call('git remote rm origin'.split(), cwd=target_working_dir)\n\n  def _final_commands(self):\n    self._finalize_handled = True\n    self._done_callback and self._done_callback()\n\n    if self._file_info_value:\n      self._file_info_value.finalize()\n    if not self._args.quiet:\n      self._progress_writer.finish()\n\n  def _ref_update(self, target_working_dir):\n    # Start the update-ref process\n    p = subproc.Popen('git update-ref --no-deref --stdin'.split(),\n                      stdin=subprocess.PIPE,\n                      cwd=target_working_dir)\n\n    # Remove replace_refs from _orig_refs\n    replace_refs = {k:v for k, v in self._orig_refs.items()\n                    if k.startswith(b'refs/replace/')}\n    reverse_replace_refs = collections.defaultdict(list)\n    for k,v in replace_refs.items():\n      reverse_replace_refs[v].append(k)\n    all(map(self._orig_refs.pop, replace_refs))\n\n    # Remove unused refs\n    exported_refs, imported_refs = self.get_exported_and_imported_refs()\n    refs_to_nuke = exported_refs - imported_refs\n    # Because revisions can be passed to fast-export which handles them as\n    # though they were refs, we might have bad \"refs\" to nuke; strip them out.\n    refs_to_nuke = [x for x in refs_to_nuke\n                    if x.startswith(b'refs/') or x == b'HEAD']\n    if self._args.partial:\n      refs_to_nuke = set()\n    if refs_to_nuke and self._args.debug:\n      print(\"[DEBUG] Deleting the following refs:\\n  \"+\n            decode(b\"\\n  \".join(sorted(refs_to_nuke))))\n    p.stdin.write(b''.join([b\"delete %s\\n\" % x\n                           for x in refs_to_nuke]))\n\n    # Delete or update and add replace_refs; note that fast-export automatically\n    # handles 'update-no-add', we only need to take action for the other four\n    # choices for replace_refs.\n    self._flush_renames()\n    actual_renames = {k:v for k,v in self._commit_renames.items() if k != v}\n    if self._args.replace_refs in ['delete-no-add', 'delete-and-add']:\n      # Delete old replace refs, if unwanted\n      replace_refs_to_nuke = set(replace_refs)\n      if self._args.replace_refs == 'delete-and-add':\n        # git-update-ref won't allow us to update a ref twice, so be careful\n        # to avoid deleting refs we'll later update\n        replace_refs_to_nuke = replace_refs_to_nuke.difference(\n                                 [b'refs/replace/'+x for x in actual_renames])\n      p.stdin.write(b''.join([b\"delete %s\\n\" % x\n                             for x in replace_refs_to_nuke]))\n    if self._args.replace_refs in ['delete-and-add', 'update-or-add',\n                                   'update-and-add']:\n      # Add new replace refs\n      update_only = (self._args.replace_refs == 'update-or-add')\n      p.stdin.write(b''.join([b\"update refs/replace/%s %s\\n\" % (old, new)\n                              for old,new in actual_renames.items()\n                              if new and not (update_only and\n                                              old in reverse_replace_refs)]))\n\n    # Complete the update-ref process\n    p.stdin.close()\n    if p.wait():\n      raise SystemExit(_(\"git update-ref failed; see above\")) # pragma: no cover\n\n  def _remap_to(self, oldish_hash):\n    '''\n    Given an oldish_hash (from the beginning of the current run), return:\n       IF oldish_hash is NOT pruned:\n         the hash of the rewrite of oldish_hash\n       otherwise:\n         the hash of the rewrite of the first unpruned ancestor of oldish_hash\n    '''\n    old_id = self._orig_graph._hash_to_id[oldish_hash]\n    new_id = _IDS.translate(old_id)\n    new_hash = self._graph.git_hash[new_id] if new_id else deleted_hash\n    return new_hash\n\n  def _compute_metadata(self, metadata_dir, orig_refs):\n    #\n    # First, handle commit_renames\n    #\n    old_commit_renames = dict()\n    if not self._already_ran:\n      commit_renames = {old: new\n                        for old, new in self._commit_renames.items()\n                       }\n    else:\n      # Read commit-map into old_commit_renames\n      with open(os.path.join(metadata_dir, b'commit-map'), 'br') as f:\n        f.readline() # Skip the header line\n        for line in f:\n          (old,new) = line.split()\n          old_commit_renames[old] = new\n      # Use A->B mappings in old_commit_renames, and B->C mappings in\n      # self._commit_renames to yield A->C mappings in commit_renames\n      commit_renames = {old: self._commit_renames.get(newish, newish)\n                        for old, newish in old_commit_renames.items()}\n      # If there are any B->C mappings in self._commit_renames for which\n      # there was no A->B mapping in old_commit_renames, then add the\n      # B->C mapping to commit_renames too.\n      seen = set(old_commit_renames.values())\n      commit_renames.update({old: new\n                             for old, new in self._commit_renames.items()\n                             if old not in seen})\n\n    #\n    # Second, handle ref_maps\n    #\n    exported_refs, imported_refs = self.get_exported_and_imported_refs()\n\n    old_commit_unrenames = dict()\n    if not self._already_ran:\n      old_ref_map = dict((refname, (old_hash, deleted_hash))\n                         for refname, old_hash in orig_refs.items()\n                         if refname in exported_refs)\n    else:\n      # old_commit_renames talk about how commits were renamed in the original\n      # run.  Let's reverse it to find out how to get from the intermediate\n      # commit name, back to the original.  Because everything in orig_refs\n      # right now refers to the intermediate commits after the first run(s),\n      # and we need to map them back to what they were before any changes.\n      old_commit_unrenames = dict((v,k) for (k,v) in old_commit_renames.items())\n\n      old_ref_map = {}\n      # Populate old_ref_map from the 'ref-map' file\n      with open(os.path.join(metadata_dir, b'ref-map'), 'br') as f:\n        f.readline() # Skip the header line\n        for line in f:\n          (old,intermediate,ref) = line.split()\n          old_ref_map[ref] = (old, intermediate)\n      # Append to old_ref_map items from orig_refs that were exported, but\n      # get the actual original commit name\n      for refname, old_hash in orig_refs.items():\n        if refname in old_ref_map:\n          continue\n        if refname not in exported_refs:\n          continue\n        # Compute older_hash\n        original_hash = old_commit_unrenames.get(old_hash, old_hash)\n        old_ref_map[refname] = (original_hash, deleted_hash)\n\n    new_refs = {}\n    new_refs_initialized = False\n    ref_maps = {}\n    self._orig_graph._ensure_reverse_maps_populated()\n    for refname, pair in old_ref_map.items():\n      old_hash, hash_ref_becomes_if_not_imported_in_this_run = pair\n      if refname not in imported_refs:\n        new_hash = hash_ref_becomes_if_not_imported_in_this_run\n      elif old_hash in commit_renames:\n        intermediate = old_commit_renames.get(old_hash,old_hash)\n        if intermediate in self._commit_renames:\n          new_hash = self._remap_to(intermediate)\n        else:\n          new_hash = intermediate\n      else: # Must be either an annotated tag, or a ref whose tip was pruned\n        if not new_refs_initialized:\n          target_working_dir = self._args.target or b'.'\n          new_refs = GitUtils.get_refs(target_working_dir)\n          new_refs_initialized = True\n        if refname in new_refs:\n          new_hash = new_refs[refname]\n        else:\n          new_hash = deleted_hash\n      ref_maps[refname] = (old_hash, new_hash)\n    if self._args.source or self._args.target:\n      if not new_refs_initialized:\n        target_working_dir = self._args.target or b'.'\n        new_refs = GitUtils.get_refs(target_working_dir)\n        new_refs_initialized = True\n      for ref, new_hash in new_refs.items():\n        if ref not in orig_refs and not ref.startswith(b'refs/replace/'):\n          old_hash = b'0'*len(new_hash)\n          ref_maps[ref] = (old_hash, new_hash)\n\n    #\n    # Third, handle first_changes\n    #\n\n    old_first_changes = dict()\n    if self._already_ran:\n      # Read first_changes into old_first_changes\n      with open(os.path.join(metadata_dir, b'first-changed-commits'), 'br') as f:\n        for line in f:\n          changed_commit, undeleted_self_or_ancestor = line.strip().split()\n          old_first_changes[changed_commit] = undeleted_self_or_ancestor\n    # We need to find the commits that were modified whose parents were not.\n    # To be able to find parents, we need the commit names as of the beginning\n    # of this run, and then when we are done, we need to map them back to the\n    # name of the commits from before any git-filter-repo runs.\n    #\n    # We are excluding here any commits deleted in previous git-filter-repo\n    # runs\n    undo_old_commit_renames = dict((v,k) for (k,v) in old_commit_renames.items()\n                                   if v != deleted_hash)\n    # Get a list of all commits that were changed, as of the beginning of\n    # this latest run.\n    changed_commits = {new\n                       for (old,new) in old_commit_renames.items()\n                       if old != new and new != deleted_hash} | \\\n                      {old\n                       for (old,new) in self._commit_renames.items()\n                       if old != new}\n    special_changed_commits = {old\n                               for (old,new) in old_commit_renames.items()\n                               if new == deleted_hash}\n    first_changes = dict()\n    for (old,new) in self._commit_renames.items():\n      if old == new:\n        # old wasn't modified, can't be first change if not even a change\n        continue\n      if old_commit_unrenames.get(old,old) != old:\n        # old was already modified in previous run; while it might represent\n        # something that is still a first change, we'll handle that as we\n        # loop over old_first_changes below\n        continue\n      if any(parent in changed_commits\n             for parent in self._orig_graph.get_parent_hashes(old)):\n        # a parent of old was modified, so old is not a first change\n        continue\n      # At this point, old IS a first change.  We need to find out what new\n      # commit it maps to, or if it doesn't map to one, what new commit was\n      # its most recent ancestor that wasn't pruned.\n      if new is None:\n        new = self._remap_to(old)\n      first_changes[old] = (new if new is not None else deleted_hash)\n    for (old,undeleted_self_or_ancestor) in old_first_changes.items():\n      if undeleted_self_or_ancestor == deleted_hash:\n        # old represents a commit that was pruned and whose entire ancestry\n        # was pruned.  So, old is still a first change\n        first_changes[old] = undeleted_self_or_ancestor\n        continue\n      intermediate = old_commit_renames.get(old, old)\n      usoa = undeleted_self_or_ancestor\n      new_ancestor = self._commit_renames.get(usoa, usoa)\n      if intermediate == deleted_hash:\n        # old was pruned in previous rewrite\n        if usoa != new_ancestor:\n          # old's ancestor got rewritten in this filtering run; we can drop\n          # this one from first_changes.\n          continue\n        # Getting here means old was a first change and old was pruned in a\n        # previous run, and its ancestors that survived were non rewritten in\n        # this run, so old remains a first change\n        first_changes[old] = new_ancestor # or usoa, since new_ancestor == usoa\n        continue\n      assert(usoa == intermediate) # old wasn't pruned => usoa == intermediate\n\n      # Check whether parents of intermediate were rewritten.  Note that\n      # intermediate in self._commit_renames only means that intermediate was\n      # processed by the latest filtering (not necessarily that it changed),\n      # but we need to know that before we can check for parent hashes having\n      # changed.\n      if intermediate not in self._commit_renames:\n        # This commit was not processed by this run, so it remains a first\n        # change\n        first_changes[old] = usoa\n        continue\n      if any(parent in changed_commits\n             for parent in self._orig_graph.get_parent_hashes(intermediate)):\n        # An ancestor was modified by this run, so it is no longer a first\n        # change; continue to the next one.\n        continue\n      # This change is a first_change; find the new commit its usoa maps to\n      new = self._remap_to(intermediate)\n      assert(new is not None)\n      first_changes[old] = new\n\n    return commit_renames, ref_maps, first_changes\n\n  def _handle_lfs_metadata(self, metadata_dir):\n    if self._lfs_object_tracker is None:\n      print(\"NOTE: LFS object orphaning not checked (LFS not in use)\")\n      return\n\n    if self._args.partial:\n      target_working_dir = self._args.target or b'.'\n      source = False\n      self._lfs_object_tracker.find_all_lfs_objects_in_repo(target_working_dir,\n                                                            source)\n\n    with open(os.path.join(metadata_dir, b'original_lfs_objects'), 'bw') as f:\n      for obj in sorted(self._lfs_object_tracker.source_objects.objects):\n        f.write(obj+b\"\\n\")\n\n    orphaned_lfs_path = os.path.join(metadata_dir, b'orphaned_lfs_objects')\n    msg = textwrap.dedent(_(f\"\"\"\\\n      NOTE: There were LFS Objects Orphaned by this rewrite recorded in\n            {decode(orphaned_lfs_path)}.\"\"\"))\n    with open(orphaned_lfs_path, 'bw') as f:\n      differences = self._lfs_object_tracker.source_objects.objects - \\\n                    self._lfs_object_tracker.target_objects.objects\n      for obj in sorted(differences):\n        f.write(obj+b\"\\n\")\n      if differences:\n        self._lfs_object_tracker.objects_orphaned = True\n        print(msg)\n\n  def _record_metadata(self, metadata_dir, orig_refs):\n    self._flush_renames()\n    commit_renames, ref_maps, first_changes = \\\n      self._compute_metadata(metadata_dir, orig_refs)\n\n    if self._args.sensitive_data_removal:\n      changed_commits = sum(k!=v for (k,v) in commit_renames.items())\n      print(f\"You rewrote {changed_commits} (of {len(commit_renames)}) commits.\")\n      print(\"\") # Add a blank line before important rewrite information\n      print(f\"NOTE: First Changed Commit(s) is/are:\\n  \"\n            + decode(b\"\\n  \".join(x for x in first_changes)))\n\n      with open(os.path.join(metadata_dir, b'sensitive_data_removal'), 'bw') as f:\n        pass # Write nothing; we only need the file created\n\n      self._handle_lfs_metadata(metadata_dir)\n      print(\"\") # Add a blank line after important rewrite information\n\n    with open(os.path.join(metadata_dir, b'commit-map'), 'bw') as f:\n      f.write((\"%-40s %s\\n\" % (_(\"old\"), _(\"new\"))).encode())\n      for (old,new) in sorted(commit_renames.items()):\n        msg = b'%s %s\\n' % (old, new if new != None else deleted_hash)\n        f.write(msg)\n\n    with open(os.path.join(metadata_dir, b'ref-map'), 'bw') as f:\n      f.write((\"%-40s %-40s %s\\n\" % (_(\"old\"), _(\"new\"), _(\"ref\"))).encode())\n      for refname, hash_pair in sorted(ref_maps.items()):\n        (old_hash, new_hash) = hash_pair\n        f.write(b'%s %s %s\\n' % (old_hash, new_hash, refname))\n        if old_hash != new_hash:\n          self._changed_refs.add(refname)\n\n    with open(os.path.join(metadata_dir, b'changed-refs'), 'bw') as f:\n      for refname in sorted(self._changed_refs):\n        f.write(b'%s\\n' % refname)\n\n    with open(os.path.join(metadata_dir, b'first-changed-commits'), 'bw') as f:\n      for commit, undeleted_self_or_ancestor in sorted(first_changes.items()):\n        f.write(b'%s %s\\n' % (commit, undeleted_self_or_ancestor))\n\n    with open(os.path.join(metadata_dir, b'suboptimal-issues'), 'bw') as f:\n      issues_found = False\n      if self._commits_no_longer_merges:\n        issues_found = True\n\n        f.write(textwrap.dedent(_('''\n          The following commits used to be merge commits but due to filtering\n          are now regular commits; they likely have suboptimal commit messages\n          (e.g. \"Merge branch next into master\").  Original commit hash on the\n          left, commit hash after filtering/rewriting on the right:\n          ''')[1:]).encode())\n        for oldhash, newhash in self._commits_no_longer_merges:\n          f.write('  {} {}\\n'.format(oldhash, newhash).encode())\n        f.write(b'\\n')\n\n      if self._commits_referenced_but_removed:\n        issues_found = True\n        f.write(textwrap.dedent(_('''\n          The following commits were filtered out, but referenced in another\n          commit message.  The reference to the now-nonexistent commit hash\n          (or a substring thereof) was left as-is in any commit messages:\n          ''')[1:]).encode())\n        for bad_commit_reference in self._commits_referenced_but_removed:\n          f.write('  {}\\n'.format(bad_commit_reference).encode())\n        f.write(b'\\n')\n\n      if not issues_found:\n        f.write(_(\"No filtering problems encountered.\\n\").encode())\n\n    with open(os.path.join(metadata_dir, b'already_ran'), 'bw') as f:\n       f.write(_(\"This file exists to allow you to filter again without --force,\\n\"\n                 \"and to specify that metadata files should be updated instead\\n\"\n                 \"of rewritten\").encode())\n\n  def finish(self):\n    ''' Alternative to run() when there is no input of our own to parse,\n        meaning that run only really needs to close the handle to fast-import\n        and let it finish, thus making a call to \"run\" feel like a misnomer. '''\n    assert not self._input\n    assert self._managed_output\n    self.run()\n\n  def insert(self, obj, direct_insertion = False):\n    if not direct_insertion:\n      if type(obj) == Blob:\n        self._tweak_blob(obj)\n      elif type(obj) == Commit:\n        aux_info = {'orig_parents': obj.parents,\n                    'had_file_changes': bool(obj.file_changes)}\n        self._tweak_commit(obj, aux_info)\n      elif type(obj) == Reset:\n        self._tweak_reset(obj)\n      elif type(obj) == Tag:\n        self._tweak_tag(obj)\n    self._insert_into_stream(obj)\n\n  def _insert_into_stream(self, obj):\n    if not obj.dumped:\n      if self._lfs_object_tracker:\n        self._lfs_object_tracker.check_output_object(obj)\n      if self._parser:\n        self._parser.insert(obj)\n      else:\n        obj.dump(self._output)\n\n  def get_exported_and_imported_refs(self):\n    return self._parser.get_exported_and_imported_refs()\n\n  def run(self):\n    start = time.time()\n    if not self._input and not self._output:\n      self._run_sanity_checks()\n      if not self._args.dry_run and not self._args.partial:\n        self._read_stash()\n        self._migrate_origin_to_heads()\n      self._setup_input(use_done_feature = True)\n      self._setup_output()\n    assert self._sanity_checks_handled\n\n    if self._input:\n      # Create and run the filter\n      self._repo_working_dir = self._args.source or b'.'\n      self._parser = FastExportParser(blob_callback   = self._tweak_blob,\n                                      commit_callback = self._tweak_commit,\n                                      tag_callback    = self._tweak_tag,\n                                      reset_callback  = self._tweak_reset,\n                                      done_callback   = self._final_commands)\n      self._setup_lfs_orphaning_checks()\n      self._parser.run(self._input, self._output)\n      if not self._finalize_handled:\n        self._final_commands()\n\n      # Make sure fast-export completed successfully\n      if not self._args.stdin and self._fep.wait():\n        raise SystemExit(_(\"Error: fast-export failed; see above.\")) # pragma: no cover\n      self._input.close()\n\n    # If we're not the manager of self._output, we should avoid post-run cleanup\n    if not self._managed_output:\n      return\n\n    # Close the output and ensure fast-import successfully completes\n    self._output.close()\n    if not self._args.dry_run and self._fip.wait():\n      raise SystemExit(_(\"Error: fast-import failed; see above.\")) # pragma: no cover\n\n    # With fast-export and fast-import complete, update state if requested\n    if self._args.state_branch:\n      self._save_marks_files()\n\n    # Notify user how long it took, before doing a gc and such\n    msg = \"New history written in {:.2f} seconds...\"\n    if self._args.repack:\n      msg = \"New history written in {:.2f} seconds; now repacking/cleaning...\"\n    print(msg.format(time.time()-start))\n\n    # Exit early, if requested\n    if self._args.dry_run:\n      print(_(\"NOTE: Not running fast-import or cleaning up; --dry-run passed.\"))\n      if self._fe_orig:\n        print(_(\"      Requested filtering can be seen by comparing:\"))\n        print(\"        \" + decode(self._fe_orig))\n      else:\n        print(_(\"      Requested filtering can be seen at:\"))\n      print(\"        \" + decode(self._fe_filt))\n      return\n\n    target_working_dir = self._args.target or b'.'\n    if self._input:\n      self._ref_update(target_working_dir)\n\n      # Write out data about run\n      self._record_metadata(self.results_tmp_dir(), self._orig_refs)\n\n    # Final cleanup:\n    #   If we need a repack, then nuke the reflogs and repack.\n    #   If we need a reset, do a reset --hard\n    reset = not GitUtils.is_repository_bare(target_working_dir)\n    self.cleanup(target_working_dir, self._args.repack, reset,\n                 run_quietly=self._args.quiet,\n                 show_debuginfo=self._args.debug)\n\n    # Let user know how long it took\n    print(_(\"Completely finished after {:.2f} seconds.\")\n          .format(time.time()-start))\n\n    # Give post-rewrite instructions for cleaning up other copies for SDR\n    if self._args.sensitive_data_removal:\n      lfs_note = \"\"\n      if self._lfs_object_tracker and \\\n         self._lfs_object_tracker.objects_orphaned == True:\n        lfs_note = _(\" and LFS Objects Orphaned\")\n      push_command = \"git push --force --mirror origin\"\n      if self._args.no_fetch:\n        if self._args.partial:\n          push_command = \"git push --force origin \" + \\\n                     \" \".join(sorted([decode(x) for x in self._changed_refs]))\n        else:\n          push_command = \"git push --all --tags origin\"\n      print(\"\")\n      print(sdr_next_steps % (push_command, lfs_note, lfs_note))\n\ndef main():\n  setup_gettext()\n  args = FilteringOptions.parse_args(sys.argv[1:])\n  if args.analyze:\n    RepoAnalyze.run(args)\n  else:\n    filter = RepoFilter(args)\n    filter.run()\n\nif __name__ == '__main__':\n  main()\n"
  },
  {
    "path": "pyproject.toml",
    "content": "[project]\nname = \"git-filter-repo\"\ndescription = \"Quickly rewrite git repository history\"\nauthors = [\n    {name = \"Elijah Newren\", email = \"newren@gmail.com\"}\n]\nreadme = \"README.md\"\nclassifiers = [\n    \"Development Status :: 5 - Production/Stable\",\n    \"Operating System :: OS Independent\",\n    \"Programming Language :: Python\",\n    \"License :: OSI Approved :: MIT License\",\n    \"Programming Language :: Python :: 3.6\",\n    \"Programming Language :: Python :: 3.7\",\n    \"Programming Language :: Python :: 3.8\",\n    \"Programming Language :: Python :: 3.9\",\n    \"Programming Language :: Python :: 3.10\",\n    \"Programming Language :: Python :: 3.11\",\n    \"Programming Language :: Python :: 3.12\",\n    \"Programming Language :: Python :: 3\",\n    \"Programming Language :: Python :: Implementation :: CPython\",\n    \"Programming Language :: Python :: Implementation :: PyPy\",\n]\nlicense.text = \"MIT\"\nrequires-python = \">= 3.6\"\ndynamic = [\"version\"]\n\n[project.urls]\nHomepage = \"https://github.com/newren/git-filter-repo\"\nIssues = \"https://github.com/newren/git-filter-repo/issues/\"\nSource = \"https://github.com/newren/git-filter-repo\"\n\n[project.scripts]\ngit-filter-repo = \"git_filter_repo:main\"\n\n[build-system]\nrequires = [\"setuptools>=61\", \"setuptools_scm>=8.0\", \"wheel\"]\nbuild-backend = \"setuptools.build_meta\"\n\n[tool.setuptools]\npy-modules = [\"git_filter_repo\"]\n\n[tool.setuptools_scm]\n# This section intentionally left blank\n"
  },
  {
    "path": "t/run_coverage",
    "content": "#!/bin/bash\n\nset -eu\n\norig_dir=$(cd $(dirname $0) && pwd -P)\ntmpdir=$(mktemp -d)\n\ncat <<EOF >$tmpdir/.coveragerc\n[run]\nparallel=true\ndata_file=$tmpdir/.coverage\nEOF\n\ncat <<EOF >$tmpdir/sitecustomize.py\nimport coverage\ncoverage.process_startup()\nEOF\n\nexport COVERAGE_PROCESS_START=$tmpdir/.coveragerc\nexport PYTHONPATH=$tmpdir:\n\n# Produce a coverage report, even if the tests fail\nset +e\n$orig_dir/run_tests\nexitcode=$?\nset -e\n\ncd $tmpdir\ncoverage3 combine -q\ncoverage3 html -d $orig_dir/report\ncoverage3 report -m\ncd $orig_dir\nrm -rf $tmpdir\n\nexit $exitcode\n"
  },
  {
    "path": "t/run_tests",
    "content": "#!/bin/bash\nset -eu\n\ncd $(dirname $0)\n\n# Put git_filter_repo.py on the front of PYTHONPATH\nexport PYTHONPATH=\"$PWD/..${PYTHONPATH:+:$PYTHONPATH}\"\n\n# We pretend filenames are unicode for two reasons: (1) because it exercises\n# more code, and (2) this setting will detect accidental use of unicode strings\n# for file/directory names when it should always be bytestrings.\nexport PRETEND_UNICODE_ARGS=1\n\nexport TEST_SHELL_PATH=/bin/sh\n\nfailed=0\n\nfor t in t[0-9]*.sh\ndo\n  printf '\\n\\n== %s ==\\n' \"$t\"\n  bash $t \"$@\" || failed=$(($failed+1))\ndone\n\nif [ 0 -lt $failed ]\nthen\n  exit 1\nfi\n"
  },
  {
    "path": "t/t9390/basic",
    "content": "feature done\n# Simple repo with three files, a merge where each side touches exactly one\n# file, and a commit at the end touching all three.  Note that the original-oid\n# directives are very fake, but make it easy to recognize what original shas\n# are.\nblob\nmark :1\noriginal-oid 0000000000000000000000000000000000000001\ndata 8\ninitial\n\nblob\nmark :2\noriginal-oid 0000000000000000000000000000000000000002\ndata 8\nten-mod\n\nblob\nmark :3\noriginal-oid 0000000000000000000000000000000000000003\ndata 11\ntwenty-mod\n\nblob\nmark :4\noriginal-oid 0000000000000000000000000000000000000004\ndata 6\nfinal\n\nreset refs/heads/master\ncommit refs/heads/master\nmark :5\noriginal-oid 0000000000000000000000000000000000000009\nauthor Little O. Me <me@little.net> 1535228562 -0700\ncommitter Little O. Me <me@little.net> 1535228562 -0700\ndata 8\nInitial\nM 100644 :1 filename\nM 100644 :1 ten\nM 100644 :1 twenty\n\ncommit refs/heads/B\nmark :6\noriginal-oid 000000000000000000000000000000000000000B\nauthor Little 'ol Me <me@laptop.(none)> 1535229544 -0700\ncommitter Little 'ol Me <me@laptop.(none)> 1535229544 -0700\ndata 11\nadd twenty\nfrom :5\nM 100644 :3 twenty\n\ncommit refs/heads/A\nmark :7\noriginal-oid 000000000000000000000000000000000000000A\nauthor Little O. Me <me@machine52.little.net> 1535229523 -0700\ncommitter Little O. Me <me@machine52.little.net> 1535229523 -0700\ndata 8\nadd ten\nfrom :5\nM 100644 :2 ten\n\ncommit refs/heads/master\nmark :8\noriginal-oid 000000000000000000000000000000000000000C\nauthor Lit.e Me <me@fire.com> 1535229559 -0700\ncommitter Lit.e Me <me@fire.com> 1535229580 -0700\ndata 24\nMerge branch 'A' into B\nfrom :6\nmerge :7\nM 100644 :2 ten\n\ncommit refs/heads/master\nmark :9\noriginal-oid 000000000000000000000000000000000000000D\nauthor Little Me <me@bigcompany.com> 1535229601 -0700\ncommitter Little Me <me@bigcompany.com> 1535229601 -0700\ndata 9\nwhatever\nfrom :8\nM 100644 :4 filename\nM 100644 :4 ten\nM 100644 :4 twenty\n\ntag v1.0\nfrom :9\noriginal-oid 000000000000000000000000000000000000000E\ntagger Little John <second@merry.men> 1535229618 -0700\ndata 5\nv1.0\n\nreset refs/heads/master\nfrom :9\n\ndone\n"
  },
  {
    "path": "t/t9390/basic-filename",
    "content": "feature done\nblob\nmark :1\ndata 8\ninitial\n\nreset refs/heads/A\ncommit refs/heads/A\nmark :2\nauthor Little O. Me <me@little.net> 1535228562 -0700\ncommitter Little O. Me <me@little.net> 1535228562 -0700\ndata 8\nInitial\nM 100644 :1 filename\n\nblob\nmark :3\ndata 6\nfinal\n\ncommit refs/heads/master\nmark :4\nauthor Little Me <me@bigcompany.com> 1535229601 -0700\ncommitter Little Me <me@bigcompany.com> 1535229601 -0700\ndata 9\nwhatever\nfrom :2\nM 100644 :3 filename\n\nreset refs/heads/B\nfrom :2\n\ntag v1.0\nfrom :4\ntagger Little John <second@merry.men> 1535229618 -0700\ndata 5\nv1.0\n\ndone\n"
  },
  {
    "path": "t/t9390/basic-mailmap",
    "content": "feature done\nblob\nmark :1\ndata 8\ninitial\n\nreset refs/heads/B\ncommit refs/heads/B\nmark :2\nauthor Little 'ol Me <me@little.net> 1535228562 -0700\ncommitter Little 'ol Me <me@little.net> 1535228562 -0700\ndata 8\nInitial\nM 100644 :1 filename\nM 100644 :1 ten\nM 100644 :1 twenty\n\nblob\nmark :3\ndata 11\ntwenty-mod\n\ncommit refs/heads/B\nmark :4\nauthor Little 'ol Me <me@little.net> 1535229544 -0700\ncommitter Little 'ol Me <me@little.net> 1535229544 -0700\ndata 11\nadd twenty\nfrom :2\nM 100644 :3 twenty\n\nblob\nmark :5\ndata 8\nten-mod\n\ncommit refs/heads/A\nmark :6\nauthor Little 'ol Me <me@little.net> 1535229523 -0700\ncommitter Little 'ol Me <me@little.net> 1535229523 -0700\ndata 8\nadd ten\nfrom :2\nM 100644 :5 ten\n\ncommit refs/heads/master\nmark :7\nauthor Little 'ol Me <me@little.net> 1535229559 -0700\ncommitter Little 'ol Me <me@little.net> 1535229580 -0700\ndata 24\nMerge branch 'A' into B\nfrom :4\nmerge :6\nM 100644 :5 ten\n\nblob\nmark :8\ndata 6\nfinal\n\ncommit refs/heads/master\nmark :9\nauthor Little 'ol Me <me@little.net> 1535229601 -0700\ncommitter Little 'ol Me <me@little.net> 1535229601 -0700\ndata 9\nwhatever\nfrom :7\nM 100644 :8 filename\nM 100644 :8 ten\nM 100644 :8 twenty\n\ntag v1.0\nfrom :9\ntagger Little John <second@merry.men> 1535229618 -0700\ndata 5\nv1.0\n\ndone\n"
  },
  {
    "path": "t/t9390/basic-message",
    "content": "feature done\nblob\nmark :1\ndata 8\ninitial\n\nreset refs/heads/B\ncommit refs/heads/B\nmark :2\nauthor Little O. Me <me@little.net> 1535228562 -0700\ncommitter Little O. Me <me@little.net> 1535228562 -0700\ndata 9\nModified\nM 100644 :1 filename\nM 100644 :1 ten\nM 100644 :1 twenty\n\nblob\nmark :3\ndata 11\ntwenty-mod\n\ncommit refs/heads/B\nmark :4\nauthor Little 'ol Me <me@laptop.(none)> 1535229544 -0700\ncommitter Little 'ol Me <me@laptop.(none)> 1535229544 -0700\ndata 18\nadd the number 20\nfrom :2\nM 100644 :3 twenty\n\nblob\nmark :5\ndata 8\nten-mod\n\ncommit refs/heads/A\nmark :6\nauthor Little O. Me <me@machine52.little.net> 1535229523 -0700\ncommitter Little O. Me <me@machine52.little.net> 1535229523 -0700\ndata 8\nadd ten\nfrom :2\nM 100644 :5 ten\n\ncommit refs/heads/master\nmark :7\nauthor Lit.e Me <me@fire.com> 1535229559 -0700\ncommitter Lit.e Me <me@fire.com> 1535229580 -0700\ndata 24\nMerge branch 'A' into B\nfrom :4\nmerge :6\nM 100644 :5 ten\n\nblob\nmark :8\ndata 6\nfinal\n\ncommit refs/heads/master\nmark :9\nauthor Little Me <me@bigcompany.com> 1535229601 -0700\ncommitter Little Me <me@bigcompany.com> 1535229601 -0700\ndata 9\nwhatever\nfrom :7\nM 100644 :8 filename\nM 100644 :8 ten\nM 100644 :8 twenty\n\ntag v1.0\nfrom :9\ntagger Little John <second@merry.men> 1535229618 -0700\ndata 15\nversion one :)\n\ndone\n"
  },
  {
    "path": "t/t9390/basic-numbers",
    "content": "feature done\nblob\nmark :1\ndata 8\ninitial\n\nreset refs/heads/B\ncommit refs/heads/B\nmark :2\nauthor Little O. Me <me@little.net> 1535228562 -0700\ncommitter Little O. Me <me@little.net> 1535228562 -0700\ndata 8\nInitial\nM 100644 :1 ten\nM 100644 :1 twenty\n\nblob\nmark :3\ndata 11\ntwenty-mod\n\ncommit refs/heads/B\nmark :4\nauthor Little 'ol Me <me@laptop.(none)> 1535229544 -0700\ncommitter Little 'ol Me <me@laptop.(none)> 1535229544 -0700\ndata 11\nadd twenty\nfrom :2\nM 100644 :3 twenty\n\nblob\nmark :5\ndata 8\nten-mod\n\ncommit refs/heads/A\nmark :6\nauthor Little O. Me <me@machine52.little.net> 1535229523 -0700\ncommitter Little O. Me <me@machine52.little.net> 1535229523 -0700\ndata 8\nadd ten\nfrom :2\nM 100644 :5 ten\n\ncommit refs/heads/master\nmark :7\nauthor Lit.e Me <me@fire.com> 1535229559 -0700\ncommitter Lit.e Me <me@fire.com> 1535229580 -0700\ndata 24\nMerge branch 'A' into B\nfrom :4\nmerge :6\nM 100644 :5 ten\n\nblob\nmark :8\ndata 6\nfinal\n\ncommit refs/heads/master\nmark :9\nauthor Little Me <me@bigcompany.com> 1535229601 -0700\ncommitter Little Me <me@bigcompany.com> 1535229601 -0700\ndata 9\nwhatever\nfrom :7\nM 100644 :8 ten\nM 100644 :8 twenty\n\ntag v1.0\nfrom :9\ntagger Little John <second@merry.men> 1535229618 -0700\ndata 5\nv1.0\n\ndone\n"
  },
  {
    "path": "t/t9390/basic-replace",
    "content": "feature done\nblob\nmark :1\ndata 8\ninitial\n\nreset refs/heads/B\ncommit refs/heads/B\nmark :2\nauthor Little O. Me <me@little.net> 1535228562 -0700\ncommitter Little O. Me <me@little.net> 1535228562 -0700\ndata 8\nInitial\nM 100644 :1 filename\nM 100644 :1 ten\nM 100644 :1 twenty\n\nblob\nmark :3\ndata 28\ntwenty-modified-by-gremlins\n\ncommit refs/heads/B\nmark :4\nauthor Little 'ol Me <me@laptop.(none)> 1535229544 -0700\ncommitter Little 'ol Me <me@laptop.(none)> 1535229544 -0700\ndata 11\nadd twenty\nfrom :2\nM 100644 :3 twenty\n\nblob\nmark :5\ndata 25\nten-modified-by-gremlins\n\ncommit refs/heads/A\nmark :6\nauthor Little O. Me <me@machine52.little.net> 1535229523 -0700\ncommitter Little O. Me <me@machine52.little.net> 1535229523 -0700\ndata 8\nadd ten\nfrom :2\nM 100644 :5 ten\n\ncommit refs/heads/master\nmark :7\nauthor Lit.e Me <me@fire.com> 1535229559 -0700\ncommitter Lit.e Me <me@fire.com> 1535229580 -0700\ndata 24\nMerge branch 'A' into B\nfrom :4\nmerge :6\nM 100644 :5 ten\n\nblob\nmark :8\ndata 6\nfinal\n\ncommit refs/heads/master\nmark :9\nauthor Little Me <me@bigcompany.com> 1535229601 -0700\ncommitter Little Me <me@bigcompany.com> 1535229601 -0700\ndata 9\nwhatever\nfrom :7\nM 100644 :8 filename\nM 100644 :8 ten\nM 100644 :8 twenty\n\ntag v1.0\nfrom :9\ntagger Little John <second@merry.men> 1535229618 -0700\ndata 5\nv1.0\n\ndone\n"
  },
  {
    "path": "t/t9390/basic-ten",
    "content": "feature done\nblob\nmark :1\ndata 8\ninitial\n\nreset refs/heads/B\ncommit refs/heads/B\nmark :2\nauthor Little O. Me <me@little.net> 1535228562 -0700\ncommitter Little O. Me <me@little.net> 1535228562 -0700\ndata 8\nInitial\nM 100644 :1 ten\n\nblob\nmark :3\ndata 8\nten-mod\n\ncommit refs/heads/A\nmark :4\nauthor Little O. Me <me@machine52.little.net> 1535229523 -0700\ncommitter Little O. Me <me@machine52.little.net> 1535229523 -0700\ndata 8\nadd ten\nfrom :2\nM 100644 :3 ten\n\nblob\nmark :5\ndata 6\nfinal\n\ncommit refs/heads/master\nmark :6\nauthor Little Me <me@bigcompany.com> 1535229601 -0700\ncommitter Little Me <me@bigcompany.com> 1535229601 -0700\ndata 9\nwhatever\nfrom :4\nM 100644 :5 ten\n\ntag v1.0\nfrom :6\ntagger Little John <second@merry.men> 1535229618 -0700\ndata 5\nv1.0\n\ndone\n"
  },
  {
    "path": "t/t9390/basic-twenty",
    "content": "feature done\nblob\nmark :1\ndata 8\ninitial\n\nreset refs/heads/A\ncommit refs/heads/A\nmark :2\nauthor Little O. Me <me@little.net> 1535228562 -0700\ncommitter Little O. Me <me@little.net> 1535228562 -0700\ndata 8\nInitial\nM 100644 :1 twenty\n\nblob\nmark :3\ndata 11\ntwenty-mod\n\ncommit refs/heads/B\nmark :4\nauthor Little 'ol Me <me@laptop.(none)> 1535229544 -0700\ncommitter Little 'ol Me <me@laptop.(none)> 1535229544 -0700\ndata 11\nadd twenty\nfrom :2\nM 100644 :3 twenty\n\nblob\nmark :5\ndata 6\nfinal\n\ncommit refs/heads/master\nmark :6\nauthor Little Me <me@bigcompany.com> 1535229601 -0700\ncommitter Little Me <me@bigcompany.com> 1535229601 -0700\ndata 9\nwhatever\nfrom :4\nM 100644 :5 twenty\n\ntag v1.0\nfrom :6\ntagger Little John <second@merry.men> 1535229618 -0700\ndata 5\nv1.0\n\ndone\n"
  },
  {
    "path": "t/t9390/degenerate",
    "content": "feature done\n# Simple repo with only three files, with a bunch of cases of dealing with\n# topology changes possibly causing merge commits to need to be pruned.\n#\n# As with case1, the original-oid directives are very fake, but if an error\n# is hit that shows one of these, it makes it really easy to know where it\n# came from.\n#\n# Expressed with shorthand, log history in the format\n#     Commit Name(Parent(s)): files changed\n# for this case looks like the following:\n#     W(V): moduleA/keepme\n#     V(U,U): moduleB/nukeme\n#     U(T): moduleA/sometimes\n#     T(S): moduleA/keepme\n#     S(R,R): moduleA/sometimes\n#     R(R): moduleB/nukeme\n#     Q(P): moduleA/keepme\n#     P(N,M): moduleA/sometimes\n#     O(M,N): moduleA/sometimes\n#     N(C): moduleB/nukeme\n#     M(L): moduleB/nukeme\n#     L(K): moduleA/keepme\n#     K(J): moduleB/nukeme\n#     J(D,H): moduleA/sometimes\n#     I(H,D): moduleA/sometimes  # backwards-ish merge\n#     H(G): moduleB/nukeme\n#     G(F): moduleA/keepme\n#     F(D): moduleB/nukeme\n#     D(B,C): moduleA/sometimes\n#     C(A): moduleB/nukeme\n#     B(A): moduleB/nukeme\n#     A(): moduleA/keepme\n#\n# This involved case is intended to test the following:\n#   * Merge becoming non-merge due to both parents becoming same commit\n#     * Two sub-cases: it has changes of its own, or it doesn't\n#   * Merge becoming merge of commit with its own ancestor\n#     * Two cases: and it has changes, and it doesn't have changes\n#     * Two cases: first parent is the ancestor, second parent is the ancestor\n#   * Merge starting as merge of commit with its own ancestor\n#     * Two cases: has changes, doesn't have changes\n#     * Two cases: first parent, or second parent\nblob\nmark :1\noriginal-oid 0000000000000000000000000000000000000001\ndata 10\nkeepme v1\n\nblob\nmark :2\noriginal-oid 0000000000000000000000000000000000000002\ndata 10\nnukeme v1\n\nblob\nmark :3\noriginal-oid 0000000000000000000000000000000000000003\ndata 10\nnukeme v2\n\nblob\nmark :4\noriginal-oid 0000000000000000000000000000000000000004\ndata 13\nsometimes v1\n\nblob\nmark :5\noriginal-oid 0000000000000000000000000000000000000005\ndata 10\nnukeme v3\n\nblob\nmark :6\noriginal-oid 0000000000000000000000000000000000000006\ndata 10\nkeepme v2\n\nblob\nmark :7\noriginal-oid 0000000000000000000000000000000000000007\ndata 10\nnukem4 v4\n\nblob\nmark :8\noriginal-oid 0000000000000000000000000000000000000008\ndata 13\nsometimes v2\n\nblob\nmark :9\noriginal-oid 0000000000000000000000000000000000000009\ndata 13\nsometimes v3\n\nblob\nmark :10\noriginal-oid 000000000000000000000000000000000000000A\ndata 10\nnukeme v4\n\nblob\nmark :11\noriginal-oid 000000000000000000000000000000000000000B\ndata 10\nkeepme v3\n\nblob\nmark :12\noriginal-oid 000000000000000000000000000000000000000C\ndata 10\nnukeme v5\n\nblob\nmark :13\noriginal-oid 000000000000000000000000000000000000000D\ndata 10\nnukeme v6\n\nblob\nmark :14\noriginal-oid 000000000000000000000000000000000000000E\ndata 13\nsometimes v4\n\nblob\nmark :15\noriginal-oid 000000000000000000000000000000000000000F\ndata 13\nsometimes v5\n\nblob\nmark :16\noriginal-oid 0000000000000000000000000000000000000010\ndata 10\nkeepme v4\n\nblob\nmark :17\noriginal-oid 0000000000000000000000000000000000000011\ndata 10\nnukeme v7\n\nblob\nmark :18\noriginal-oid 0000000000000000000000000000000000000012\ndata 13\nsometimes v6\n\nblob\nmark :19\noriginal-oid 0000000000000000000000000000000000000013\ndata 10\nkeepme v5\n\nblob\nmark :20\noriginal-oid 0000000000000000000000000000000000000014\ndata 13\nsometimes v7\n\nblob\nmark :21\noriginal-oid 0000000000000000000000000000000000000015\ndata 10\nnukeme v8\n\nblob\nmark :22\noriginal-oid 0000000000000000000000000000000000000016\ndata 10\nkeepme v6\n\ncommit refs/heads/master\nmark :26\noriginal-oid 0000000000000000000000000000000000000020\nauthor Full Name <user@organization.tld> 2000000000 +0100\ncommitter Full Name <user@organization.tld> 2000000000 +0100\ndata 2\nA\nM 100644 :1 moduleA/keepme\n\ncommit refs/heads/master\nmark :27\noriginal-oid 0000000000000000000000000000000000000021\nauthor Full Name <user@organization.tld> 2000010000 +0100\ncommitter Full Name <user@organization.tld> 2000010000 +0100\ndata 2\nB\nfrom :26\nM 100644 :2 moduleB/nukeme\n\ncommit refs/heads/master\nmark :28\noriginal-oid 0000000000000000000000000000000000000022\nauthor Full Name <user@organization.tld> 2000020000 +0100\ncommitter Full Name <user@organization.tld> 2000020000 +0100\ndata 2\nC\nfrom :26\nM 100644 :3 moduleB/nukeme\n\ncommit refs/heads/master\nmark :29\noriginal-oid 0000000000000000000000000000000000000023\nauthor Full Name <user@organization.tld> 2000030000 +0100\ncommitter Full Name <user@organization.tld> 2000030000 +0100\ndata 29\nD: Merge commit 'C' into 'B'\nfrom :27\nmerge :28\nM 100644 :4 moduleA/sometimes\n\ncommit refs/heads/master\nmark :30\noriginal-oid 0000000000000000000000000000000000000024\nauthor Full Name <user@organization.tld> 2000040000 +0100\ncommitter Full Name <user@organization.tld> 2000040000 +0100\ndata 2\nF\nfrom :29\nM 100644 :5 moduleB/nukeme\n\ncommit refs/heads/master\nmark :31\noriginal-oid 0000000000000000000000000000000000000025\nauthor Full Name <user@organization.tld> 2000050000 +0100\ncommitter Full Name <user@organization.tld> 2000050000 +0100\ndata 2\nG\nfrom :30\nM 100644 :6 moduleA/keepme\n\ncommit refs/heads/master\nmark :32\noriginal-oid 0000000000000000000000000000000000000026\nauthor Full Name <user@organization.tld> 2000060000 +0100\ncommitter Full Name <user@organization.tld> 2000060000 +0100\ndata 2\nH\nfrom :31\nM 100644 :7 moduleB/nukeme\n\ncommit refs/heads/branchI\nmark :33\noriginal-oid 0000000000000000000000000000000000000027\nauthor Full Name <user@organization.tld> 2000070000 +0100\ncommitter Full Name <user@organization.tld> 2000070000 +0100\ndata 29\nI: Merge commit 'D' into 'H'\nfrom :32\nmerge :29\nM 100644 :8 moduleA/sometimes\n\ncommit refs/heads/master\nmark :34\noriginal-oid 0000000000000000000000000000000000000028\nauthor Full Name <user@organization.tld> 2000080000 +0100\ncommitter Full Name <user@organization.tld> 2000080000 +0100\ndata 29\nJ: Merge commit 'H' into 'D'\nfrom :29\nmerge :32\nM 100644 :9 moduleA/sometimes\n\ncommit refs/heads/master\nmark :35\noriginal-oid 0000000000000000000000000000000000000029\nauthor Full Name <user@organization.tld> 2000090000 +0100\ncommitter Full Name <user@organization.tld> 2000090000 +0100\ndata 2\nK\nfrom :34\nM 100644 :10 moduleB/nukeme\n\ncommit refs/heads/master\nmark :36\noriginal-oid 000000000000000000000000000000000000002A\nauthor Full Name <user@organization.tld> 2000092000 +0100\ncommitter Full Name <user@organization.tld> 2000092000 +0100\ndata 2\nL\nfrom :35\nM 100644 :11 moduleA/keepme\n\ncommit refs/heads/master\nmark :37\noriginal-oid 000000000000000000000000000000000000002B\nauthor Full Name <user@organization.tld> 2000094000 +0100\ncommitter Full Name <user@organization.tld> 2000094000 +0100\ndata 2\nM\nfrom :36\nM 100644 :12 moduleB/nukeme\n\ncommit refs/heads/master\nmark :38\noriginal-oid 000000000000000000000000000000000000002C\nauthor Full Name <user@organization.tld> 2000096000 +0100\ncommitter Full Name <user@organization.tld> 2000096000 +0100\ndata 2\nN\nfrom :28\nM 100644 :13 moduleB/nukeme\n\ncommit refs/heads/branchO\nmark :39\noriginal-oid 000000000000000000000000000000000000002D\nauthor Full Name <user@organization.tld> 2000098000 +0100\ncommitter Full Name <user@organization.tld> 2000098000 +0100\ndata 29\nO: Merge commit 'N' into 'M'\nfrom :37\nmerge :38\nD moduleA/sometimes\n\ncommit refs/heads/master\nmark :40\noriginal-oid 000000000000000000000000000000000000002E\nauthor Full Name <user@organization.tld> 2000099000 +0100\ncommitter Full Name <user@organization.tld> 2000099000 +0100\ndata 29\nP: Merge commit 'M' into 'N'\nfrom :38\nmerge :37\nM 100644 :15 moduleA/sometimes\n\ncommit refs/heads/master\nmark :41\noriginal-oid 0000000000000000000000000000000000000030\nauthor Full Name <user@organization.tld> 3000000000 +0100\ncommitter Full Name <user@organization.tld> 3000000000 +0100\ndata 2\nQ\nfrom :40\nM 100644 :16 moduleA/keepme\n\ncommit refs/heads/master\nmark :42\noriginal-oid 0000000000000000000000000000000000000031\nauthor Full Name <user@organization.tld> 3000010000 +0100\ncommitter Full Name <user@organization.tld> 3000010000 +0100\ndata 2\nR\nfrom :41\nM 100644 :17 moduleB/nukeme\n\ncommit refs/heads/master\nmark :43\noriginal-oid 0000000000000000000000000000000000000032\nauthor Full Name <user@organization.tld> 3000020000 +0100\ncommitter Full Name <user@organization.tld> 3000020000 +0100\ndata 29\nS: Merge commit 'R' into 'R'\nfrom :42\nmerge :42\nM 100644 :18 moduleA/sometimes\n\ncommit refs/heads/master\nmark :44\noriginal-oid 0000000000000000000000000000000000000033\nauthor Full Name <user@organization.tld> 3000030000 +0100\ncommitter Full Name <user@organization.tld> 3000030000 +0100\ndata 2\nT\nfrom :43\nM 100644 :19 moduleA/keepme\n\ncommit refs/heads/master\nmark :45\noriginal-oid 0000000000000000000000000000000000000034\nauthor Full Name <user@organization.tld> 3000040000 +0100\ncommitter Full Name <user@organization.tld> 3000040000 +0100\ndata 2\nU\nfrom :44\nM 100644 :20 moduleA/sometimes\n\ncommit refs/heads/master\nmark :46\noriginal-oid 0000000000000000000000000000000000000035\nauthor Full Name <user@organization.tld> 3000050000 +0100\ncommitter Full Name <user@organization.tld> 3000050000 +0100\ndata 29\nV: Merge commit 'U' into 'U'\nfrom :45\nmerge :45\nM 100644 :21 moduleB/nukeme\n\ncommit refs/heads/master\nmark :47\noriginal-oid 0000000000000000000000000000000000000036\nauthor Full Name <user@organization.tld> 3000060000 +0100\ncommitter Full Name <user@organization.tld> 3000060000 +0100\ndata 2\nW\nfrom :46\nM 100644 :22 moduleA/keepme\n\ndone\n"
  },
  {
    "path": "t/t9390/degenerate-evil-merge",
    "content": "feature done\nblob\nmark :1\ndata 0\n\nreset refs/heads/master\ncommit refs/heads/master\nmark :2\nauthor Full Name <user@organization.tld> 2000000000 +0100\ncommitter Full Name <user@organization.tld> 2000000000 +0100\ndata 2\nA\nM 100644 :1 irrelevant\nM 100644 :1 module-of-interest/file1\nM 100644 :1 module-of-interest/file2\nM 100644 :1 module-of-interest/file3\nM 100644 :1 other-module/files\nM 100644 :1 other-module/are\nM 100644 :1 other-module/fun\n\ncommit refs/heads/master\nmark :3\nauthor Full Name <user@organization.tld> 2000030000 +0100\ncommitter Full Name <user@organization.tld> 2000030000 +0100\ndata 2\nB\nfrom :2\nD irrelevant\nD module-of-interest/file1\nD module-of-interest/file2\nD module-of-interest/file3\n\nblob\nmark :4\ndata 8\ncontent\n\ncommit refs/heads/master\nmark :5\nauthor Full Name <user@organization.tld> 2000040000 +0100\ncommitter Full Name <user@organization.tld> 2000040000 +0100\ndata 2\nD\nfrom :3\nM 100644 :4 other-module/fun\n\ncommit refs/heads/master\nmark :6\nauthor Full Name <user@organization.tld> 2000020000 +0100\ncommitter Full Name <user@organization.tld> 2000020000 +0100\ndata 2\nC\nfrom :2\nM 100644 :4 irrelevant\n\ncommit refs/heads/master\nmark :7\nauthor Full Name <user@organization.tld> 2000050000 +0100\ncommitter Full Name <user@organization.tld> 2000050000 +0100\ndata 31\nMerge and ignore the deletions\nfrom :6\nmerge :5\nM 100644 :4 irrelevant\nM 100644 :4 other-module/fun\n\nblob\nmark :8\ndata 6\nfinal\n\ncommit refs/heads/master\nmark :7\nauthor Full Name <user@organization.tld> 2000060000 +0100\ncommitter Full Name <user@organization.tld> 2000060000 +0100\ndata 13\nFinal change\nfrom :7\nM 100644 :8 module-of-interest/file2\n\ndone\n"
  },
  {
    "path": "t/t9390/degenerate-globme",
    "content": "feature done\nblob\nmark :1\ndata 10\nkeepme v1\n\nreset refs/heads/master\ncommit refs/heads/master\nmark :2\nauthor Full Name <user@organization.tld> 2000000000 +0100\ncommitter Full Name <user@organization.tld> 2000000000 +0100\ndata 2\nA\nM 100644 :1 moduleA/keepme\n\nblob\nmark :3\ndata 10\nnukeme v1\n\ncommit refs/heads/master\nmark :4\nauthor Full Name <user@organization.tld> 2000010000 +0100\ncommitter Full Name <user@organization.tld> 2000010000 +0100\ndata 2\nB\nfrom :2\nM 100644 :3 moduleB/nukeme\n\nblob\nmark :5\ndata 10\nnukeme v2\n\ncommit refs/heads/master\nmark :6\nauthor Full Name <user@organization.tld> 2000020000 +0100\ncommitter Full Name <user@organization.tld> 2000020000 +0100\ndata 2\nC\nfrom :2\nM 100644 :5 moduleB/nukeme\n\ncommit refs/heads/master\nmark :7\nauthor Full Name <user@organization.tld> 2000030000 +0100\ncommitter Full Name <user@organization.tld> 2000030000 +0100\ndata 29\nD: Merge commit 'C' into 'B'\nfrom :4\nmerge :6\n\nblob\nmark :8\ndata 10\nnukeme v3\n\ncommit refs/heads/master\nmark :9\nauthor Full Name <user@organization.tld> 2000040000 +0100\ncommitter Full Name <user@organization.tld> 2000040000 +0100\ndata 2\nF\nfrom :7\nM 100644 :8 moduleB/nukeme\n\nblob\nmark :10\ndata 10\nkeepme v2\n\ncommit refs/heads/master\nmark :11\nauthor Full Name <user@organization.tld> 2000050000 +0100\ncommitter Full Name <user@organization.tld> 2000050000 +0100\ndata 2\nG\nfrom :9\nM 100644 :10 moduleA/keepme\n\nblob\nmark :12\ndata 10\nnukem4 v4\n\ncommit refs/heads/master\nmark :13\nauthor Full Name <user@organization.tld> 2000060000 +0100\ncommitter Full Name <user@organization.tld> 2000060000 +0100\ndata 2\nH\nfrom :11\nM 100644 :12 moduleB/nukeme\n\ncommit refs/heads/branchI\nmark :14\nauthor Full Name <user@organization.tld> 2000070000 +0100\ncommitter Full Name <user@organization.tld> 2000070000 +0100\ndata 29\nI: Merge commit 'D' into 'H'\nfrom :13\nmerge :7\n\ncommit refs/heads/master\nmark :15\nauthor Full Name <user@organization.tld> 2000080000 +0100\ncommitter Full Name <user@organization.tld> 2000080000 +0100\ndata 29\nJ: Merge commit 'H' into 'D'\nfrom :7\nmerge :13\n\nblob\nmark :16\ndata 10\nnukeme v4\n\ncommit refs/heads/master\nmark :17\nauthor Full Name <user@organization.tld> 2000090000 +0100\ncommitter Full Name <user@organization.tld> 2000090000 +0100\ndata 2\nK\nfrom :15\nM 100644 :16 moduleB/nukeme\n\nblob\nmark :18\ndata 10\nkeepme v3\n\ncommit refs/heads/master\nmark :19\nauthor Full Name <user@organization.tld> 2000092000 +0100\ncommitter Full Name <user@organization.tld> 2000092000 +0100\ndata 2\nL\nfrom :17\nM 100644 :18 moduleA/keepme\n\nblob\nmark :20\ndata 10\nnukeme v5\n\ncommit refs/heads/master\nmark :21\nauthor Full Name <user@organization.tld> 2000094000 +0100\ncommitter Full Name <user@organization.tld> 2000094000 +0100\ndata 2\nM\nfrom :19\nM 100644 :20 moduleB/nukeme\n\nblob\nmark :22\ndata 10\nnukeme v6\n\ncommit refs/heads/master\nmark :23\nauthor Full Name <user@organization.tld> 2000096000 +0100\ncommitter Full Name <user@organization.tld> 2000096000 +0100\ndata 2\nN\nfrom :6\nM 100644 :22 moduleB/nukeme\n\ncommit refs/heads/branchO\nmark :24\nauthor Full Name <user@organization.tld> 2000098000 +0100\ncommitter Full Name <user@organization.tld> 2000098000 +0100\ndata 29\nO: Merge commit 'N' into 'M'\nfrom :21\nmerge :23\n\ncommit refs/heads/master\nmark :25\nauthor Full Name <user@organization.tld> 2000099000 +0100\ncommitter Full Name <user@organization.tld> 2000099000 +0100\ndata 29\nP: Merge commit 'M' into 'N'\nfrom :23\nmerge :21\n\nblob\nmark :26\ndata 10\nkeepme v4\n\ncommit refs/heads/master\nmark :27\nauthor Full Name <user@organization.tld> 3000000000 +0100\ncommitter Full Name <user@organization.tld> 3000000000 +0100\ndata 2\nQ\nfrom :25\nM 100644 :26 moduleA/keepme\n\nblob\nmark :28\ndata 10\nnukeme v7\n\ncommit refs/heads/master\nmark :29\nauthor Full Name <user@organization.tld> 3000010000 +0100\ncommitter Full Name <user@organization.tld> 3000010000 +0100\ndata 2\nR\nfrom :27\nM 100644 :28 moduleB/nukeme\n\ncommit refs/heads/master\nmark :30\nauthor Full Name <user@organization.tld> 3000020000 +0100\ncommitter Full Name <user@organization.tld> 3000020000 +0100\ndata 29\nS: Merge commit 'R' into 'R'\nfrom :29\nmerge :29\n\nblob\nmark :31\ndata 10\nkeepme v5\n\ncommit refs/heads/master\nmark :32\nauthor Full Name <user@organization.tld> 3000030000 +0100\ncommitter Full Name <user@organization.tld> 3000030000 +0100\ndata 2\nT\nfrom :30\nM 100644 :31 moduleA/keepme\n\nblob\nmark :33\ndata 10\nnukeme v8\n\ncommit refs/heads/master\nmark :34\nauthor Full Name <user@organization.tld> 3000050000 +0100\ncommitter Full Name <user@organization.tld> 3000050000 +0100\ndata 29\nV: Merge commit 'U' into 'U'\nfrom :32\nmerge :32\nM 100644 :33 moduleB/nukeme\n\nblob\nmark :35\ndata 10\nkeepme v6\n\ncommit refs/heads/master\nmark :36\nauthor Full Name <user@organization.tld> 3000060000 +0100\ncommitter Full Name <user@organization.tld> 3000060000 +0100\ndata 2\nW\nfrom :34\nM 100644 :35 moduleA/keepme\n\ndone\n"
  },
  {
    "path": "t/t9390/degenerate-keepme",
    "content": "feature done\nblob\nmark :1\ndata 10\nkeepme v1\n\nreset refs/heads/branchO\ncommit refs/heads/branchO\nmark :2\nauthor Full Name <user@organization.tld> 2000000000 +0100\ncommitter Full Name <user@organization.tld> 2000000000 +0100\ndata 2\nA\nM 100644 :1 moduleA/keepme\n\nblob\nmark :3\ndata 10\nkeepme v2\n\ncommit refs/heads/branchO\nmark :4\nauthor Full Name <user@organization.tld> 2000050000 +0100\ncommitter Full Name <user@organization.tld> 2000050000 +0100\ndata 2\nG\nfrom :2\nM 100644 :3 moduleA/keepme\n\ncommit refs/heads/branchI\nmark :5\nauthor Full Name <user@organization.tld> 2000070000 +0100\ncommitter Full Name <user@organization.tld> 2000070000 +0100\ndata 29\nI: Merge commit 'D' into 'H'\nfrom :4\nmerge :2\n\ncommit refs/heads/branchO\nmark :6\nauthor Full Name <user@organization.tld> 2000080000 +0100\ncommitter Full Name <user@organization.tld> 2000080000 +0100\ndata 29\nJ: Merge commit 'H' into 'D'\nfrom :2\nmerge :4\n\nblob\nmark :7\ndata 10\nkeepme v3\n\ncommit refs/heads/branchO\nmark :8\nauthor Full Name <user@organization.tld> 2000092000 +0100\ncommitter Full Name <user@organization.tld> 2000092000 +0100\ndata 2\nL\nfrom :6\nM 100644 :7 moduleA/keepme\n\nblob\nmark :9\ndata 10\nkeepme v4\n\ncommit refs/heads/master\nmark :10\nauthor Full Name <user@organization.tld> 3000000000 +0100\ncommitter Full Name <user@organization.tld> 3000000000 +0100\ndata 2\nQ\nfrom :8\nM 100644 :9 moduleA/keepme\n\nblob\nmark :11\ndata 10\nkeepme v5\n\ncommit refs/heads/master\nmark :12\nauthor Full Name <user@organization.tld> 3000030000 +0100\ncommitter Full Name <user@organization.tld> 3000030000 +0100\ndata 2\nT\nfrom :10\nM 100644 :11 moduleA/keepme\n\nblob\nmark :13\ndata 10\nkeepme v6\n\ncommit refs/heads/master\nmark :14\nauthor Full Name <user@organization.tld> 3000060000 +0100\ncommitter Full Name <user@organization.tld> 3000060000 +0100\ndata 2\nW\nfrom :12\nM 100644 :13 moduleA/keepme\n\ndone\n"
  },
  {
    "path": "t/t9390/degenerate-keepme-noff",
    "content": "feature done\nblob\nmark :1\ndata 10\nkeepme v1\n\nreset refs/heads/master\ncommit refs/heads/master\nmark :2\nauthor Full Name <user@organization.tld> 2000000000 +0100\ncommitter Full Name <user@organization.tld> 2000000000 +0100\ndata 2\nA\nM 100644 :1 moduleA/keepme\n\nblob\nmark :3\ndata 10\nkeepme v2\n\ncommit refs/heads/branchO\nmark :4\nauthor Full Name <user@organization.tld> 2000050000 +0100\ncommitter Full Name <user@organization.tld> 2000050000 +0100\ndata 2\nG\nfrom :2\nM 100644 :3 moduleA/keepme\n\ncommit refs/heads/branchI\nmark :5\nauthor Full Name <user@organization.tld> 2000070000 +0100\ncommitter Full Name <user@organization.tld> 2000070000 +0100\ndata 29\nI: Merge commit 'D' into 'H'\nfrom :4\nmerge :2\n\ncommit refs/heads/branchO\nmark :6\nauthor Full Name <user@organization.tld> 2000080000 +0100\ncommitter Full Name <user@organization.tld> 2000080000 +0100\ndata 29\nJ: Merge commit 'H' into 'D'\nfrom :2\nmerge :4\n\nblob\nmark :7\ndata 10\nkeepme v3\n\ncommit refs/heads/branchO\nmark :8\nauthor Full Name <user@organization.tld> 2000092000 +0100\ncommitter Full Name <user@organization.tld> 2000092000 +0100\ndata 2\nL\nfrom :6\nM 100644 :7 moduleA/keepme\n\ncommit refs/heads/master\nmark :9\nauthor Full Name <user@organization.tld> 2000099000 +0100\ncommitter Full Name <user@organization.tld> 2000099000 +0100\ndata 29\nP: Merge commit 'M' into 'N'\nfrom :2\nmerge :8\n\nblob\nmark :10\ndata 10\nkeepme v4\n\ncommit refs/heads/master\nmark :11\nauthor Full Name <user@organization.tld> 3000000000 +0100\ncommitter Full Name <user@organization.tld> 3000000000 +0100\ndata 2\nQ\nfrom :9\nM 100644 :10 moduleA/keepme\n\nblob\nmark :12\ndata 10\nkeepme v5\n\ncommit refs/heads/master\nmark :13\nauthor Full Name <user@organization.tld> 3000030000 +0100\ncommitter Full Name <user@organization.tld> 3000030000 +0100\ndata 2\nT\nfrom :11\nM 100644 :12 moduleA/keepme\n\nblob\nmark :14\ndata 10\nkeepme v6\n\ncommit refs/heads/master\nmark :15\nauthor Full Name <user@organization.tld> 3000060000 +0100\ncommitter Full Name <user@organization.tld> 3000060000 +0100\ndata 2\nW\nfrom :13\nM 100644 :14 moduleA/keepme\n\ndone\n"
  },
  {
    "path": "t/t9390/degenerate-moduleA",
    "content": "feature done\nblob\nmark :1\ndata 10\nkeepme v1\n\nreset refs/heads/master\ncommit refs/heads/master\nmark :2\nauthor Full Name <user@organization.tld> 2000000000 +0100\ncommitter Full Name <user@organization.tld> 2000000000 +0100\ndata 2\nA\nM 100644 :1 moduleA/keepme\n\nblob\nmark :3\ndata 13\nsometimes v1\n\ncommit refs/heads/master\nmark :4\nauthor Full Name <user@organization.tld> 2000030000 +0100\ncommitter Full Name <user@organization.tld> 2000030000 +0100\ndata 29\nD: Merge commit 'C' into 'B'\nfrom :2\nmerge :2\nM 100644 :3 moduleA/sometimes\n\nblob\nmark :5\ndata 10\nkeepme v2\n\ncommit refs/heads/master\nmark :6\nauthor Full Name <user@organization.tld> 2000050000 +0100\ncommitter Full Name <user@organization.tld> 2000050000 +0100\ndata 2\nG\nfrom :4\nM 100644 :5 moduleA/keepme\n\nblob\nmark :7\ndata 13\nsometimes v2\n\ncommit refs/heads/branchI\nmark :8\nauthor Full Name <user@organization.tld> 2000070000 +0100\ncommitter Full Name <user@organization.tld> 2000070000 +0100\ndata 29\nI: Merge commit 'D' into 'H'\nfrom :6\nmerge :4\nM 100644 :7 moduleA/sometimes\n\nblob\nmark :9\ndata 13\nsometimes v3\n\ncommit refs/heads/master\nmark :10\nauthor Full Name <user@organization.tld> 2000080000 +0100\ncommitter Full Name <user@organization.tld> 2000080000 +0100\ndata 29\nJ: Merge commit 'H' into 'D'\nfrom :4\nmerge :6\nM 100644 :9 moduleA/sometimes\n\nblob\nmark :11\ndata 10\nkeepme v3\n\ncommit refs/heads/master\nmark :12\nauthor Full Name <user@organization.tld> 2000092000 +0100\ncommitter Full Name <user@organization.tld> 2000092000 +0100\ndata 2\nL\nfrom :10\nM 100644 :11 moduleA/keepme\n\ncommit refs/heads/branchO\nmark :13\nauthor Full Name <user@organization.tld> 2000098000 +0100\ncommitter Full Name <user@organization.tld> 2000098000 +0100\ndata 29\nO: Merge commit 'N' into 'M'\nfrom :12\nmerge :2\nD moduleA/sometimes\n\nblob\nmark :14\ndata 13\nsometimes v5\n\ncommit refs/heads/master\nmark :15\nauthor Full Name <user@organization.tld> 2000099000 +0100\ncommitter Full Name <user@organization.tld> 2000099000 +0100\ndata 29\nP: Merge commit 'M' into 'N'\nfrom :2\nmerge :12\nM 100644 :14 moduleA/sometimes\n\nblob\nmark :16\ndata 10\nkeepme v4\n\ncommit refs/heads/master\nmark :17\nauthor Full Name <user@organization.tld> 3000000000 +0100\ncommitter Full Name <user@organization.tld> 3000000000 +0100\ndata 2\nQ\nfrom :15\nM 100644 :16 moduleA/keepme\n\nblob\nmark :18\ndata 13\nsometimes v6\n\ncommit refs/heads/master\nmark :19\nauthor Full Name <user@organization.tld> 3000020000 +0100\ncommitter Full Name <user@organization.tld> 3000020000 +0100\ndata 29\nS: Merge commit 'R' into 'R'\nfrom :17\nmerge :17\nM 100644 :18 moduleA/sometimes\n\nblob\nmark :20\ndata 10\nkeepme v5\n\ncommit refs/heads/master\nmark :21\nauthor Full Name <user@organization.tld> 3000030000 +0100\ncommitter Full Name <user@organization.tld> 3000030000 +0100\ndata 2\nT\nfrom :19\nM 100644 :20 moduleA/keepme\n\nblob\nmark :22\ndata 13\nsometimes v7\n\ncommit refs/heads/master\nmark :23\nauthor Full Name <user@organization.tld> 3000040000 +0100\ncommitter Full Name <user@organization.tld> 3000040000 +0100\ndata 2\nU\nfrom :21\nM 100644 :22 moduleA/sometimes\n\ncommit refs/heads/master\nmark :24\nauthor Full Name <user@organization.tld> 3000050000 +0100\ncommitter Full Name <user@organization.tld> 3000050000 +0100\ndata 29\nV: Merge commit 'U' into 'U'\nfrom :23\nmerge :23\n\nblob\nmark :25\ndata 10\nkeepme v6\n\ncommit refs/heads/master\nmark :26\nauthor Full Name <user@organization.tld> 3000060000 +0100\ncommitter Full Name <user@organization.tld> 3000060000 +0100\ndata 2\nW\nfrom :24\nM 100644 :25 moduleA/keepme\n\ndone\n"
  },
  {
    "path": "t/t9390/empty",
    "content": "feature done\n# Simple repo with only two files, with a whole bunch of cases dealing with\n# empty pruning, particularly commits that start empty.\n#\n# As with case1, the original-oid directives are very fake, but if an error\n# is hit that shows one of these, it makes it really easy to know where it\n# came from.\nblob\nmark :1\noriginal-oid 0000000000000000000000000000000000000001\ndata 10\nnukeme v1\n\nblob\nmark :2\noriginal-oid 0000000000000000000000000000000000000002\ndata 10\nkeepme v1\n\nblob\nmark :3\noriginal-oid 0000000000000000000000000000000000000003\ndata 10\nnukeme v2\n\nblob\nmark :4\noriginal-oid 0000000000000000000000000000000000000004\ndata 10\nkeepme v2\n\ncommit refs/heads/master\nmark :5\noriginal-oid 0000000000000000000000000000000000000010\nauthor Full Name <user@organization.tld> 1000000000 +0100\ncommitter Full Name <user@organization.tld> 1000000000 +0100\ndata 2\nA\n\ncommit refs/heads/master\nmark :6\noriginal-oid 0000000000000000000000000000000000000011\nauthor Full Name <user@organization.tld> 1000010000 +0100\ncommitter Full Name <user@organization.tld> 1000010000 +0100\ndata 2\nB\nfrom :5\n\nreset refs/heads/master\ncommit refs/heads/master\nmark :7\noriginal-oid 0000000000000000000000000000000000000012\nauthor Full Name <user@organization.tld> 1000020000 +0100\ncommitter Full Name <user@organization.tld> 1000020000 +0100\ndata 2\nC\nM 100644 :1 nukeme\n\ncommit refs/heads/master\nmark :8\noriginal-oid 0000000000000000000000000000000000000013\nauthor Full Name <user@organization.tld> 1000030000 +0100\ncommitter Full Name <user@organization.tld> 1000030000 +0100\ndata 2\nD\nfrom :7\n\ncommit refs/heads/master\nmark :9\noriginal-oid 0000000000000000000000000000000000000014\nauthor Full Name <user@organization.tld> 1000040000 +0100\ncommitter Full Name <user@organization.tld> 1000040000 +0100\ndata 29\nE: Merge commit 'D' into 'B'\nfrom :6\nmerge :8\nM 100644 :2 keepme\n\ncommit refs/heads/master\nmark :10\noriginal-oid 0000000000000000000000000000000000000015\nauthor Full Name <user@organization.tld> 1000050000 +0100\ncommitter Full Name <user@organization.tld> 1000050000 +0100\ndata 29\nF: Merge commit 'D' into 'B'\nfrom :6\nmerge :8\n\ncommit refs/heads/master\nmark :11\noriginal-oid 0000000000000000000000000000000000000016\nauthor Full Name <user@organization.tld> 1000060000 +0100\ncommitter Full Name <user@organization.tld> 1000060000 +0100\ndata 2\nG\nfrom :9\nM 100644 :3 nukeme\n\ncommit refs/heads/master\nmark :12\noriginal-oid 0000000000000000000000000000000000000017\nauthor Full Name <user@organization.tld> 1000070000 +0100\ncommitter Full Name <user@organization.tld> 1000070000 +0100\ndata 2\nH\nfrom :11\n\ncommit refs/heads/master\nmark :13\noriginal-oid 0000000000000000000000000000000000000018\nauthor Full Name <user@organization.tld> 1000080000 +0100\ncommitter Full Name <user@organization.tld> 1000080000 +0100\ndata 2\nI\nfrom :10\nM 100644 :4 keepme\n\ncommit refs/heads/master\nmark :14\noriginal-oid 0000000000000000000000000000000000000019\nauthor Full Name <user@organization.tld> 1000090000 +0100\ncommitter Full Name <user@organization.tld> 1000090000 +0100\ndata 29\nJ: Merge commit 'I' into 'H'\nfrom :12\nmerge :13\n\ndone\n"
  },
  {
    "path": "t/t9390/empty-keepme",
    "content": "feature done\nreset refs/heads/master\ncommit refs/heads/master\nmark :1\nauthor Full Name <user@organization.tld> 1000000000 +0100\ncommitter Full Name <user@organization.tld> 1000000000 +0100\ndata 2\nA\n\ncommit refs/heads/master\nmark :2\nauthor Full Name <user@organization.tld> 1000010000 +0100\ncommitter Full Name <user@organization.tld> 1000010000 +0100\ndata 2\nB\nfrom :1\n\nblob\nmark :3\ndata 10\nkeepme v1\n\ncommit refs/heads/master\nmark :4\nauthor Full Name <user@organization.tld> 1000040000 +0100\ncommitter Full Name <user@organization.tld> 1000040000 +0100\ndata 29\nE: Merge commit 'D' into 'B'\nfrom :2\nM 100644 :3 keepme\n\nblob\nmark :5\ndata 10\nkeepme v2\n\ncommit refs/heads/master\nmark :6\nauthor Full Name <user@organization.tld> 1000080000 +0100\ncommitter Full Name <user@organization.tld> 1000080000 +0100\ndata 2\nI\nfrom :2\nM 100644 :5 keepme\n\ncommit refs/heads/master\nmark :7\nauthor Full Name <user@organization.tld> 1000090000 +0100\ncommitter Full Name <user@organization.tld> 1000090000 +0100\ndata 29\nJ: Merge commit 'I' into 'H'\nfrom :4\nmerge :6\n\ndone\n"
  },
  {
    "path": "t/t9390/less-empty-keepme",
    "content": "feature done\nreset refs/heads/master\ncommit refs/heads/master\nmark :1\nauthor Full Name <user@organization.tld> 1000000000 +0100\ncommitter Full Name <user@organization.tld> 1000000000 +0100\ndata 2\nA\n\ncommit refs/heads/master\nmark :2\nauthor Full Name <user@organization.tld> 1000010000 +0100\ncommitter Full Name <user@organization.tld> 1000010000 +0100\ndata 2\nB\nfrom :1\n\nreset refs/heads/master\ncommit refs/heads/master\nmark :3\nauthor Full Name <user@organization.tld> 1000020000 +0100\ncommitter Full Name <user@organization.tld> 1000020000 +0100\ndata 2\nC\n\ncommit refs/heads/master\nmark :4\nauthor Full Name <user@organization.tld> 1000030000 +0100\ncommitter Full Name <user@organization.tld> 1000030000 +0100\ndata 2\nD\nfrom :3\n\nblob\nmark :5\ndata 10\nkeepme v1\n\ncommit refs/heads/master\nmark :6\nauthor Full Name <user@organization.tld> 1000040000 +0100\ncommitter Full Name <user@organization.tld> 1000040000 +0100\ndata 29\nE: Merge commit 'D' into 'B'\nfrom :2\nmerge :4\nM 100644 :5 keepme\n\ncommit refs/heads/master\nmark :7\nauthor Full Name <user@organization.tld> 1000060000 +0100\ncommitter Full Name <user@organization.tld> 1000060000 +0100\ndata 2\nG\nfrom :6\n\ncommit refs/heads/master\nmark :8\nauthor Full Name <user@organization.tld> 1000070000 +0100\ncommitter Full Name <user@organization.tld> 1000070000 +0100\ndata 2\nH\nfrom :7\n\ncommit refs/heads/master\nmark :9\nauthor Full Name <user@organization.tld> 1000050000 +0100\ncommitter Full Name <user@organization.tld> 1000050000 +0100\ndata 29\nF: Merge commit 'D' into 'B'\nfrom :2\nmerge :4\n\nblob\nmark :10\ndata 10\nkeepme v2\n\ncommit refs/heads/master\nmark :11\nauthor Full Name <user@organization.tld> 1000080000 +0100\ncommitter Full Name <user@organization.tld> 1000080000 +0100\ndata 2\nI\nfrom :9\nM 100644 :10 keepme\n\ncommit refs/heads/master\nmark :12\nauthor Full Name <user@organization.tld> 1000090000 +0100\ncommitter Full Name <user@organization.tld> 1000090000 +0100\ndata 29\nJ: Merge commit 'I' into 'H'\nfrom :8\nmerge :11\n\ndone\n"
  },
  {
    "path": "t/t9390/more-empty-keepme",
    "content": "feature done\nblob\nmark :1\ndata 10\nkeepme v1\n\nreset refs/heads/master\ncommit refs/heads/master\nmark :2\nauthor Full Name <user@organization.tld> 1000040000 +0100\ncommitter Full Name <user@organization.tld> 1000040000 +0100\ndata 29\nE: Merge commit 'D' into 'B'\nM 100644 :1 keepme\n\nblob\nmark :3\ndata 10\nkeepme v2\n\nreset refs/heads/master\ncommit refs/heads/master\nmark :4\nauthor Full Name <user@organization.tld> 1000080000 +0100\ncommitter Full Name <user@organization.tld> 1000080000 +0100\ndata 2\nI\nM 100644 :3 keepme\n\ncommit refs/heads/master\nmark :5\nauthor Full Name <user@organization.tld> 1000090000 +0100\ncommitter Full Name <user@organization.tld> 1000090000 +0100\ndata 29\nJ: Merge commit 'I' into 'H'\nfrom :2\nmerge :4\n\ndone\n"
  },
  {
    "path": "t/t9390/sample-mailmap",
    "content": "Little 'ol Me <me@little.net>\n<me@little.net> <me@laptop.(none)>\n# Here is a comment\nLittle 'ol Me <me@little.net> Little O. Me\nLittle 'ol Me <me@little.net> <me@fire.com>\nLittle 'ol Me <me@little.net> Little Me <me@bigcompany.com>\nLittle John <second@merry.men> little.john <>\n"
  },
  {
    "path": "t/t9390/sample-message",
    "content": "Initial==>Modified\nregex:tw.nty==>the number 20\nv1.0==>version one!\nregex:!$==> :)\n"
  },
  {
    "path": "t/t9390/sample-replace",
    "content": "mod==>modified-by-gremlins\n"
  },
  {
    "path": "t/t9390/unusual",
    "content": "option git quiet\nfeature done\n# Input in a format filter-repo isn't generally expected to receive (either\n# because we don't pass certain flags to fast-export or repos don't have the\n# weird features or whatever other reason), but which we want to test for\n# completeness.\nprogress I am starting the import, yo.\n\ncheckpoint\n\nblob\nmark :1\noriginal-oid 0000000000000000000000000000000000000001\ndata 5\nhello\n\ncommit refs/heads/master\nmark :2\noriginal-oid 0000000000000000000000000000000000000002\ncommitter Srinivasa Ramanujan <fellow@cambridge.org> 1535228562 +051800\ndata 8\nInitial\nM 100644 :1 greeting\n\nreset refs/heads/develop\nfrom :2\n\ntag v1.0\nfrom :2\noriginal-oid 0000000000000000000000000000000000000003\ntagger little.john <> 1535229618 -0700\ndata 4\nv1.0\n\ndone\n"
  },
  {
    "path": "t/t9390/unusual-filtered",
    "content": "feature done\nblob\nmark :1\ndata 5\nhello\nreset refs/heads/develop\ncommit refs/heads/develop\nmark :2\nauthor Srinivasa Ramanujan <fellow@cambridge.org> 1535228562 +051800\ncommitter Srinivasa Ramanujan <fellow@cambridge.org> 1535228562 +051800\ndata 8\nInitial\nM 100644 :1 greeting\n\nreset refs/heads/master\nfrom :2\n\ntag v1.0\nfrom :2\ntagger little.john <> 1535229618 -0700\ndata 4\nv1.0\ndone\n"
  },
  {
    "path": "t/t9390/unusual-mailmap",
    "content": "feature done\nblob\nmark :1\ndata 5\nhello\nreset refs/heads/develop\ncommit refs/heads/develop\nmark :2\nauthor Srinivasa Ramanujan <fellow@cambridge.org> 1535228562 +051800\ncommitter Srinivasa Ramanujan <fellow@cambridge.org> 1535228562 +051800\ndata 8\nInitial\nM 100644 :1 greeting\n\nreset refs/heads/master\nfrom :2\n\ntag v1.0\nfrom :2\ntagger Little John <second@merry.men> 1535229618 -0700\ndata 4\nv1.0\ndone\n"
  },
  {
    "path": "t/t9390-filter-repo-basics.sh",
    "content": "#!/bin/bash\n\ntest_description='Basic filter-repo tests'\n\n. ./test-lib.sh\n\nexport PATH=$(dirname $TEST_DIRECTORY):$PATH  # Put git-filter-repo in PATH\n\nDATA=\"$TEST_DIRECTORY/t9390\"\nSQ=\"'\"\n\nfilter_testcase() {\n\tINPUT=$1\n\tOUTPUT=$2\n\tshift 2\n\tREST=(\"$@\")\n\n\n\tNAME=\"check: $INPUT -> $OUTPUT using '${REST[@]}'\"\n\ttest_expect_success \"$NAME\" '\n\t\t# Clean up from previous run\n\t\tgit pack-refs --all &&\n\t\trm .git/packed-refs &&\n\t\trm -rf .git/filter-repo/ &&\n\n\t\t# Run the example\n\t\tcat $DATA/$INPUT | git filter-repo --stdin --quiet --force --replace-refs delete-no-add \"${REST[@]}\" &&\n\n\t\t# Compare the resulting repo to expected value\n\t\tgit fast-export --use-done-feature --all >compare &&\n\t\ttest_cmp $DATA/$OUTPUT compare\n\t'\n}\n\nfilter_testcase basic basic-filename --path filename\nfilter_testcase basic basic-twenty   --path twenty\nfilter_testcase basic basic-ten      --path ten\nfilter_testcase basic basic-numbers  --path ten --path twenty\nfilter_testcase basic basic-filename --invert-paths --path-glob 't*en*'\nfilter_testcase basic basic-numbers  --invert-paths --path-regex 'f.*e.*e'\nfilter_testcase basic basic-mailmap  --mailmap ../t9390/sample-mailmap\nfilter_testcase basic basic-replace  --replace-text ../t9390/sample-replace\nfilter_testcase basic basic-message  --replace-message ../t9390/sample-message\nfilter_testcase empty empty-keepme   --path keepme\nfilter_testcase empty more-empty-keepme --path keepme --prune-empty=always \\\n\t\t                                   --prune-degenerate=always\nfilter_testcase empty less-empty-keepme --path keepme --prune-empty=never \\\n\t\t                                   --prune-degenerate=never\nfilter_testcase degenerate degenerate-keepme   --path moduleA/keepme\nfilter_testcase degenerate degenerate-moduleA  --path moduleA\nfilter_testcase degenerate degenerate-globme   --path-glob *me\nfilter_testcase degenerate degenerate-keepme-noff --path moduleA/keepme --no-ff\nfilter_testcase unusual unusual-filtered --path ''\nfilter_testcase unusual unusual-mailmap  --mailmap ../t9390/sample-mailmap\n\nsetup_path_rename() {\n\ttest -d path_rename && return\n\ttest_create_repo path_rename &&\n\t(\n\t\tcd path_rename &&\n\t\tmkdir sequences values &&\n\t\ttest_seq 1 10 >sequences/tiny &&\n\t\ttest_seq 100 110 >sequences/intermediate &&\n\t\ttest_seq 1000 1010 >sequences/large &&\n\t\ttest_seq 1000 1010 >values/large &&\n\t\ttest_seq 10000 10010 >values/huge &&\n\t\tgit add sequences values &&\n\t\tgit commit -m initial &&\n\n\t\tgit mv sequences/tiny sequences/small &&\n\t\tcp sequences/intermediate sequences/medium &&\n\t\techo 10011 >values/huge &&\n\t\tgit add sequences values &&\n\t\tgit commit -m updates &&\n\n\t\tgit rm sequences/intermediate &&\n\t\techo 11 >sequences/small &&\n\t\tgit add sequences/small &&\n\t\tgit commit -m changes &&\n\n\t\techo 1011 >sequences/medium &&\n\t\tgit add sequences/medium &&\n\t\tgit commit -m final\n\t)\n}\n\ntest_expect_success '--path-rename sequences/tiny:sequences/small' '\n\tsetup_path_rename &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/path_rename path_rename_single &&\n\t\tcd path_rename_single &&\n\t\tgit filter-repo --path-rename sequences/tiny:sequences/small &&\n\t\tgit log --format=%n --name-only | sort | uniq >filenames &&\n\t\ttest_line_count = 7 filenames &&\n\t\t! grep sequences/tiny filenames &&\n\t\tgit rev-parse HEAD~3:sequences/small\n\t)\n'\n\ntest_expect_success '--path-rename sequences:numbers' '\n\tsetup_path_rename &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/path_rename path_rename_dir &&\n\t\tcd path_rename_dir &&\n\t\tgit filter-repo --path-rename sequences:numbers &&\n\t\tgit log --format=%n --name-only | sort | uniq >filenames &&\n\t\ttest_line_count = 8 filenames &&\n\t\t! grep sequences/ filenames &&\n\t\tgrep numbers/ filenames &&\n\t\tgrep values/ filenames\n\t)\n'\n\ntest_expect_success '--path-rename-prefix values:numbers' '\n\tsetup_path_rename &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/path_rename path_rename_dir_2 &&\n\t\tcd path_rename_dir_2 &&\n\t\tgit filter-repo --path-rename values/:numbers/ &&\n\t\tgit log --format=%n --name-only | sort | uniq >filenames &&\n\t\ttest_line_count = 8 filenames &&\n\t\t! grep values/ filenames &&\n\t\tgrep sequences/ filenames &&\n\t\tgrep numbers/ filenames\n\t)\n'\n\ntest_expect_success '--path-rename squashing' '\n\tsetup_path_rename &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/path_rename path_rename_squash &&\n\t\tcd path_rename_squash &&\n\t\tgit filter-repo \\\n\t\t\t--path-rename sequences/tiny:sequences/small \\\n\t\t\t--path-rename sequences:numbers \\\n\t\t\t--path-rename values:numbers \\\n\t\t\t--path-rename numbers/intermediate:numbers/medium &&\n\t\tgit log --format=%n --name-only | sort | uniq >filenames &&\n\t\t# Just small, medium, large, huge, and a blank line...\n\t\ttest_line_count = 5 filenames &&\n\t\t! grep sequences/ filenames &&\n\t\t! grep values/ filenames &&\n\t\tgrep numbers/ filenames\n\t)\n'\n\ntest_expect_success '--path-rename inability to squash' '\n\tsetup_path_rename &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/path_rename path_rename_bad_squash &&\n\t\tcd path_rename_bad_squash &&\n\t\ttest_must_fail git filter-repo \\\n\t\t\t--path-rename values/large:values/big \\\n\t\t\t--path-rename values/huge:values/big 2>../err &&\n\t\ttest_i18ngrep \"File renaming caused colliding pathnames\" ../err\n\t)\n'\n\ntest_expect_success '--paths-from-file' '\n\tsetup_path_rename &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/path_rename paths_from_file &&\n\t\tcd paths_from_file &&\n\n\t\tcat >../path_changes <<-EOF &&\n\t\tliteral:values/huge\n\t\tvalues/huge==>values/gargantuan\n\t\tglob:*rge\n\n\t\t# Comments and blank lines are ignored\n\t\tregex:.*med.*\n\t\tregex:^([^/]*)/(.*)ge$==>\\2/\\1/ge\n\t\tEOF\n\n\t\tgit filter-repo --paths-from-file ../path_changes &&\n\t\tgit log --format=%n --name-only | sort | uniq >filenames &&\n\t\t# intermediate, medium, two larges, gargantuan, and a blank line\n\t\ttest_line_count = 6 filenames &&\n\t\t! grep sequences/tiny filenames &&\n\t\tgrep sequences/intermediate filenames &&\n\t\tgrep lar/sequences/ge filenames &&\n\t\tgrep lar/values/ge filenames &&\n\t\tgrep values/gargantuan filenames &&\n\t\t! grep sequences/small filenames &&\n\t\tgrep sequences/medium filenames &&\n\n\t\trm ../path_changes\n\t)\n'\n\ntest_expect_success '--paths does not mean --paths-from-file' '\n\tsetup_path_rename &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/path_rename paths_misuse &&\n\t\tcd paths_misuse &&\n\n\t\ttest_must_fail git filter-repo --paths values/large 2>../err &&\n\n\t\tgrep \"Error: Option.*--paths.*unrecognized; did you\" ../err &&\n\t\trm ../err\n\t)\n'\n\ncreate_path_filtering_and_renaming() {\n\ttest -d path_filtering_and_renaming && return\n\n\ttest_create_repo path_filtering_and_renaming &&\n\t(\n\t\tcd path_filtering_and_renaming &&\n\n\t\t>.gitignore &&\n\t\tmkdir -p src/main/java/com/org/{foo,bar} &&\n\t\tmkdir -p src/main/resources &&\n\t\ttest_seq  1 10 >src/main/java/com/org/foo/uptoten &&\n\t\ttest_seq 11 20 >src/main/java/com/org/bar/uptotwenty &&\n\t\ttest_seq  1  7 >src/main/java/com/org/uptoseven &&\n\t\ttest_seq  1  5 >src/main/resources/uptofive &&\n\t\tgit add . &&\n\t\tgit commit -m Initial\n\t)\n}\n\ntest_expect_success 'Mixing filtering and renaming paths, not enough filters' '\n\tcreate_path_filtering_and_renaming &&\n\tgit clone --no-local path_filtering_and_renaming \\\n\t\t\t     path_filtering_and_renaming_1 &&\n\t(\n\t\tcd path_filtering_and_renaming_1 &&\n\n\t\tgit filter-repo --path .gitignore \\\n\t\t\t\t--path src/main/resources \\\n\t\t\t\t--path-rename src/main/java/com/org/foo/:src/main/java/com/org/ &&\n\n\t\tcat <<-EOF >expect &&\n\t\t.gitignore\n\t\tsrc/main/resources/uptofive\n\t\tEOF\n\t\tgit ls-files >actual &&\n\t\ttest_cmp expect actual\n\t)\n'\n\ntest_expect_success 'Mixing filtering and renaming paths, enough filters' '\n\tcreate_path_filtering_and_renaming &&\n\tgit clone --no-local path_filtering_and_renaming \\\n\t\t\t     path_filtering_and_renaming_2 &&\n\t(\n\t\tcd path_filtering_and_renaming_2 &&\n\n\t\tgit filter-repo --path .gitignore \\\n\t\t\t\t--path src/main/resources \\\n\t\t\t\t--path src/main/java/com/org/foo/ \\\n\t\t\t\t--path-rename src/main/java/com/org/foo/:src/main/java/com/org/ &&\n\n\t\tcat <<-EOF >expect &&\n\t\t.gitignore\n\t\tsrc/main/java/com/org/uptoten\n\t\tsrc/main/resources/uptofive\n\t\tEOF\n\t\tgit ls-files >actual &&\n\t\ttest_cmp expect actual\n\t)\n'\n\ntest_expect_success 'Path rename also allowed before path filtering' '\n\tcreate_path_filtering_and_renaming &&\n\tgit clone --no-local path_filtering_and_renaming \\\n\t\t\t     path_renaming_and_filtering &&\n\t(\n\t\tcd path_renaming_and_filtering &&\n\n\t\tgit filter-repo --invert-paths \\\n\t\t\t\t--path-rename src/main/java/com/org/foo/uptoten:src/main/java/org/foo/asbigasten \\\n\t\t\t\t--path src/main/java/com/ \\\n\n\t\tcat <<-EOF >expect &&\n\t\t.gitignore\n\t\tsrc/main/java/org/foo/asbigasten\n\t\tsrc/main/resources/uptofive\n\t\tEOF\n\t\tgit ls-files >actual &&\n\t\ttest_cmp expect actual\n\t)\n'\n\ntest_expect_success 'Mixing filtering and to-subdirectory-filter' '\n\tcreate_path_filtering_and_renaming &&\n\tgit clone --no-local path_filtering_and_renaming \\\n\t\t\t     path_filtering_and_renaming_3 &&\n\t(\n\t\tcd path_filtering_and_renaming_3 &&\n\n\t\tgit filter-repo --path src/main/resources \\\n\t\t\t\t--to-subdirectory-filter my-module &&\n\n\t\tcat <<-EOF >expect &&\n\t\tmy-module/src/main/resources/uptofive\n\t\tEOF\n\t\tgit ls-files >actual &&\n\t\ttest_cmp expect actual\n\t)\n'\n\nsetup_commit_message_rewriting() {\n\ttest -d commit_msg && return\n\ttest_create_repo commit_msg &&\n\t(\n\t\tcd commit_msg &&\n\t\techo two guys walking into a >bar &&\n\t\tgit add bar &&\n\t\tgit commit -m initial &&\n\n\t\ttest_commit another &&\n\n\t\tname=$(git rev-parse HEAD) &&\n\t\techo hello >world &&\n\t\tgit add world &&\n\t\tgit commit -m \"Commit referencing ${name:0:8}\" &&\n\n\t\tgit revert HEAD &&\n\n\t\tfor i in $(test_seq 1 200)\n\t\tdo\n\t\t\tgit commit --allow-empty -m \"another commit\"\n\t\tdone &&\n\n\t\techo foo >bar &&\n\t\tgit add bar &&\n\t\tgit commit -m bar &&\n\n\t\tgit revert --no-commit HEAD &&\n\t\techo foo >baz &&\n\t\tgit add baz &&\n\t\tgit commit\n\t)\n}\n\ntest_expect_success 'commit message rewrite' '\n\tsetup_commit_message_rewriting &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/commit_msg commit_msg_clone &&\n\t\tcd commit_msg_clone &&\n\n\t\tgit filter-repo --invert-paths --path bar &&\n\n\t\tgit log --oneline >changes &&\n\t\ttest_line_count = 204 changes &&\n\n\t\t# If a commit we reference is rewritten, we expect the\n\t\t# reference to be rewritten.\n\t\tname=$(git rev-parse HEAD~203) &&\n\t\techo \"Commit referencing ${name:0:8}\" >expect &&\n\t\tgit log --no-walk --format=%s HEAD~202 >actual &&\n\t\ttest_cmp expect actual &&\n\n\t\t# If a commit we reference was pruned, then the reference\n\t\t# has nothing to be rewritten to.  Verify that the commit\n\t\t# ID it points to does not exist.\n\t\tlatest=$(git log --no-walk | grep reverts | awk \"{print \\$4}\" | tr -d '.') &&\n\t\ttest -n \"$latest\" &&\n\t\ttest_must_fail git cat-file -e \"$latest\"\n\t)\n'\n\ntest_expect_success 'commit hash unchanged if requested' '\n\tsetup_commit_message_rewriting &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/commit_msg commit_msg_clone_2 &&\n\t\tcd commit_msg_clone_2 &&\n\n\t\tname=$(git rev-parse HEAD~204) &&\n\t\tgit filter-repo --invert-paths --path bar --preserve-commit-hashes &&\n\n\t\tgit log --oneline >changes &&\n\t\ttest_line_count = 204 changes &&\n\n\t\techo \"Commit referencing ${name:0:8}\" >expect &&\n\t\tgit log --no-walk --format=%s HEAD~202 >actual &&\n\t\ttest_cmp expect actual\n\t)\n'\n\ntest_expect_success 'commit message encoding preserved if requested' '\n\t(\n\t\tgit init commit_message_encoding &&\n\t\tcd commit_message_encoding &&\n\n\t\tcat >input <<-\\EOF &&\n\t\tfeature done\n\t\tcommit refs/heads/develop\n\t\tmark :1\n\t\toriginal-oid deadbeefdeadbeefdeadbeefdeadbeefdeadbeef\n\t\tauthor Just Me <just@here.org> 1234567890 -0200\n\t\tcommitter Just Me <just@here.org> 1234567890 -0200\n\t\tencoding iso-8859-7\n\t\tdata 5\n\t\tEOF\n\n\t\tprintf \"Pi: \\360\\n\\ndone\\n\" >>input &&\n\n\t\tcat input | git fast-import --quiet &&\n\t\tgit rev-parse develop >expect &&\n\n\t\tgit filter-repo --preserve-commit-encoding --force &&\n\t\tgit rev-parse develop >actual &&\n\t\ttest_cmp expect actual\n\t)\n'\n\ntest_expect_success 'commit message rewrite unsuccessful' '\n\t(\n\t\tgit init commit_msg_not_found &&\n\t\tcd commit_msg_not_found &&\n\n\t\tcat >input <<-\\EOF &&\n\t\tfeature done\n\t\tcommit refs/heads/develop\n\t\tmark :1\n\t\toriginal-oid deadbeefdeadbeefdeadbeefdeadbeefdeadbeef\n\t\tauthor Just Me <just@here.org> 1234567890 -0200\n\t\tcommitter Just Me <just@here.org> 1234567890 -0200\n\t\tdata 2\n\t\tA\n\n\t\tcommit refs/heads/develop\n\t\tmark :2\n\t\toriginal-oid deadbeefcafedeadbeefcafedeadbeefcafecafe\n\t\tauthor Just Me <just@here.org> 1234567890 -0200\n\t\tcommitter Just Me <just@here.org> 1234567890 -0200\n\t\tdata 2\n\t\tB\n\n\t\tcommit refs/heads/develop\n\t\tmark :3\n\t\toriginal-oid 0000000000000000000000000000000000000004\n\t\tauthor Just Me <just@here.org> 3980014290 -0200\n\t\tcommitter Just Me <just@here.org> 3980014290 -0200\n\t\tdata 93\n\t\tFour score and seven years ago, commit deadbeef (\"B\",\n\t\t2009-02-13) messed up.  This fixes it.\n\t\tdone\n\t\tEOF\n\n\t\tcat input | git filter-repo --stdin --path salutation --force &&\n\n\t\tgit log --oneline develop >changes &&\n\t\ttest_line_count = 3 changes &&\n\n\t\tgit log develop >out &&\n\t\tgrep deadbeef out\n\t)\n'\n\ntest_expect_success '--refs and --replace-text' '\n\t# This test exists to make sure we do not assume that parents in\n\t# filter-repo code are always represented by integers (or marks);\n\t# they sometimes are represented as hashes.\n\tsetup_path_rename &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/path_rename refs_and_replace_text &&\n\t\tcd refs_and_replace_text &&\n\t\tgit rev-parse --short=10 HEAD~1 >myparent &&\n\t\techo \"10==>TEN\" >input &&\n\t\tgit filter-repo --force --replace-text input --refs $(cat myparent)..master &&\n\t\tcat <<-EOF >expect &&\n\t\tTEN11\n\t\tEOF\n\t\ttest_cmp expect sequences/medium &&\n\t\tgit rev-list --count HEAD >actual &&\n\t\techo 4 >expect &&\n\t\ttest_cmp expect actual &&\n\t\tgit rev-parse --short=10 HEAD~1 >actual &&\n\t\ttest_cmp myparent actual\n\t)\n'\n\ntest_expect_success 'reset to specific refs' '\n\ttest_create_repo reset_to_specific_refs &&\n\t(\n\t\tcd reset_to_specific_refs &&\n\n\t\tgit commit --allow-empty -m initial &&\n\t\tINITIAL=$(git rev-parse HEAD) &&\n\t\techo \"$INITIAL refs/heads/develop\" >expect &&\n\n\t\tcat >input <<-INPUT_END &&\n\t\treset refs/heads/develop\n\t\tfrom $INITIAL\n\n\t\treset refs/heads/master\n\t\tfrom 0000000000000000000000000000000000000000\n\t\tINPUT_END\n\n\t\tcat input | git filter-repo --force --stdin &&\n\t\tgit show-ref >actual &&\n\t\ttest_cmp expect actual\n\t)\n'\n\nsetup_handle_funny_characters() {\n\ttest -d funny_chars && return\n\ttest_create_repo funny_chars &&\n\t(\n\t\tcd funny_chars &&\n\n\t\tgit symbolic-ref HEAD refs/heads/españa &&\n\n\t\tprintf \"بتتكلم بالهندي؟\\n\" >señor &&\n\t\tprintf \"Αυτά μου φαίνονται αλαμπουρνέζικα.\\n\" >>señor &&\n\t\tprintf \"זה סינית בשבילי\\n\" >>señor &&\n\t\tprintf \"ちんぷんかんぷん\\n\" >>señor &&\n\t\tprintf \"За мене тоа е шпанско село\\n\" >>señor &&\n\t\tprintf \"看起来像天书。\\n\" >>señor &&\n\t\tprintf \"انگار ژاپنی حرف می زنه\\n\" >>señor &&\n\t\tprintf \"Это для меня китайская грамота.\\n\" >>señor &&\n\t\tprintf \"To mi je španska vas\\n\" >>señor &&\n\t\tprintf \"Konuya Fransız kaldım\\n\" >>señor &&\n\t\tprintf \"עס איז די שפּראַך פון גיבבעריש\\n\" >>señor &&\n\t\tprintf \"Not even UTF-8:\\xe0\\x80\\x80\\x00\\n\" >>señor &&\n\n\t\tcp señor señora &&\n\t\tcp señor señorita &&\n\t\tgit add . &&\n\n\t\texport GIT_AUTHOR_NAME=\"Nguyễn Arnfjörð Gábor\" &&\n\t\texport GIT_COMMITTER_NAME=$GIT_AUTHOR_NAME &&\n\t\texport GIT_AUTHOR_EMAIL=\"emails@are.ascii\" &&\n\t\texport GIT_COMMITTER_EMAIL=\"$GIT_AUTHOR_EMAIL\" &&\n\t\tgit commit -m \"€$£₽₪\" &&\n\n\t\tgit tag -a -m \"₪₽£€$\" סְפָרַד\n\t)\n}\n\ntest_expect_success 'handle funny characters' '\n\tsetup_handle_funny_characters &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/funny_chars funny_chars_checks &&\n\t\tcd funny_chars_checks &&\n\n\t\tfile_sha=$(git rev-parse :0:señor) &&\n\t\tformer_head_sha=$(git rev-parse HEAD) &&\n\t\tgit filter-repo --replace-refs old-default --to-subdirectory-filter títulos &&\n\n\t\tcat <<-EOF >expect &&\n\t\t100644 $file_sha 0\t\"t\\303\\255tulos/se\\303\\261or\"\n\t\t100644 $file_sha 0\t\"t\\303\\255tulos/se\\303\\261ora\"\n\t\t100644 $file_sha 0\t\"t\\303\\255tulos/se\\303\\261orita\"\n\t\tEOF\n\n\t\tgit ls-files -s >actual &&\n\t\ttest_cmp expect actual &&\n\n\t\tcommit_sha=$(git rev-parse HEAD) &&\n\t\ttag_sha=$(git rev-parse סְפָרַד) &&\n\t\tcat <<-EOF >expect &&\n\t\t$commit_sha refs/heads/españa\n\t\t$commit_sha refs/replace/$former_head_sha\n\t\t$tag_sha refs/tags/סְפָרַד\n\t\tEOF\n\n\t\tgit show-ref >actual &&\n\t\ttest_cmp expect actual &&\n\n\t\techo \"€$£₽₪\" >expect &&\n\t\tgit cat-file -p HEAD | tail -n 1 >actual &&\n\n\t\techo \"₪₽£€$\" >expect &&\n\t\tgit cat-file -p סְפָרַד | tail -n 1 >actual\n        )\n'\n\ntest_expect_success '--state-branch with changing renames' '\n\ttest_create_repo state_branch_renames_export\n\ttest_create_repo state_branch_renames &&\n\t(\n\t\tcd state_branch_renames &&\n\t\tgit fast-import --quiet <$DATA/basic-numbers &&\n\t\tgit branch -d A &&\n\t\tgit branch -d B &&\n\t\tgit tag -d v1.0 &&\n\n\t\tORIG=$(git rev-parse master) &&\n\t\tgit reset --hard master~1 &&\n\t\tgit filter-repo --path-rename ten:zehn \\\n                                --state-branch state_info \\\n                                --target ../state_branch_renames_export &&\n\n\t\tcd ../state_branch_renames_export &&\n\t\tgit log --format=%s --name-status >actual &&\n\t\tcat <<-EOF >expect &&\n\t\t\tMerge branch ${SQ}A${SQ} into B\n\t\t\tadd twenty\n\n\t\t\tM\ttwenty\n\t\t\tadd ten\n\n\t\t\tM\tzehn\n\t\t\tInitial\n\n\t\t\tA\ttwenty\n\t\t\tA\tzehn\n\t\t\tEOF\n\t\ttest_cmp expect actual &&\n\n\t\tcd ../state_branch_renames &&\n\n\t\tgit reset --hard $ORIG &&\n\t\tgit filter-repo --path-rename twenty:veinte \\\n                                --state-branch state_info \\\n                                --target ../state_branch_renames_export &&\n\n\t\tcd ../state_branch_renames_export &&\n\t\tgit log --format=%s --name-status >actual &&\n\t\tcat <<-EOF >expect &&\n\t\t\twhatever\n\n\t\t\tA\tten\n\t\t\tA\tveinte\n\t\t\tMerge branch ${SQ}A${SQ} into B\n\t\t\tadd twenty\n\n\t\t\tM\ttwenty\n\t\t\tadd ten\n\n\t\t\tM\tzehn\n\t\t\tInitial\n\n\t\t\tA\ttwenty\n\t\t\tA\tzehn\n\t\t\tEOF\n\t\ttest_cmp expect actual\n\t)\n'\n\ntest_expect_success '--state-branch with expanding paths and refs' '\n\ttest_create_repo state_branch_more_paths_export\n\ttest_create_repo state_branch_more_paths &&\n\t(\n\t\tcd state_branch_more_paths &&\n\t\tgit fast-import --quiet <$DATA/basic-numbers &&\n\n\t\tgit reset --hard master~1 &&\n\t\tgit filter-repo --path ten --state-branch state_info \\\n                                --target ../state_branch_more_paths_export \\\n                                --refs master &&\n\n\t\tcd ../state_branch_more_paths_export &&\n\t\techo 2 >expect &&\n\t\tgit rev-list --count master >actual &&\n\t\ttest_cmp expect actual &&\n\t\ttest_must_fail git rev-parse master~1:twenty &&\n\t\ttest_must_fail git rev-parse master:twenty &&\n\n\t\tcd ../state_branch_more_paths &&\n\n\t\tgit reset --hard v1.0 &&\n\t\tgit filter-repo --path ten --path twenty \\\n                                --state-branch state_info \\\n                                --target ../state_branch_more_paths_export &&\n\n\t\tcd ../state_branch_more_paths_export &&\n\t\techo 3 >expect &&\n\t\tgit rev-list --count master >actual &&\n\t\ttest_cmp expect actual &&\n\t\ttest_must_fail git rev-parse master~2:twenty &&\n\t\tgit rev-parse master:twenty\n\t)\n'\n\ntest_expect_success FUNNYNAMES 'degenerate merge with non-matching filenames' '\n\ttest_create_repo degenerate_merge_differing_filenames &&\n\t(\n\t\tcd degenerate_merge_differing_filenames &&\n\n\t\ttouch \"foo \\\"quote\\\" bar\" &&\n\t\tgit add \"foo \\\"quote\\\" bar\" &&\n\t\tgit commit -m \"Add foo \\\"quote\\\" bar\"\n\t\tgit branch A &&\n\n\t\tgit checkout --orphan B &&\n\t\tgit reset --hard &&\n\t\tmkdir -p pkg/list &&\n\t\ttest_commit pkg/list/whatever &&\n\t\ttest_commit unwanted_file &&\n\n\t\tgit checkout A &&\n\t\tgit merge --allow-unrelated-histories --no-commit B &&\n\t\t>pkg/list/wanted &&\n\t\tgit add pkg/list/wanted &&\n\t\tgit rm -f pkg/list/whatever.t &&\n\t\tgit commit &&\n\n\t\tgit filter-repo --force --path pkg/list &&\n\t\t! test_path_is_file pkg/list/whatever.t &&\n\t\tgit ls-files >actual &&\n\t\techo pkg/list/wanted >expect &&\n\t\ttest_cmp expect actual\n\t)\n'\n\ntest_expect_success 'degenerate merge with typechange' '\n\ttest_create_repo degenerate_merge_with_typechange &&\n\t(\n\t\tcd degenerate_merge_with_typechange &&\n\n\t\ttouch irrelevant_file &&\n\t\tgit add irrelevant_file &&\n\t\tgit commit -m \"Irrelevant, unwanted file\"\n\t\tgit branch A &&\n\n\t\tgit checkout --orphan B &&\n\t\tgit reset --hard &&\n\t\techo hello >world &&\n\t\tgit add world &&\n\t\tgit commit -m \"greeting\" &&\n\t\techo goodbye >planet &&\n\t\tgit add planet &&\n\t\tgit commit -m \"farewell\" &&\n\n\t\tgit checkout A &&\n\t\tgit merge --allow-unrelated-histories --no-commit B &&\n\t\trm world &&\n\t\tln -s planet world &&\n\t\tgit add world &&\n\t\tgit commit &&\n\n\t\tgit filter-repo --force --path world &&\n\t\ttest_path_is_missing irrelevant_file &&\n\t\ttest_path_is_missing planet &&\n\t\techo world >expect &&\n\t\tgit ls-files >actual &&\n\t\ttest_cmp expect actual &&\n\n\t\tgit log --oneline HEAD >input &&\n\t\ttest_line_count = 2 input\n\t)\n'\n\ntest_expect_success 'degenerate evil merge' '\n\ttest_create_repo degenerate_evil_merge &&\n\t(\n\t\tcd degenerate_evil_merge &&\n\n\t\tcat $DATA/degenerate-evil-merge | git fast-import --quiet &&\n\t\tgit filter-repo --force --subdirectory-filter module-of-interest &&\n\t\ttest_path_is_missing module-of-interest &&\n\t\ttest_path_is_missing other-module &&\n\t\ttest_path_is_missing irrelevant &&\n\t\ttest_path_is_file file1 &&\n\t\ttest_path_is_file file2 &&\n\t\ttest_path_is_file file3\n\t)\n'\n\ntest_lazy_prereq IN_FILTER_REPO_CLONE '\n\tgit -C ../../ rev-parse HEAD:git-filter-repo &&\n\tgrep @@LOCALEDIR@@ ../../../git-filter-repo &&\n\thead -n 1 ../../../git-filter-repo | grep \"/usr/bin/env python3$\"\n'\n\n# Next test depends on git-filter-repo coming from the git-filter-repo\n# not having been modified by e.g. normal installation.  Skip the test\n# if we're in some kind of installation of filter-repo rather than in a\n# simple clone of the original repository.\ntest_expect_success IN_FILTER_REPO_CLONE '--version' '\n\tgit filter-repo --version >actual &&\n\tgit hash-object ../../git-filter-repo | cut -c 1-12 >expect &&\n\ttest_cmp expect actual\n'\n\ntest_expect_success 'empty author ident' '\n\ttest_create_repo empty_author_ident &&\n\t(\n\t\tcd empty_author_ident &&\n\n\t\tgit init &&\n\t\tcat <<-EOF | git fast-import --quiet &&\n\t\t\tfeature done\n\t\t\tblob\n\t\t\tmark :1\n\t\t\tdata 8\n\t\t\tinitial\n\n\t\t\treset refs/heads/develop\n\t\t\tcommit refs/heads/develop\n\t\t\tmark :2\n\t\t\tauthor <empty@ident.ity> 1535228562 -0700\n\t\t\tcommitter Full Name <email@add.ress> 1535228562 -0700\n\t\t\tdata 8\n\t\t\tInitial\n\t\t\tM 100644 :1 filename\n\n\t\t\tdone\n\t\t\tEOF\n\n\t\tgit filter-repo --force --path-rename filename:stuff &&\n\n\t\tgit log --format=%an develop >actual &&\n\t\techo >expect &&\n\t\ttest_cmp expect actual\n\t)\n'\n\ntest_expect_success 'rewrite stash' '\n\ttest_create_repo rewrite_stash &&\n\t(\n\t\tcd rewrite_stash &&\n\n\t\tgit init &&\n\t\ttest_write_lines 1 2 3 4 5 6 7 8 9 10 >numbers &&\n\t\tgit add numbers &&\n\t\tgit commit -qm initial &&\n\n\t\techo 11 >>numbers &&\n\t\tgit stash push -m \"add eleven\" &&\n\t\techo foobar >>numbers &&\n\t\tgit stash push -m \"add foobar\" &&\n\n\t\tgit filter-repo --force --path-rename numbers:values &&\n\n\t\tgit stash list >output &&\n\t\ttest 2 -eq $(cat output | wc -l)\n\t)\n'\n\ntest_expect_success 'rewrite stash and drop relevant entries' '\n\ttest_create_repo rewrite_stash_drop_entries &&\n\t(\n\t\tcd rewrite_stash_drop_entries &&\n\n\t\tgit init &&\n\t\ttest_write_lines 1 2 3 4 5 6 7 8 9 10 >numbers &&\n\t\tgit add numbers &&\n\t\tgit commit -qm numbers &&\n\n\t\techo 11 >>numbers &&\n\t\tgit stash push -m \"add eleven\" &&\n\n\t\ttest_write_lines a b c d e f g h i j >letters &&\n\t\ttest_write_lines hello hi welcome >greetings &&\n\t\tgit add letters greetings &&\n\t\tgit commit -qm \"letters and greetings\" &&\n\n\t\techo k >>letters &&\n\t\tgit stash push -m \"add k\" &&\n\t\techo hey >>greetings &&\n\t\tgit stash push -m \"add hey\" &&\n\t\techo 12 >>numbers &&\n\t\tgit stash push -m \"add twelve\" &&\n\n\t\ttest_line_count = 4 .git/logs/refs/stash &&\n\n\t\tgit filter-repo --force --path letters --path greetings &&\n\n\t\ttest_line_count = 3 .git/logs/refs/stash &&\n\t\t! grep add.eleven .git/logs/refs/stash &&\n\t\tgrep add.k .git/logs/refs/stash &&\n\t\tgrep add.hey .git/logs/refs/stash &&\n\t\tgrep add.twelve .git/logs/refs/stash\n\t)\n'\n\ntest_expect_success POSIXPERM 'failure to run cleanup' '\n\ttest_create_repo fail_to_cleanup &&\n\t(\n\t\tcd fail_to_cleanup &&\n\n\t\tgit init &&\n\t\ttest_write_lines 1 2 3 4 5 6 7 8 9 10 >numbers &&\n\t\tgit add numbers &&\n\t\tgit commit -qm initial &&\n\n\t\tchmod u-w .git/logs &&\n\t\ttest_must_fail git filter-repo --force \\\n\t\t                       --path-rename numbers:values 2> ../err &&\n\t\tchmod u+w .git/logs &&\n\t\tgrep fatal.*git.reflog.expire.*failed ../err\n\t)\n'\n\ntest_expect_success 'origin refs without origin remote does not die' '\n\ttest_create_repo origin_refs_with_origin_remote &&\n\t(\n\t\tcd origin_refs_with_origin_remote &&\n\n\t\ttest_commit numbers &&\n\t\tgit update-ref refs/remotes/origin/svnhead master &&\n\n\t\tgit filter-repo --force --path-rename numbers.t:values.t &&\n\n\t\tgit show svnhead:values.t >actual &&\n\t\techo numbers >expect &&\n\t\ttest_cmp expect actual\n\t)\n'\n\ntest_expect_success 'multi-line config value' '\n\ttest_create_repo multiline_config &&\n\t(\n\t\tcd multiline_config &&\n\n\t\tgit config set test.test \"test\ntest\" &&\n\t\tgit filter-repo --force\n\t)\n'\n\ntest_done\n"
  },
  {
    "path": "t/t9391/commit_info.py",
    "content": "#!/usr/bin/env python3\n\n\"\"\"\nPlease see the\n  ***** API BACKWARD COMPATIBILITY CAVEAT *****\nnear the top of git-filter-repo\n\"\"\"\n\nimport re\nimport datetime\n\nimport git_filter_repo as fr\n\ndef change_up_them_commits(commit, metadata):\n  # Change the commit author\n  if commit.author_name == b\"Copy N. Paste\":\n    commit.author_name = b\"Ima L. Oser\"\n    commit.author_email = b\"aloser@my.corp\"\n\n  # Fix the author email\n  commit.author_email = re.sub(b\"@my.crp\", b\"@my.corp\", commit.author_email)\n\n  # Fix the committer date (bad timezone conversion in initial import)\n  oldtime = fr.string_to_date(commit.committer_date)\n  newtime = oldtime + datetime.timedelta(hours=-5)\n  commit.committer_date = fr.date_to_string(newtime)\n\n  # Fix the commit message\n  commit.message = re.sub(b\"Marketing is staffed with pansies\", b\"\",\n                          commit.message)\n\nargs = fr.FilteringOptions.parse_args(['--force'])\nfilter = fr.RepoFilter(args, commit_callback = change_up_them_commits)\nfilter.run()\n"
  },
  {
    "path": "t/t9391/create_fast_export_output.py",
    "content": "#!/usr/bin/env python3\n\n\"\"\"\nPlease see the\n  ***** API BACKWARD COMPATIBILITY CAVEAT *****\nnear the top of git-filter-repo.\n\"\"\"\n\nimport git_filter_repo as fr\nfrom git_filter_repo import Blob, Reset, FileChange, Commit, Tag, FixedTimeZone\nfrom git_filter_repo import Progress, Checkpoint\n\nfrom datetime import datetime, timedelta\n\nargs = fr.FilteringOptions.default_options()\nout = fr.RepoFilter(args)\nout.importer_only()\n\nworld = Blob(b\"Hello\")\nout.insert(world)\n\nbar = Blob(b\"foo\\n\")\nout.insert(bar)\n\nmaster = Reset(b\"refs/heads/master\")\nout.insert(master)\n\nchanges = [FileChange(b'M', b'world', world.id, mode=b\"100644\"),\n           FileChange(b'M', b'bar',   bar.id,   mode=b\"100644\")]\nwhen = datetime(year=2005, month=4, day=7,\n                hour=15, minute=16, second=10,\n                tzinfo=FixedTimeZone(b\"-0700\"))\nwhen_string = fr.date_to_string(when)\ncommit1 = Commit(b\"refs/heads/master\",\n                 b\"A U Thor\", b\"au@thor.email\", when_string,\n                 b\"Com M. Iter\", b\"comm@iter.email\", when_string,\n                 b\"My first commit!  Wooot!\\n\\nLonger description\",\n                 changes,\n                 parents = [])\nout.insert(commit1)\n\nworld = Blob(b\"Hello\\nHi\")\nout.insert(world)\nworld_link = Blob(b\"world\")\nout.insert(world_link)\n\nchanges = [FileChange(b'M', b'world',  world.id,      mode=b\"100644\"),\n           FileChange(b'M', b'planet', world_link.id, mode=b\"120000\")]\nwhen += timedelta(days=3, hours=4, minutes=6)\nwhen_string = fr.date_to_string(when)\ncommit2 = Commit(b\"refs/heads/master\",\n                 b\"A U Thor\", b\"au@thor.email\", when_string,\n                 b\"Com M. Iter\", b\"comm@iter.email\", when_string,\n                 b\"Make a symlink to world called planet, modify world\",\n                 changes,\n                 parents = [commit1.id])\nout.insert(commit2)\n\nscript = Blob(b\"#!/bin/sh\\n\\necho Hello\")\nout.insert(script)\nchanges = [FileChange(b'M', b'runme', script.id, mode=b\"100755\"),\n           FileChange(b'D', b'bar')]\nwhen_string = b\"1234567890 -0700\"\ncommit3 = Commit(b\"refs/heads/master\",\n                 b\"A U Thor\", b\"au@thor.email\", when_string,\n                 b\"Com M. Iter\", b\"comm@iter.email\", when_string,\n                 b\"Add runme script, remove bar\",\n                 changes,\n                 parents = [commit2.id])\nout.insert(commit3)\n\nprogress = Progress(b\"Done with the master branch now...\")\nout.insert(progress)\ncheckpoint = Checkpoint()\nout.insert(checkpoint)\n\ndevel = Reset(b\"refs/heads/devel\", commit1.id)\nout.insert(devel)\n\nworld = Blob(b\"Hello\\nGoodbye\")\nout.insert(world)\n\nchanges = [FileChange(b'DELETEALL'),\n           FileChange(b'M', b'world', world.id, mode=b\"100644\"),\n           FileChange(b'M', b'bar',   bar.id,   mode=b\"100644\")]\nwhen = datetime(2006, 8, 17, tzinfo=FixedTimeZone(b\"+0200\"))\nwhen_string = fr.date_to_string(when)\ncommit4 = Commit(b\"refs/heads/devel\",\n                 b\"A U Thor\", b\"au@thor.email\", when_string,\n                 b\"Com M. Iter\", b\"comm@iter.email\", when_string,\n                 b\"Modify world\",\n                 changes,\n                 parents = [commit1.id])\nout.insert(commit4)\n\nworld = Blob(b\"Hello\\nHi\\nGoodbye\")\nout.insert(world)\nwhen = fr.string_to_date(commit3.author_date) + timedelta(days=47)\nwhen_string = fr.date_to_string(when)\n# git fast-import requires file changes to be listed in terms of differences\n# to the first parent.  Thus, despite the fact that runme and planet have\n# not changed and bar was not modified in the devel side, we have to list them\n# all anyway.\nchanges = [FileChange(b'M', b'world', world.id, mode=b\"100644\"),\n           FileChange(b'D', b'bar'),\n           FileChange(b'M', b'runme', script.id, mode=b\"100755\"),\n           FileChange(b'M', b'planet', world_link.id, mode=b\"120000\")]\n\ncommit5 = Commit(b\"refs/heads/devel\",\n                 b\"A U Thor\", b\"au@thor.email\", when_string,\n                 b\"Com M. Iter\", b\"comm@iter.email\", when_string,\n                 b\"Merge branch 'master'\\n\",\n                 changes,\n                 parents = [commit4.id, commit3.id])\nout.insert(commit5)\n\n\nmytag = Tag(b\"refs/tags/v1.0\", commit5.id,\n            b\"His R. Highness\", b\"royalty@my.kingdom\", when_string,\n            b\"I bequeath to my peons this royal software\")\nout.insert(mytag)\nout.finish()\n"
  },
  {
    "path": "t/t9391/emoji-repo",
    "content": "feature done\nblob\nmark :1\ndata 8\ninitial\n\nblob\nmark :2\ndata 5\nlock\n\nblob\nmark :3\ndata 11\n*.bak\n🔒\n\nreset refs/heads/master\ncommit refs/heads/master\nmark :4\nauthor Little O. Me <me@little.net> 1535228562 -0700\ncommitter Little O. Me <me@little.net> 1535228562 -0700\ndata 10\nMy commit\nM 100644 :1 filename\nM 100644 :2 🔒\nM 100644 :3 .gitignore\n\ndone\n"
  },
  {
    "path": "t/t9391/erroneous.py",
    "content": "#!/usr/bin/env python3\n\n\"\"\"\nPlease see the\n  ***** API BACKWARD COMPATIBILITY CAVEAT *****\nnear the top of git-filter-repo\n\"\"\"\n\nimport git_filter_repo as fr\n\ndef handle_tag(tag):\n  print(\"Tagger: \"+''.join(tag.tagger_name))\n\nargs = fr.FilteringOptions.parse_args(['--force', '--tag-callback', 'pass'])\nfilter = fr.RepoFilter(args, tag_callback = handle_tag)\nfilter.run()\n"
  },
  {
    "path": "t/t9391/file_filter.py",
    "content": "#!/usr/bin/env python3\n\n\"\"\"\nPlease see the\n  ***** API BACKWARD COMPATIBILITY CAVEAT *****\nnear the top of git-filter-repo.\n\"\"\"\n\nimport sys\nimport git_filter_repo as fr\n\ndef drop_file_by_contents(blob, metadata):\n  bad_file_contents = b'The launch code is 1-2-3-4.'\n  if blob.data == bad_file_contents:\n    blob.skip()\n\ndef drop_files_by_name(commit, metadata):\n  new_file_changes = []\n  for change in commit.file_changes:\n    if not change.filename.endswith(b'.doc'):\n      new_file_changes.append(change)\n  commit.file_changes = new_file_changes\n\nsys.argv.append('--force')\nargs = fr.FilteringOptions.parse_args(sys.argv[1:])\n\nfilter = fr.RepoFilter(args,\n                       blob_callback   = drop_file_by_contents,\n                       commit_callback = drop_files_by_name)\nfilter.run()\n"
  },
  {
    "path": "t/t9391/print_progress.py",
    "content": "#!/usr/bin/env python3\n\n\"\"\"\nPlease see the\n  ***** API BACKWARD COMPATIBILITY CAVEAT *****\nnear the top of git-filter-repo\n\"\"\"\n\nimport sys\nimport git_filter_repo as fr\n\nif len(sys.argv) != 3:\n  raise SystemExit(\"Syntax:\\n  %s SOURCE_REPO TARGET_REPO\")\nsource_repo = sys.argv[1].encode()\ntarget_repo = sys.argv[2].encode()\n\ntotal_objects = fr.GitUtils.get_total_objects(source_repo) # blobs+trees\ntotal_commits = fr.GitUtils.get_commit_count(source_repo)\nobject_count = 0\ncommit_count = 0\n\ndef print_progress():\n  global object_count, commit_count, total_objects, total_commits\n  print(\"\\rRewriting commits... %d/%d  (%d objects)\"\n        % (commit_count, total_commits, object_count), end='')\n\ndef my_blob_callback(blob, metadata):\n  global object_count\n  object_count += 1\n  print_progress()\n\ndef my_commit_callback(commit, metadata):\n  global commit_count\n  commit_count += 1\n  print_progress()\n\nargs = fr.FilteringOptions.parse_args(['--force', '--quiet'])\nfilter = fr.RepoFilter(args,\n                       blob_callback   = my_blob_callback,\n                       commit_callback = my_commit_callback)\nfilter.run()\n"
  },
  {
    "path": "t/t9391/rename-master-to-develop.py",
    "content": "#!/usr/bin/env python3\n\n\"\"\"\nPlease see the\n  ***** API BACKWARD COMPATIBILITY CAVEAT *****\nnear the top of git-filter-repo.\n\"\"\"\n\nimport git_filter_repo as fr\n\ndef my_commit_callback(commit, metadata):\n  if commit.branch == b\"refs/heads/master\":\n    commit.branch = b\"refs/heads/develop\"\n\nargs = fr.FilteringOptions.default_options()\nargs.force = True\nfilter = fr.RepoFilter(args, commit_callback = my_commit_callback)\nfilter.run()\n"
  },
  {
    "path": "t/t9391/splice_repos.py",
    "content": "#!/usr/bin/env python3\n\n\"\"\"\nPlease see the\n  ***** API BACKWARD COMPATIBILITY CAVEAT *****\nnear the top of git-filter-repo.\n\nAlso, note that splicing repos may need some special care as fast-export\nonly shows the files that changed relative to the first parent, so there\nmay be gotchas if you are to splice near merge commits; this example does\nnot try to handle any such special cases.\n\"\"\"\n\nimport re\nimport sys\nimport git_filter_repo as fr\n\nclass InterleaveRepositories:\n  def __init__(self, repo1, repo2, output_dir):\n    self.repo1 = repo1\n    self.repo2 = repo2\n    self.output_dir = output_dir\n\n    self.commit_map = {}\n    self.last_commit = None\n\n  def skip_reset(self, reset, metadata):\n    reset.skip()\n\n  def hold_commit(self, commit, metadata):\n    commit.skip(new_id = commit.id)\n    letter = re.match(b'Commit (.)', commit.message).group(1)\n    self.commit_map[letter] = commit\n\n  def weave_commit(self, commit, metadata):\n    letter = re.match(b'Commit (.)', commit.message).group(1)\n    prev_letter = bytes([ord(letter)-1])\n\n    # Splice in any extra commits needed\n    if prev_letter in self.commit_map:\n      new_commit = self.commit_map[prev_letter]\n      new_commit.dumped = 0\n      new_commit.parents = [self.last_commit] if self.last_commit else []\n      # direct_insertion=True to avoid weave_commit being called recursively\n      # on the same commit\n      self.out.insert(new_commit, direct_insertion = True)\n      commit.parents = [new_commit.id]\n\n    # Dump our commit now\n    self.out.insert(commit, direct_insertion = True)\n\n    # Make sure that commits that depended on new_commit.id will now depend\n    # on commit.id\n    if prev_letter in self.commit_map:\n      self.last_commit = commit.id\n      fr.record_id_rename(new_commit.id, commit.id)\n\n  def run(self):\n    blob = fr.Blob(b'public gpg key contents')\n    tag = fr.Tag(b'gpg-pubkey', blob.id,\n                 b'Ima Tagger', b'ima@tagg.er', b'1136199845 +0300',\n                 b'Very important explanation and stuff')\n\n    args = fr.FilteringOptions.parse_args(['--target', self.output_dir])\n    out = fr.RepoFilter(args)\n    out.importer_only()\n    self.out = out\n\n    i1args = fr.FilteringOptions.parse_args(['--source', self.repo1])\n    i1 = fr.RepoFilter(i1args,\n                       reset_callback  = self.skip_reset,\n                       commit_callback = self.hold_commit)\n    i1.set_output(out)\n    i1.run()\n\n    i2args = fr.FilteringOptions.parse_args(['--source', self.repo2])\n    i2 = fr.RepoFilter(i2args,\n                       commit_callback = self.weave_commit)\n    i2.set_output(out)\n    i2.run()\n\n    out.insert(blob)\n    out.insert(tag)\n    out.finish()\n\nsplicer = InterleaveRepositories(sys.argv[1], sys.argv[2], sys.argv[3])\nsplicer.run()\n"
  },
  {
    "path": "t/t9391/strip-cvs-keywords.py",
    "content": "#!/usr/bin/env python3\n\n\"\"\"\nPlease see the\n  ***** API BACKWARD COMPATIBILITY CAVEAT *****\nnear the top of git-filter-repo.\n\"\"\"\n\nimport re\nimport git_filter_repo as fr\n\ndef strip_cvs_keywords(blob, metadata):\n  # FIXME: Should first check if blob is a text file to avoid ruining\n  # binaries.  Could use python.magic here, or just output blob.data to\n  # the unix 'file' command\n  pattern = br'\\$(Id|Date|Source|Header|CVSHeader|Author|Revision):.*\\$'\n  replacement = br'$\\1$'\n  blob.data = re.sub(pattern, replacement, blob.data)\n\nargs = fr.FilteringOptions.parse_args(['--force'])\nfilter = fr.RepoFilter(args, blob_callback = strip_cvs_keywords)\nfilter.run()\n"
  },
  {
    "path": "t/t9391/unusual.py",
    "content": "#!/usr/bin/env python3\n\n# Please: DO NOT USE THIS AS AN EXAMPLE.\n#\n# This file is NOT for demonstration of how to use git-filter-repo as a\n# libary; it exists to test corner cases or otherwise unusual inputs, and\n# to verify some invariants that git-filter-repo currently aims to maintain\n# (these invariants might be different in future versions of\n# git-filter-repo).  As such, it reaches deep into the internals and does\n# weird things that you should probably avoid in your usage of\n# git-filter-repo.  Any code in this testcase is much more likely to have\n# API breaks than other files in t9391.\n\nimport collections\nimport os\nimport random\nimport io\nimport sys\nimport textwrap\n\nimport git_filter_repo as fr\n\ntotal_objects = {'common': 0, 'uncommon': 0}\ndef track_everything(obj, *_ignored):\n  if type(obj) == fr.Blob or type(obj) == fr.Commit:\n    total_objects['common'] += 1\n  else:\n    total_objects['uncommon'] += 1\n  if type(obj) == fr.Reset:\n    def assert_not_reached(x): raise SystemExit(\"should have been skipped!\")\n    obj.dump = assert_not_reached\n    obj.skip()\n  if hasattr(obj, 'id') and type(obj) != fr.Tag:\n    # The creation of myblob should cause objects in stream to get their ids\n    # increased by 1; this shouldn't be depended upon as API by external\n    # projects, I'm just verifying an invariant of the current code.\n    assert fr._IDS._reverse_translation[obj.id] == [obj.id - 1]\n\ndef handle_progress(progress):\n  print(b\"Decipher this: \"+bytes(reversed(progress.message)))\n  track_everything(progress)\n\ndef handle_checkpoint(checkpoint_object):\n  # Flip a coin; see if we want to pass the checkpoint through.\n  if random.randint(0,1) == 0:\n    checkpoint_object.dump(parser._output)\n  track_everything(checkpoint_object)\n\nmystr = b'This is the contents of the blob'\ncompare = b\"Blob:\\n  blob\\n  mark :1\\n  data %d\\n  %s\" % (len(mystr), mystr)\n# Next line's only purpose is testing code coverage of something that helps\n# debugging git-filter-repo; it is NOT something external folks should depend\n# upon.\nmyblob = fr.Blob(mystr)\nassert bytes(myblob) == compare\n# Everyone should be using RepoFilter objects, not FastExportParser.  But for\n# testing purposes...\nparser = fr.FastExportParser(blob_callback   = track_everything,\n                             reset_callback  = track_everything,\n                             commit_callback = track_everything,\n                             tag_callback    = track_everything,\n                             progress_callback = handle_progress,\n                             checkpoint_callback = handle_checkpoint)\n\nparser.run(input = sys.stdin.detach(),\n           output = open(os.devnull, 'bw'))\n# DO NOT depend upon or use _IDS directly you external script writers.  I'm\n# only testing here for code coverage; the capacity exists to help debug\n# git-filter-repo itself, not for external folks to use.\nassert str(fr._IDS).startswith(\"Current count: 5\")\nprint(\"Found {} blobs/commits and {} other objects\"\n      .format(total_objects['common'], total_objects['uncommon']))\n\n\nstream = io.BytesIO(textwrap.dedent('''\n  blob\n  mark :1\n  data 5\n  hello\n\n  commit refs/heads/A\n  mark :2\n  author Just Me <just@here.org> 1234567890 -0200\n  committer Just Me <just@here.org> 1234567890 -0200\n  data 2\n  A\n\n  commit refs/heads/B\n  mark :3\n  author Just Me <just@here.org> 1234567890 -0200\n  committer Just Me <just@here.org> 1234567890 -0200\n  data 2\n  B\n  from :2\n  M 100644 :1 greeting\n\n  reset refs/heads/B\n  from :3\n\n  commit refs/heads/C\n  mark :4\n  author Just Me <just@here.org> 1234567890 -0200\n  committer Just Me <just@here.org> 1234567890 -0200\n  data 2\n  C\n  from :3\n  M 100644 :1 salutation\n\n  '''[1:]).encode())\n\ncounts = collections.Counter()\ndef look_for_reset(obj, metadata):\n  print(\"Processing {}\".format(obj))\n  counts[type(obj)] += 1\n  if type(obj) == fr.Reset:\n    assert obj.ref == b'refs/heads/B'\n\n# Use all kinds of internals that external scripts should NOT use and which\n# are likely to break in the future, just to verify a few invariants...\nargs = fr.FilteringOptions.parse_args(['--stdin', '--dry-run',\n                                       '--path', 'salutation'])\nfilter = fr.RepoFilter(args,\n                       blob_callback   = look_for_reset,\n                       reset_callback  = look_for_reset,\n                       commit_callback = look_for_reset,\n                       tag_callback    = look_for_reset)\nfilter._input = stream\nfilter._setup_output()\nfilter._sanity_checks_handled = True\nfilter.run()\nassert counts == collections.Counter({fr.Blob: 1, fr.Commit: 3, fr.Reset: 1})\n"
  },
  {
    "path": "t/t9391-filter-repo-lib-usage.sh",
    "content": "#!/bin/bash\n\ntest_description='Usage of git-filter-repo as a library'\n. ./test-lib.sh\n\n# for git_filter_repo.py import\ncase \"$(uname -s)\" in\nMINGW*|MSYS)\n\texport PYTHONPATH=$(cygpath -am $TEST_DIRECTORY/..)\\;$PYTHONPATH\n\t;;\n*)\n\texport PYTHONPATH=$(dirname $TEST_DIRECTORY):$PYTHONPATH\n\t;;\nesac\n# Avoid writing git_filter_repo.pyc file\nexport PYTHONDONTWRITEBYTECODE=1\nexport CONTRIB_DIR=$TEST_DIRECTORY/../contrib/filter-repo-demos\n\nDATA=\"$TEST_DIRECTORY/t9391\"\n\nsetup()\n{\n\tgit init $1 &&\n\t(\n\t\tcd $1 &&\n\t\techo hello > world &&\n\t\tgit add world &&\n\t\ttest_tick &&\n\t\tgit commit -m initial &&\n\t\tprintf \"The launch code is 1-2-3-4.\" > secret &&\n\t\tgit add secret &&\n\t\ttest_tick &&\n\t\tgit commit -m \"Sssh.  Dont tell no one\" &&\n\t\techo A file that you cant trust > file.doc &&\n\t\techo there >> world &&\n\t\tgit add file.doc world &&\n\t\ttest_tick &&\n\t\tprintf \"Random useless changes\\n\\nLet us be like the marketing group.  Marketing is staffed with pansies\" | git commit -F - &&\n\t\techo Do not use a preposition to end a setence with > advice &&\n\t\tgit add advice &&\n\t\ttest_tick &&\n\t\tGIT_AUTHOR_NAME=\"Copy N. Paste\" git commit -m \"hypocrisy is fun\" &&\n\t\techo Avoid cliches like the plague >> advice &&\n\t\ttest_tick &&\n\t\tGIT_AUTHOR_EMAIL=\"foo@my.crp\" git commit -m \"it is still fun\" advice &&\n\t\techo \"  \\$Id: A bunch of junk$\" > foobar.c &&\n\t\tgit add foobar.c &&\n\t\ttest_tick &&\n\t\tgit commit -m \"Brain damage\"\n\t)\n}\n\ntest_expect_success 'commit_info.py' '\n\tsetup commit_info &&\n\t(\n\t\tcd commit_info &&\n\t\t$TEST_DIRECTORY/t9391/commit_info.py &&\n\t\ttest 0e5a1029 = $(git rev-parse --short=8 --verify refs/heads/master)\n\t)\n'\n\ntest_expect_success 'file_filter.py' '\n\tsetup file_filter &&\n\t(\n\t\tcd file_filter &&\n\t\t$TEST_DIRECTORY/t9391/file_filter.py &&\n\t\ttest ee59e2b4 = $(git rev-parse --short=8 --verify refs/heads/master)\n\t)\n'\n\ntest_expect_success 'print_progress.py' '\n\tsetup print_progress &&\n\t(\n\t\tcd print_progress &&\n\t\tMASTER=$(git rev-parse --verify master) &&\n\t\t$TEST_DIRECTORY/t9391/print_progress.py . new &&\n\t\ttest $MASTER = $(git rev-parse --verify refs/heads/master)\n\t)\n'\n\ntest_expect_success 'rename-master-to-develop.py' '\n\tsetup rename_master_to_develop &&\n\t(\n\t\tcd rename_master_to_develop &&\n\t\tMASTER=$(git rev-parse --verify master) &&\n\t\t$TEST_DIRECTORY/t9391/rename-master-to-develop.py &&\n\t\ttest $MASTER = $(git rev-parse --verify refs/heads/develop)\n\t)\n'\n\ntest_expect_success 'strip-cvs-keywords.py' '\n\tsetup strip_cvs_keywords &&\n\t(\n\t\tcd strip_cvs_keywords &&\n\t\t$TEST_DIRECTORY/t9391/strip-cvs-keywords.py\n\t\ttest 2306fc7c = $(git rev-parse --short=8 --verify refs/heads/master)\n\t)\n'\n\ntest_expect_success 'setup two extra repositories' '\n\tmkdir repo1 &&\n\tcd repo1 &&\n\tgit init &&\n\techo hello > world &&\n\tgit add world &&\n\ttest_tick &&\n\tgit commit -m \"Commit A\" &&\n\techo goodbye > world &&\n\tgit add world &&\n\ttest_tick &&\n\tgit commit -m \"Commit C\" &&\n\tcd .. &&\n\tmkdir repo2 &&\n\tcd repo2 &&\n\tgit init &&\n\techo foo > bar &&\n\tgit add bar &&\n\ttest_tick &&\n\tgit commit -m \"Commit B\" &&\n\techo fooey > bar &&\n\tgit add bar &&\n\ttest_tick &&\n\tgit commit -m \"Commit D\" &&\n\tcd ..\n'\n\ntest_expect_success 'splice_repos.py' '\n\tgit init splice_repos &&\n\t$TEST_DIRECTORY/t9391/splice_repos.py repo1 repo2 splice_repos &&\n\ttest 4 = $(git -C splice_repos rev-list master | wc -l)\n'\n\ntest_expect_success 'create_fast_export_output.py' '\n\tgit init create_fast_export_output &&\n\t(cd create_fast_export_output &&\n\t\t$TEST_DIRECTORY/t9391/create_fast_export_output.py &&\n\t\ttest e5e0569b = $(git rev-parse --short=8 --verify refs/heads/master) &&\n\t\ttest 122ead00 = $(git rev-parse --short=8 --verify refs/heads/devel) &&\n\t\ttest f36143f9 = $(git rev-parse --short=8 --verify refs/tags/v1.0))\n'\n\ntest_expect_success 'unusual.py' '\n\tsetup unusual &&\n\t(\n\t\tcd unusual &&\n\t\tcat $TEST_DIRECTORY/t9390/unusual | \\\n\t\t\t$TEST_DIRECTORY/t9391/unusual.py >output &&\n\n\t\tgrep \"Decipher this: .oy ,tropmi eht gnitrats ma I\" output &&\n\t\tgrep \"Found 2 blobs/commits and 4 other objects\" output\n\t)\n'\n\ntest_expect_success 'erroneous.py' '\n\tsetup erroneous &&\n\t(\n\t\tcd erroneous &&\n\t\ttest_must_fail $TEST_DIRECTORY/t9391/erroneous.py 2>../err &&\n\n\t\ttest_i18ngrep \"Error: Cannot pass a tag_callback to RepoFilter AND pass --tag-callback\" ../err\n\t)\n'\n\ntest_expect_success 'other error cases' '\n\tGIT_CEILING_DIRECTORIES=$(pwd) &&\n\texport GIT_CEILING_DIRECTORIES &&\n\t(\n\t\tmkdir other &&\n\t\tcd other &&\n\n\t\t! python3 -c \"import git_filter_repo as fr; fr.GitUtils.get_commit_count(b\\\".\\\", [\\\"HEAD\\\"])\" 2>err &&\n\t\ttest_i18ngrep \"\\. does not appear to be a valid git repository\" err\n\t)\n'\n\ntest_lazy_prereq DOS2UNIX '\n\tdos2unix -h\n\ttest $? -ne 127\n'\n\ntest_expect_success 'lint-history' '\n\ttest_create_repo lint-history &&\n\t(\n\t\tcd lint-history &&\n\t\tgit config core.autocrlf false &&\n\t\techo initial >content &&\n\t\tgit add content &&\n\t\tgit commit -m \"initial\" &&\n\n\t\tprintf \"CRLF is stupid\\r\\n\" >content &&\n\t\tgit add content &&\n\t\tgit commit -m \"make a statement\" &&\n\n\t\tprintf \"CRLF is stupid\\n\" >content &&\n\t\tgit add content &&\n\t\tgit commit -m \"oops, that was embarassing\" &&\n\n\t\tif test_have_prereq DOS2UNIX\n\t\tthen\n\t\t\t$CONTRIB_DIR/lint-history --filenames-important dos2unix &&\n\t\t\techo 2 >expect &&\n\t\t\tgit rev-list --count HEAD >actual &&\n\t\t\ttest_cmp expect actual\n\t\tfi\n\t)\n'\n\ntest_expect_success !WINDOWS 'lint-history --refs' '\n\ttest_create_repo lint-history-only-some-refs &&\n\t(\n\t\tcd lint-history-only-some-refs &&\n\t\ttest_commit a somefile bad &&\n\t\ttest_commit b notherfile baaad &&\n\t\ttest_commit c whatever baaaaaad &&\n\t\tgit checkout -b mybranch HEAD~1 &&\n\t\ttest_commit d somefile baaaaaaaad &&\n\t\ttest_commit e whatever \"baaaaaaaaaad to the bone\" &&\n\n\t\tcat <<-EOF >linter.sh &&\n\t\t#!/bin/bash\n\t\tcat \\$1 | tr -d a >tmp\n\t\tmv tmp \\$1\n\t\tEOF\n\t\tchmod u+x linter.sh &&\n\n\t\tPATH=$PATH:. $CONTRIB_DIR/lint-history --refs master..mybranch  -- linter.sh &&\n\n\t\techo bd >expect &&\n\t\techo bd to the bone >long-expect &&\n\n\t\t# Verify master is untouched\n\t\tgit checkout master &&\n\t\t! test_cmp somefile expect &&\n\t\t! test_cmp notherfile expect &&\n\t\t! test_cmp whatever expect &&\n\n\t\t# Verify that files touched on the branch are tweaked\n\t\tgit checkout mybranch &&\n\t\ttest_cmp somefile expect &&\n\t\t! test_cmp notherfile expect &&\n\t\ttest_cmp whatever long-expect\n\n\t)\n'\n\ntest_expect_success 'clean-ignore with emoji in filenames' '\n\ttest_create_repo clean-ignore &&\n\t(\n\t\tcd clean-ignore &&\n\t\tgit fast-import --quiet <$DATA/emoji-repo &&\n\t\tgit reset --hard &&\n\t\t$CONTRIB_DIR/clean-ignore --force &&\n\t\tprintf \".gitignore\\nfilename\\n\" >expect &&\n\t\tgit ls-files >actual &&\n\t\ttest_cmp expect actual\n\t)\n'\n\ntest_done\n"
  },
  {
    "path": "t/t9392-filter-repo-python-callback.sh",
    "content": "#!/bin/bash\n\ntest_description='Usage of git-filter-repo with python callbacks'\n. ./test-lib.sh\n\nexport PATH=$(dirname $TEST_DIRECTORY):$PATH  # Put git-filter-repo in PATH\n\nsetup()\n{\n\tgit init $1 &&\n\t(\n\t\tcd $1 &&\n\t\techo hello > world &&\n\t\tgit add world &&\n\t\ttest_tick &&\n\t\tgit commit -m initial &&\n\t\tprintf \"The launch code is 1-2-3-4.\" > secret &&\n\t\tgit add secret &&\n\t\ttest_tick &&\n\t\tgit commit -m \"Sssh.  Dont tell no one\" &&\n\t\techo A file that you cant trust > file.doc &&\n\t\techo there >> world &&\n\t\tgit add file.doc world &&\n\t\ttest_tick &&\n\t\tprintf \"Random useless changes\\n\\nLet us be like the marketing group.  Marketing is staffed with pansies\" | git commit -F - &&\n\t\techo Do not use a preposition to end a setence with > advice &&\n\t\tgit add advice &&\n\t\ttest_tick &&\n\t\tGIT_AUTHOR_NAME=\"Copy N. Paste\" git commit -m \"hypocrisy is fun\" &&\n\t\techo Avoid cliches like the plague >> advice &&\n\t\ttest_tick &&\n\t\tGIT_AUTHOR_EMAIL=\"foo@my.crp\" git commit -m \"it is still fun\" advice &&\n\t\techo \"  \\$Id: A bunch of junk$\" > foobar.c &&\n\t\tgit add foobar.c &&\n\t\ttest_tick &&\n\t\tgit commit -m \"Brain damage\" &&\n\n                git tag v1.0 HEAD~3 &&\n                git tag -a -m 'Super duper snazzy release' v2.0 HEAD~1 &&\n                git branch testing master &&\n\n\t\t# Make it look like a fresh clone (avoid need for --force)\n\t\tgit gc &&\n\t\tgit remote add origin . &&\n\t\tgit update-ref refs/remotes/origin/master refs/heads/master\n\t\tgit update-ref refs/remotes/origin/testing refs/heads/testing\n\t)\n}\n\ntest_expect_success '--filename-callback' '\n\tsetup filename-callback &&\n\t(\n\t\tcd filename-callback &&\n\t\tgit filter-repo --filename-callback \"return None if filename.endswith(b\\\".doc\\\") else b\\\"src/\\\"+filename\" &&\n\t\tgit log --format=%n --name-only | sort | uniq | grep -v ^$ > f &&\n\t\t! grep file.doc f &&\n\t\tCOMPARE=$(wc -l <f) &&\n\t\tgrep src/ f >filtered_f &&\n\t\ttest_line_count = $COMPARE filtered_f\n\t)\n'\n\ntest_expect_success '--file-info-callback acting like --filename-callback' '\n\tsetup fileinfo-as-filename-callback &&\n\t(\n\t\tcd fileinfo-as-filename-callback &&\n\t\tgit filter-repo --file-info-callback \"return (None if filename.endswith(b\\\".doc\\\") else b\\\"src/\\\"+filename, mode, blob_id)\" &&\n\t\tgit log --format=%n --name-only | sort | uniq | grep -v ^$ > f &&\n\t\t! grep file.doc f &&\n\t\tCOMPARE=$(wc -l <f) &&\n\t\tgrep src/ f >filtered_f &&\n\t\ttest_line_count = $COMPARE filtered_f\n\t)\n'\n\ntest_expect_success '--message-callback' '\n\tsetup message-callback &&\n\t(\n\t\tcd message-callback &&\n\t\tgit filter-repo --message-callback \"return b\\\"TLDR: \\\"+message[0:5]\" &&\n\t\tgit log --format=%s >log-messages &&\n\t\tgrep TLDR:...... log-messages >modified-messages &&\n\t\ttest_line_count = 6 modified-messages\n\t)\n'\n\ntest_expect_success '--name-callback' '\n\tsetup name-callback &&\n\t(\n\t\tcd name-callback &&\n\t\tgit filter-repo --name-callback \"return name.replace(b\\\"N.\\\", b\\\"And\\\")\" &&\n\t\tgit log --format=%an >log-person-names &&\n\t\tgrep Copy.And.Paste log-person-names\n\t)\n'\n\ntest_expect_success '--email-callback' '\n\tsetup email-callback &&\n\t(\n\t\tcd email-callback &&\n\t\tgit filter-repo --email-callback \"return email.replace(b\\\".com\\\", b\\\".org\\\")\" &&\n\t\tgit log --format=%ae%n%ce >log-emails &&\n\t\t! grep .com log-emails &&\n\t\tgrep .org log-emails\n\t)\n'\n\ntest_expect_success '--refname-callback' '\n\tsetup refname-callback &&\n\t(\n\t\tcd refname-callback &&\n\t\tgit filter-repo --refname-callback \"\n                    dir,path = os.path.split(refname)\n                    return dir+b\\\"/prefix-\\\"+path\" &&\n\t\tgit show-ref | grep refs/heads/prefix-master &&\n\t\tgit show-ref | grep refs/tags/prefix-v1.0 &&\n\t\tgit show-ref | grep refs/tags/prefix-v2.0\n\t)\n'\n\ntest_expect_success '--refname-callback sanity check' '\n\tsetup refname-sanity-check &&\n\t(\n\t\tcd refname-sanity-check &&\n\n\t\ttest_must_fail git filter-repo --refname-callback \"return re.sub(b\\\"tags\\\", b\\\"other-tags\\\", refname)\" 2>../err &&\n\t\ttest_i18ngrep \"fast-import requires tags to be in refs/tags/ namespace\" ../err &&\n\t\trm ../err\n\t)\n'\n\ntest_expect_success '--blob-callback' '\n\tsetup blob-callback &&\n\t(\n\t\tcd blob-callback &&\n\t\tgit log --format=%n --name-only | sort | uniq | grep -v ^$ > f &&\n\t\ttest_line_count = 5 f &&\n\t\trm f &&\n\t\tgit filter-repo --blob-callback \"if len(blob.data) > 25: blob.skip()\" &&\n\t\tgit log --format=%n --name-only | sort | uniq | grep -v ^$ > f &&\n\t\ttest_line_count = 2 f\n\t)\n'\n\ntest_expect_success '--file-info-callback acting like --blob-callback' '\n\tsetup fileinfo-as-blob-callback &&\n\t(\n\t\tcd fileinfo-as-blob-callback &&\n\t\tgit log --format=%n --name-only | sort | uniq | grep -v ^$ > f &&\n\t\ttest_line_count = 5 f &&\n\t\trm f &&\n\t\tgit filter-repo --file-info-callback \"\n\t\t    size = value.get_size_by_identifier(blob_id)\n\t\t    return (None if size > 25 else filename, mode, blob_id)\" &&\n\t\tgit log --format=%n --name-only | sort | uniq | grep -v ^$ > f &&\n\t\ttest_line_count = 2 f\n\t)\n'\n\ntest_expect_success '--commit-callback' '\n\tsetup commit-callback &&\n\t(\n\t\tcd commit-callback &&\n\t\tgit filter-repo --commit-callback \"\n                    commit.committer_name  = commit.author_name\n                    commit.committer_email = commit.author_email\n                    commit.committer_date  = commit.author_date\n                    for change in commit.file_changes:\n                      change.mode = b\\\"100755\\\"\n                    \" &&\n\t\tgit log --format=%ae%n%ce >log-emails &&\n\t\t! grep committer@example.com log-emails &&\n\t\tgit log --raw | grep ^: >file-changes &&\n\t\t! grep 100644 file-changes &&\n\t\tgrep 100755 file-changes\n\t)\n'\n\ntest_expect_success '--tag-callback' '\n\tsetup tag-callback &&\n\t(\n\t\tcd tag-callback &&\n\t\tgit filter-repo --tag-callback \"\n                    tag.tagger_name = b\\\"Dr. \\\"+tag.tagger_name\n                    tag.message = b\\\"Awesome sauce \\\"+tag.message\n                    \" &&\n\t\tgit cat-file -p v2.0 | grep ^tagger.Dr\\\\. &&\n\t\tgit cat-file -p v2.0 | grep ^Awesome.sauce.Super\n\t)\n'\n\ntest_expect_success '--reset-callback' '\n\tsetup reset-callback &&\n\t(\n\t\tcd reset-callback &&\n\t\tgit filter-repo --reset-callback \"reset.from_ref = 3\" &&\n\t\ttest $(git rev-parse testing) = $(git rev-parse master~3)\n\t)\n'\n\ntest_expect_success 'callback has return statement sanity check' '\n\tsetup callback_return_sanity &&\n\t(\n\t\tcd callback_return_sanity &&\n\n\t\ttest_must_fail git filter-repo --filename-callback \"filename + b\\\".txt\\\"\" 2>../err&&\n\t\ttest_i18ngrep \"Error: --filename-callback should have a return statement\" ../err &&\n\t\trm ../err\n\t)\n'\n\ntest_expect_success 'Callback read from a file' '\n\tsetup name-callback-from-file &&\n\t(\n\t\tcd name-callback-from-file &&\n\t\techo \"return name.replace(b\\\"N.\\\", b\\\"And\\\")\" >../name-func &&\n\t\tgit filter-repo --name-callback ../name-func &&\n\t\tgit log --format=%an >log-person-names &&\n\t\tgrep Copy.And.Paste log-person-names\n\t)\n'\n\ntest_expect_success 'Filtering a blob to make it match previous version' '\n\ttest_create_repo remove_unique_bits_of_blob &&\n\t(\n\t\tcd remove_unique_bits_of_blob &&\n\n\t\ttest_write_lines foo baz >metasyntactic_names &&\n\t\tgit add metasyntactic_names &&\n\t\tgit commit -m init &&\n\n\t\ttest_write_lines foo bar baz >metasyntactic_names &&\n\t\tgit add metasyntactic_names &&\n\t\tgit commit -m second &&\n\n\t\tgit filter-repo --force --blob-callback \"blob.data = blob.data.replace(b\\\"\\\\nbar\\\", b\\\"\\\")\"\n\n\t\techo 1 >expect &&\n\t\tgit rev-list --count HEAD >actual &&\n\t\ttest_cmp expect actual\n\t)\n'\n\ntest_expect_success 'tweaking just a tag' '\n\ttest_create_repo tweaking_just_a_tag &&\n\t(\n\t\tcd tweaking_just_a_tag &&\n\n\t\ttest_commit foo &&\n\t\tgit tag -a -m \"Here is a tag\" mytag &&\n\n\t\tgit filter-repo --force --refs mytag ^mytag^{commit} --name-callback \"return name.replace(b\\\"Mitter\\\", b\\\"L D\\\")\" &&\n\n\t\tgit cat-file -p mytag | grep C.O.L.D\n\t)\n'\n\ntest_expect_success '--file-info-callback messing with history' '\n\tsetup messing_with_files &&\n\t(\n\t\tcd messing_with_files &&\n\n\t\techo \"1-2-3-4==>1-2-3-4-5\" >replacement &&\n\t\t# Trying to count the levels of backslash escaping is not fun.\n\t\techo \"regex:\\\\\\$[^\\$]*\\\\\\$==>cvs is lame\" >>replacement &&\n\t\tgit filter-repo --force --file-info-callback \"\n\t\t    size = value.get_size_by_identifier(blob_id)\n\t\t    contents = value.get_contents_by_identifier(blob_id)\n\t\t    if not value.is_binary(contents):\n\t\t      contents = value.apply_replace_text(contents)\n\t\t    if contents[-1] != 10:\n\t\t      contents += bytes([10])\n\t\t    blob_id = value.insert_file_with_contents(contents)\n\t\t    newname = bytes(reversed(filename))\n\t\t    if size == 27 and len(contents) == 27:\n\t\t      newname = None\n\t\t    return (newname, mode, blob_id)\n                    \" --replace-text replacement &&\n\n\t\tcat <<-EOF >expect &&\n\t\tc.raboof\n\t\tdlrow\n\t\tecivda\n\t\tterces\n\t\tEOF\n\n\t\tgit ls-files >actual &&\n\t\ttest_cmp expect actual &&\n\n\t\techo \"The launch code is 1-2-3-4-5.\" >expect &&\n\t\ttest_cmp expect terces &&\n\n\t\techo \"  cvs is lame\" >expect &&\n\t\ttest_cmp expect c.raboof\n\t)\n'\n\ntest_expect_success '--file-info-callback and deletes and drops' '\n\tsetup file_info_deletes_drops &&\n\t(\n\t\tcd file_info_deletes_drops &&\n\n\t\tgit rm file.doc &&\n\t\tgit commit -m \"Nuke doc file\" &&\n\n\t\tgit filter-repo --force --file-info-callback \"\n\t\t    size = value.get_size_by_identifier(blob_id)\n\t\t    (newname, newmode) = (filename, mode)\n\t\t    if filename == b\\\"world\\\" and size == 12:\n\t\t      newname = None\n\t\t    if filename == b\\\"advice\\\" and size == 77:\n\t\t      newmode = None\n\t\t    return (newname, newmode, blob_id)\n                    \"\n\n\t\tcat <<-EOF >expect &&\n\t\tfoobar.c\n\t\tsecret\n\t\tworld\n\t\tEOF\n\n\t\techo 1 >expect &&\n\t\tgit rev-list --count HEAD -- world >actual &&\n\t\ttest_cmp expect actual &&\n\n\t\techo 2 >expect &&\n\t\tgit rev-list --count HEAD -- advice >actual &&\n\t\ttest_cmp expect actual &&\n\n\t\techo hello >expect &&\n\t\ttest_cmp expect world\n\t)\n'\n\ntest_lazy_prereq UNIX2DOS '\n        unix2dos -h\n        test $? -ne 127\n'\n\ntest_expect_success UNIX2DOS '--file-info-callback acting like lint-history' '\n\tsetup lint_history_replacement &&\n\t(\n\t\tcd lint_history_replacement &&\n\t\tgit ls-files -s | grep -v file.doc >expect &&\n\n\t\tgit filter-repo --force --file-info-callback \"\n\t\t    if not filename.endswith(b\\\".doc\\\"):\n\t\t      return (filename, mode, blob_id)\n\n\t\t    if blob_id in value.data:\n\t\t      return (filename, mode, value.data[blob_id])\n\n\t\t    contents = value.get_contents_by_identifier(blob_id)\n\t\t    tmpfile = os.path.basename(filename)\n\t\t    with open(tmpfile, \\\"wb\\\") as f:\n\t\t      f.write(contents)\n\t\t    subprocess.check_call([\\\"unix2dos\\\", filename])\n\t\t    with open(filename, \\\"rb\\\") as f:\n\t\t      contents = f.read()\n\t\t    new_blob_id = value.insert_file_with_contents(contents)\n\n\t\t    value.data[blob_id] = new_blob_id\n\t\t    return (filename, mode, new_blob_id)\n                    \" &&\n\n\t\tgit ls-files -s | grep -v file.doc >actual &&\n\t\ttest_cmp expect actual &&\n\n\t\tprintf \"A file that you cant trust\\r\\n\" >expect &&\n\t\ttest_cmp expect file.doc\n\t)\n'\n\ntest_done\n"
  },
  {
    "path": "t/t9393/lfs",
    "content": "feature done\n# Simple repo with a few files, some of them lfs.  Note that the lfs object\n# ids and the original-oid directives are very fake, but make it easy to\n# track things.\nblob\nmark :1\noriginal-oid 0000000000000000000000000000000000000001\ndata 39\nL* filter=lfs diff=lfs merge=lfs -text\n\nblob\nmark :2\noriginal-oid 0000000000000000000000000000000000000002\ndata 126\nversion https://git-lfs.github.com/spec/v1\noid sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\nsize 1\n\nblob\nmark :3\noriginal-oid 0000000000000000000000000000000000000003\ndata 126\nversion https://git-lfs.github.com/spec/v1\noid sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\nsize 2\n\nblob\nmark :4\noriginal-oid 0000000000000000000000000000000000000004\ndata 126\nversion https://git-lfs.github.com/spec/v1\noid sha256:cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc\nsize 3\n\nblob\nmark :5\noriginal-oid 0000000000000000000000000000000000000005\ndata 126\nversion https://git-lfs.github.com/spec/v1\noid sha256:dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd\nsize 4\n\nblob\nmark :6\noriginal-oid 0000000000000000000000000000000000000006\ndata 6\nstuff\n\nblob\nmark :7\noriginal-oid 0000000000000000000000000000000000000007\ndata 1040\n0000000000000000000000000000000000000000000000000000000000000000\n1111111111111111111111111111111111111111111111111111111111111111\n2222222222222222222222222222222222222222222222222222222222222222\n3333333333333333333333333333333333333333333333333333333333333333\n4444444444444444444444444444444444444444444444444444444444444444\n5555555555555555555555555555555555555555555555555555555555555555\n6666666666666666666666666666666666666666666666666666666666666666\n7777777777777777777777777777777777777777777777777777777777777777\n8888888888888888888888888888888888888888888888888888888888888888\n9999999999999999999999999999999999999999999999999999999999999999\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\nbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\ncccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc\ndddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd\neeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee\nffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff\n\nreset refs/heads/main\ncommit refs/heads/main\nmark :10\noriginal-oid 0000000000000000000000000000000000000010\nauthor Little O. Me <me@little.net> 1234567890 -0700\ncommitter Little O. Me <me@little.net> 1234567890 -0700\ndata 13\nFirst commit\nM 100644 :1 .gitattributes\nM 100644 :4 LB\n\ncommit refs/heads/main\nmark :11\noriginal-oid 0000000000000000000000000000000000000011\nauthor Little O. Me <me@little.net> 1234567891 -0700\ncommitter Little O. Me <me@little.net> 1234567891 -0700\ndata 14\nSecond commit\nM 100644 :2 LA\n\ncommit refs/heads/main\nmark :12\noriginal-oid 0000000000000000000000000000000000000012\nauthor Little O. Me <me@little.net> 1234567892 -0700\ncommitter Little O. Me <me@little.net> 1234567892 -0700\ndata 13\nThird commit\nM 100644 :6 Z\n\ncommit refs/heads/main\nmark :13\noriginal-oid 0000000000000000000000000000000000000013\nauthor Little O. Me <me@little.net> 1234567893 -0700\ncommitter Little O. Me <me@little.net> 1234567893 -0700\ndata 14\nFourth commit\nM 100644 :4 LC\n\ncommit refs/heads/main\nmark :14\noriginal-oid 0000000000000000000000000000000000000014\nauthor Little O. Me <me@little.net> 1234567894 -0700\ncommitter Little O. Me <me@little.net> 1234567894 -0700\ndata 13\nFifth commit\nM 100644 :3 LA\n\ncommit refs/heads/main\nmark :15\noriginal-oid 0000000000000000000000000000000000000015\nauthor Little O. Me <me@little.net> 1234567895 -0700\ncommitter Little O. Me <me@little.net> 1234567895 -0700\ndata 13\nSixth commit\nM 100644 :7 Y\nD Z\n\ncommit refs/heads/main\nmark :16\noriginal-oid 0000000000000000000000000000000000000016\nauthor Little O. Me <me@little.net> 1234567896 -0700\ncommitter Little O. Me <me@little.net> 1234567896 -0700\ndata 15\nSeventh commit\nM 100644 :5 LD\n\ncommit refs/heads/main\nmark :17\noriginal-oid 0000000000000000000000000000000000000017\nauthor Little O. Me <me@little.net> 1234567897 -0700\ncommitter Little O. Me <me@little.net> 1234567897 -0700\ndata 14\nEighth commit\nM 100644 :2 LA\n\ndone\n"
  },
  {
    "path": "t/t9393/simple",
    "content": "feature done\n# Simple repo with a few files, and two branches with no common history.\n# Note that the original-oid directives are very fake, but make it easy to\n# track things.\nblob\nmark :1\noriginal-oid 0000000000000000000000000000000000000001\ndata 16\nfile 1 contents\n\nblob\nmark :2\noriginal-oid 0000000000000000000000000000000000000002\ndata 16\nfile 2 contents\n\nblob\nmark :3\noriginal-oid 0000000000000000000000000000000000000003\ndata 16\nfile 3 contents\n\nblob\nmark :4\noriginal-oid 0000000000000000000000000000000000000004\ndata 16\nfile 4 contents\n\nreset refs/heads/orphan-me\ncommit refs/heads/orphan-me\nmark :5\noriginal-oid 0000000000000000000000000000000000000009\nauthor Little O. Me <me@little.net> 1535228562 -0700\ncommitter Little O. Me <me@little.net> 1535228562 -0700\ndata 8\nInitial\nM 100644 :1 nuke-me\n\ncommit refs/heads/orphan-me\nmark :6\noriginal-oid 000000000000000000000000000000000000000A\nauthor Little 'ol Me <me@laptop.(none)> 1535229544 -0700\ncommitter Little 'ol Me <me@laptop.(none)> 1535229544 -0700\ndata 9\nTweak it\nfrom :5\nM 100644 :4 nuke-me\n\nreset refs/heads/master\ncommit refs/heads/master\nmark :7\noriginal-oid 000000000000000000000000000000000000000B\nauthor Little O. Me <me@machine52.little.net> 1535229523 -0700\ncommitter Little O. Me <me@machine52.little.net> 1535229523 -0700\ndata 15\nInitial commit\nM 100644 :1 fileA\n\ncommit refs/heads/master\nmark :8\noriginal-oid 000000000000000000000000000000000000000C\nauthor Lit.e Me <me@fire.com> 1535229559 -0700\ncommitter Lit.e Me <me@fire.com> 1535229580 -0700\ndata 10\nAdd fileB\nfrom :7\nM 100644 :2 fileB\n\ncommit refs/heads/master\nmark :9\noriginal-oid 000000000000000000000000000000000000000D\nauthor Little Me <me@bigcompany.com> 1535229601 -0700\ncommitter Little Me <me@bigcompany.com> 1535229601 -0700\ndata 10\nAdd fileC\nfrom :8\nM 100644 :3 fileC\n\ncommit refs/heads/master\nmark :10\noriginal-oid 000000000000000000000000000000000000000E\nauthor Little Me <me@bigcompany.com> 1535229618 -0700\ncommitter Little Me <me@bigcompany.com> 1535229618 -0700\ndata 10\nAdd fileD\nfrom :9\nM 100644 :4 fileD\n\ntag v1.0\nfrom :10\noriginal-oid 000000000000000000000000000000000000000F\ntagger Little John <second@merry.men> 1535229637 -0700\ndata 5\nv1.0\n\ndone\n"
  },
  {
    "path": "t/t9393-filter-repo-rerun.sh",
    "content": "#!/bin/bash\n\ntest_description='filter-repo tests with reruns'\n\n. ./test-lib.sh\n\nexport PATH=$(dirname $TEST_DIRECTORY):$PATH  # Put git-filter-repo in PATH\n\nDATA=\"$TEST_DIRECTORY/t9393\"\nDELETED_SHA=\"0000000000000000000000000000000000000000\" # FIXME: sha256 support\n\ntest_expect_success 'a re-run that is treated as a clean slate' '\n\ttest_create_repo clean_slate_rerun &&\n\t(\n\t\tcd clean_slate_rerun &&\n\t\tgit fast-import --quiet <$DATA/simple &&\n\n\t\tFIRST_ORPHAN=$(git rev-parse orphan-me~1) &&\n\t\tFINAL_ORPHAN=$(git rev-parse orphan-me) &&\n\t\tFILE_A_CHANGE=$(git rev-list -1 HEAD -- fileA) &&\n\t\tFILE_B_CHANGE=$(git rev-list -1 HEAD -- fileB) &&\n\t\tFILE_C_CHANGE=$(git rev-list -1 HEAD -- fileC) &&\n\t\tFILE_D_CHANGE=$(git rev-list -1 HEAD -- fileD) &&\n\t\tORIGINAL_TAG=$(git rev-parse v1.0) &&\n\n\t\tgit filter-repo --invert-paths --path fileB --force &&\n\t\tNEW_FILE_C_CHANGE=$(git rev-list -1 HEAD -- fileC) &&\n\t\tNEW_FILE_D_CHANGE=$(git rev-list -1 HEAD -- fileD) &&\n\t\tFINAL_TAG=$(git rev-parse v1.0) &&\n\n\t\tcat <<-EOF | sort >sha-expect &&\n\t\t${FIRST_ORPHAN} ${FIRST_ORPHAN}\n\t\t${FINAL_ORPHAN} ${FINAL_ORPHAN}\n\t\t${FILE_A_CHANGE} ${FILE_A_CHANGE}\n\t\t${FILE_B_CHANGE} ${DELETED_SHA}\n\t\t${FILE_C_CHANGE} ${NEW_FILE_C_CHANGE}\n\t\t${FILE_D_CHANGE} ${NEW_FILE_D_CHANGE}\n\t\tEOF\n\t\tprintf \"%-40s %s\\n\" old new >expect &&\n\t\tcat sha-expect >>expect &&\n\t\ttest_cmp expect .git/filter-repo/commit-map &&\n\n\t\tcat <<-EOF | sort -k 3 >sha-expect &&\n\t\t${FILE_D_CHANGE} ${NEW_FILE_D_CHANGE} $(git symbolic-ref HEAD)\n\t\t${FINAL_ORPHAN} ${FINAL_ORPHAN} refs/heads/orphan-me\n\t\t${ORIGINAL_TAG} ${FINAL_TAG} refs/tags/v1.0\n\t\tEOF\n\t\tprintf \"%-40s %-40s %s\\n\" old new ref >expect &&\n\t\tcat sha-expect >>expect &&\n\t\ttest_cmp expect .git/filter-repo/ref-map &&\n\n\t\tcat <<-EOF | sort >expect &&\n\t\t${FILE_B_CHANGE} ${FILE_A_CHANGE}\n\t\tEOF\n\t\ttest_cmp expect .git/filter-repo/first-changed-commits &&\n\n\t\ttouch -t 197001010000 .git/filter-repo/already_ran &&\n\t\techo no | git filter-repo --invert-paths --path fileC --force &&\n\t\tFINAL_FILE_D_CHANGE=$(git rev-list -1 HEAD -- fileD) &&\n\t\tREALLY_FINAL_TAG=$(git rev-parse v1.0) &&\n\n\t\tcat <<-EOF | sort >sha-expect &&\n\t\t${FIRST_ORPHAN} ${FIRST_ORPHAN}\n\t\t${FINAL_ORPHAN} ${FINAL_ORPHAN}\n\t\t${FILE_A_CHANGE} ${FILE_A_CHANGE}\n\t\t${NEW_FILE_C_CHANGE} ${DELETED_SHA}\n\t\t${NEW_FILE_D_CHANGE} ${FINAL_FILE_D_CHANGE}\n\t\tEOF\n\t\tprintf \"%-40s %s\\n\" old new >expect &&\n\t\tcat sha-expect >>expect &&\n\t\ttest_cmp expect .git/filter-repo/commit-map &&\n\n\t\tcat <<-EOF | sort -k 3 >sha-expect &&\n\t\t${NEW_FILE_D_CHANGE} ${FINAL_FILE_D_CHANGE} $(git symbolic-ref HEAD)\n\t\t${FINAL_ORPHAN} ${FINAL_ORPHAN} refs/heads/orphan-me\n\t\t${FINAL_TAG} ${REALLY_FINAL_TAG} refs/tags/v1.0\n\t\tEOF\n\t\tprintf \"%-40s %-40s %s\\n\" old new ref >expect &&\n\t\tcat sha-expect >>expect &&\n\t\ttest_cmp expect .git/filter-repo/ref-map &&\n\n\t\tcat <<-EOF | sort >expect &&\n\t\t${NEW_FILE_C_CHANGE} ${FILE_A_CHANGE}\n\t\tEOF\n\t\ttest_cmp expect .git/filter-repo/first-changed-commits\n\t)\n'\n\ntest_expect_success 'remove two files, no re-run' '\n\ttest_create_repo simple_two_files &&\n\t(\n\t\tcd simple_two_files &&\n\t\tgit fast-import --quiet <$DATA/simple &&\n\n\t\tFIRST_ORPHAN=$(git rev-parse orphan-me~1) &&\n\t\tFINAL_ORPHAN=$(git rev-parse orphan-me) &&\n\t\tFILE_A_CHANGE=$(git rev-list -1 HEAD -- fileA) &&\n\t\tFILE_B_CHANGE=$(git rev-list -1 HEAD -- fileB) &&\n\t\tFILE_C_CHANGE=$(git rev-list -1 HEAD -- fileC) &&\n\t\tFILE_D_CHANGE=$(git rev-list -1 HEAD -- fileD) &&\n\t\tORIGINAL_TAG=$(git rev-parse v1.0) &&\n\n\t\tgit filter-repo --invert-paths --path nuke-me --path fileC \\\n\t\t                --force &&\n\n\t\tNEW_FILE_D_CHANGE=$(git rev-list -1 HEAD -- fileD) &&\n\t\tNEW_TAG=$(git rev-parse v1.0) &&\n\n\t\tcat <<-EOF | sort >sha-expect &&\n\t\t${FIRST_ORPHAN} ${DELETED_SHA}\n\t\t${FINAL_ORPHAN} ${DELETED_SHA}\n\t\t${FILE_A_CHANGE} ${FILE_A_CHANGE}\n\t\t${FILE_B_CHANGE} ${FILE_B_CHANGE}\n\t\t${FILE_C_CHANGE} ${DELETED_SHA}\n\t\t${FILE_D_CHANGE} ${NEW_FILE_D_CHANGE}\n\t\tEOF\n\t\tprintf \"%-40s %s\\n\" old new >expect &&\n\t\tcat sha-expect >>expect &&\n\t\ttest_cmp expect .git/filter-repo/commit-map &&\n\n\t\tcat <<-EOF | sort -k 3 >sha-expect &&\n\t\t${FILE_D_CHANGE} ${NEW_FILE_D_CHANGE} $(git symbolic-ref HEAD)\n\t\t${FINAL_ORPHAN} ${DELETED_SHA} refs/heads/orphan-me\n\t\t${ORIGINAL_TAG} ${NEW_TAG} refs/tags/v1.0\n\t\tEOF\n\t\tprintf \"%-40s %-40s %s\\n\" old new ref >expect &&\n\t\tcat sha-expect >>expect &&\n\t\ttest_cmp expect .git/filter-repo/ref-map &&\n\n\t\tcat <<-EOF | sort >expect &&\n\t\t${FIRST_ORPHAN} ${DELETED_SHA}\n\t\t${FILE_C_CHANGE} ${FILE_B_CHANGE}\n\t\tEOF\n\t\ttest_cmp expect .git/filter-repo/first-changed-commits\n\t)\n'\n\ntest_expect_success 'remove two files, then remove a later file' '\n\ttest_create_repo remove_two_file_then_remove_later &&\n\t(\n\t\tcd remove_two_file_then_remove_later &&\n\t\tgit fast-import --quiet <$DATA/simple &&\n\n\t\tFIRST_ORPHAN=$(git rev-parse orphan-me~1) &&\n\t\tFINAL_ORPHAN=$(git rev-parse orphan-me) &&\n\t\tFILE_A_CHANGE=$(git rev-list -1 HEAD -- fileA) &&\n\t\tFILE_B_CHANGE=$(git rev-list -1 HEAD -- fileB) &&\n\t\tFILE_C_CHANGE=$(git rev-list -1 HEAD -- fileC) &&\n\t\tFILE_D_CHANGE=$(git rev-list -1 HEAD -- fileD) &&\n\t\tORIGINAL_TAG=$(git rev-parse v1.0) &&\n\n\t\tgit filter-repo --invert-paths --path nuke-me --path fileC \\\n\t\t                --force &&\n\n\t\tNEW_FILE_D_CHANGE=$(git rev-list -1 HEAD -- fileD) &&\n\t\tNEW_TAG=$(git rev-parse v1.0) &&\n\n\t\tcat <<-EOF | sort >expect &&\n\t\t${FIRST_ORPHAN} ${DELETED_SHA}\n\t\t${FILE_C_CHANGE} ${FILE_B_CHANGE}\n\t\tEOF\n\t\ttest_cmp expect .git/filter-repo/first-changed-commits &&\n\n\t\tcat <<-EOF | sort -k 3 >sha-expect &&\n\t\t${FILE_D_CHANGE} ${NEW_FILE_D_CHANGE} $(git symbolic-ref HEAD)\n\t\t${FINAL_ORPHAN} ${DELETED_SHA} refs/heads/orphan-me\n\t\t${ORIGINAL_TAG} ${NEW_TAG} refs/tags/v1.0\n\t\tEOF\n\t\tprintf \"%-40s %-40s %s\\n\" old new ref >expect &&\n\t\tcat sha-expect >>expect &&\n\t\ttest_cmp expect .git/filter-repo/ref-map &&\n\n\t\tgit filter-repo --invert-paths --path fileD &&\n\n\t\tFINAL_TAG=$(git rev-parse v1.0) &&\n\n\t\tcat <<-EOF | sort >expect &&\n\t\t${FIRST_ORPHAN} ${DELETED_SHA}\n\t\t${FILE_C_CHANGE} ${FILE_B_CHANGE}\n\t\tEOF\n\t\ttest_cmp expect .git/filter-repo/first-changed-commits &&\n\n\t\tcat <<-EOF | sort >sha-expect &&\n\t\t${FIRST_ORPHAN} ${DELETED_SHA}\n\t\t${FINAL_ORPHAN} ${DELETED_SHA}\n\t\t${FILE_A_CHANGE} ${FILE_A_CHANGE}\n\t\t${FILE_B_CHANGE} ${FILE_B_CHANGE}\n\t\t${FILE_C_CHANGE} ${DELETED_SHA}\n\t\t${FILE_D_CHANGE} ${DELETED_SHA}\n\t\tEOF\n\t\tprintf \"%-40s %s\\n\" old new >expect &&\n\t\tcat sha-expect >>expect &&\n\t\ttest_cmp expect .git/filter-repo/commit-map &&\n\n\t\tcat <<-EOF | sort -k 3 >sha-expect &&\n\t\t${FILE_D_CHANGE} ${FILE_B_CHANGE} $(git symbolic-ref HEAD)\n\t\t${FINAL_ORPHAN} ${DELETED_SHA} refs/heads/orphan-me\n\t\t${ORIGINAL_TAG} ${FINAL_TAG} refs/tags/v1.0\n\t\tEOF\n\t\tprintf \"%-40s %-40s %s\\n\" old new ref >expect &&\n\t\tcat sha-expect >>expect &&\n\t\ttest_cmp expect .git/filter-repo/ref-map\n\t)\n'\n\ntest_expect_success 'remove two files, then remove a later file via --refs' '\n\ttest_create_repo remove_two_files_remove_later_via_refs &&\n\t(\n\t\tcd remove_two_files_remove_later_via_refs &&\n\t\tgit fast-import --quiet <$DATA/simple &&\n\n\t\tFIRST_ORPHAN=$(git rev-parse orphan-me~1) &&\n\t\tFINAL_ORPHAN=$(git rev-parse orphan-me) &&\n\t\tFILE_A_CHANGE=$(git rev-list -1 HEAD -- fileA) &&\n\t\tFILE_B_CHANGE=$(git rev-list -1 HEAD -- fileB) &&\n\t\tFILE_C_CHANGE=$(git rev-list -1 HEAD -- fileC) &&\n\t\tFILE_D_CHANGE=$(git rev-list -1 HEAD -- fileD) &&\n\t\tORIGINAL_TAG=$(git rev-parse v1.0) &&\n\n\t\tgit filter-repo --invert-paths --path nuke-me --path fileB \\\n\t\t                --force &&\n\n\t\tNEW_FILE_C_CHANGE=$(git rev-list -1 HEAD -- fileC) &&\n\n\t\tcat <<-EOF | sort >expect &&\n\t\t${FIRST_ORPHAN} ${DELETED_SHA}\n\t\t${FILE_B_CHANGE} ${FILE_A_CHANGE}\n\t\tEOF\n\t\ttest_cmp expect .git/filter-repo/first-changed-commits &&\n\n\t\tgit filter-repo --invert-paths --path fileD --refs HEAD~1..HEAD &&\n\t\tFINAL_TAG=$(git rev-parse v1.0) &&\n\n\t\tcat <<-EOF | sort >expect &&\n\t\t${FIRST_ORPHAN} ${DELETED_SHA}\n\t\t${FILE_B_CHANGE} ${FILE_A_CHANGE}\n\t\tEOF\n\t\ttest_cmp expect .git/filter-repo/first-changed-commits &&\n\n\t\tcat <<-EOF | sort >sha-expect &&\n\t\t${FIRST_ORPHAN} ${DELETED_SHA}\n\t\t${FINAL_ORPHAN} ${DELETED_SHA}\n\t\t${FILE_A_CHANGE} ${FILE_A_CHANGE}\n\t\t${FILE_B_CHANGE} ${DELETED_SHA}\n\t\t${FILE_C_CHANGE} ${NEW_FILE_C_CHANGE}\n\t\t${FILE_D_CHANGE} ${DELETED_SHA}\n\t\tEOF\n\t\tprintf \"%-40s %s\\n\" old new >expect &&\n\t\tcat sha-expect >>expect &&\n\t\ttest_cmp expect .git/filter-repo/commit-map &&\n\n\t\tcat <<-EOF | sort -k 3 >sha-expect &&\n\t\t${FILE_D_CHANGE} ${NEW_FILE_C_CHANGE} $(git symbolic-ref HEAD)\n\t\t${FINAL_ORPHAN} ${DELETED_SHA} refs/heads/orphan-me\n\t\t${ORIGINAL_TAG} ${FINAL_TAG} refs/tags/v1.0\n\t\tEOF\n\t\tprintf \"%-40s %-40s %s\\n\" old new ref >expect &&\n\t\tcat sha-expect >>expect &&\n\t\ttest_cmp expect .git/filter-repo/ref-map\n\t)\n'\n\ntest_expect_success 'remove two files, then remove an earlier file' '\n\ttest_create_repo remove_two_files_then_remove_earlier &&\n\t(\n\t\tcd remove_two_files_then_remove_earlier &&\n\t\tgit fast-import --quiet <$DATA/simple &&\n\n\t\tFIRST_ORPHAN=$(git rev-parse orphan-me~1) &&\n\t\tFINAL_ORPHAN=$(git rev-parse orphan-me) &&\n\t\tFILE_A_CHANGE=$(git rev-list -1 HEAD -- fileA) &&\n\t\tFILE_B_CHANGE=$(git rev-list -1 HEAD -- fileB) &&\n\t\tFILE_C_CHANGE=$(git rev-list -1 HEAD -- fileC) &&\n\t\tFILE_D_CHANGE=$(git rev-list -1 HEAD -- fileD) &&\n\t\tORIGINAL_TAG=$(git rev-parse v1.0) &&\n\n\t\tgit filter-repo --invert-paths --path nuke-me --path fileC \\\n\t\t                --force &&\n\n\t\tgit filter-repo --invert-paths --path fileB &&\n\n\t\tNEW_FILE_D_CHANGE=$(git rev-list -1 HEAD -- fileD) &&\n\t\tFINAL_TAG=$(git rev-parse v1.0) &&\n\n\t\tcat <<-EOF | sort >expect &&\n\t\t${FIRST_ORPHAN} ${DELETED_SHA}\n\t\t${FILE_B_CHANGE} ${FILE_A_CHANGE}\n\t\tEOF\n\t\ttest_cmp expect .git/filter-repo/first-changed-commits &&\n\n\t\tcat <<-EOF | sort >sha-expect &&\n\t\t${FIRST_ORPHAN} ${DELETED_SHA}\n\t\t${FINAL_ORPHAN} ${DELETED_SHA}\n\t\t${FILE_A_CHANGE} ${FILE_A_CHANGE}\n\t\t${FILE_B_CHANGE} ${DELETED_SHA}\n\t\t${FILE_C_CHANGE} ${DELETED_SHA}\n\t\t${FILE_D_CHANGE} ${NEW_FILE_D_CHANGE}\n\t\tEOF\n\t\tprintf \"%-40s %s\\n\" old new >expect &&\n\t\tcat sha-expect >>expect &&\n\t\ttest_cmp expect .git/filter-repo/commit-map &&\n\n\t\tcat <<-EOF | sort -k 3 >sha-expect &&\n\t\t${FILE_D_CHANGE} ${NEW_FILE_D_CHANGE} $(git symbolic-ref HEAD)\n\t\t${FINAL_ORPHAN} ${DELETED_SHA} refs/heads/orphan-me\n\t\t${ORIGINAL_TAG} ${FINAL_TAG} refs/tags/v1.0\n\t\tEOF\n\t\tprintf \"%-40s %-40s %s\\n\" old new ref >expect &&\n\t\tcat sha-expect >>expect &&\n\t\ttest_cmp expect .git/filter-repo/ref-map\n\t)\n'\n\ntest_expect_success 'modify a file, then remove a later file' '\n\ttest_create_repo modify_file_later_remove &&\n\t(\n\t\tcd modify_file_later_remove &&\n\t\tgit fast-import --quiet <$DATA/simple &&\n\n\t\tFIRST_ORPHAN=$(git rev-parse orphan-me~1) &&\n\t\tFINAL_ORPHAN=$(git rev-parse orphan-me) &&\n\t\tFILE_A_CHANGE=$(git rev-list -1 HEAD -- fileA) &&\n\t\tFILE_B_CHANGE=$(git rev-list -1 HEAD -- fileB) &&\n\t\tFILE_C_CHANGE=$(git rev-list -1 HEAD -- fileC) &&\n\t\tFILE_D_CHANGE=$(git rev-list -1 HEAD -- fileD) &&\n\t\tORIGINAL_TAG=$(git rev-parse v1.0) &&\n\n\t\techo \"file 3 contents==>Alternate C\" >changes &&\n\t\tgit filter-repo --force --replace-text changes &&\n\n\t\tNEW_FILE_C_CHANGE=$(git rev-list -1 HEAD -- fileC) &&\n\n\t\tcat <<-EOF | sort >expect &&\n\t\t${FILE_C_CHANGE} ${NEW_FILE_C_CHANGE}\n\t\tEOF\n\t\ttest_cmp expect .git/filter-repo/first-changed-commits &&\n\n\t\tgit filter-repo --invert-paths --path fileD &&\n\n\t\tFINAL_TAG=$(git rev-parse v1.0) &&\n\n\t\tcat <<-EOF | sort >expect &&\n\t\t${FILE_C_CHANGE} ${NEW_FILE_C_CHANGE}\n\t\tEOF\n\t\ttest_cmp expect .git/filter-repo/first-changed-commits &&\n\n\t\t# Make sure the fileD commit was indeed removed\n\t\techo $NEW_FILE_C_CHANGE >expect &&\n\t\tgit rev-parse HEAD >actual &&\n\t\ttest_cmp expect actual &&\n\n\t\tcat <<-EOF | sort >sha-expect &&\n\t\t${FIRST_ORPHAN} ${FIRST_ORPHAN}\n\t\t${FINAL_ORPHAN} ${FINAL_ORPHAN}\n\t\t${FILE_A_CHANGE} ${FILE_A_CHANGE}\n\t\t${FILE_B_CHANGE} ${FILE_B_CHANGE}\n\t\t${FILE_C_CHANGE} ${NEW_FILE_C_CHANGE}\n\t\t${FILE_D_CHANGE} ${DELETED_SHA}\n\t\tEOF\n\t\tprintf \"%-40s %s\\n\" old new >expect &&\n\t\tcat sha-expect >>expect &&\n\t\ttest_cmp expect .git/filter-repo/commit-map &&\n\n\t\tcat <<-EOF | sort -k 3 >sha-expect &&\n\t\t${FILE_D_CHANGE} ${NEW_FILE_C_CHANGE} $(git symbolic-ref HEAD)\n\t\t${FINAL_ORPHAN} ${FINAL_ORPHAN} refs/heads/orphan-me\n\t\t${ORIGINAL_TAG} ${FINAL_TAG} refs/tags/v1.0\n\t\tEOF\n\t\tprintf \"%-40s %-40s %s\\n\" old new ref >expect &&\n\t\tcat sha-expect >>expect &&\n\t\ttest_cmp expect .git/filter-repo/ref-map\n\t)\n'\n\n\ntest_expect_success 'modify a file, then remove a later file via --refs' '\n\ttest_create_repo modify_file_later_remove_with_refs &&\n\t(\n\t\tcd modify_file_later_remove_with_refs &&\n\t\tgit fast-import --quiet <$DATA/simple &&\n\n\t\tFIRST_ORPHAN=$(git rev-parse orphan-me~1) &&\n\t\tFINAL_ORPHAN=$(git rev-parse orphan-me) &&\n\t\tFILE_A_CHANGE=$(git rev-list -1 HEAD -- fileA) &&\n\t\tFILE_B_CHANGE=$(git rev-list -1 HEAD -- fileB) &&\n\t\tFILE_C_CHANGE=$(git rev-list -1 HEAD -- fileC) &&\n\t\tFILE_D_CHANGE=$(git rev-list -1 HEAD -- fileD) &&\n\t\tORIGINAL_TAG=$(git rev-parse v1.0) &&\n\n\t\techo \"file 2 contents==>Alternate B\" >changes &&\n\t\tgit filter-repo --force --replace-text changes &&\n\n\t\tNEW_FILE_B_CHANGE=$(git rev-list -1 HEAD -- fileB) &&\n\t\tNEW_FILE_C_CHANGE=$(git rev-list -1 HEAD -- fileC) &&\n\n\t\tcat <<-EOF | sort >expect &&\n\t\t${FILE_B_CHANGE} ${NEW_FILE_B_CHANGE}\n\t\tEOF\n\t\ttest_cmp expect .git/filter-repo/first-changed-commits &&\n\n\t\tgit filter-repo --invert-paths --path fileD \\\n\t\t                --refs HEAD~1..HEAD &&\n\t\tFINAL_TAG=$(git rev-parse v1.0) &&\n\n\t\tcat <<-EOF | sort >expect &&\n\t\t${FILE_B_CHANGE} ${NEW_FILE_B_CHANGE}\n\t\tEOF\n\t\ttest_cmp expect .git/filter-repo/first-changed-commits &&\n\n\t\t# Make sure the fileD commit was indeed removed\n\t\tgit rev-parse HEAD^ >expect &&\n\t\techo ${NEW_FILE_B_CHANGE} >actual &&\n\t\ttest_cmp expect actual &&\n\n\t\tcat <<-EOF | sort >sha-expect &&\n\t\t${FIRST_ORPHAN} ${FIRST_ORPHAN}\n\t\t${FINAL_ORPHAN} ${FINAL_ORPHAN}\n\t\t${FILE_A_CHANGE} ${FILE_A_CHANGE}\n\t\t${FILE_B_CHANGE} ${NEW_FILE_B_CHANGE}\n\t\t${FILE_C_CHANGE} ${NEW_FILE_C_CHANGE}\n\t\t${FILE_D_CHANGE} ${DELETED_SHA}\n\t\tEOF\n\t\tprintf \"%-40s %s\\n\" old new >expect &&\n\t\tcat sha-expect >>expect &&\n\t\ttest_cmp expect .git/filter-repo/commit-map &&\n\n\t\tcat <<-EOF | sort -k 3 >sha-expect &&\n\t\t${FILE_D_CHANGE} ${NEW_FILE_C_CHANGE} $(git symbolic-ref HEAD)\n\t\t${FINAL_ORPHAN} ${FINAL_ORPHAN} refs/heads/orphan-me\n\t\t${ORIGINAL_TAG} ${FINAL_TAG} refs/tags/v1.0\n\t\tEOF\n\t\tprintf \"%-40s %-40s %s\\n\" old new ref >expect &&\n\t\tcat sha-expect >>expect &&\n\t\ttest_cmp expect .git/filter-repo/ref-map\n\t)\n'\n\ntest_expect_success 'modify a file, then remove an earlier file' '\n\ttest_create_repo modify_file_earlier_remove &&\n\t(\n\t\tcd modify_file_earlier_remove &&\n\t\tgit fast-import --quiet <$DATA/simple &&\n\n\t\tFIRST_ORPHAN=$(git rev-parse orphan-me~1) &&\n\t\tFINAL_ORPHAN=$(git rev-parse orphan-me) &&\n\t\tFILE_A_CHANGE=$(git rev-list -1 HEAD -- fileA) &&\n\t\tFILE_B_CHANGE=$(git rev-list -1 HEAD -- fileB) &&\n\t\tFILE_C_CHANGE=$(git rev-list -1 HEAD -- fileC) &&\n\t\tFILE_D_CHANGE=$(git rev-list -1 HEAD -- fileD) &&\n\t\tORIGINAL_TAG=$(git rev-parse v1.0) &&\n\n\t\techo \"file 3 contents==>Alternate C\" >changes &&\n\t\tgit filter-repo --force --replace-text changes &&\n\n\t\tNEW_FILE_C_CHANGE=$(git rev-list -1 HEAD -- fileC) &&\n\n\t\tcat <<-EOF | sort >expect &&\n\t\t${FILE_C_CHANGE} ${NEW_FILE_C_CHANGE}\n\t\tEOF\n\t\ttest_cmp expect .git/filter-repo/first-changed-commits &&\n\n\t\tgit filter-repo --invert-paths --path fileB &&\n\n\t\tNEW_FILE_C_CHANGE=$(git rev-list -1 HEAD -- fileC) &&\n\t\tNEW_FILE_D_CHANGE=$(git rev-list -1 HEAD -- fileD) &&\n\t\tFINAL_TAG=$(git rev-parse v1.0) &&\n\n\t\tcat <<-EOF | sort >expect &&\n\t\t${FILE_B_CHANGE} ${FILE_A_CHANGE}\n\t\tEOF\n\t\ttest_cmp expect .git/filter-repo/first-changed-commits &&\n\n\t\tcat <<-EOF | sort >sha-expect &&\n\t\t${FIRST_ORPHAN} ${FIRST_ORPHAN}\n\t\t${FINAL_ORPHAN} ${FINAL_ORPHAN}\n\t\t${FILE_A_CHANGE} ${FILE_A_CHANGE}\n\t\t${FILE_B_CHANGE} ${DELETED_SHA}\n\t\t${FILE_C_CHANGE} ${NEW_FILE_C_CHANGE}\n\t\t${FILE_D_CHANGE} ${NEW_FILE_D_CHANGE}\n\t\tEOF\n\t\tprintf \"%-40s %s\\n\" old new >expect &&\n\t\tcat sha-expect >>expect &&\n\t\ttest_cmp expect .git/filter-repo/commit-map &&\n\n\t\tcat <<-EOF | sort -k 3 >sha-expect &&\n\t\t${FILE_D_CHANGE} ${NEW_FILE_D_CHANGE} $(git symbolic-ref HEAD)\n\t\t${FINAL_ORPHAN} ${FINAL_ORPHAN} refs/heads/orphan-me\n\t\t${ORIGINAL_TAG} ${FINAL_TAG} refs/tags/v1.0\n\t\tEOF\n\t\tprintf \"%-40s %-40s %s\\n\" old new ref >expect &&\n\t\tcat sha-expect >>expect &&\n\t\ttest_cmp expect .git/filter-repo/ref-map\n\t)\n'\n\ntest_expect_success 'use --refs heavily with a rerun' '\n\ttest_create_repo rerun_on_targetted_branches &&\n\t(\n\t\tcd rerun_on_targetted_branches &&\n\t\tgit fast-import --quiet <$DATA/simple &&\n\n\t\tFIRST_ORPHAN=$(git rev-parse orphan-me~1) &&\n\t\tFINAL_ORPHAN=$(git rev-parse orphan-me) &&\n\t\tFILE_A_CHANGE=$(git rev-list -1 HEAD -- fileA) &&\n\t\tFILE_B_CHANGE=$(git rev-list -1 HEAD -- fileB) &&\n\t\tFILE_C_CHANGE=$(git rev-list -1 HEAD -- fileC) &&\n\t\tFILE_D_CHANGE=$(git rev-list -1 HEAD -- fileD) &&\n\n\t\techo \"Tweak it==>Modify it\" >changes &&\n\t\tgit filter-repo --force --refs orphan-me \\\n\t\t    --replace-message changes &&\n\n\t\tNEW_FINAL_ORPHAN=$(git rev-list -1 orphan-me) &&\n\n\t\tcat <<-EOF | sort >expect &&\n\t\t${FINAL_ORPHAN} ${NEW_FINAL_ORPHAN}\n\t\tEOF\n\t\ttest_cmp expect .git/filter-repo/first-changed-commits &&\n\n\t\tgit filter-repo --refs $(git symbolic-ref HEAD) \\\n\t\t    --invert-paths --path fileD &&\n\n\t\tcat <<-EOF | sort >expect &&\n\t\t${FINAL_ORPHAN} ${NEW_FINAL_ORPHAN}\n\t\t${FILE_D_CHANGE} ${FILE_C_CHANGE}\n\t\tEOF\n\t\ttest_cmp expect .git/filter-repo/first-changed-commits &&\n\n\t\tcat <<-EOF | sort >sha-expect &&\n\t\t${FIRST_ORPHAN} ${FIRST_ORPHAN}\n\t\t${FINAL_ORPHAN} ${NEW_FINAL_ORPHAN}\n\t\t${FILE_A_CHANGE} ${FILE_A_CHANGE}\n\t\t${FILE_B_CHANGE} ${FILE_B_CHANGE}\n\t\t${FILE_C_CHANGE} ${FILE_C_CHANGE}\n\t\t${FILE_D_CHANGE} ${DELETED_SHA}\n\t\tEOF\n\t\tprintf \"%-40s %s\\n\" old new >expect &&\n\t\tcat sha-expect >>expect &&\n\t\ttest_cmp expect .git/filter-repo/commit-map &&\n\n\t\tcat <<-EOF | sort -k 3 >sha-expect &&\n\t\t${FILE_D_CHANGE} ${FILE_C_CHANGE} $(git symbolic-ref HEAD)\n\t\t${FINAL_ORPHAN} ${NEW_FINAL_ORPHAN} refs/heads/orphan-me\n\t\tEOF\n\t\tprintf \"%-40s %-40s %s\\n\" old new ref >expect &&\n\t\tcat sha-expect >>expect &&\n\t\ttest_cmp expect .git/filter-repo/ref-map\n\t)\n'\n\ntest_expect_success 'sdr: basic usage' '\n\ttest_create_repo use_sdr &&\n\t(\n\t\tcd use_sdr &&\n\t\tgit fast-import --quiet <$DATA/simple &&\n\n\t\tgit filter-repo --invert-paths --path nuke-me --force \\\n\t\t                --sensitive-data-removal >output &&\n\n\t\tgrep \"You rewrote.*commits\" output &&\n\t\tgrep \"First Changed Commit(s) is/are:\" output\n\t)\n'\n\ntest_expect_success 'sdr: must use consistently' '\n\ttest_create_repo use_sdr_consistently &&\n\t(\n\t\tcd use_sdr_consistently &&\n\t\tgit fast-import --quiet <$DATA/simple &&\n\n\t\tgit filter-repo --path nuke-me --force &&\n\n\t\ttest_must_fail git filter-repo --sensitive-data-removal \\\n\t\t                   --path nuke-me 2>err &&\n\n\t\tgrep \"Cannot specify --sensitive-data-removal\" err\n\t)\n'\n\ntest_expect_success 'sdr: interaction with fetch and notes and stashes' '\n\ttest_create_repo sdr_with_fetch_and_notes &&\n\t(\n\t\tcd sdr_with_fetch_and_notes &&\n\t\tgit fast-import --quiet <$DATA/simple &&\n\t\tgit notes add -m \"Here is a note\" HEAD~1 &&\n\t\tgit notes add -m \"Here is another note\" HEAD &&\n\t\tgit clone \"file://$(pwd)\" fresh_clone &&\n\n\t\tcd fresh_clone &&\n\n\t\techo stuff >>fileA &&\n\t\tgit stash save stuff &&\n\t\techo things >>fileB &&\n\t\tgit stash save things &&\n\n\t\ttest_line_count = 2 .git/logs/refs/stash &&\n\n\t\tgit show-ref | grep refs/remotes/origin &&\n\t\tgit filter-repo --sdr --path fileB --force >../output &&\n\n\t\tgrep \"Fetching all refs from origin\" ../output &&\n\n\t\tgit show-ref >ref-output &&\n\t\t! grep refs/remotes/origin/ ref-output &&\n\n\t\t# Only keeping path \"nuke-me\" would wipe out refs/notes/commits\n\t\t# (meaning both its commits would be pruned and thus cause the\n\t\t# ref itself to get pruned), if we did not have a special case\n\t\t# for it.  Verify the special casing works.\n\t\techo 2 >expect &&\n\t\tgit rev-list --count refs/notes/commits >actual &&\n\t\ttest_cmp expect actual &&\n\n\t\t! grep refs/remotes/origin .git/filter-repo/ref-map &&\n\n\t\ttest_line_count = 1 .git/logs/refs/stash\n\t)\n'\n\ntest_expect_success 'sdr: handling local-only changes' '\n\ttest_create_repo sdr_with_local_only_changes &&\n\t(\n\t\tcd sdr_with_local_only_changes &&\n\t\tgit fast-import --quiet <$DATA/simple &&\n\t\tgit clone \"file://$(pwd)\" fresh_clone &&\n\n\t\tcd fresh_clone &&\n\n\t\techo stuff >>fileB &&\n\t\tgit commit -m \"random changes\" fileB &&\n\n\t\techo n | git filter-repo --sdr --path fileB --force >../output &&\n\n\t\tgrep \"You have refs modified from upstream\" ../output &&\n\n\t\tgit log -1 --format=%s fileB >actual &&\n\t\techo \"random changes\" >expect &&\n\t\ttest_cmp expect actual\n\t)\n'\n\n# I use LFS pointer files to fake LFS objects below; prevent git-lfs from\n# attempting to smudge them, which would just result in an error.\nexport GIT_LFS_SKIP_SMUDGE=1\n\ntest_expect_success 'lfs: not in use, no files to process' '\n\ttest_create_repo no_lfs_files_to_process &&\n\t(\n\t\tcd no_lfs_files_to_process &&\n\t\tgit fast-import --quiet <$DATA/simple &&\n\n\t\tgit filter-repo --sensitive-data-removal --force \\\n\t\t                --invert-paths --path nuke-me >output &&\n\n\t\tgrep \"NOTE: LFS object orphaning not checked (LFS not in use)\" output &&\n\n\t\ttest_path_is_missing .git/filter-repo/original_lfs_objects &&\n\t\ttest_path_is_missing .git/filter-repo/orphaned_lfs_objects &&\n\n\t\tgit filter-repo --sensitive-data-removal --path fileC >output &&\n\n\t\tgrep \"NOTE: LFS object orphaning not checked (LFS not in use)\" output &&\n\n\t\ttest_path_is_missing .git/filter-repo/original_lfs_objects &&\n\t\ttest_path_is_missing .git/filter-repo/orphaned_lfs_objects\n\t)\n'\n\ntest_expect_success 'lfs: no files orphaned' '\n\ttest_create_repo no_lfs_files_orphaned &&\n\t(\n\t\tcd no_lfs_files_orphaned &&\n\t\tgit symbolic-ref HEAD refs/heads/main &&\n\t\tgit fast-import --quiet <$DATA/lfs &&\n\n\t\tgit filter-repo --sensitive-data-removal --path Z \\\n\t\t                --invert-paths --force >output &&\n\n\t\t! grep \"NOTE:.*LFS not in use\" output &&\n\t\t! grep \"NOTE:.*LFS Objects Orphaned by this rewrite\" output &&\n\n\t\tcat <<-EOF >expect &&\n\t\tsha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n\t\tsha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n\t\tsha256:cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc\n\t\tsha256:dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd\n\t\tEOF\n\n\t\ttest_cmp expect .git/filter-repo/original_lfs_objects &&\n\t\ttest_must_be_empty .git/filter-repo/orphaned_lfs_objects\n\t)\n'\n\ntest_expect_success 'lfs: orphaning across multiple runs' '\n\ttest_create_repo lfs_multiple_runs &&\n\t(\n\t\tcd lfs_multiple_runs &&\n\t\tgit symbolic-ref HEAD refs/heads/main &&\n\t\tgit fast-import --quiet <$DATA/lfs &&\n\n\t\tgit filter-repo --sensitive-data-removal --path LB --path LD \\\n\t\t                --invert-paths --force >output &&\n\n\t\tgrep \"NOTE:.*LFS Objects Orphaned by this rewrite\" output &&\n\n\t\tcat <<-EOF >orig_expect &&\n\t\tsha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n\t\tsha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n\t\tsha256:cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc\n\t\tsha256:dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd\n\t\tEOF\n\n\t\ttest_cmp orig_expect .git/filter-repo/original_lfs_objects &&\n\n\t\techo \"sha256:dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd\" >expect &&\n\t\ttest_cmp expect .git/filter-repo/orphaned_lfs_objects &&\n\n\t\tgit filter-repo --path LA --invert-paths &&\n\n\t\tcat <<-EOF >expect &&\n\t\tsha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n\t\tsha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n\t\tsha256:dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd\n\t\tEOF\n\n\t\ttest_cmp expect .git/filter-repo/orphaned_lfs_objects\n\t)\n'\n\ntest_expect_success 'lfs: orphaning across multiple runs with blob callback' '\n\ttest_create_repo lfs_multiple_runs_blob_callback &&\n\t(\n\t\tcd lfs_multiple_runs_blob_callback &&\n\t\tgit symbolic-ref HEAD refs/heads/main &&\n\t\tgit fast-import --quiet <$DATA/lfs &&\n\n\t\tgit filter-repo --sensitive-data-removal --path LB --path LD \\\n\t\t                --invert-paths --blob-callback pass \\\n\t\t                --force >output &&\n\n\t\tgrep \"NOTE:.*LFS Objects Orphaned by this rewrite\" output &&\n\n\t\tcat <<-EOF >orig_expect &&\n\t\tsha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n\t\tsha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n\t\tsha256:cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc\n\t\tsha256:dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd\n\t\tEOF\n\n\t\ttest_cmp orig_expect .git/filter-repo/original_lfs_objects &&\n\n\t\techo \"sha256:dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd\" >expect &&\n\t\ttest_cmp expect .git/filter-repo/orphaned_lfs_objects &&\n\n\t\tgit filter-repo --path LA --invert-paths --blob-callback pass &&\n\n\t\tcat <<-EOF >expect &&\n\t\tsha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n\t\tsha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n\t\tsha256:dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd\n\t\tEOF\n\n\t\ttest_cmp expect .git/filter-repo/orphaned_lfs_objects\n\t)\n'\n\ntest_expect_success 'lfs: partial history rewrite affecting orphaning' '\n\ttest_create_repo lfs_partial_history &&\n\t(\n\t\tcd lfs_partial_history &&\n\t\tgit symbolic-ref HEAD refs/heads/main &&\n\t\tgit fast-import --quiet <$DATA/lfs &&\n\n\t\tgit filter-repo --sensitive-data-removal --path LA \\\n\t\t                --refs HEAD~2..HEAD --force &&\n\n\t\tcat <<-EOF >orig_expect &&\n\t\tsha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n\t\tsha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n\t\tsha256:cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc\n\t\tsha256:dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd\n\t\tEOF\n\n\t\ttest_cmp orig_expect .git/filter-repo/original_lfs_objects &&\n\t\ttest_must_be_empty .git/filter-repo/orphaned_lfs_objects\n\n\t\tgit filter-repo --path LA --invert-paths &&\n\n\t\tcat <<-EOF >expect &&\n\t\tsha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n\t\tsha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n\t\tsha256:dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd\n\t\tEOF\n\n\t\ttest_cmp expect .git/filter-repo/orphaned_lfs_objects\n\t)\n'\n\ntest_expect_success 'lfs: full rewrite then partial' '\n\ttest_create_repo lfs_full_then_partial &&\n\t(\n\t\tcd lfs_full_then_partial &&\n\t\tgit symbolic-ref HEAD refs/heads/main &&\n\t\tgit fast-import --quiet <$DATA/lfs &&\n\n\t\tgit filter-repo --sensitive-data-removal \\\n\t\t                --invert-paths --path LB --force &&\n\n\t\tcat <<-EOF >orig_expect &&\n\t\tsha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n\t\tsha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n\t\tsha256:cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc\n\t\tsha256:dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd\n\t\tEOF\n\n\t\ttest_cmp orig_expect .git/filter-repo/original_lfs_objects &&\n\t\ttest_must_be_empty .git/filter-repo/orphaned_lfs_objects\n\n\t\tgit filter-repo --path LA --path LD --invert-paths \\\n\t\t                --refs HEAD~2..HEAD &&\n\n\t\tcat <<-EOF >expect &&\n\t\tsha256:dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd\n\t\tEOF\n\n\t\ttest_cmp expect .git/filter-repo/orphaned_lfs_objects\n\t)\n'\n\ntest_expect_success 'sdr: lfs + submodules' '\n\ttest_create_repo lfs_plus_submodules &&\n\t(\n\t\tcd lfs_plus_submodules &&\n\t\tgit symbolic-ref HEAD refs/heads/main &&\n\t\tgit fast-import --quiet <$DATA/lfs &&\n\n\t\tgit reset --hard &&\n\t\tgit init subdir &&\n\t\t>subdir/empty &&\n\t\tgit -C subdir add . &&\n\t\tgit -C subdir commit -m initial &&\n\n\t\tgit submodule add ./subdir &&\n\t\tgit commit -m \"Add submodule\" &&\n\n\t\tgit filter-repo --sensitive-data-removal \\\n\t\t                --invert-paths --path LD --force &&\n\n\t\tcat <<-EOF >orig_expect &&\n\t\tsha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n\t\tsha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n\t\tsha256:cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc\n\t\tsha256:dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd\n\t\tEOF\n\n\t\ttest_cmp orig_expect .git/filter-repo/original_lfs_objects &&\n\n\t\tcat <<-EOF >expect &&\n\t\tsha256:dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd\n\t\tEOF\n\n\t\ttest_cmp expect .git/filter-repo/orphaned_lfs_objects\n\t)\n'\n\ntest_done\n"
  },
  {
    "path": "t/t9394/date-order",
    "content": "feature done\nblob\nmark :1\ndata 8\ninitial\n\nreset refs/heads/master\ncommit refs/heads/master\nmark :2\nauthor Little O. Me <me@little.net> 1535228562 -0700\ncommitter Little O. Me <me@little.net> 1535228562 -0700\ndata 8\nInitial\nM 100644 :1 filename\n\ncommit refs/heads/master\nmark :3\nauthor Little Me <me@bigcompany.com> 1535229601 -0700\ncommitter Little Me <me@bigcompany.com> 1535229601 -0700\ndata 2\nA\nfrom :2\n\ncommit refs/heads/master\nmark :4\nauthor Little Me <me@bigcompany.com> 1535229602 -0700\ncommitter Little Me <me@bigcompany.com> 1535229602 -0700\ndata 2\nB\nfrom :2\n\ncommit refs/heads/master\nmark :5\nauthor Little Me <me@bigcompany.com> 1535229603 -0700\ncommitter Little Me <me@bigcompany.com> 1535229603 -0700\ndata 2\nC\nfrom :3\n\ncommit refs/heads/master\nmark :6\nauthor Little Me <me@bigcompany.com> 1535229604 -0700\ncommitter Little Me <me@bigcompany.com> 1535229604 -0700\ndata 2\nD\nfrom :4\n\ncommit refs/heads/master\nmark :7\nauthor Little Me <me@bigcompany.com> 1535229605 -0700\ncommitter Little Me <me@bigcompany.com> 1535229605 -0700\ndata 6\nmerge\nfrom :5\nmerge :6\n\ndone\n"
  },
  {
    "path": "t/t9394-filter-repo-sanity-checks-and-bigger-repo-setup.sh",
    "content": "#!/bin/bash\n\ntest_description='Basic filter-repo tests'\n\n. ./test-lib.sh\n\nexport PATH=$(dirname $TEST_DIRECTORY):$PATH  # Put git-filter-repo in PATH\n\nDATA=\"$TEST_DIRECTORY/t9394\"\n\nsetup_metasyntactic_repo() {\n\ttest -d metasyntactic && return\n\ttest_create_repo metasyntactic &&\n\t(\n\t\tcd metasyntactic &&\n\t\tweird_name=$(printf \"file\\tna\\nme\") &&\n\t\techo \"funny\" >\"$weird_name\" &&\n\t\tmkdir numbers &&\n\t\ttest_seq 1 10 >numbers/small &&\n\t\ttest_seq 100 110 >numbers/medium &&\n\t\tgit add \"$weird_name\" numbers &&\n\t\tgit commit -m initial &&\n\t\tgit tag v1.0 &&\n\t\tgit tag -a -m v1.1 v1.1 &&\n\n\t\tmkdir words &&\n\t\techo foo >words/important &&\n\t\techo bar >words/whimsical &&\n\t\techo baz >words/sequences &&\n\t\tgit add words &&\n\t\tgit commit -m some.words &&\n\t\tgit branch another_branch &&\n\t\tgit tag v2.0 &&\n\n\t\techo spam >words/to &&\n\t\techo eggs >words/know &&\n\t\tgit add words\n\t\tgit rm \"$weird_name\" &&\n\t\tgit commit -m more.words &&\n\t\tgit tag -a -m \"Look, ma, I made a tag\" v3.0\n\t)\n}\n\ntest_expect_success FUNNYNAMES '--tag-rename' '\n\tsetup_metasyntactic_repo &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/metasyntactic tag_rename &&\n\t\tcd tag_rename &&\n\t\tgit filter-repo \\\n\t\t\t--tag-rename \"\":\"myrepo-\" \\\n\t\t\t--path words &&\n\t\ttest_must_fail git cat-file -t v1.0 &&\n\t\ttest_must_fail git cat-file -t v1.1 &&\n\t\ttest_must_fail git cat-file -t v2.0 &&\n\t\ttest_must_fail git cat-file -t v3.0 &&\n\t\ttest_must_fail git cat-file -t myrepo-v1.0 &&\n\t\ttest_must_fail git cat-file -t myrepo-v1.1 &&\n\t\ttest $(git cat-file -t myrepo-v2.0) = commit &&\n\t\ttest $(git cat-file -t myrepo-v3.0) = tag\n\t)\n'\n\ntest_expect_success 'tag of tag before relevant portion of history' '\n\ttest_create_repo filtered_tag_of_tag &&\n\t(\n\t\tcd filtered_tag_of_tag &&\n\t\techo contents >file &&\n\t\tgit add file &&\n\t\tgit commit -m \"Initial\" &&\n\n\t\tgit tag -a -m \"Inner Tag\" inner_tag HEAD &&\n\t\tgit tag -a -m \"Outer Tag\" outer_tag inner_tag &&\n\n\t\tmkdir subdir &&\n\t\techo stuff >subdir/whatever &&\n\t\tgit add subdir &&\n\t\tgit commit -m \"Add file in subdir\" &&\n\n\t\tgit filter-repo --force --subdirectory-filter subdir &&\n\n\t\tgit show-ref >refs &&\n\t\t! grep refs/tags refs &&\n\t\tgit log --all --oneline >commits &&\n\t\ttest_line_count = 1 commits\n\t)\n'\n\ntest_expect_success FUNNYNAMES '--subdirectory-filter' '\n\tsetup_metasyntactic_repo &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/metasyntactic subdir_filter &&\n\t\tcd subdir_filter &&\n\t\tgit filter-repo \\\n\t\t\t--subdirectory-filter words &&\n\t\tgit cat-file --batch-check --batch-all-objects >all-objs &&\n\t\ttest_line_count = 10 all-objs &&\n\t\tgit log --format=%n --name-only | sort | uniq >filenames &&\n\t\ttest_line_count = 6 filenames &&\n\t\tgrep ^important$ filenames &&\n\t\ttest_must_fail git cat-file -t v1.0 &&\n\t\ttest_must_fail git cat-file -t v1.1 &&\n\t\ttest $(git cat-file -t v2.0) = commit &&\n\t\ttest $(git cat-file -t v3.0) = tag\n\t)\n'\n\ntest_expect_success FUNNYNAMES '--subdirectory-filter with trailing slash' '\n\tsetup_metasyntactic_repo &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/metasyntactic subdir_filter_2 &&\n\t\tcd subdir_filter_2 &&\n\t\tgit filter-repo \\\n\t\t\t--subdirectory-filter words/ &&\n\t\tgit cat-file --batch-check --batch-all-objects >all-objs &&\n\t\ttest_line_count = 10 all-objs &&\n\t\tgit log --format=%n --name-only | sort | uniq >filenames &&\n\t\ttest_line_count = 6 filenames &&\n\t\tgrep ^important$ filenames &&\n\t\ttest_must_fail git cat-file -t v1.0 &&\n\t\ttest_must_fail git cat-file -t v1.1 &&\n\t\ttest $(git cat-file -t v2.0) = commit &&\n\t\ttest $(git cat-file -t v3.0) = tag\n\t)\n'\n\ntest_expect_success FUNNYNAMES '--to-subdirectory-filter' '\n\tsetup_metasyntactic_repo &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/metasyntactic to_subdir_filter &&\n\t\tcd to_subdir_filter &&\n\t\tgit filter-repo \\\n\t\t\t--to-subdirectory-filter mysubdir/ &&\n\t\tgit cat-file --batch-check --batch-all-objects >all-objs &&\n\t\ttest_line_count = 22 all-objs &&\n\t\tgit log --format=%n --name-only | sort | uniq >filenames &&\n\t\ttest_line_count = 9 filenames &&\n\t\tgrep \"^\\\"mysubdir/file\\\\\\\\tna\\\\\\\\nme\\\"$\" filenames &&\n\t\tgrep ^mysubdir/words/important$ filenames &&\n\t\ttest $(git cat-file -t v1.0) = commit &&\n\t\ttest $(git cat-file -t v1.1) = tag &&\n\t\ttest $(git cat-file -t v2.0) = commit &&\n\t\ttest $(git cat-file -t v3.0) = tag\n\t)\n'\n\ntest_expect_success FUNNYNAMES '--use-base-name' '\n\tsetup_metasyntactic_repo &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/metasyntactic use_base_name &&\n\t\tcd use_base_name &&\n\t\tgit filter-repo --path small --path important --use-base-name &&\n\t\tgit cat-file --batch-check --batch-all-objects >all-objs &&\n\t\ttest_line_count = 10 all-objs &&\n\t\tgit log --format=%n --name-only | sort | uniq >filenames &&\n\t\ttest_line_count = 3 filenames &&\n\t\tgrep ^numbers/small$ filenames &&\n\t\tgrep ^words/important$ filenames &&\n\t\ttest $(git cat-file -t v1.0) = commit &&\n\t\ttest $(git cat-file -t v1.1) = tag &&\n\t\ttest $(git cat-file -t v2.0) = commit &&\n\t\ttest $(git cat-file -t v3.0) = tag\n\t)\n'\n\ntest_expect_success FUNNYNAMES 'refs/replace/ to skip a parent' '\n\tsetup_metasyntactic_repo &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/metasyntactic replace_skip_ref &&\n\t\tcd replace_skip_ref &&\n\n\t\tgit tag -d v2.0 &&\n\t\tgit replace HEAD~1 HEAD~2 &&\n\n\t\tgit filter-repo --proceed &&\n\t\ttest $(git rev-list --count HEAD) = 2 &&\n\t\tgit cat-file --batch-check --batch-all-objects >all-objs &&\n\t\ttest_line_count = 16 all-objs &&\n\t\tgit log --format=%n --name-only | sort | uniq >filenames &&\n\t\ttest_line_count = 9 filenames &&\n\t\ttest $(git cat-file -t v1.0) = commit &&\n\t\ttest $(git cat-file -t v1.1) = tag &&\n\t\ttest_must_fail git cat-file -t v2.0 &&\n\t\ttest $(git cat-file -t v3.0) = tag\n\t)\n'\n\ntest_expect_success FUNNYNAMES 'refs/replace/ to add more initial history' '\n\tsetup_metasyntactic_repo &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/metasyntactic replace_add_refs &&\n\t\tcd replace_add_refs &&\n\n\t\tgit checkout --orphan new_root &&\n\t\trm .git/index &&\n\t\tgit add numbers/small &&\n\t\tgit clean -fd &&\n\t\tgit commit -m new.root &&\n\t\tNEW_ROOT=$(git rev-parse HEAD) &&\n\t\tgit checkout master &&\n\n\t\t# Make it look like a fresh clone...\n\t\tgit gc &&\n\t\tgit reflog expire --expire=now HEAD &&\n\t\tgit branch -D new_root &&\n\n\t\t# ...but add a replace object to give us a new root commit\n\t\tgit replace --graft master~2 $NEW_ROOT &&\n\n\t\tgit --no-replace-objects cat-file -p master~2 >grandparent &&\n\t\t! grep parent grandparent &&\n\t\trm grandparent &&\n\n\t\tgit filter-repo --proceed &&\n\n\t\tgit --no-replace-objects cat-file -p master~2 >new-grandparent &&\n\t\tgrep parent new-grandparent &&\n\n\t\ttest $(git rev-list --count HEAD) = 4 &&\n\t\tgit cat-file --batch-check --batch-all-objects >all-objs &&\n\t\ttest_line_count = 22 all-objs &&\n\t\tgit log --format=%n --name-only | sort | uniq >filenames &&\n\t\ttest_line_count = 9 filenames &&\n\t\ttest $(git cat-file -t v1.0) = commit &&\n\t\ttest $(git cat-file -t v1.1) = tag &&\n\t\ttest $(git cat-file -t v2.0) = commit &&\n\t\ttest $(git cat-file -t v3.0) = tag\n\t)\n'\n\ntest_expect_success FUNNYNAMES 'creation/deletion/updating of replace refs' '\n\tsetup_metasyntactic_repo &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/metasyntactic replace_handling &&\n\n\t\t# Same setup as \"refs/replace/ to skip a parent\", so we\n\t\t# do not have to check that replacement refs were used\n\t\t# correctly in the rewrite, just that replacement refs were\n\t\t# deleted, added, or updated correctly.\n\t\tcd replace_handling &&\n\t\tgit tag -d v2.0 &&\n\t\tmaster=$(git rev-parse master) &&\n\t\tmaster_1=$(git rev-parse master~1) &&\n\t\tmaster_2=$(git rev-parse master~2) &&\n\t\tgit replace HEAD~1 HEAD~2 &&\n\t\tcd .. &&\n\n\t\tmkdir -p test_replace_refs &&\n\t\tcd test_replace_refs &&\n\n\t\trsync -a --delete ../replace_handling/ ./ &&\n\t\tgit filter-repo --replace-refs delete-no-add --path-rename numbers:counting &&\n\t\tgit show-ref >output &&\n\t\t! grep refs/replace/ output &&\n\n\t\trsync -a --delete ../replace_handling/ ./ &&\n\t\tgit filter-repo --replace-refs delete-and-add --path-rename numbers:counting &&\n\t\techo \"$(git rev-parse master) refs/replace/$master\" >out &&\n\t\techo \"$(git rev-parse master~1) refs/replace/$master_1\" >>out &&\n\t\techo \"$(git rev-parse master~1) refs/replace/$master_2\" >>out &&\n\t\tsort -k 2 out >expect &&\n\t\tgit show-ref | grep refs/replace/ >output &&\n\t\ttest_cmp output expect &&\n\n\t\trsync -a --delete ../replace_handling/ ./ &&\n\t\tgit filter-repo --replace-refs update-no-add --path-rename numbers:counting &&\n\t\techo \"$(git rev-parse master~1) refs/replace/$master_1\" >expect &&\n\t\tgit show-ref | grep refs/replace/ >output &&\n\t\ttest_cmp output expect &&\n\n\t\trsync -a --delete ../replace_handling/ ./ &&\n\t\tgit filter-repo --replace-refs update-or-add --path-rename numbers:counting &&\n\t\techo \"$(git rev-parse master) refs/replace/$master\" >>out &&\n\t\techo \"$(git rev-parse master~1) refs/replace/$master_1\" >>out &&\n\t\tsort -k 2 out >expect &&\n\t\tgit show-ref | grep refs/replace/ >output &&\n\t\ttest_cmp output expect &&\n\n\t\trsync -a --delete ../replace_handling/ ./ &&\n\t\tgit filter-repo --replace-refs update-and-add --path-rename numbers:counting &&\n\t\techo \"$(git rev-parse master) refs/replace/$master\" >>out &&\n\t\techo \"$(git rev-parse master~1) refs/replace/$master_1\" >>out &&\n\t\techo \"$(git rev-parse master~1) refs/replace/$master_2\" >>out &&\n\t\tsort -k 2 out >expect &&\n\t\tgit show-ref | grep refs/replace/ >output &&\n\t\ttest_cmp output expect &&\n\n\t\trsync -a --delete ../replace_handling/ ./ &&\n\t\tgit filter-repo --replace-refs old-default --path-rename numbers:counting &&\n\t\techo \"$(git rev-parse master) refs/replace/$master\" >>out &&\n\t\techo \"$(git rev-parse master~1) refs/replace/$master_1\" >>out &&\n\t\techo \"$(git rev-parse master~1) refs/replace/$master_2\" >>out &&\n\t\tsort -k 2 out >expect &&\n\t\tgit show-ref | grep refs/replace/ >output &&\n\t\ttest_cmp output expect &&\n\n\t\trsync -a --delete ../replace_handling/ ./ &&\n\t\tgit filter-repo --replace-refs update-no-add --path-rename numbers:counting &&\n\t\techo \"$(git rev-parse master~1) refs/replace/$master_1\" >expect &&\n\t\tgit show-ref | grep refs/replace/ >output &&\n\t\ttest_cmp output expect\n\t)\n'\n\ntest_expect_success FUNNYNAMES '--debug' '\n\tsetup_metasyntactic_repo &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/metasyntactic debug &&\n\t\tcd debug &&\n\n\t\tgit filter-repo --path words --debug &&\n\n\t\ttest $(git rev-list --count HEAD) = 2 &&\n\t\tgit cat-file --batch-check --batch-all-objects >all-objs &&\n\t\ttest_line_count = 12 all-objs &&\n\t\tgit log --format=%n --name-only | sort | uniq >filenames &&\n\t\ttest_line_count = 6 filenames &&\n\n\t\ttest_path_is_file .git/filter-repo/fast-export.original &&\n\t\tgrep \"^commit \" .git/filter-repo/fast-export.original >out &&\n\t\ttest_line_count = 3 out &&\n\t\ttest_path_is_file .git/filter-repo/fast-export.filtered &&\n\t\tgrep \"^commit \" .git/filter-repo/fast-export.filtered >out &&\n\t\ttest_line_count = 2 out\n\t)\n'\n\ntest_expect_success FUNNYNAMES '--dry-run' '\n\tsetup_metasyntactic_repo &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/metasyntactic dry_run &&\n\t\tcd dry_run &&\n\n\t\tgit filter-repo --path words --dry-run &&\n\n\t\tgit show-ref | grep master >out &&\n\t\ttest_line_count = 2 out &&\n\t\tawk \"{print \\$1}\" out | uniq >out2 &&\n\t\ttest_line_count = 1 out2 &&\n\n\t\ttest $(git rev-list --count HEAD) = 3 &&\n\t\tgit cat-file --batch-check --batch-all-objects >all-objs &&\n\t\ttest_line_count = 19 all-objs &&\n\t\tgit log --format=%n --name-only | sort | uniq >filenames &&\n\t\ttest_line_count = 9 filenames &&\n\n\t\ttest_path_is_file .git/filter-repo/fast-export.original &&\n\t\tgrep \"^commit \" .git/filter-repo/fast-export.original >out &&\n\t\ttest_line_count = 3 out &&\n\t\ttest_path_is_file .git/filter-repo/fast-export.filtered &&\n\t\tgrep \"^commit \" .git/filter-repo/fast-export.filtered >out &&\n\t\ttest_line_count = 2 out\n\t)\n'\n\ntest_expect_success FUNNYNAMES '--dry-run --debug' '\n\tsetup_metasyntactic_repo &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/metasyntactic dry_run_debug &&\n\t\tcd dry_run_debug &&\n\n\t\tgit filter-repo --path words --dry-run --debug &&\n\n\t\tgit show-ref | grep master >out &&\n\t\ttest_line_count = 2 out &&\n\t\tawk \"{print \\$1}\" out | uniq >out2 &&\n\t\ttest_line_count = 1 out2 &&\n\n\t\ttest $(git rev-list --count HEAD) = 3 &&\n\t\tgit cat-file --batch-check --batch-all-objects >all-objs &&\n\t\ttest_line_count = 19 all-objs &&\n\t\tgit log --format=%n --name-only | sort | uniq >filenames &&\n\t\ttest_line_count = 9 filenames &&\n\n\t\ttest_path_is_file .git/filter-repo/fast-export.original &&\n\t\tgrep \"^commit \" .git/filter-repo/fast-export.original >out &&\n\t\ttest_line_count = 3 out &&\n\t\ttest_path_is_file .git/filter-repo/fast-export.filtered &&\n\t\tgrep \"^commit \" .git/filter-repo/fast-export.filtered >out &&\n\t\ttest_line_count = 2 out\n\t)\n'\n\ntest_expect_success FUNNYNAMES '--dry-run --stdin' '\n\tsetup_metasyntactic_repo &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/metasyntactic dry_run_stdin &&\n\t\tcd dry_run_stdin &&\n\n\t\tgit fast-export --all | git filter-repo --path words --dry-run --stdin &&\n\n\t\tgit show-ref | grep master >out &&\n\t\ttest_line_count = 2 out &&\n\t\tawk \"{print \\$1}\" out | uniq >out2 &&\n\t\ttest_line_count = 1 out2 &&\n\n\t\ttest $(git rev-list --count HEAD) = 3 &&\n\t\tgit cat-file --batch-check --batch-all-objects >all-objs &&\n\t\ttest_line_count = 19 all-objs &&\n\t\tgit log --format=%n --name-only | sort | uniq >filenames &&\n\t\ttest_line_count = 9 filenames &&\n\n\t\ttest_path_is_missing .git/filter-repo/fast-export.original &&\n\t\ttest_path_is_file .git/filter-repo/fast-export.filtered &&\n\t\tgrep \"^commit \" .git/filter-repo/fast-export.filtered >out &&\n\t\ttest_line_count = 2 out\n\t)\n'\n\nsetup_analyze_me() {\n\ttest -d analyze_me && return\n\ttest_create_repo analyze_me &&\n\t(\n\t\tcd analyze_me &&\n\t\tmkdir numbers words &&\n\t\ttest_seq 1 10 >numbers/small.num &&\n\t\ttest_seq 100 110 >numbers/medium.num &&\n\t\techo spam >words/to &&\n\t\techo eggs >words/know &&\n\t\techo rename a lot >fickle &&\n\t\tgit add numbers words fickle &&\n\t\ttest_tick &&\n\t\tgit commit -m initial &&\n\n\t\tgit branch modify-fickle &&\n\t\tgit branch other &&\n\t\tgit mv fickle capricious &&\n\t\ttest_tick &&\n\t\tgit commit -m \"rename on main branch\" &&\n\n\t\tgit checkout other &&\n\t\techo random other change >whatever &&\n\t\tgit add whatever &&\n\t\tgit mv fickle capricious &&\n\t\ttest_tick &&\n\t\tgit commit -m \"rename on other branch\" &&\n\n\t\tgit checkout master &&\n\t\tgit merge --no-commit other &&\n\t\tgit mv capricious mercurial &&\n\t\ttest_tick &&\n\t\tgit commit &&\n\n\t\tgit mv words sequence &&\n\t\ttest_tick &&\n\t\tgit commit -m now.sequence &&\n\n\t\tgit rm -rf numbers &&\n\t\ttest_tick &&\n\t\tgit commit -m remove.words &&\n\n\t\tmkdir words &&\n\t\techo no >words/know &&\n\t\tgit add words/know &&\n\t\ttest_tick &&\n\t\tgit commit -m \"Recreated file previously renamed\" &&\n\n\t\techo \"160000 deadbeefdeadbeefdeadbeefdeadbeefdeadbeefQfake_submodule\" | q_to_tab | git update-index --index-info &&\n\t\ttest_tick &&\n\t\tgit commit -m \"Add a fake submodule\" &&\n\n\t\ttest_tick &&\n\t\tgit commit --allow-empty -m \"Final commit, empty\" &&\n\n\t\tgit checkout modify-fickle &&\n\t\techo \"more stuff\" >>fickle &&\n\t\ttest_tick &&\n\t\tgit commit -am \"another more stuff commit\" &&\n\n\t\tgit checkout modify-fickle &&\n\t\techo \"more stuff\" >>fickle &&\n\t\ttest_tick &&\n\t\tgit commit -am \"another more stuff commit\" &&\n\n\t\ttest_tick &&\n\t\tgit commit --allow-empty -m \"Final commit, empty\" &&\n\n\t\tgit checkout master &&\n\n\t\t# Add a random extra unreferenced object\n\t\techo foobar | git hash-object --stdin -w\n\t)\n}\n\ntest_expect_success C_LOCALE_OUTPUT '--analyze' '\n\tsetup_analyze_me &&\n\t(\n\t\tcd analyze_me &&\n\n\t\t# Do the analysis, mask compressed size away to avoid different\n\t\t# values with different zlib libraries.\n\t\tgit filter-repo --analyze &&\n\n\t\t# It should not work again without a --force\n\t\ttest_must_fail git filter-repo --analyze &&\n\n\t\t# With a --force, another run should succeed\n\t\tgit filter-repo --analyze --force &&\n\n\t\ttest -d .git/filter-repo/analysis &&\n\t\tcd .git/filter-repo/analysis &&\n\n\t\tcat >expect <<-EOF &&\n\t\tfickle ->\n\t\t    capricious\n\t\t    mercurial\n\t\twords/to ->\n\t\t    sequence/to\n\t\tEOF\n\t\ttest_cmp expect renames.txt &&\n\n\t\tcat >expect <<-EOF &&\n\t\t== Overall Statistics ==\n\t\t  Number of commits: 12\n\t\t  Number of filenames: 10\n\t\t  Number of directories: 4\n\t\t  Number of file extensions: 2\n\n\t\t  Total unpacked size (bytes): 206\n\t\t  Total packed size (bytes): XX\n\n\t\tEOF\n\t\thead -n 9 README | sed -E \"s@(Total packed size .bytes.: )[0-9]+@\\1XX@\"  >actual &&\n\t\ttest_cmp expect actual &&\n\n\t\tcat >expect <<-EOF &&\n\t\t=== Files by sha and associated pathnames in reverse size ===\n\t\tFormat: sha, unpacked size, packed size, filename(s) object stored as\n\t\t  a89c82a2d4b713a125a4323d25adda062cc0013d         44 XX numbers/medium.num\n\t\t  c58ae2ffaf8352bd9860bf4bbb6ea78238dca846         35 XX fickle\n\t\t  ccff62141ec7bae42e01a3dcb7615b38aa9fa5b3         24 XX fickle\n\t\t  f00c965d8307308469e537302baa73048488f162         21 XX numbers/small.num\n\t\t  2aa69a2a708eed00cb390e30f6bcc3eed773f390         20 XX whatever\n\t\t  51b95456de9274c9a95f756742808dfd480b9b35         13 XX [capricious, fickle, mercurial]\n\t\t  732c85a1b3d7ce40ec8f78fd9ffea32e9f45fae0          5 XX [sequence/know, words/know]\n\t\t  34b6a0c9d02cb6ef7f409f248c0c1224ce9dd373          5 XX [sequence/to, words/to]\n\t\t  7ecb56eb3fa3fa6f19dd48bca9f971950b119ede          3 XX words/know\n\t\tEOF\n\t\tsed -E < blob-shas-and-paths.txt \"s@([0-9a-f]+[[:space:]]+[0-9]+)[[:space:]]+[0-9]+@\\1 XX@\" >actual &&\n\t\ttest_cmp expect actual &&\n\n\t\tcat >expect <<-EOF &&\n\t\t=== All directories by reverse size ===\n\t\tFormat: unpacked size, packed size, date deleted, directory name\n\t\t         206 XX <present>  <toplevel>\n\t\t          65 XX 2005-04-07 numbers\n\t\t          13 XX <present>  words\n\t\t          10 XX <present>  sequence\n\t\tEOF\n\t\tsed -E < directories-all-sizes.txt \"s@(^[[:space:]]+[0-9]+)([[:space:]]+)[0-9]+@\\1 XX@\" >actual &&\n\t\ttest_cmp expect actual &&\n\n\t\tcat >expect <<-EOF &&\n\t\t=== Deleted directories by reverse size ===\n\t\tFormat: unpacked size, packed size, date deleted, directory name\n\t\t          65 XX 2005-04-07 numbers\n\t\tEOF\n\t\tsed -E < directories-deleted-sizes.txt \"s@(^[[:space:]]+[0-9]+)([[:space:]]+)[0-9]+@\\1 XX@\" >actual &&\n\t\ttest_cmp expect actual &&\n\n\t\tcat >expect <<-EOF &&\n\t\t=== All extensions by reverse size ===\n\t\tFormat: unpacked size, packed size, date deleted, extension name\n\t\t         141 XX <present>  <no extension>\n\t\t          65 XX 2005-04-07 .num\n\t\tEOF\n\t\tsed -E < extensions-all-sizes.txt \"s@(^[[:space:]]+[0-9]+)([[:space:]]+)[0-9]+@\\1 XX@\" >actual &&\n\t\ttest_cmp expect actual &&\n\n\t\tcat >expect <<-EOF &&\n\t\t=== Deleted extensions by reverse size ===\n\t\tFormat: unpacked size, packed size, date deleted, extension name\n\t\t          65 XX 2005-04-07 .num\n\t\tEOF\n\t\tsed -E < extensions-deleted-sizes.txt \"s@(^[[:space:]]+[0-9]+)([[:space:]]+)[0-9]+@\\1 XX@\" >actual &&\n\t\ttest_cmp expect actual &&\n\n\t\tcat >expect <<-EOF &&\n\t\t=== All paths by reverse accumulated size ===\n\t\tFormat: unpacked size, packed size, date deleted, path name\n\t\t          72 XX <present>  fickle\n\t\t          44 XX 2005-04-07 numbers/medium.num\n\t\t           8 XX <present>  words/know\n\t\t          21 XX 2005-04-07 numbers/small.num\n\t\t          20 XX <present>  whatever\n\t\t          13 XX <present>  mercurial\n\t\t          13 XX <present>  capricious\n\t\t           5 XX <present>  words/to\n\t\t           5 XX <present>  sequence/to\n\t\t           5 XX <present>  sequence/know\n\t\tEOF\n\t\tsed -E < path-all-sizes.txt \"s@(^[[:space:]]+[0-9]+)([[:space:]]+)[0-9]+@\\1 XX@\" >actual &&\n\t\ttest_cmp expect actual &&\n\n\t\tcat >expect <<-EOF &&\n\t\t=== Deleted paths by reverse accumulated size ===\n\t\tFormat: unpacked size, packed size, date deleted, path name(s)\n\t\t          44 XX 2005-04-07 numbers/medium.num\n\t\t          21 XX 2005-04-07 numbers/small.num\n\t\tEOF\n\t\tsed -E < path-deleted-sizes.txt \"s@(^[[:space:]]+[0-9]+)([[:space:]]+)[0-9]+@\\1 XX@\" >actual &&\n\t\ttest_cmp expect actual\n\t)\n'\n\ntest_expect_success C_LOCALE_OUTPUT '--analyze --report-dir' '\n\tsetup_analyze_me &&\n\t(\n\t\tcd analyze_me &&\n\n\t\trm -rf .git/filter-repo &&\n\t\tgit filter-repo --analyze --report-dir foobar &&\n\n\t\t# It should not work again without a --force\n\t\ttest_must_fail git filter-repo --analyze --report-dir foobar &&\n\n\t\t# With a --force, though, it should overwrite\n\t\tgit filter-repo --analyze --report-dir foobar --force &&\n\n\t\ttest ! -d .git/filter-repo/analysis &&\n\t\ttest -d foobar &&\n\n\t\tcd foobar &&\n\n\t\t# Very simple tests because already tested above.\n\t\ttest_path_is_file renames.txt &&\n\t\ttest_path_is_file README &&\n\t\ttest_path_is_file blob-shas-and-paths.txt &&\n\t\ttest_path_is_file directories-all-sizes.txt &&\n\t\ttest_path_is_file directories-deleted-sizes.txt &&\n\t\ttest_path_is_file extensions-all-sizes.txt &&\n\t\ttest_path_is_file extensions-deleted-sizes.txt &&\n\t\ttest_path_is_file path-all-sizes.txt &&\n\t\ttest_path_is_file path-deleted-sizes.txt\n\t)\n'\n\ntest_expect_success '--replace-text all options' '\n\tsetup_analyze_me &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/analyze_me replace_text &&\n\t\tcd replace_text &&\n\n\t\tcat >../replace-rules <<-\\EOF &&\n\t\tother\n\t\tchange==>variation\n\n\t\tliteral:spam==>foodstuff\n\t\tglob:ran*m==>haphazard\n\t\tregex:1(.[0-9])==>2\\1\n\t\tEOF\n\t\tgit filter-repo --replace-text ../replace-rules &&\n\n\t\ttest_seq 200 210 >expect &&\n\t\tgit show HEAD~4:numbers/medium.num >actual &&\n\t\ttest_cmp expect actual &&\n\n\t\techo \"foodstuff\" >expect &&\n\t\ttest_cmp expect sequence/to &&\n\n\t\techo \"haphazard ***REMOVED*** variation\" >expect &&\n\t\ttest_cmp expect whatever\n\t)\n'\n\ntest_expect_success '--replace-text binary zero_byte-0_char' '\n\t(\n\t\tset -e\n\t\tset -u\n\t\tREPO=replace-text-detect-binary\n\t\tFILE=mangle.bin\n\t\tOLD_STR=replace-from\n\t\tNEW_STR=replace-with\n\t\t# used with printf, contains a zero byte and a \"0\" character, binary\n\t\tOLD_CONTENT_FORMAT=\"${OLD_STR}\\\\0${OLD_STR}\\\\n0\\\\n\"\n\t\t# expect content unchanged due to binary\n\t\tNEW_CONTENT_FORMAT=\"${OLD_CONTENT_FORMAT}\"\n\n\t\trm -rf \"${REPO}\"\n\t\tgit init \"${REPO}\"\n\t\tcd \"${REPO}\"\n\t\techo \"${OLD_STR}==>${NEW_STR}\" >../replace-rules\n\t\tprintf \"${NEW_CONTENT_FORMAT}\" > ../expect\n\t\tprintf \"${OLD_CONTENT_FORMAT}\" > \"${FILE}\"\n\t\tgit add \"${FILE}\"\n\t\tgit commit -m 'test'\n\t\tgit filter-repo --force --replace-text ../replace-rules\n\n\t\ttest_cmp ../expect \"${FILE}\"\n\t)\n'\n\ntest_expect_success '--replace-text binary zero_byte-no_0_char' '\n\t(\n\t\tset -e\n\t\tset -u\n\t\tREPO=replace-text-detect-binary\n\t\tFILE=mangle.bin\n\t\tOLD_STR=replace-from\n\t\tNEW_STR=replace-with\n\t\t# used with printf, contains a zero byte but no \"0\" character, binary\n\t\tOLD_CONTENT_FORMAT=\"${OLD_STR}\\\\0${OLD_STR}\\\\n\"\n\t\t# expect content unchanged due to binary\n\t\tNEW_CONTENT_FORMAT=\"${OLD_CONTENT_FORMAT}\"\n\n\t\trm -rf \"${REPO}\"\n\t\tgit init \"${REPO}\"\n\t\tcd \"${REPO}\"\n\t\techo \"${OLD_STR}==>${NEW_STR}\" >../replace-rules\n\t\tprintf \"${NEW_CONTENT_FORMAT}\" > ../expect\n\t\tprintf \"${OLD_CONTENT_FORMAT}\" > \"${FILE}\"\n\t\tgit add \"${FILE}\"\n\t\tgit commit -m 'test'\n\t\tgit filter-repo --force --replace-text ../replace-rules\n\n\t\ttest_cmp ../expect \"${FILE}\"\n\t)\n'\n\ntest_expect_success '--replace-text text-file no_zero_byte-zero_char' '\n\t(\n\t\tset -e\n\t\tset -u\n\t\tREPO=replace-text-detect-binary\n\t\tFILE=mangle.bin\n\t\tOLD_STR=replace-from\n\t\tNEW_STR=replace-with\n\t\t# used with printf, contains no zero byte but contains a \"0\" character, text\n\t\tOLD_CONTENT_FORMAT=\"${OLD_STR}0\\\\n0${OLD_STR}\\\\n0\\\\n\"\n\t\t# expect content changed due to text\n\t\tNEW_CONTENT_FORMAT=\"${NEW_STR}0\\\\n0${NEW_STR}\\\\n0\\\\n\"\n\n\t\trm -rf \"${REPO}\"\n\t\tgit init \"${REPO}\"\n\t\tcd \"${REPO}\"\n\t\techo \"${OLD_STR}==>${NEW_STR}\" >../replace-rules\n\t\tprintf \"${NEW_CONTENT_FORMAT}\" > ../expect\n\t\tprintf \"${OLD_CONTENT_FORMAT}\" > \"${FILE}\"\n\t\tgit add \"${FILE}\"\n\t\tgit commit -m 'test'\n\t\tgit filter-repo --force --replace-text ../replace-rules\n\n\t\ttest_cmp ../expect \"${FILE}\"\n\t)\n'\n\ntest_expect_success '--strip-blobs-bigger-than' '\n\tsetup_analyze_me &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/analyze_me strip_big_blobs &&\n\t\tcd strip_big_blobs &&\n\n\t\t# Verify certain files are present initially\n\t\tgit log --format=%n --name-only | sort | uniq >../filenames &&\n\t\ttest_line_count = 11 ../filenames &&\n\t\tgit rev-parse HEAD~7:numbers/medium.num &&\n\t\tgit rev-parse HEAD~7:numbers/small.num &&\n\t\tgit rev-parse HEAD~4:mercurial &&\n\t\ttest -f mercurial &&\n\n\t\t# Make one of the current files be \"really big\"\n\t\ttest_seq 1 1000 >mercurial &&\n\t\tgit add mercurial &&\n\t\tgit commit --amend &&\n\n\t\t# Strip \"really big\" files\n\t\tgit filter-repo --force --strip-blobs-bigger-than 3K --prune-empty never &&\n\n\t\tgit log --format=%n --name-only | sort | uniq >../filenames &&\n\t\ttest_line_count = 11 ../filenames &&\n\t\t# The \"mercurial\" file should still be around...\n\t\tgit rev-parse HEAD~4:mercurial &&\n\t\tgit rev-parse HEAD:mercurial &&\n\t\t# ...but only with its old, smaller contents\n\t\ttest_line_count = 1 mercurial &&\n\n\t\t# Strip files that are too big, verify they are gone\n\t\tgit filter-repo --strip-blobs-bigger-than 40 &&\n\n\t\tgit log --format=%n --name-only | sort | uniq >../filenames &&\n\t\ttest_line_count = 10 ../filenames &&\n\t\ttest_must_fail git rev-parse HEAD~7:numbers/medium.num &&\n\n\t\t# Do it again, this time with --replace-text since that means\n\t\t# we are operating without --no-data and have to go through\n\t\t# a different codepath.  (The search/replace terms are bogus)\n\t\tcat >../replace-rules <<-\\EOF &&\n\t\tnot found==>was found\n\t\tEOF\n\t\tgit filter-repo --strip-blobs-bigger-than 20 --replace-text ../replace-rules &&\n\n\t\tgit log --format=%n --name-only | sort | uniq >../filenames &&\n\t\ttest_line_count = 9 ../filenames &&\n\t\ttest_must_fail git rev-parse HEAD~7:numbers/medium.num &&\n\t\ttest_must_fail git rev-parse HEAD~7:numbers/small.num &&\n\n\t\t# Remove the temporary auxiliary files\n\t\trm ../replace-rules &&\n\t\trm ../filenames\n\t)\n'\n\ntest_expect_success '--strip-blobs-with-ids' '\n\tsetup_analyze_me &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/analyze_me strip_blobs_with_ids &&\n\t\tcd strip_blobs_with_ids &&\n\n\t\t# Verify certain files are present initially\n\t\tgit log --format=%n --name-only | sort | uniq >../filenames &&\n\t\ttest_line_count = 11 ../filenames &&\n\t\tgrep fake_submodule ../filenames &&\n\n\t\t# Strip \"a certain file\" files\n\t\techo deadbeefdeadbeefdeadbeefdeadbeefdeadbeef >../input &&\n\t\tgit filter-repo --strip-blobs-with-ids ../input &&\n\n\t\tgit log --format=%n --name-only | sort | uniq >../filenames &&\n\t\ttest_line_count = 10 ../filenames &&\n\t\t# Make sure fake_submodule was removed\n\t\t! grep fake_submodule ../filenames &&\n\n\t\t# Do it again, this time with --replace-text since that means\n\t\t# we are operating without --no-data and have to go through\n\t\t# a different codepath.  (The search/replace terms are bogus)\n\t\tcat >../bad-ids <<-\\EOF &&\n\t\t34b6a0c9d02cb6ef7f409f248c0c1224ce9dd373\n\t\t51b95456de9274c9a95f756742808dfd480b9b35\n\t\tEOF\n\t\tcat >../replace-rules <<-\\EOF &&\n\t\tnot found==>was found\n\t\tEOF\n\t\tgit filter-repo --strip-blobs-with-ids ../bad-ids --replace-text ../replace-rules &&\n\n\t\tgit log --format=%n --name-only | sort | uniq >../filenames &&\n\t\ttest_line_count = 6 ../filenames &&\n\t\t! grep sequence/to ../filenames &&\n\t\t! grep words/to ../filenames &&\n\t\t! grep capricious ../filenames &&\n\t\t! grep fickle ../filenames &&\n\t\t! grep mercurial ../filenames &&\n\n\t\t# Remove the temporary auxiliary files\n\t\trm ../bad-ids &&\n\t\trm ../replace-rules &&\n\t\trm ../filenames\n\t)\n'\n\ntest_expect_success 'startup sanity checks' '\n\tsetup_analyze_me &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/analyze_me startup_sanity_checks &&\n\t\tcd startup_sanity_checks &&\n\n\t\techo foobar | git hash-object -w --stdin &&\n\t\tgit count-objects -v &&\n\t\ttest_must_fail git filter-repo --path numbers 2>../err &&\n\t\ttest_i18ngrep \"expected freshly packed repo\" ../err &&\n\t\tgit prune &&\n\n\t\tgit remote add another_remote /dev/null &&\n\t\ttest_must_fail git filter-repo --path numbers 2>../err &&\n\t\ttest_i18ngrep \"expected one remote, origin\" ../err &&\n\t\tgit remote rm another_remote &&\n\n\t\tgit remote rename origin another_remote &&\n\t\ttest_must_fail git filter-repo --path numbers 2>../err &&\n\t\ttest_i18ngrep \"expected one remote, origin\" ../err &&\n\t\tgit remote rename another_remote origin &&\n\n\t\tcd words &&\n\t\ttest_must_fail git filter-repo --path numbers 2>../../err &&\n\t\ttest_i18ngrep \"GIT_DIR must be .git\" ../../err &&\n\t\trm ../../err &&\n\t\tcd .. &&\n\n\t\tgit config core.bare true &&\n\t\ttest_must_fail git filter-repo --path numbers 2>../err &&\n\t\ttest_i18ngrep \"GIT_DIR must be .\" ../err &&\n\t\tgit config core.bare false &&\n\n\t\tgit update-ref -m \"Just Testing\" refs/heads/master HEAD &&\n\t\ttest_must_fail git filter-repo --path numbers 2>../err &&\n\t\ttest_i18ngrep \"expected at most one entry in the reflog\" ../err &&\n\t\tgit reflog expire --expire=now &&\n\n\t\techo yes >>words/know &&\n\t\tgit stash save random change &&\n\t\trm -rf .git/logs/ &&\n\t\tgit gc &&\n\t\ttest_must_fail git filter-repo --path numbers 2>../err &&\n\t\ttest_i18ngrep \"has stashed changes\" ../err &&\n\t\tgit update-ref -d refs/stash &&\n\n\t\techo yes >>words/know &&\n\t\tgit add words/know &&\n\t\tgit gc --prune=now &&\n\t\ttest_must_fail git filter-repo --path numbers 2>../err &&\n\t\ttest_i18ngrep \"you have uncommitted changes\" ../err &&\n\t\tgit checkout HEAD words/know &&\n\n\t\techo yes >>words/know &&\n\t\ttest_must_fail git filter-repo --path numbers 2>../err &&\n\t\ttest_i18ngrep \"you have unstaged changes\" ../err &&\n\t\tgit checkout -- words/know &&\n\n\t\t>untracked &&\n\t\ttest_must_fail git filter-repo --path numbers 2>../err &&\n\t\ttest_i18ngrep \"you have untracked changes\" ../err &&\n\t\trm ../err &&\n\t\trm untracked &&\n\n\t\tgit worktree add ../other-worktree HEAD &&\n\t\ttest_must_fail git filter-repo --path numbers 2>../err &&\n\t\ttest_i18ngrep \"you have multiple worktrees\" ../err &&\n\t\trm -rf ../err &&\n\t\tgit worktree remove ../other-worktree &&\n\n\t\tgit update-ref -d refs/remotes/origin/master &&\n\t\ttest_must_fail git filter-repo --path numbers 2>../err &&\n\t\ttest_i18ngrep \"refs/heads/master exists, but refs/remotes/origin/master not found\" ../err &&\n\t\tgit update-ref -m restoring refs/remotes/origin/master refs/heads/master &&\n\t\trm ../err &&\n\n\t\trm .git/logs/refs/remotes/origin/master &&\n\t\tgit update-ref -m funsies refs/remotes/origin/master refs/heads/master~1 &&\n\t\ttest_must_fail git filter-repo --path numbers 2>../err &&\n\t\ttest_i18ngrep \"refs/heads/master does not match refs/remotes/origin/master\" ../err &&\n\t\trm ../err &&\n\n\t\tcd ../ &&\n\t\tgit -C analyze_me gc &&\n\t\techo foobar | git -C analyze_me hash-object -w --stdin &&\n\t\tgit clone analyze_me startup_sanity_checks2 &&\n\t\tcd startup_sanity_checks2 &&\n\n\t\techo foobar | git hash-object -w --stdin &&\n\t\ttest_must_fail git filter-repo --path numbers 2>../err &&\n\t\ttest_i18ngrep \"expected freshly packed repo\" ../err &&\n\t\ttest_i18ngrep \"when cloning local repositories\" ../err &&\n\t\trm ../err &&\n\n\t\tcd ../startup_sanity_checks &&\n\t\tgit config core.ignoreCase true &&\n\t\trev=$(git rev-parse refs/remotes/origin/other) &&\n\t\techo \"$rev refs/remotes/origin/zcase\" >>.git/packed-refs &&\n\t\techo \"$rev refs/remotes/origin/zCASE\" >>.git/packed-refs &&\n\t\ttest_must_fail git filter-repo --path numbers 2>../err\n\t\ttest_i18ngrep \"Cannot rewrite history on a case insensitive\" ../err &&\n\t\tgit update-ref -d refs/remotes/origin/zCASE &&\n\t\tgit config --unset core.ignoreCase &&\n\n\t\tgit config core.precomposeUnicode true &&\n\t\trev=$(git rev-parse refs/heads/master) &&\n\t\techo \"$rev refs/remotes/origin/zlamé\" >>.git/packed-refs &&\n\t\techo \"$rev refs/remotes/origin/zlamé\" >>.git/packed-refs &&\n\t\ttest_must_fail git filter-repo --path numbers 2>../err\n\t\ttest_i18ngrep \"Cannot rewrite history on a character normalizing\" ../err &&\n\t\tgit update-ref -d refs/remotes/origin/zlamé &&\n\t\tgit config --unset core.precomposeUnicode &&\n\t\tcd ..\n\t)\n'\n\ntest_expect_success 'other startup error cases and requests for help' '\n\t(\n\t\t# prevent MSYS2 (Git for Windows) from converting the colon to\n\t\t# a semicolon when encountering parameters that look like\n\t\t# Unix-style, colon-separated path lists (such as `foo:.`)\n\t\tMSYS_NO_PATHCONV=1 &&\n\t\texport MSYS_NO_PATHCONV\n\n\t\tgit init startup_errors &&\n\t\tcd startup_errors &&\n\n\t\tgit filter-repo -h >out &&\n\t\ttest_i18ngrep \"filter-repo destructively rewrites history\" out &&\n\n\t\ttest_must_fail git filter-repo 2>err &&\n\t\ttest_i18ngrep \"No arguments specified.\" err &&\n\n\t\ttest_must_fail git filter-repo --analyze 2>err &&\n\t\ttest_i18ngrep \"Nothing to analyze; repository is empty\" err &&\n\n\t\t(\n\t\t\tGIT_CEILING_DIRECTORIES=$(pwd) &&\n\t\t\texport GIT_CEILING_DIRECTORIES &&\n\t\t\tmkdir not_a_repo &&\n\t\t\tcd not_a_repo &&\n\t\t\ttest_must_fail git filter-repo --dry-run 2>err &&\n\t\t\ttest_i18ngrep \"returned non-zero exit status\" err &&\n\t\t\trm err &&\n\t\t\tcd .. &&\n\t\t\trmdir not_a_repo\n\t\t) &&\n\n\t\ttest_must_fail git filter-repo --analyze --path foobar 2>err &&\n\t\ttest_i18ngrep \": --analyze is incompatible with --path\" err &&\n\n\t\ttest_must_fail git filter-repo --analyze --stdin 2>err &&\n\t\ttest_i18ngrep \": --analyze is incompatible with --stdin\" err &&\n\n\t\ttest_must_fail git filter-repo --path-rename foo:bar --use-base-name 2>err &&\n\t\ttest_i18ngrep \": --use-base-name and --path-rename are incompatible\" err &&\n\n\t\ttest_must_fail git filter-repo --file-info-callback \"None\" --stdin 2>err &&\n\t\ttest_i18ngrep \": --file-info-callback is incompatible with\" err &&\n\n\t\ttest_must_fail git filter-repo --file-info-callback \"None\" --blob-callback \"None\" 2>err &&\n\t\ttest_i18ngrep \": --file-info-callback is incompatible with\" err &&\n\n\t\ttest_must_fail git filter-repo --file-info-callback \"None\" --filename-callback \"None\" 2>err &&\n\t\ttest_i18ngrep \": --file-info-callback is incompatible with\" err &&\n\n\t\ttest_must_fail git filter-repo --path-rename foo:bar/ 2>err &&\n\t\ttest_i18ngrep \"either ends with a slash then both must.\" err &&\n\n\t\techo \"foo==>bar/\" >input &&\n\t\ttest_must_fail git filter-repo --paths-from-file input 2>err &&\n\t\ttest_i18ngrep \"either ends with a slash then both must.\" err &&\n\n\t\techo \"glob:*.py==>newname\" >input &&\n\t\ttest_must_fail git filter-repo --paths-from-file input 2>err &&\n\t\ttest_i18ngrep \"renaming globs makes no sense\" err &&\n\n\t\ttest_must_fail git filter-repo --strip-blobs-bigger-than 3GiB 2>err &&\n\t\ttest_i18ngrep \"could not parse.*3GiB\" err &&\n\n\t\ttest_must_fail git filter-repo --path-rename foo/bar:. 2>err &&\n\t\ttest_i18ngrep \"Invalid path component .\\.. found in .foo/bar:\\.\" err &&\n\n\t\ttest_must_fail git filter-repo --path /foo/bar 2>err &&\n\t\ttest_i18ngrep \"Pathnames cannot begin with a ./\" err &&\n\n\t\ttest_must_fail git filter-repo --path-rename foo:/bar 2>err &&\n\t\ttest_i18ngrep \"Pathnames cannot begin with a ./\" err &&\n\n\t\ttest_must_fail git filter-repo --path-rename /foo:bar 2>err &&\n\t\ttest_i18ngrep \"Pathnames cannot begin with a ./\" err &&\n\n\t\ttest_must_fail git filter-repo --path-rename foo 2>err &&\n\t\ttest_i18ngrep \"Error: --path-rename expects one colon in its argument\" err &&\n\n\t\ttest_must_fail git filter-repo --subdirectory-filter /foo 2>err &&\n\t\ttest_i18ngrep \"Pathnames cannot begin with a ./\" err &&\n\n\t\ttest_must_fail git filter-repo --subdirectory-filter /foo 2>err &&\n\t\ttest_i18ngrep \"Pathnames cannot begin with a ./\" err &&\n\n\t\ttest_must_fail git filter-repo --sdr --stdin </dev/null 2>err &&\n\t\ttest_i18ngrep \": sensitive data removal is incompatible with --stdin\" err &&\n\n\t\ttest_must_fail git filter-repo --sdr --source . 2>err &&\n\t\ttest_i18ngrep \": sensitive data removal is incompatible with --source and --target\" err &&\n\n\t\ttest_must_fail git filter-repo --sdr --target . 2>err &&\n\t\ttest_i18ngrep \": sensitive data removal is incompatible with --source and --target\" err\n\t)\n'\n\ntest_expect_success 'invalid fast-import directives' '\n\t(\n\t\tgit init invalid_directives &&\n\t\tcd invalid_directives &&\n\n\t\techo \"get-mark :15\" | \\\n\t\t\ttest_must_fail git filter-repo --stdin --force 2>err &&\n\t\ttest_i18ngrep \"Unsupported command\" err &&\n\n\t\techo \"invalid-directive\" | \\\n\t\t\ttest_must_fail git filter-repo --stdin --force 2>err &&\n\t\ttest_i18ngrep \"Could not parse line\" err\n\t)\n'\n\ntest_expect_success 'mailmap sanity checks' '\n\tsetup_analyze_me &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/analyze_me mailmap_sanity_checks &&\n\t\tcd mailmap_sanity_checks &&\n\n\t\tfake=$(pwd)/fake &&\n\t\ttest_must_fail git filter-repo --mailmap \"$fake\"/path 2>../err &&\n\t\ttest_i18ngrep \"Cannot read $fake/path\" ../err &&\n\n\t\techo \"Total Bogus\" >../whoopsies &&\n\t\ttest_must_fail git filter-repo --mailmap ../whoopsies 2>../err &&\n\t\ttest_i18ngrep \"Unparseable mailmap file\" ../err &&\n\t\trm ../err &&\n\t\trm ../whoopsies &&\n\n\t\techo \"Me <me@site.com> Myself <yo@email.com> Extraneous\" >../whoopsies &&\n\t\ttest_must_fail git filter-repo --mailmap ../whoopsies 2>../err &&\n\t\ttest_i18ngrep \"Unparseable mailmap file\" ../err &&\n\t\trm ../err &&\n\t\trm ../whoopsies\n\t)\n'\n\ntest_expect_success 'incremental import' '\n\tsetup_analyze_me &&\n\t(\n\t\tgit clone file://\"$(pwd)\"/analyze_me incremental &&\n\t\tcd incremental &&\n\n\t\toriginal=$(git rev-parse master) &&\n\t\tgit fast-export --reference-excluded-parents master~2..master \\\n\t\t\t| git filter-repo --stdin --refname-callback \"return b\\\"develop\\\"\" &&\n\t\ttest \"$(git rev-parse develop)\" = \"$original\"\n\t)\n'\n\ntest_expect_success '--target' '\n\tsetup_analyze_me &&\n\tgit init target &&\n\t(\n\t\tcd target &&\n\t\tgit checkout -b other &&\n\t\techo hello >world &&\n\t\tgit add world &&\n\t\tgit commit -m init &&\n\t\tgit checkout -b unique\n\t) &&\n\tgit -C target rev-parse unique >target/expect &&\n\tgit filter-repo --source analyze_me --target target --path fake_submodule --force --debug &&\n\ttest 2 = $(git -C target rev-list --count master) &&\n\ttest_must_fail git -C target rev-parse other &&\n\tgit -C target rev-parse unique >target/actual &&\n\ttest_cmp target/expect target/actual\n'\n\ntest_expect_success '--date-order' '\n\ttest_create_repo date_order &&\n\t(\n\t\tcd date_order &&\n\t\tgit fast-import --quiet <$DATA/date-order &&\n\t\t# First, verify that without date-order, C is before B\n\t\tcat <<-EOF >expect-normal &&\n\t\tInitial\n\t\tA\n\t\tC\n\t\tB\n\t\tD\n\t\tmerge\n\t\tEOF\n\t\tgit filter-repo --force --message-callback \"\n\t\t\twith open(\\\"messages.txt\\\", \\\"ab\\\") as f:\n\t\t\t\tf.write(message)\n\t\t\treturn message\n\t\t\t\" &&\n\t\ttest_cmp expect-normal messages.txt &&\n\n\t\t# Next, verify that with date-order, C and B are reversed\n\t\trm messages.txt &&\n\t\tcat <<-EOF >expect &&\n\t\tInitial\n\t\tA\n\t\tB\n\t\tC\n\t\tD\n\t\tmerge\n\t\tEOF\n\t\tgit filter-repo --date-order --force --message-callback \"\n\t\t\twith open(\\\"messages.txt\\\", \\\"ab\\\") as f:\n\t\t\t\tf.write(message)\n\t\t\treturn message\n\t\t\t\" &&\n\t\ttest_cmp expect messages.txt\n\t)\n'\n\ntest_expect_success '--refs' '\n\tsetup_analyze_me &&\n\tgit init refs &&\n\t(\n\t\tcd refs &&\n\t\tgit checkout -b other &&\n\t\techo hello >world &&\n\t\tgit add world &&\n\t\tgit commit -m init\n\t) &&\n\tgit -C refs rev-parse other >refs/expect &&\n\tgit -C analyze_me rev-parse master >refs/expect &&\n\tgit filter-repo --source analyze_me --target refs --refs master --force &&\n\tgit -C refs rev-parse other >refs/actual &&\n\tgit -C refs rev-parse master >refs/actual &&\n\ttest_cmp refs/expect refs/actual\n'\n\ntest_done\n"
  },
  {
    "path": "t/test-lib-functions.sh",
    "content": "# Library of functions shared by all tests scripts, included by\n# test-lib.sh.\n#\n# Copyright (c) 2005 Junio C Hamano\n#\n# This program is free software: you can redistribute it and/or modify\n# it under the terms of the GNU General Public License as published by\n# the Free Software Foundation, either version 2 of the License, or\n# (at your option) any later version.\n#\n# This program is distributed in the hope that it will be useful,\n# but WITHOUT ANY WARRANTY; without even the implied warranty of\n# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n# GNU General Public License for more details.\n#\n# You should have received a copy of the GNU General Public License\n# along with this program.  If not, see http://www.gnu.org/licenses/ .\n\n# The semantics of the editor variables are that of invoking\n# sh -c \"$EDITOR \\\"$@\\\"\" files ...\n#\n# If our trash directory contains shell metacharacters, they will be\n# interpreted if we just set $EDITOR directly, so do a little dance with\n# environment variables to work around this.\n#\n# In particular, quoting isn't enough, as the path may contain the same quote\n# that we're using.\ntest_set_editor () {\n\tFAKE_EDITOR=\"$1\"\n\texport FAKE_EDITOR\n\tEDITOR='\"$FAKE_EDITOR\"'\n\texport EDITOR\n}\n\ntest_set_index_version () {\n    GIT_INDEX_VERSION=\"$1\"\n    export GIT_INDEX_VERSION\n}\n\ntest_decode_color () {\n\tawk '\n\t\tfunction name(n) {\n\t\t\tif (n == 0) return \"RESET\";\n\t\t\tif (n == 1) return \"BOLD\";\n\t\t\tif (n == 2) return \"FAINT\";\n\t\t\tif (n == 3) return \"ITALIC\";\n\t\t\tif (n == 7) return \"REVERSE\";\n\t\t\tif (n == 30) return \"BLACK\";\n\t\t\tif (n == 31) return \"RED\";\n\t\t\tif (n == 32) return \"GREEN\";\n\t\t\tif (n == 33) return \"YELLOW\";\n\t\t\tif (n == 34) return \"BLUE\";\n\t\t\tif (n == 35) return \"MAGENTA\";\n\t\t\tif (n == 36) return \"CYAN\";\n\t\t\tif (n == 37) return \"WHITE\";\n\t\t\tif (n == 40) return \"BLACK\";\n\t\t\tif (n == 41) return \"BRED\";\n\t\t\tif (n == 42) return \"BGREEN\";\n\t\t\tif (n == 43) return \"BYELLOW\";\n\t\t\tif (n == 44) return \"BBLUE\";\n\t\t\tif (n == 45) return \"BMAGENTA\";\n\t\t\tif (n == 46) return \"BCYAN\";\n\t\t\tif (n == 47) return \"BWHITE\";\n\t\t}\n\t\t{\n\t\t\twhile (match($0, /\\033\\[[0-9;]*m/) != 0) {\n\t\t\t\tprintf \"%s<\", substr($0, 1, RSTART-1);\n\t\t\t\tcodes = substr($0, RSTART+2, RLENGTH-3);\n\t\t\t\tif (length(codes) == 0)\n\t\t\t\t\tprintf \"%s\", name(0)\n\t\t\t\telse {\n\t\t\t\t\tn = split(codes, ary, \";\");\n\t\t\t\t\tsep = \"\";\n\t\t\t\t\tfor (i = 1; i <= n; i++) {\n\t\t\t\t\t\tprintf \"%s%s\", sep, name(ary[i]);\n\t\t\t\t\t\tsep = \";\"\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t\tprintf \">\";\n\t\t\t\t$0 = substr($0, RSTART + RLENGTH, length($0) - RSTART - RLENGTH + 1);\n\t\t\t}\n\t\t\tprint\n\t\t}\n\t'\n}\n\nlf_to_nul () {\n\tperl -pe 'y/\\012/\\000/'\n}\n\nnul_to_q () {\n\tperl -pe 'y/\\000/Q/'\n}\n\nq_to_nul () {\n\tperl -pe 'y/Q/\\000/'\n}\n\nq_to_cr () {\n\ttr Q '\\015'\n}\n\nq_to_tab () {\n\ttr Q '\\011'\n}\n\nqz_to_tab_space () {\n\ttr QZ '\\011\\040'\n}\n\nappend_cr () {\n\tsed -e 's/$/Q/' | tr Q '\\015'\n}\n\nremove_cr () {\n\ttr '\\015' Q | sed -e 's/Q$//'\n}\n\n# Generate an output of $1 bytes of all zeroes (NULs, not ASCII zeroes).\n# If $1 is 'infinity', output forever or until the receiving pipe stops reading,\n# whichever comes first.\ngenerate_zero_bytes () {\n\ttest-tool genzeros \"$@\"\n}\n\n# In some bourne shell implementations, the \"unset\" builtin returns\n# nonzero status when a variable to be unset was not set in the first\n# place.\n#\n# Use sane_unset when that should not be considered an error.\n\nsane_unset () {\n\tunset \"$@\"\n\treturn 0\n}\n\ntest_tick () {\n\tif test -z \"${test_tick+set}\"\n\tthen\n\t\ttest_tick=1112911993\n\telse\n\t\ttest_tick=$(($test_tick + 60))\n\tfi\n\tGIT_COMMITTER_DATE=\"$test_tick -0700\"\n\tGIT_AUTHOR_DATE=\"$test_tick -0700\"\n\texport GIT_COMMITTER_DATE GIT_AUTHOR_DATE\n}\n\n# Stop execution and start a shell. This is useful for debugging tests.\n#\n# Be sure to remove all invocations of this command before submitting.\n\ntest_pause () {\n\t\"$SHELL_PATH\" <&6 >&5 2>&7\n}\n\n# Wrap git with a debugger. Adding this to a command can make it easier\n# to understand what is going on in a failing test.\n#\n# Examples:\n#     debug git checkout master\n#     debug --debugger=nemiver git $ARGS\n#     debug -d \"valgrind --tool=memcheck --track-origins=yes\" git $ARGS\ndebug () {\n\tcase \"$1\" in\n\t-d)\n\t\tGIT_DEBUGGER=\"$2\" &&\n\t\tshift 2\n\t\t;;\n\t--debugger=*)\n\t\tGIT_DEBUGGER=\"${1#*=}\" &&\n\t\tshift 1\n\t\t;;\n\t*)\n\t\tGIT_DEBUGGER=1\n\t\t;;\n\tesac &&\n\tGIT_DEBUGGER=\"${GIT_DEBUGGER}\" \"$@\" <&6 >&5 2>&7\n}\n\n# Call test_commit with the arguments\n# [-C <directory>] <message> [<file> [<contents> [<tag>]]]\"\n#\n# This will commit a file with the given contents and the given commit\n# message, and tag the resulting commit with the given tag name.\n#\n# <file>, <contents>, and <tag> all default to <message>.\n#\n# If the first argument is \"-C\", the second argument is used as a path for\n# the git invocations.\n\ntest_commit () {\n\tnotick= &&\n\tsignoff= &&\n\tindir= &&\n\twhile test $# != 0\n\tdo\n\t\tcase \"$1\" in\n\t\t--notick)\n\t\t\tnotick=yes\n\t\t\t;;\n\t\t--signoff)\n\t\t\tsignoff=\"$1\"\n\t\t\t;;\n\t\t-C)\n\t\t\tindir=\"$2\"\n\t\t\tshift\n\t\t\t;;\n\t\t*)\n\t\t\tbreak\n\t\t\t;;\n\t\tesac\n\t\tshift\n\tdone &&\n\tindir=${indir:+\"$indir\"/} &&\n\tfile=${2:-\"$1.t\"} &&\n\techo \"${3-$1}\" > \"$indir$file\" &&\n\tgit ${indir:+ -C \"$indir\"} add \"$file\" &&\n\tif test -z \"$notick\"\n\tthen\n\t\ttest_tick\n\tfi &&\n\tgit ${indir:+ -C \"$indir\"} commit $signoff -m \"$1\" &&\n\tgit ${indir:+ -C \"$indir\"} tag \"${4:-$1}\"\n}\n\n# Call test_merge with the arguments \"<message> <commit>\", where <commit>\n# can be a tag pointing to the commit-to-merge.\n\ntest_merge () {\n\ttest_tick &&\n\tgit merge -m \"$1\" \"$2\" &&\n\tgit tag \"$1\"\n}\n\n# This function helps systems where core.filemode=false is set.\n# Use it instead of plain 'chmod +x' to set or unset the executable bit\n# of a file in the working directory and add it to the index.\n\ntest_chmod () {\n\tchmod \"$@\" &&\n\tgit update-index --add \"--chmod=$@\"\n}\n\n# Get the modebits from a file.\ntest_modebits () {\n\tls -l \"$1\" | sed -e 's|^\\(..........\\).*|\\1|'\n}\n\n# Unset a configuration variable, but don't fail if it doesn't exist.\ntest_unconfig () {\n\tconfig_dir=\n\tif test \"$1\" = -C\n\tthen\n\t\tshift\n\t\tconfig_dir=$1\n\t\tshift\n\tfi\n\tgit ${config_dir:+-C \"$config_dir\"} config --unset-all \"$@\"\n\tconfig_status=$?\n\tcase \"$config_status\" in\n\t5) # ok, nothing to unset\n\t\tconfig_status=0\n\t\t;;\n\tesac\n\treturn $config_status\n}\n\n# Set git config, automatically unsetting it after the test is over.\ntest_config () {\n\tconfig_dir=\n\tif test \"$1\" = -C\n\tthen\n\t\tshift\n\t\tconfig_dir=$1\n\t\tshift\n\tfi\n\ttest_when_finished \"test_unconfig ${config_dir:+-C '$config_dir'} '$1'\" &&\n\tgit ${config_dir:+-C \"$config_dir\"} config \"$@\"\n}\n\ntest_config_global () {\n\ttest_when_finished \"test_unconfig --global '$1'\" &&\n\tgit config --global \"$@\"\n}\n\nwrite_script () {\n\t{\n\t\techo \"#!${2-\"$SHELL_PATH\"}\" &&\n\t\tcat\n\t} >\"$1\" &&\n\tchmod +x \"$1\"\n}\n\n# Use test_set_prereq to tell that a particular prerequisite is available.\n# The prerequisite can later be checked for in two ways:\n#\n# - Explicitly using test_have_prereq.\n#\n# - Implicitly by specifying the prerequisite tag in the calls to\n#   test_expect_{success,failure,code}.\n#\n# The single parameter is the prerequisite tag (a simple word, in all\n# capital letters by convention).\n\ntest_unset_prereq () {\n\t! test_have_prereq \"$1\" ||\n\tsatisfied_prereq=\"${satisfied_prereq% $1 *} ${satisfied_prereq#* $1 }\"\n}\n\ntest_set_prereq () {\n\tcase \"$1\" in\n\t!*)\n\t\ttest_unset_prereq \"${1#!}\"\n\t\t;;\n\t*)\n\t\tsatisfied_prereq=\"$satisfied_prereq$1 \"\n\t\t;;\n\tesac\n}\nsatisfied_prereq=\" \"\nlazily_testable_prereq= lazily_tested_prereq=\n\n# Usage: test_lazy_prereq PREREQ 'script'\ntest_lazy_prereq () {\n\tlazily_testable_prereq=\"$lazily_testable_prereq$1 \"\n\teval test_prereq_lazily_$1=\\$2\n}\n\ntest_run_lazy_prereq_ () {\n\tscript='\nmkdir -p \"$TRASH_DIRECTORY/prereq-test-dir\" &&\n(\n\tcd \"$TRASH_DIRECTORY/prereq-test-dir\" &&'\"$2\"'\n)'\n\tsay >&3 \"checking prerequisite: $1\"\n\tsay >&3 \"$script\"\n\ttest_eval_ \"$script\"\n\teval_ret=$?\n\trm -rf \"$TRASH_DIRECTORY/prereq-test-dir\"\n\tif test \"$eval_ret\" = 0; then\n\t\tsay >&3 \"prerequisite $1 ok\"\n\telse\n\t\tsay >&3 \"prerequisite $1 not satisfied\"\n\tfi\n\treturn $eval_ret\n}\n\ntest_have_prereq () {\n\t# prerequisites can be concatenated with ','\n\tsave_IFS=$IFS\n\tIFS=,\n\tset -- $*\n\tIFS=$save_IFS\n\n\ttotal_prereq=0\n\tok_prereq=0\n\tmissing_prereq=\n\n\tfor prerequisite\n\tdo\n\t\tcase \"$prerequisite\" in\n\t\t!*)\n\t\t\tnegative_prereq=t\n\t\t\tprerequisite=${prerequisite#!}\n\t\t\t;;\n\t\t*)\n\t\t\tnegative_prereq=\n\t\tesac\n\n\t\tcase \" $lazily_tested_prereq \" in\n\t\t*\" $prerequisite \"*)\n\t\t\t;;\n\t\t*)\n\t\t\tcase \" $lazily_testable_prereq \" in\n\t\t\t*\" $prerequisite \"*)\n\t\t\t\teval \"script=\\$test_prereq_lazily_$prerequisite\" &&\n\t\t\t\tif test_run_lazy_prereq_ \"$prerequisite\" \"$script\"\n\t\t\t\tthen\n\t\t\t\t\ttest_set_prereq $prerequisite\n\t\t\t\tfi\n\t\t\t\tlazily_tested_prereq=\"$lazily_tested_prereq$prerequisite \"\n\t\t\tesac\n\t\t\t;;\n\t\tesac\n\n\t\ttotal_prereq=$(($total_prereq + 1))\n\t\tcase \"$satisfied_prereq\" in\n\t\t*\" $prerequisite \"*)\n\t\t\tsatisfied_this_prereq=t\n\t\t\t;;\n\t\t*)\n\t\t\tsatisfied_this_prereq=\n\t\tesac\n\n\t\tcase \"$satisfied_this_prereq,$negative_prereq\" in\n\t\tt,|,t)\n\t\t\tok_prereq=$(($ok_prereq + 1))\n\t\t\t;;\n\t\t*)\n\t\t\t# Keep a list of missing prerequisites; restore\n\t\t\t# the negative marker if necessary.\n\t\t\tprerequisite=${negative_prereq:+!}$prerequisite\n\t\t\tif test -z \"$missing_prereq\"\n\t\t\tthen\n\t\t\t\tmissing_prereq=$prerequisite\n\t\t\telse\n\t\t\t\tmissing_prereq=\"$prerequisite,$missing_prereq\"\n\t\t\tfi\n\t\tesac\n\tdone\n\n\ttest $total_prereq = $ok_prereq\n}\n\ntest_declared_prereq () {\n\tcase \",$test_prereq,\" in\n\t*,$1,*)\n\t\treturn 0\n\t\t;;\n\tesac\n\treturn 1\n}\n\ntest_verify_prereq () {\n\ttest -z \"$test_prereq\" ||\n\texpr >/dev/null \"$test_prereq\" : '[A-Z0-9_,!]*$' ||\n\tBUG \"'$test_prereq' does not look like a prereq\"\n}\n\ntest_expect_failure () {\n\ttest_start_\n\ttest \"$#\" = 3 && { test_prereq=$1; shift; } || test_prereq=\n\ttest \"$#\" = 2 ||\n\tBUG \"not 2 or 3 parameters to test-expect-failure\"\n\ttest_verify_prereq\n\texport test_prereq\n\tif ! test_skip \"$@\"\n\tthen\n\t\tsay >&3 \"checking known breakage: $2\"\n\t\tif test_run_ \"$2\" expecting_failure\n\t\tthen\n\t\t\ttest_known_broken_ok_ \"$1\"\n\t\telse\n\t\t\ttest_known_broken_failure_ \"$1\"\n\t\tfi\n\tfi\n\ttest_finish_\n}\n\ntest_expect_success () {\n\ttest_start_\n\ttest \"$#\" = 3 && { test_prereq=$1; shift; } || test_prereq=\n\ttest \"$#\" = 2 ||\n\tBUG \"not 2 or 3 parameters to test-expect-success\"\n\ttest_verify_prereq\n\texport test_prereq\n\tif ! test_skip \"$@\"\n\tthen\n\t\tsay >&3 \"expecting success: $2\"\n\t\tif test_run_ \"$2\"\n\t\tthen\n\t\t\ttest_ok_ \"$1\"\n\t\telse\n\t\t\ttest_failure_ \"$@\"\n\t\tfi\n\tfi\n\ttest_finish_\n}\n\n# test_external runs external test scripts that provide continuous\n# test output about their progress, and succeeds/fails on\n# zero/non-zero exit code.  It outputs the test output on stdout even\n# in non-verbose mode, and announces the external script with \"# run\n# <n>: ...\" before running it.  When providing relative paths, keep in\n# mind that all scripts run in \"trash directory\".\n# Usage: test_external description command arguments...\n# Example: test_external 'Perl API' perl ../path/to/test.pl\ntest_external () {\n\ttest \"$#\" = 4 && { test_prereq=$1; shift; } || test_prereq=\n\ttest \"$#\" = 3 ||\n\tBUG \"not 3 or 4 parameters to test_external\"\n\tdescr=\"$1\"\n\tshift\n\ttest_verify_prereq\n\texport test_prereq\n\tif ! test_skip \"$descr\" \"$@\"\n\tthen\n\t\t# Announce the script to reduce confusion about the\n\t\t# test output that follows.\n\t\tsay_color \"\" \"# run $test_count: $descr ($*)\"\n\t\t# Export TEST_DIRECTORY, TRASH_DIRECTORY and GIT_TEST_LONG\n\t\t# to be able to use them in script\n\t\texport TEST_DIRECTORY TRASH_DIRECTORY GIT_TEST_LONG\n\t\t# Run command; redirect its stderr to &4 as in\n\t\t# test_run_, but keep its stdout on our stdout even in\n\t\t# non-verbose mode.\n\t\t\"$@\" 2>&4\n\t\tif test \"$?\" = 0\n\t\tthen\n\t\t\tif test $test_external_has_tap -eq 0; then\n\t\t\t\ttest_ok_ \"$descr\"\n\t\t\telse\n\t\t\t\tsay_color \"\" \"# test_external test $descr was ok\"\n\t\t\t\ttest_success=$(($test_success + 1))\n\t\t\tfi\n\t\telse\n\t\t\tif test $test_external_has_tap -eq 0; then\n\t\t\t\ttest_failure_ \"$descr\" \"$@\"\n\t\t\telse\n\t\t\t\tsay_color error \"# test_external test $descr failed: $@\"\n\t\t\t\ttest_failure=$(($test_failure + 1))\n\t\t\tfi\n\t\tfi\n\tfi\n}\n\n# Like test_external, but in addition tests that the command generated\n# no output on stderr.\ntest_external_without_stderr () {\n\t# The temporary file has no (and must have no) security\n\t# implications.\n\ttmp=${TMPDIR:-/tmp}\n\tstderr=\"$tmp/git-external-stderr.$$.tmp\"\n\ttest_external \"$@\" 4> \"$stderr\"\n\ttest -f \"$stderr\" || error \"Internal error: $stderr disappeared.\"\n\tdescr=\"no stderr: $1\"\n\tshift\n\tsay >&3 \"# expecting no stderr from previous command\"\n\tif test ! -s \"$stderr\"\n\tthen\n\t\trm \"$stderr\"\n\n\t\tif test $test_external_has_tap -eq 0; then\n\t\t\ttest_ok_ \"$descr\"\n\t\telse\n\t\t\tsay_color \"\" \"# test_external_without_stderr test $descr was ok\"\n\t\t\ttest_success=$(($test_success + 1))\n\t\tfi\n\telse\n\t\tif test \"$verbose\" = t\n\t\tthen\n\t\t\toutput=$(echo; echo \"# Stderr is:\"; cat \"$stderr\")\n\t\telse\n\t\t\toutput=\n\t\tfi\n\t\t# rm first in case test_failure exits.\n\t\trm \"$stderr\"\n\t\tif test $test_external_has_tap -eq 0; then\n\t\t\ttest_failure_ \"$descr\" \"$@\" \"$output\"\n\t\telse\n\t\t\tsay_color error \"# test_external_without_stderr test $descr failed: $@: $output\"\n\t\t\ttest_failure=$(($test_failure + 1))\n\t\tfi\n\tfi\n}\n\n# debugging-friendly alternatives to \"test [-f|-d|-e]\"\n# The commands test the existence or non-existence of $1. $2 can be\n# given to provide a more precise diagnosis.\ntest_path_is_file () {\n\tif ! test -f \"$1\"\n\tthen\n\t\techo \"File $1 doesn't exist. $2\"\n\t\tfalse\n\tfi\n}\n\ntest_path_is_dir () {\n\tif ! test -d \"$1\"\n\tthen\n\t\techo \"Directory $1 doesn't exist. $2\"\n\t\tfalse\n\tfi\n}\n\ntest_path_exists () {\n\tif ! test -e \"$1\"\n\tthen\n\t\techo \"Path $1 doesn't exist. $2\"\n\t\tfalse\n\tfi\n}\n\n# Check if the directory exists and is empty as expected, barf otherwise.\ntest_dir_is_empty () {\n\ttest_path_is_dir \"$1\" &&\n\tif test -n \"$(ls -a1 \"$1\" | egrep -v '^\\.\\.?$')\"\n\tthen\n\t\techo \"Directory '$1' is not empty, it contains:\"\n\t\tls -la \"$1\"\n\t\treturn 1\n\tfi\n}\n\ntest_path_is_missing () {\n\tif test -e \"$1\"\n\tthen\n\t\techo \"Path exists:\"\n\t\tls -ld \"$1\"\n\t\tif test $# -ge 1\n\t\tthen\n\t\t\techo \"$*\"\n\t\tfi\n\t\tfalse\n\tfi\n}\n\n# test_line_count checks that a file has the number of lines it\n# ought to. For example:\n#\n#\ttest_expect_success 'produce exactly one line of output' '\n#\t\tdo something >output &&\n#\t\ttest_line_count = 1 output\n#\t'\n#\n# is like \"test $(wc -l <output) = 1\" except that it passes the\n# output through when the number of lines is wrong.\n\ntest_line_count () {\n\tif test $# != 3\n\tthen\n\t\tBUG \"not 3 parameters to test_line_count\"\n\telif ! test $(wc -l <\"$3\") \"$1\" \"$2\"\n\tthen\n\t\techo \"test_line_count: line count for $3 !$1 $2\"\n\t\tcat \"$3\"\n\t\treturn 1\n\tfi\n}\n\n# Returns success if a comma separated string of keywords ($1) contains a\n# given keyword ($2).\n# Examples:\n# `list_contains \"foo,bar\" bar` returns 0\n# `list_contains \"foo\" bar` returns 1\n\nlist_contains () {\n\tcase \",$1,\" in\n\t*,$2,*)\n\t\treturn 0\n\t\t;;\n\tesac\n\treturn 1\n}\n\n# This is not among top-level (test_expect_success | test_expect_failure)\n# but is a prefix that can be used in the test script, like:\n#\n#\ttest_expect_success 'complain and die' '\n#           do something &&\n#           do something else &&\n#\t    test_must_fail git checkout ../outerspace\n#\t'\n#\n# Writing this as \"! git checkout ../outerspace\" is wrong, because\n# the failure could be due to a segv.  We want a controlled failure.\n#\n# Accepts the following options:\n#\n#   ok=<signal-name>[,<...>]:\n#     Don't treat an exit caused by the given signal as error.\n#     Multiple signals can be specified as a comma separated list.\n#     Currently recognized signal names are: sigpipe, success.\n#     (Don't use 'success', use 'test_might_fail' instead.)\n\ntest_must_fail () {\n\tcase \"$1\" in\n\tok=*)\n\t\t_test_ok=${1#ok=}\n\t\tshift\n\t\t;;\n\t*)\n\t\t_test_ok=\n\t\t;;\n\tesac\n\t\"$@\" 2>&7\n\texit_code=$?\n\tif test $exit_code -eq 0 && ! list_contains \"$_test_ok\" success\n\tthen\n\t\techo >&4 \"test_must_fail: command succeeded: $*\"\n\t\treturn 1\n\telif test_match_signal 13 $exit_code && list_contains \"$_test_ok\" sigpipe\n\tthen\n\t\treturn 0\n\telif test $exit_code -gt 129 && test $exit_code -le 192\n\tthen\n\t\techo >&4 \"test_must_fail: died by signal $(($exit_code - 128)): $*\"\n\t\treturn 1\n\telif test $exit_code -eq 127\n\tthen\n\t\techo >&4 \"test_must_fail: command not found: $*\"\n\t\treturn 1\n\telif test $exit_code -eq 126\n\tthen\n\t\techo >&4 \"test_must_fail: valgrind error: $*\"\n\t\treturn 1\n\tfi\n\treturn 0\n} 7>&2 2>&4\n\n# Similar to test_must_fail, but tolerates success, too.  This is\n# meant to be used in contexts like:\n#\n#\ttest_expect_success 'some command works without configuration' '\n#\t\ttest_might_fail git config --unset all.configuration &&\n#\t\tdo something\n#\t'\n#\n# Writing \"git config --unset all.configuration || :\" would be wrong,\n# because we want to notice if it fails due to segv.\n#\n# Accepts the same options as test_must_fail.\n\ntest_might_fail () {\n\ttest_must_fail ok=success \"$@\" 2>&7\n} 7>&2 2>&4\n\n# Similar to test_must_fail and test_might_fail, but check that a\n# given command exited with a given exit code. Meant to be used as:\n#\n#\ttest_expect_success 'Merge with d/f conflicts' '\n#\t\ttest_expect_code 1 git merge \"merge msg\" B master\n#\t'\n\ntest_expect_code () {\n\twant_code=$1\n\tshift\n\t\"$@\" 2>&7\n\texit_code=$?\n\tif test $exit_code = $want_code\n\tthen\n\t\treturn 0\n\tfi\n\n\techo >&4 \"test_expect_code: command exited with $exit_code, we wanted $want_code $*\"\n\treturn 1\n} 7>&2 2>&4\n\n# test_cmp is a helper function to compare actual and expected output.\n# You can use it like:\n#\n#\ttest_expect_success 'foo works' '\n#\t\techo expected >expected &&\n#\t\tfoo >actual &&\n#\t\ttest_cmp expected actual\n#\t'\n#\n# This could be written as either \"cmp\" or \"diff -u\", but:\n# - cmp's output is not nearly as easy to read as diff -u\n# - not all diff versions understand \"-u\"\n\ntest_cmp() {\n\t$GIT_TEST_CMP \"$@\"\n}\n\n# Check that the given config key has the expected value.\n#\n#    test_cmp_config [-C <dir>] <expected-value>\n#                    [<git-config-options>...] <config-key>\n#\n# for example to check that the value of core.bar is foo\n#\n#    test_cmp_config foo core.bar\n#\ntest_cmp_config() {\n\tlocal GD &&\n\tif test \"$1\" = \"-C\"\n\tthen\n\t\tshift &&\n\t\tGD=\"-C $1\" &&\n\t\tshift\n\tfi &&\n\tprintf \"%s\\n\" \"$1\" >expect.config &&\n\tshift &&\n\tgit $GD config \"$@\" >actual.config &&\n\ttest_cmp expect.config actual.config\n}\n\n# test_cmp_bin - helper to compare binary files\n\ntest_cmp_bin() {\n\tcmp \"$@\"\n}\n\n# Use this instead of test_cmp to compare files that contain expected and\n# actual output from git commands that can be translated.  When running\n# under GIT_TEST_GETTEXT_POISON this pretends that the command produced expected\n# results.\ntest_i18ncmp () {\n\t! test_have_prereq C_LOCALE_OUTPUT || test_cmp \"$@\"\n}\n\n# Use this instead of \"grep expected-string actual\" to see if the\n# output from a git command that can be translated either contains an\n# expected string, or does not contain an unwanted one.  When running\n# under GIT_TEST_GETTEXT_POISON this pretends that the command produced expected\n# results.\ntest_i18ngrep () {\n\teval \"last_arg=\\${$#}\"\n\n\ttest -f \"$last_arg\" ||\n\tBUG \"test_i18ngrep requires a file to read as the last parameter\"\n\n\tif test $# -lt 2 ||\n\t   { test \"x!\" = \"x$1\" && test $# -lt 3 ; }\n\tthen\n\t\tBUG \"too few parameters to test_i18ngrep\"\n\tfi\n\n\tif test_have_prereq !C_LOCALE_OUTPUT\n\tthen\n\t\t# pretend success\n\t\treturn 0\n\tfi\n\n\tif test \"x!\" = \"x$1\"\n\tthen\n\t\tshift\n\t\t! grep \"$@\" && return 0\n\n\t\techo >&4 \"error: '! grep $@' did find a match in:\"\n\telse\n\t\tgrep \"$@\" && return 0\n\n\t\techo >&4 \"error: 'grep $@' didn't find a match in:\"\n\tfi\n\n\tif test -s \"$last_arg\"\n\tthen\n\t\tcat >&4 \"$last_arg\"\n\telse\n\t\techo >&4 \"<File '$last_arg' is empty>\"\n\tfi\n\n\treturn 1\n}\n\n# Call any command \"$@\" but be more verbose about its\n# failure. This is handy for commands like \"test\" which do\n# not output anything when they fail.\nverbose () {\n\t\"$@\" && return 0\n\techo >&4 \"command failed: $(git rev-parse --sq-quote \"$@\")\"\n\treturn 1\n}\n\n# Check if the file expected to be empty is indeed empty, and barfs\n# otherwise.\n\ntest_must_be_empty () {\n\ttest_path_is_file \"$1\" &&\n\tif test -s \"$1\"\n\tthen\n\t\techo \"'$1' is not empty, it contains:\"\n\t\tcat \"$1\"\n\t\treturn 1\n\tfi\n}\n\n# Tests that its two parameters refer to the same revision\ntest_cmp_rev () {\n\tif test $# != 2\n\tthen\n\t\terror \"bug in the test script: test_cmp_rev requires two revisions, but got $#\"\n\telse\n\t\tlocal r1 r2\n\t\tr1=$(git rev-parse --verify \"$1\") &&\n\t\tr2=$(git rev-parse --verify \"$2\") &&\n\t\tif test \"$r1\" != \"$r2\"\n\t\tthen\n\t\t\tcat >&4 <<-EOF\n\t\t\terror: two revisions point to different objects:\n\t\t\t  '$1': $r1\n\t\t\t  '$2': $r2\n\t\t\tEOF\n\t\t\treturn 1\n\t\tfi\n\tfi\n}\n\n# Print a sequence of integers in increasing order, either with\n# two arguments (start and end):\n#\n#     test_seq 1 5 -- outputs 1 2 3 4 5 one line at a time\n#\n# or with one argument (end), in which case it starts counting\n# from 1.\n\ntest_seq () {\n\tcase $# in\n\t1)\tset 1 \"$@\" ;;\n\t2)\t;;\n\t*)\tBUG \"not 1 or 2 parameters to test_seq\" ;;\n\tesac\n\ttest_seq_counter__=$1\n\twhile test \"$test_seq_counter__\" -le \"$2\"\n\tdo\n\t\techo \"$test_seq_counter__\"\n\t\ttest_seq_counter__=$(( $test_seq_counter__ + 1 ))\n\tdone\n}\n\n# This function can be used to schedule some commands to be run\n# unconditionally at the end of the test to restore sanity:\n#\n#\ttest_expect_success 'test core.capslock' '\n#\t\tgit config core.capslock true &&\n#\t\ttest_when_finished \"git config --unset core.capslock\" &&\n#\t\thello world\n#\t'\n#\n# That would be roughly equivalent to\n#\n#\ttest_expect_success 'test core.capslock' '\n#\t\tgit config core.capslock true &&\n#\t\thello world\n#\t\tgit config --unset core.capslock\n#\t'\n#\n# except that the greeting and config --unset must both succeed for\n# the test to pass.\n#\n# Note that under --immediate mode, no clean-up is done to help diagnose\n# what went wrong.\n\ntest_when_finished () {\n\t# We cannot detect when we are in a subshell in general, but by\n\t# doing so on Bash is better than nothing (the test will\n\t# silently pass on other shells).\n\ttest \"${BASH_SUBSHELL-0}\" = 0 ||\n\tBUG \"test_when_finished does nothing in a subshell\"\n\ttest_cleanup=\"{ $*\n\t\t} && (exit \\\"\\$eval_ret\\\"); eval_ret=\\$?; $test_cleanup\"\n}\n\n# Most tests can use the created repository, but some may need to create more.\n# Usage: test_create_repo <directory>\ntest_create_repo () {\n\ttest \"$#\" = 1 ||\n\tBUG \"not 1 parameter to test-create-repo\"\n\trepo=\"$1\"\n\tmkdir -p \"$repo\"\n\t(\n\t\tcd \"$repo\" || error \"Cannot setup test environment\"\n\t\t\"${GIT_TEST_INSTALLED:-$GIT_EXEC_PATH}/git$X\" init \\\n\t\t\t\"--template=$GIT_BUILD_DIR/templates/blt/\" >&3 2>&4 ||\n\t\terror \"cannot run git init -- have you built things yet?\"\n\t\ttest -d .git/hooks && mv .git/hooks .git/hooks-disabled || true\n\t) || exit\n}\n\n# This function helps on symlink challenged file systems when it is not\n# important that the file system entry is a symbolic link.\n# Use test_ln_s_add instead of \"ln -s x y && git add y\" to add a\n# symbolic link entry y to the index.\n\ntest_ln_s_add () {\n\tif test_have_prereq SYMLINKS\n\tthen\n\t\tln -s \"$1\" \"$2\" &&\n\t\tgit update-index --add \"$2\"\n\telse\n\t\tprintf '%s' \"$1\" >\"$2\" &&\n\t\tln_s_obj=$(git hash-object -w \"$2\") &&\n\t\tgit update-index --add --cacheinfo 120000 $ln_s_obj \"$2\" &&\n\t\t# pick up stat info from the file\n\t\tgit update-index \"$2\"\n\tfi\n}\n\n# This function writes out its parameters, one per line\ntest_write_lines () {\n\tprintf \"%s\\n\" \"$@\"\n}\n\nperl () {\n\tcommand \"$PERL_PATH\" \"$@\" 2>&7\n} 7>&2 2>&4\n\n# Is the value one of the various ways to spell a boolean true/false?\ntest_normalize_bool () {\n\tgit -c magic.variable=\"$1\" config --bool magic.variable 2>/dev/null\n}\n\n# Given a variable $1, normalize the value of it to one of \"true\",\n# \"false\", or \"auto\" and store the result to it.\n#\n#     test_tristate GIT_TEST_HTTPD\n#\n# A variable set to an empty string is set to 'false'.\n# A variable set to 'false' or 'auto' keeps its value.\n# Anything else is set to 'true'.\n# An unset variable defaults to 'auto'.\n#\n# The last rule is to allow people to set the variable to an empty\n# string and export it to decline testing the particular feature\n# for versions both before and after this change.  We used to treat\n# both unset and empty variable as a signal for \"do not test\" and\n# took any non-empty string as \"please test\".\n\ntest_tristate () {\n\tif eval \"test x\\\"\\${$1+isset}\\\" = xisset\"\n\tthen\n\t\t# explicitly set\n\t\teval \"\n\t\t\tcase \\\"\\$$1\\\" in\n\t\t\t'')\t$1=false ;;\n\t\t\tauto)\t;;\n\t\t\t*)\t$1=\\$(test_normalize_bool \\$$1 || echo true) ;;\n\t\t\tesac\n\t\t\"\n\telse\n\t\teval \"$1=auto\"\n\tfi\n}\n\n# Exit the test suite, either by skipping all remaining tests or by\n# exiting with an error. If \"$1\" is \"auto\", we then we assume we were\n# opportunistically trying to set up some tests and we skip. If it is\n# \"true\", then we report a failure.\n#\n# The error/skip message should be given by $2.\n#\ntest_skip_or_die () {\n\tcase \"$1\" in\n\tauto)\n\t\tskip_all=$2\n\t\ttest_done\n\t\t;;\n\ttrue)\n\t\terror \"$2\"\n\t\t;;\n\t*)\n\t\terror \"BUG: test tristate is '$1' (real error: $2)\"\n\tesac\n}\n\n# The following mingw_* functions obey POSIX shell syntax, but are actually\n# bash scripts, and are meant to be used only with bash on Windows.\n\n# A test_cmp function that treats LF and CRLF equal and avoids to fork\n# diff when possible.\nmingw_test_cmp () {\n\t# Read text into shell variables and compare them. If the results\n\t# are different, use regular diff to report the difference.\n\tlocal test_cmp_a= test_cmp_b=\n\n\t# When text came from stdin (one argument is '-') we must feed it\n\t# to diff.\n\tlocal stdin_for_diff=\n\n\t# Since it is difficult to detect the difference between an\n\t# empty input file and a failure to read the files, we go straight\n\t# to diff if one of the inputs is empty.\n\tif test -s \"$1\" && test -s \"$2\"\n\tthen\n\t\t# regular case: both files non-empty\n\t\tmingw_read_file_strip_cr_ test_cmp_a <\"$1\"\n\t\tmingw_read_file_strip_cr_ test_cmp_b <\"$2\"\n\telif test -s \"$1\" && test \"$2\" = -\n\tthen\n\t\t# read 2nd file from stdin\n\t\tmingw_read_file_strip_cr_ test_cmp_a <\"$1\"\n\t\tmingw_read_file_strip_cr_ test_cmp_b\n\t\tstdin_for_diff='<<<\"$test_cmp_b\"'\n\telif test \"$1\" = - && test -s \"$2\"\n\tthen\n\t\t# read 1st file from stdin\n\t\tmingw_read_file_strip_cr_ test_cmp_a\n\t\tmingw_read_file_strip_cr_ test_cmp_b <\"$2\"\n\t\tstdin_for_diff='<<<\"$test_cmp_a\"'\n\tfi\n\ttest -n \"$test_cmp_a\" &&\n\ttest -n \"$test_cmp_b\" &&\n\ttest \"$test_cmp_a\" = \"$test_cmp_b\" ||\n\teval \"diff -u \\\"\\$@\\\" $stdin_for_diff\"\n}\n\n# $1 is the name of the shell variable to fill in\nmingw_read_file_strip_cr_ () {\n\t# Read line-wise using LF as the line separator\n\t# and use IFS to strip CR.\n\tlocal line\n\twhile :\n\tdo\n\t\tif IFS=$'\\r' read -r -d $'\\n' line\n\t\tthen\n\t\t\t# good\n\t\t\tline=$line$'\\n'\n\t\telse\n\t\t\t# we get here at EOF, but also if the last line\n\t\t\t# was not terminated by LF; in the latter case,\n\t\t\t# some text was read\n\t\t\tif test -z \"$line\"\n\t\t\tthen\n\t\t\t\t# EOF, really\n\t\t\t\tbreak\n\t\t\tfi\n\t\tfi\n\t\teval \"$1=\\$$1\\$line\"\n\tdone\n}\n\n# Like \"env FOO=BAR some-program\", but run inside a subshell, which means\n# it also works for shell functions (though those functions cannot impact\n# the environment outside of the test_env invocation).\ntest_env () {\n\t(\n\t\twhile test $# -gt 0\n\t\tdo\n\t\t\tcase \"$1\" in\n\t\t\t*=*)\n\t\t\t\teval \"${1%%=*}=\\${1#*=}\"\n\t\t\t\teval \"export ${1%%=*}\"\n\t\t\t\tshift\n\t\t\t\t;;\n\t\t\t*)\n\t\t\t\t\"$@\" 2>&7\n\t\t\t\texit\n\t\t\t\t;;\n\t\t\tesac\n\t\tdone\n\t)\n} 7>&2 2>&4\n\n# Returns true if the numeric exit code in \"$2\" represents the expected signal\n# in \"$1\". Signals should be given numerically.\ntest_match_signal () {\n\tif test \"$2\" = \"$((128 + $1))\"\n\tthen\n\t\t# POSIX\n\t\treturn 0\n\telif test \"$2\" = \"$((256 + $1))\"\n\tthen\n\t\t# ksh\n\t\treturn 0\n\tfi\n\treturn 1\n}\n\n# Read up to \"$1\" bytes (or to EOF) from stdin and write them to stdout.\ntest_copy_bytes () {\n\tperl -e '\n\t\tmy $len = $ARGV[1];\n\t\twhile ($len > 0) {\n\t\t\tmy $s;\n\t\t\tmy $nread = sysread(STDIN, $s, $len);\n\t\t\tdie \"cannot read: $!\" unless defined($nread);\n\t\t\tlast unless $nread;\n\t\t\tprint $s;\n\t\t\t$len -= $nread;\n\t\t}\n\t' - \"$1\"\n}\n\n# run \"$@\" inside a non-git directory\nnongit () {\n\ttest -d non-repo ||\n\tmkdir non-repo ||\n\treturn 1\n\n\t(\n\t\tGIT_CEILING_DIRECTORIES=$(pwd) &&\n\t\texport GIT_CEILING_DIRECTORIES &&\n\t\tcd non-repo &&\n\t\t\"$@\" 2>&7\n\t)\n} 7>&2 2>&4\n\n# convert stdin to pktline representation; note that empty input becomes an\n# empty packet, not a flush packet (for that you can just print 0000 yourself).\npacketize() {\n\tcat >packetize.tmp &&\n\tlen=$(wc -c <packetize.tmp) &&\n\tprintf '%04x%s' \"$(($len + 4))\" &&\n\tcat packetize.tmp &&\n\trm -f packetize.tmp\n}\n\n# Parse the input as a series of pktlines, writing the result to stdout.\n# Sideband markers are removed automatically, and the output is routed to\n# stderr if appropriate.\n#\n# NUL bytes are converted to \"\\\\0\" for ease of parsing with text tools.\ndepacketize () {\n\tperl -e '\n\t\twhile (read(STDIN, $len, 4) == 4) {\n\t\t\tif ($len eq \"0000\") {\n\t\t\t\tprint \"FLUSH\\n\";\n\t\t\t} else {\n\t\t\t\tread(STDIN, $buf, hex($len) - 4);\n\t\t\t\t$buf =~ s/\\0/\\\\0/g;\n\t\t\t\tif ($buf =~ s/^[\\x2\\x3]//) {\n\t\t\t\t\tprint STDERR $buf;\n\t\t\t\t} else {\n\t\t\t\t\t$buf =~ s/^\\x1//;\n\t\t\t\t\tprint $buf;\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t'\n}\n\n# Set the hash algorithm in use to $1.  Only useful when testing the testsuite.\ntest_set_hash () {\n\ttest_hash_algo=\"$1\"\n}\n\n# Detect the hash algorithm in use.\ntest_detect_hash () {\n\t# Currently we only support SHA-1, but in the future this function will\n\t# actually detect the algorithm in use.\n\ttest_hash_algo='sha1'\n}\n\n# Load common hash metadata and common placeholder object IDs for use with\n# test_oid.\ntest_oid_init () {\n\ttest -n \"$test_hash_algo\" || test_detect_hash &&\n\ttest_oid_cache <\"$TEST_DIRECTORY/oid-info/hash-info\" &&\n\ttest_oid_cache <\"$TEST_DIRECTORY/oid-info/oid\"\n}\n\n# Load key-value pairs from stdin suitable for use with test_oid.  Blank lines\n# and lines starting with \"#\" are ignored.  Keys must be shell identifier\n# characters.\n#\n# Examples:\n# rawsz sha1:20\n# rawsz sha256:32\ntest_oid_cache () {\n\tlocal tag rest k v &&\n\n\t{ test -n \"$test_hash_algo\" || test_detect_hash; } &&\n\twhile read tag rest\n\tdo\n\t\tcase $tag in\n\t\t\\#*)\n\t\t\tcontinue;;\n\t\t?*)\n\t\t\t# non-empty\n\t\t\t;;\n\t\t*)\n\t\t\t# blank line\n\t\t\tcontinue;;\n\t\tesac &&\n\n\t\tk=\"${rest%:*}\" &&\n\t\tv=\"${rest#*:}\" &&\n\n\t\tif ! expr \"$k\" : '[a-z0-9][a-z0-9]*$' >/dev/null\n\t\tthen\n\t\t\tBUG 'bad hash algorithm'\n\t\tfi &&\n\t\teval \"test_oid_${k}_$tag=\\\"\\$v\\\"\"\n\tdone\n}\n\n# Look up a per-hash value based on a key ($1).  The value must have been loaded\n# by test_oid_init or test_oid_cache.\ntest_oid () {\n\tlocal var=\"test_oid_${test_hash_algo}_$1\" &&\n\n\t# If the variable is unset, we must be missing an entry for this\n\t# key-hash pair, so exit with an error.\n\tif eval \"test -z \\\"\\${$var+set}\\\"\"\n\tthen\n\t\tBUG \"undefined key '$1'\"\n\tfi &&\n\teval \"printf '%s' \\\"\\${$var}\\\"\"\n}\n\n# Choose a port number based on the test script's number and store it in\n# the given variable name, unless that variable already contains a number.\ntest_set_port () {\n\tlocal var=$1 port\n\n\tif test $# -ne 1 || test -z \"$var\"\n\tthen\n\t\tBUG \"test_set_port requires a variable name\"\n\tfi\n\n\teval port=\\$$var\n\tcase \"$port\" in\n\t\"\")\n\t\t# No port is set in the given env var, use the test\n\t\t# number as port number instead.\n\t\t# Remove not only the leading 't', but all leading zeros\n\t\t# as well, so the arithmetic below won't (mis)interpret\n\t\t# a test number like '0123' as an octal value.\n\t\tport=${this_test#${this_test%%[1-9]*}}\n\t\tif test \"${port:-0}\" -lt 1024\n\t\tthen\n\t\t\t# root-only port, use a larger one instead.\n\t\t\tport=$(($port + 10000))\n\t\tfi\n\t\t;;\n\t*[!0-9]*|0*)\n\t\terror >&7 \"invalid port number: $port\"\n\t\t;;\n\t*)\n\t\t# The user has specified the port.\n\t\t;;\n\tesac\n\n\t# Make sure that parallel '--stress' test jobs get different\n\t# ports.\n\tport=$(($port + ${GIT_TEST_STRESS_JOB_NR:-0}))\n\teval $var=$port\n}\n"
  },
  {
    "path": "t/test-lib.sh",
    "content": "# Test framework for git.  See t/README for usage.\n#\n# Copyright (c) 2005 Junio C Hamano\n#\n# This program is free software: you can redistribute it and/or modify\n# it under the terms of the GNU General Public License as published by\n# the Free Software Foundation, either version 2 of the License, or\n# (at your option) any later version.\n#\n# This program is distributed in the hope that it will be useful,\n# but WITHOUT ANY WARRANTY; without even the implied warranty of\n# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n# GNU General Public License for more details.\n#\n# You should have received a copy of the GNU General Public License\n# along with this program.  If not, see http://www.gnu.org/licenses/ .\n\n# Test the binaries we have just built.  The tests are kept in\n# t/ subdirectory and are run in 'trash directory' subdirectory.\nif test -z \"$TEST_DIRECTORY\"\nthen\n\t# We allow tests to override this, in case they want to run tests\n\t# outside of t/, e.g. for running tests on the test library\n\t# itself.\n\tTEST_DIRECTORY=$(pwd)\nelse\n\t# ensure that TEST_DIRECTORY is an absolute path so that it\n\t# is valid even if the current working directory is changed\n\tTEST_DIRECTORY=$(cd \"$TEST_DIRECTORY\" && pwd) || exit 1\nfi\nif test -z \"$TEST_OUTPUT_DIRECTORY\"\nthen\n\t# Similarly, override this to store the test-results subdir\n\t# elsewhere\n\tTEST_OUTPUT_DIRECTORY=$TEST_DIRECTORY\nfi\nGIT_BUILD_DIR=\"$TEST_DIRECTORY\"/..\n\n# If we were built with ASAN, it may complain about leaks\n# of program-lifetime variables. Disable it by default to lower\n# the noise level. This needs to happen at the start of the script,\n# before we even do our \"did we build git yet\" check (since we don't\n# want that one to complain to stderr).\n: ${ASAN_OPTIONS=detect_leaks=0:abort_on_error=1}\nexport ASAN_OPTIONS\n\n# If LSAN is in effect we _do_ want leak checking, but we still\n# want to abort so that we notice the problems.\n: ${LSAN_OPTIONS=abort_on_error=1}\nexport LSAN_OPTIONS\n\nPERL_PATH=${PERL_PATH:-perl}\nexport PERL_PATH SHELL_PATH\n\nGIT_TEST_INSTALLED=${GIT_TEST_INSTALLED:-$(dirname $(type -p git))}\nGIT_TEST_CHAIN_LINT=0\nDIFF='diff'\n\n# Parse options while taking care to leave $@ intact, so we will still\n# have all the original command line options when executing the test\n# script again for '--tee' and '--verbose-log' below.\nstore_arg_to=\nprev_opt=\nfor opt\ndo\n\tif test -n \"$store_arg_to\"\n\tthen\n\t\teval $store_arg_to=\\$opt\n\t\tstore_arg_to=\n\t\tprev_opt=\n\t\tcontinue\n\tfi\n\n\tcase \"$opt\" in\n\t-d|--d|--de|--deb|--debu|--debug)\n\t\tdebug=t ;;\n\t-i|--i|--im|--imm|--imme|--immed|--immedi|--immedia|--immediat|--immediate)\n\t\timmediate=t ;;\n\t-l|--l|--lo|--lon|--long|--long-|--long-t|--long-te|--long-tes|--long-test|--long-tests)\n\t\tGIT_TEST_LONG=t; export GIT_TEST_LONG ;;\n\t-r)\n\t\tstore_arg_to=run_list\n\t\t;;\n\t--run=*)\n\t\trun_list=${opt#--*=} ;;\n\t-h|--h|--he|--hel|--help)\n\t\thelp=t ;;\n\t-v|--v|--ve|--ver|--verb|--verbo|--verbos|--verbose)\n\t\tverbose=t ;;\n\t--verbose-only=*)\n\t\tverbose_only=${opt#--*=}\n\t\t;;\n\t-q|--q|--qu|--qui|--quie|--quiet)\n\t\t# Ignore --quiet under a TAP::Harness. Saying how many tests\n\t\t# passed without the ok/not ok details is always an error.\n\t\ttest -z \"$HARNESS_ACTIVE\" && quiet=t ;;\n\t--with-dashes)\n\t\twith_dashes=t ;;\n\t--no-bin-wrappers)\n\t\tno_bin_wrappers=t ;;\n\t--no-color)\n\t\tcolor= ;;\n\t--va|--val|--valg|--valgr|--valgri|--valgrin|--valgrind)\n\t\tvalgrind=memcheck\n\t\ttee=t\n\t\t;;\n\t--valgrind=*)\n\t\tvalgrind=${opt#--*=}\n\t\ttee=t\n\t\t;;\n\t--valgrind-only=*)\n\t\tvalgrind_only=${opt#--*=}\n\t\ttee=t\n\t\t;;\n\t--tee)\n\t\ttee=t ;;\n\t--root=*)\n\t\troot=${opt#--*=} ;;\n\t--chain-lint)\n\t\tGIT_TEST_CHAIN_LINT=1 ;;\n\t--no-chain-lint)\n\t\tGIT_TEST_CHAIN_LINT=0 ;;\n\t-x)\n\t\ttrace=t ;;\n\t-V|--verbose-log)\n\t\tverbose_log=t\n\t\ttee=t\n\t\t;;\n\t--write-junit-xml)\n\t\twrite_junit_xml=t\n\t\t;;\n\t--stress)\n\t\tstress=t ;;\n\t--stress=*)\n\t\tstress=${opt#--*=}\n\t\tcase \"$stress\" in\n\t\t*[!0-9]*|0*|\"\")\n\t\t\techo \"error: --stress=<N> requires the number of jobs to run\" >&2\n\t\t\texit 1\n\t\t\t;;\n\t\t*)\t# Good.\n\t\t\t;;\n\t\tesac\n\t\t;;\n\t--stress-limit=*)\n\t\tstress_limit=${opt#--*=}\n\t\tcase \"$stress_limit\" in\n\t\t*[!0-9]*|0*|\"\")\n\t\t\techo \"error: --stress-limit=<N> requires the number of repetitions\" >&2\n\t\t\texit 1\n\t\t\t;;\n\t\t*)\t# Good.\n\t\t\t;;\n\t\tesac\n\t\t;;\n\t*)\n\t\techo \"error: unknown test option '$opt'\" >&2; exit 1 ;;\n\tesac\n\n\tprev_opt=$opt\ndone\nif test -n \"$store_arg_to\"\nthen\n\techo \"error: $prev_opt requires an argument\" >&2\n\texit 1\nfi\n\nif test -n \"$valgrind_only\"\nthen\n\ttest -z \"$valgrind\" && valgrind=memcheck\n\ttest -z \"$verbose\" && verbose_only=\"$valgrind_only\"\nelif test -n \"$valgrind\"\nthen\n\ttest -z \"$verbose_log\" && verbose=t\nfi\n\nif test -n \"$stress\"\nthen\n\tverbose=t\n\ttrace=t\n\timmediate=t\nfi\n\nTEST_STRESS_JOB_SFX=\"${GIT_TEST_STRESS_JOB_NR:+.stress-$GIT_TEST_STRESS_JOB_NR}\"\nTEST_NAME=\"$(basename \"$0\" .sh)\"\nTEST_RESULTS_DIR=\"$TEST_OUTPUT_DIRECTORY/test-results\"\nTEST_RESULTS_BASE=\"$TEST_RESULTS_DIR/$TEST_NAME$TEST_STRESS_JOB_SFX\"\nTRASH_DIRECTORY=\"trash directory.$TEST_NAME$TEST_STRESS_JOB_SFX\"\ntest -n \"$root\" && TRASH_DIRECTORY=\"$root/$TRASH_DIRECTORY\"\ncase \"$TRASH_DIRECTORY\" in\n/*) ;; # absolute path is good\n *) TRASH_DIRECTORY=\"$TEST_OUTPUT_DIRECTORY/$TRASH_DIRECTORY\" ;;\nesac\n\n# If --stress was passed, run this test repeatedly in several parallel loops.\nif test \"$GIT_TEST_STRESS_STARTED\" = \"done\"\nthen\n\t: # Don't stress test again.\nelif test -n \"$stress\"\nthen\n\tif test \"$stress\" != t\n\tthen\n\t\tjob_count=$stress\n\telif test -n \"$GIT_TEST_STRESS_LOAD\"\n\tthen\n\t\tjob_count=\"$GIT_TEST_STRESS_LOAD\"\n\telif job_count=$(getconf _NPROCESSORS_ONLN 2>/dev/null) &&\n\t     test -n \"$job_count\"\n\tthen\n\t\tjob_count=$((2 * $job_count))\n\telse\n\t\tjob_count=8\n\tfi\n\n\tmkdir -p \"$TEST_RESULTS_DIR\"\n\tstressfail=\"$TEST_RESULTS_BASE.stress-failed\"\n\trm -f \"$stressfail\"\n\n\tstress_exit=0\n\ttrap '\n\t\tkill $job_pids 2>/dev/null\n\t\twait\n\t\tstress_exit=1\n\t' TERM INT HUP\n\n\tjob_pids=\n\tjob_nr=0\n\twhile test $job_nr -lt \"$job_count\"\n\tdo\n\t\t(\n\t\t\tGIT_TEST_STRESS_STARTED=done\n\t\t\tGIT_TEST_STRESS_JOB_NR=$job_nr\n\t\t\texport GIT_TEST_STRESS_STARTED GIT_TEST_STRESS_JOB_NR\n\n\t\t\ttrap '\n\t\t\t\tkill $test_pid 2>/dev/null\n\t\t\t\twait\n\t\t\t\texit 1\n\t\t\t' TERM INT\n\n\t\t\tcnt=1\n\t\t\twhile ! test -e \"$stressfail\" &&\n\t\t\t      { test -z \"$stress_limit\" ||\n\t\t\t\ttest $cnt -le $stress_limit ; }\n\t\t\tdo\n\t\t\t\t$TEST_SHELL_PATH \"$0\" \"$@\" >\"$TEST_RESULTS_BASE.stress-$job_nr.out\" 2>&1 &\n\t\t\t\ttest_pid=$!\n\n\t\t\t\tif wait $test_pid\n\t\t\t\tthen\n\t\t\t\t\tprintf \"OK   %2d.%d\\n\" $GIT_TEST_STRESS_JOB_NR $cnt\n\t\t\t\telse\n\t\t\t\t\techo $GIT_TEST_STRESS_JOB_NR >>\"$stressfail\"\n\t\t\t\t\tprintf \"FAIL %2d.%d\\n\" $GIT_TEST_STRESS_JOB_NR $cnt\n\t\t\t\tfi\n\t\t\t\tcnt=$(($cnt + 1))\n\t\t\tdone\n\t\t) &\n\t\tjob_pids=\"$job_pids $!\"\n\t\tjob_nr=$(($job_nr + 1))\n\tdone\n\n\twait\n\n\tif test -f \"$stressfail\"\n\tthen\n\t\tstress_exit=1\n\t\techo \"Log(s) of failed test run(s):\"\n\t\tfor failed_job_nr in $(sort -n \"$stressfail\")\n\t\tdo\n\t\t\techo \"Contents of '$TEST_RESULTS_BASE.stress-$failed_job_nr.out':\"\n\t\t\tcat \"$TEST_RESULTS_BASE.stress-$failed_job_nr.out\"\n\t\tdone\n\t\trm -rf \"$TRASH_DIRECTORY.stress-failed\"\n\t\t# Move the last one.\n\t\tmv \"$TRASH_DIRECTORY.stress-$failed_job_nr\" \"$TRASH_DIRECTORY.stress-failed\"\n\tfi\n\n\texit $stress_exit\nfi\n\n# if --tee was passed, write the output not only to the terminal, but\n# additionally to the file test-results/$BASENAME.out, too.\nif test \"$GIT_TEST_TEE_STARTED\" = \"done\"\nthen\n\t: # do not redirect again\nelif test -n \"$tee\"\nthen\n\tmkdir -p \"$TEST_RESULTS_DIR\"\n\n\t# Make this filename available to the sub-process in case it is using\n\t# --verbose-log.\n\tGIT_TEST_TEE_OUTPUT_FILE=$TEST_RESULTS_BASE.out\n\texport GIT_TEST_TEE_OUTPUT_FILE\n\n\t# Truncate before calling \"tee -a\" to get rid of the results\n\t# from any previous runs.\n\t>\"$GIT_TEST_TEE_OUTPUT_FILE\"\n\n\t(GIT_TEST_TEE_STARTED=done ${TEST_SHELL_PATH} \"$0\" \"$@\" 2>&1;\n\t echo $? >\"$TEST_RESULTS_BASE.exit\") | tee -a \"$GIT_TEST_TEE_OUTPUT_FILE\"\n\ttest \"$(cat \"$TEST_RESULTS_BASE.exit\")\" = 0\n\texit\nfi\n\nif test -n \"$trace\" && test -n \"$test_untraceable\"\nthen\n\t# '-x' tracing requested, but this test script can't be reliably\n\t# traced, unless it is run with a Bash version supporting\n\t# BASH_XTRACEFD (introduced in Bash v4.1).\n\t#\n\t# Perform this version check _after_ the test script was\n\t# potentially re-executed with $TEST_SHELL_PATH for '--tee' or\n\t# '--verbose-log', so the right shell is checked and the\n\t# warning is issued only once.\n\tif test -n \"$BASH_VERSION\" && eval '\n\t     test ${BASH_VERSINFO[0]} -gt 4 || {\n\t       test ${BASH_VERSINFO[0]} -eq 4 &&\n\t       test ${BASH_VERSINFO[1]} -ge 1\n\t     }\n\t   '\n\tthen\n\t\t: Executed by a Bash version supporting BASH_XTRACEFD.  Good.\n\telse\n\t\techo >&2 \"warning: ignoring -x; '$0' is untraceable without BASH_XTRACEFD\"\n\t\ttrace=\n\tfi\nfi\nif test -n \"$trace\" && test -z \"$verbose_log\"\nthen\n\tverbose=t\nfi\n\n# For repeatability, reset the environment to known value.\n# TERM is sanitized below, after saving color control sequences.\nLANG=C\nLC_ALL=C\nPAGER=cat\nTZ=UTC\nexport LANG LC_ALL PAGER TZ\nEDITOR=:\n\n# GIT_TEST_GETTEXT_POISON should not influence git commands executed\n# during initialization of test-lib and the test repo. Back it up,\n# unset and then restore after initialization is finished.\nif test -n \"$GIT_TEST_GETTEXT_POISON\"\nthen\n\tGIT_TEST_GETTEXT_POISON_ORIG=$GIT_TEST_GETTEXT_POISON\n\tunset GIT_TEST_GETTEXT_POISON\nfi\n\n# A call to \"unset\" with no arguments causes at least Solaris 10\n# /usr/xpg4/bin/sh and /bin/ksh to bail out.  So keep the unsets\n# deriving from the command substitution clustered with the other\n# ones.\nunset VISUAL EMAIL LANGUAGE COLUMNS $(\"$PERL_PATH\" -e '\n\tmy @env = keys %ENV;\n\tmy $ok = join(\"|\", qw(\n\t\tTRACE\n\t\tDEBUG\n\t\tTEST\n\t\t.*_TEST\n\t\tPROVE\n\t\tVALGRIND\n\t\tUNZIP\n\t\tPERF_\n\t\tCURL_VERBOSE\n\t\tTRACE_CURL\n\t));\n\tmy @vars = grep(/^GIT_/ && !/^GIT_($ok)/o, @env);\n\tprint join(\"\\n\", @vars);\n')\nunset XDG_CACHE_HOME\nunset XDG_CONFIG_HOME\nunset GITPERLLIB\nGIT_AUTHOR_EMAIL=author@example.com\nGIT_AUTHOR_NAME='A U Thor'\nGIT_COMMITTER_EMAIL=committer@example.com\nGIT_COMMITTER_NAME='C O Mitter'\nGIT_MERGE_VERBOSITY=5\nGIT_MERGE_AUTOEDIT=no\nexport GIT_MERGE_VERBOSITY GIT_MERGE_AUTOEDIT\nexport GIT_AUTHOR_EMAIL GIT_AUTHOR_NAME\nexport GIT_COMMITTER_EMAIL GIT_COMMITTER_NAME\nexport EDITOR\n\n# Tests using GIT_TRACE typically don't want <timestamp> <file>:<line> output\nGIT_TRACE_BARE=1\nexport GIT_TRACE_BARE\n\ncheck_var_migration () {\n\t# the warnings and hints given from this helper depends\n\t# on end-user settings, which will disrupt the self-test\n\t# done on the test framework itself.\n\tcase \"$GIT_TEST_FRAMEWORK_SELFTEST\" in\n\tt)\treturn ;;\n\tesac\n\n\told_name=$1 new_name=$2\n\teval \"old_isset=\\${${old_name}:+isset}\"\n\teval \"new_isset=\\${${new_name}:+isset}\"\n\n\tcase \"$old_isset,$new_isset\" in\n\tisset,)\n\t\techo >&2 \"warning: $old_name is now $new_name\"\n\t\techo >&2 \"hint: set $new_name too during the transition period\"\n\t\teval \"$new_name=\\$$old_name\"\n\t\t;;\n\tisset,isset)\n\t\t# do this later\n\t\t# echo >&2 \"warning: $old_name is now $new_name\"\n\t\t# echo >&2 \"hint: remove $old_name\"\n\t\t;;\n\tesac\n}\n\ncheck_var_migration GIT_FSMONITOR_TEST GIT_TEST_FSMONITOR\ncheck_var_migration TEST_GIT_INDEX_VERSION GIT_TEST_INDEX_VERSION\ncheck_var_migration GIT_FORCE_PRELOAD_TEST GIT_TEST_PRELOAD_INDEX\n\n# Use specific version of the index file format\nif test -n \"${GIT_TEST_INDEX_VERSION:+isset}\"\nthen\n\tGIT_INDEX_VERSION=\"$GIT_TEST_INDEX_VERSION\"\n\texport GIT_INDEX_VERSION\nfi\n\n# Add libc MALLOC and MALLOC_PERTURB test\n# only if we are not executing the test with valgrind\nif test -n \"$valgrind\" ||\n   test -n \"$TEST_NO_MALLOC_CHECK\"\nthen\n\tsetup_malloc_check () {\n\t\t: nothing\n\t}\n\tteardown_malloc_check () {\n\t\t: nothing\n\t}\nelse\n\tsetup_malloc_check () {\n\t\tMALLOC_CHECK_=3\tMALLOC_PERTURB_=165\n\t\texport MALLOC_CHECK_ MALLOC_PERTURB_\n\t}\n\tteardown_malloc_check () {\n\t\tunset MALLOC_CHECK_ MALLOC_PERTURB_\n\t}\nfi\n\n# Protect ourselves from common misconfiguration to export\n# CDPATH into the environment\nunset CDPATH\n\nunset GREP_OPTIONS\nunset UNZIP\n\ncase $(echo $GIT_TRACE |tr \"[A-Z]\" \"[a-z]\") in\n1|2|true)\n\tGIT_TRACE=4\n\t;;\nesac\n\n# Convenience\n#\n# A regexp to match 5, 35 and 40 hexdigits\n_x05='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'\n_x35=\"$_x05$_x05$_x05$_x05$_x05$_x05$_x05\"\n_x40=\"$_x35$_x05\"\n\n# Zero SHA-1\n_z40=0000000000000000000000000000000000000000\n\nOID_REGEX=\"$_x40\"\nZERO_OID=$_z40\nEMPTY_TREE=4b825dc642cb6eb9a060e54bf8d69288fbee4904\nEMPTY_BLOB=e69de29bb2d1d6434b8b29ae775ad8c2e48c5391\n\n# Line feed\nLF='\n'\n\n# UTF-8 ZERO WIDTH NON-JOINER, which HFS+ ignores\n# when case-folding filenames\nu200c=$(printf '\\342\\200\\214')\n\nexport _x05 _x35 _x40 _z40 LF u200c EMPTY_TREE EMPTY_BLOB ZERO_OID OID_REGEX\n\n# Each test should start with something like this, after copyright notices:\n#\n# test_description='Description of this test...\n# This test checks if command xyzzy does the right thing...\n# '\n# . ./test-lib.sh\ntest \"x$TERM\" != \"xdumb\" && (\n\t\ttest -t 1 &&\n\t\ttput bold >/dev/null 2>&1 &&\n\t\ttput setaf 1 >/dev/null 2>&1 &&\n\t\ttput sgr0 >/dev/null 2>&1\n\t) &&\n\tcolor=t\n\nif test -n \"$color\"\nthen\n\t# Save the color control sequences now rather than run tput\n\t# each time say_color() is called.  This is done for two\n\t# reasons:\n\t#   * TERM will be changed to dumb\n\t#   * HOME will be changed to a temporary directory and tput\n\t#     might need to read ~/.terminfo from the original HOME\n\t#     directory to get the control sequences\n\t# Note:  This approach assumes the control sequences don't end\n\t# in a newline for any terminal of interest (command\n\t# substitutions strip trailing newlines).  Given that most\n\t# (all?) terminals in common use are related to ECMA-48, this\n\t# shouldn't be a problem.\n\tsay_color_error=$(tput bold; tput setaf 1) # bold red\n\tsay_color_skip=$(tput setaf 4) # blue\n\tsay_color_warn=$(tput setaf 3) # brown/yellow\n\tsay_color_pass=$(tput setaf 2) # green\n\tsay_color_info=$(tput setaf 6) # cyan\n\tsay_color_reset=$(tput sgr0)\n\tsay_color_=\"\" # no formatting for normal text\n\tsay_color () {\n\t\ttest -z \"$1\" && test -n \"$quiet\" && return\n\t\teval \"say_color_color=\\$say_color_$1\"\n\t\tshift\n\t\tprintf \"%s\\\\n\" \"$say_color_color$*$say_color_reset\"\n\t}\nelse\n\tsay_color() {\n\t\ttest -z \"$1\" && test -n \"$quiet\" && return\n\t\tshift\n\t\tprintf \"%s\\n\" \"$*\"\n\t}\nfi\n\nTERM=dumb\nexport TERM\n\nerror () {\n\tsay_color error \"error: $*\"\n\tGIT_EXIT_OK=t\n\texit 1\n}\n\nBUG () {\n\terror >&7 \"bug in the test script: $*\"\n}\n\nsay () {\n\tsay_color info \"$*\"\n}\n\nif test -n \"$HARNESS_ACTIVE\"\nthen\n\tif test \"$verbose\" = t || test -n \"$verbose_only\"\n\tthen\n\t\tprintf 'Bail out! %s\\n' \\\n\t\t 'verbose mode forbidden under TAP harness; try --verbose-log'\n\t\texit 1\n\tfi\nfi\n\ntest \"${test_description}\" != \"\" ||\nerror \"Test script did not set test_description.\"\n\nif test \"$help\" = \"t\"\nthen\n\tprintf '%s\\n' \"$test_description\"\n\texit 0\nfi\n\nexec 5>&1\nexec 6<&0\nexec 7>&2\nif test \"$verbose_log\" = \"t\"\nthen\n\texec 3>>\"$GIT_TEST_TEE_OUTPUT_FILE\" 4>&3\nelif test \"$verbose\" = \"t\"\nthen\n\texec 4>&2 3>&1\nelse\n\texec 4>/dev/null 3>/dev/null\nfi\n\n# Send any \"-x\" output directly to stderr to avoid polluting tests\n# which capture stderr. We can do this unconditionally since it\n# has no effect if tracing isn't turned on.\n#\n# Note that this sets up the trace fd as soon as we assign the variable, so it\n# must come after the creation of descriptor 4 above. Likewise, we must never\n# unset this, as it has the side effect of closing descriptor 4, which we\n# use to show verbose tests to the user.\n#\n# Note also that we don't need or want to export it. The tracing is local to\n# this shell, and we would not want to influence any shells we exec.\nBASH_XTRACEFD=4\n\ntest_failure=0\ntest_count=0\ntest_fixed=0\ntest_broken=0\ntest_success=0\n\ntest_external_has_tap=0\n\ndie () {\n\tcode=$?\n\tif test -n \"$GIT_EXIT_OK\"\n\tthen\n\t\texit $code\n\telse\n\t\techo >&5 \"FATAL: Unexpected exit with code $code\"\n\t\texit 1\n\tfi\n}\n\nGIT_EXIT_OK=\ntrap 'die' EXIT\ntrap 'exit $?' INT TERM HUP\n\n# The user-facing functions are loaded from a separate file so that\n# test_perf subshells can have them too\n. \"$TEST_DIRECTORY/test-lib-functions.sh\"\n\n# You are not expected to call test_ok_ and test_failure_ directly, use\n# the test_expect_* functions instead.\n\ntest_ok_ () {\n\tif test -n \"$write_junit_xml\"\n\tthen\n\t\twrite_junit_xml_testcase \"$*\"\n\tfi\n\ttest_success=$(($test_success + 1))\n\tsay_color \"\" \"ok $test_count - $@\"\n}\n\ntest_failure_ () {\n\tif test -n \"$write_junit_xml\"\n\tthen\n\t\tjunit_insert=\"<failure message=\\\"not ok $test_count -\"\n\t\tjunit_insert=\"$junit_insert $(xml_attr_encode \"$1\")\\\">\"\n\t\tjunit_insert=\"$junit_insert $(xml_attr_encode \\\n\t\t\t\"$(if test -n \"$GIT_TEST_TEE_OUTPUT_FILE\"\n\t\t\t   then\n\t\t\t\ttest-tool path-utils skip-n-bytes \\\n\t\t\t\t\t\"$GIT_TEST_TEE_OUTPUT_FILE\" $GIT_TEST_TEE_OFFSET\n\t\t\t   else\n\t\t\t\tprintf '%s\\n' \"$@\" | sed 1d\n\t\t\t   fi)\")\"\n\t\tjunit_insert=\"$junit_insert</failure>\"\n\t\tif test -n \"$GIT_TEST_TEE_OUTPUT_FILE\"\n\t\tthen\n\t\t\tjunit_insert=\"$junit_insert<system-err>$(xml_attr_encode \\\n\t\t\t\t\"$(cat \"$GIT_TEST_TEE_OUTPUT_FILE\")\")</system-err>\"\n\t\tfi\n\t\twrite_junit_xml_testcase \"$1\" \"      $junit_insert\"\n\tfi\n\ttest_failure=$(($test_failure + 1))\n\tsay_color error \"not ok $test_count - $1\"\n\tshift\n\tprintf '%s\\n' \"$*\" | sed -e 's/^/#\t/'\n\ttest \"$immediate\" = \"\" || { GIT_EXIT_OK=t; exit 1; }\n}\n\ntest_known_broken_ok_ () {\n\tif test -n \"$write_junit_xml\"\n\tthen\n\t\twrite_junit_xml_testcase \"$* (breakage fixed)\"\n\tfi\n\ttest_fixed=$(($test_fixed+1))\n\tsay_color error \"ok $test_count - $@ # TODO known breakage vanished\"\n}\n\ntest_known_broken_failure_ () {\n\tif test -n \"$write_junit_xml\"\n\tthen\n\t\twrite_junit_xml_testcase \"$* (known breakage)\"\n\tfi\n\ttest_broken=$(($test_broken+1))\n\tsay_color warn \"not ok $test_count - $@ # TODO known breakage\"\n}\n\ntest_debug () {\n\ttest \"$debug\" = \"\" || eval \"$1\"\n}\n\nmatch_pattern_list () {\n\targ=\"$1\"\n\tshift\n\ttest -z \"$*\" && return 1\n\tfor pattern_\n\tdo\n\t\tcase \"$arg\" in\n\t\t$pattern_)\n\t\t\treturn 0\n\t\tesac\n\tdone\n\treturn 1\n}\n\nmatch_test_selector_list () {\n\ttitle=\"$1\"\n\tshift\n\targ=\"$1\"\n\tshift\n\ttest -z \"$1\" && return 0\n\n\t# Both commas and whitespace are accepted as separators.\n\tOLDIFS=$IFS\n\tIFS=' \t,'\n\tset -- $1\n\tIFS=$OLDIFS\n\n\t# If the first selector is negative we include by default.\n\tinclude=\n\tcase \"$1\" in\n\t\t!*) include=t ;;\n\tesac\n\n\tfor selector\n\tdo\n\t\torig_selector=$selector\n\n\t\tpositive=t\n\t\tcase \"$selector\" in\n\t\t\t!*)\n\t\t\t\tpositive=\n\t\t\t\tselector=${selector##?}\n\t\t\t\t;;\n\t\tesac\n\n\t\ttest -z \"$selector\" && continue\n\n\t\tcase \"$selector\" in\n\t\t\t*-*)\n\t\t\t\tif expr \"z${selector%%-*}\" : \"z[0-9]*[^0-9]\" >/dev/null\n\t\t\t\tthen\n\t\t\t\t\techo \"error: $title: invalid non-numeric in range\" \\\n\t\t\t\t\t\t\"start: '$orig_selector'\" >&2\n\t\t\t\t\texit 1\n\t\t\t\tfi\n\t\t\t\tif expr \"z${selector#*-}\" : \"z[0-9]*[^0-9]\" >/dev/null\n\t\t\t\tthen\n\t\t\t\t\techo \"error: $title: invalid non-numeric in range\" \\\n\t\t\t\t\t\t\"end: '$orig_selector'\" >&2\n\t\t\t\t\texit 1\n\t\t\t\tfi\n\t\t\t\t;;\n\t\t\t*)\n\t\t\t\tif expr \"z$selector\" : \"z[0-9]*[^0-9]\" >/dev/null\n\t\t\t\tthen\n\t\t\t\t\techo \"error: $title: invalid non-numeric in test\" \\\n\t\t\t\t\t\t\"selector: '$orig_selector'\" >&2\n\t\t\t\t\texit 1\n\t\t\t\tfi\n\t\tesac\n\n\t\t# Short cut for \"obvious\" cases\n\t\ttest -z \"$include\" && test -z \"$positive\" && continue\n\t\ttest -n \"$include\" && test -n \"$positive\" && continue\n\n\t\tcase \"$selector\" in\n\t\t\t-*)\n\t\t\t\tif test $arg -le ${selector#-}\n\t\t\t\tthen\n\t\t\t\t\tinclude=$positive\n\t\t\t\tfi\n\t\t\t\t;;\n\t\t\t*-)\n\t\t\t\tif test $arg -ge ${selector%-}\n\t\t\t\tthen\n\t\t\t\t\tinclude=$positive\n\t\t\t\tfi\n\t\t\t\t;;\n\t\t\t*-*)\n\t\t\t\tif test ${selector%%-*} -le $arg \\\n\t\t\t\t\t&& test $arg -le ${selector#*-}\n\t\t\t\tthen\n\t\t\t\t\tinclude=$positive\n\t\t\t\tfi\n\t\t\t\t;;\n\t\t\t*)\n\t\t\t\tif test $arg -eq $selector\n\t\t\t\tthen\n\t\t\t\t\tinclude=$positive\n\t\t\t\tfi\n\t\t\t\t;;\n\t\tesac\n\tdone\n\n\ttest -n \"$include\"\n}\n\nmaybe_teardown_verbose () {\n\ttest -z \"$verbose_only\" && return\n\texec 4>/dev/null 3>/dev/null\n\tverbose=\n}\n\nlast_verbose=t\nmaybe_setup_verbose () {\n\ttest -z \"$verbose_only\" && return\n\tif match_pattern_list $test_count $verbose_only\n\tthen\n\t\texec 4>&2 3>&1\n\t\t# Emit a delimiting blank line when going from\n\t\t# non-verbose to verbose.  Within verbose mode the\n\t\t# delimiter is printed by test_expect_*.  The choice\n\t\t# of the initial $last_verbose is such that before\n\t\t# test 1, we do not print it.\n\t\ttest -z \"$last_verbose\" && echo >&3 \"\"\n\t\tverbose=t\n\telse\n\t\texec 4>/dev/null 3>/dev/null\n\t\tverbose=\n\tfi\n\tlast_verbose=$verbose\n}\n\nmaybe_teardown_valgrind () {\n\ttest -z \"$GIT_VALGRIND\" && return\n\tGIT_VALGRIND_ENABLED=\n}\n\nmaybe_setup_valgrind () {\n\ttest -z \"$GIT_VALGRIND\" && return\n\tif test -z \"$valgrind_only\"\n\tthen\n\t\tGIT_VALGRIND_ENABLED=t\n\t\treturn\n\tfi\n\tGIT_VALGRIND_ENABLED=\n\tif match_pattern_list $test_count $valgrind_only\n\tthen\n\t\tGIT_VALGRIND_ENABLED=t\n\tfi\n}\n\nwant_trace () {\n\ttest \"$trace\" = t && {\n\t\ttest \"$verbose\" = t || test \"$verbose_log\" = t\n\t}\n}\n\n# This is a separate function because some tests use\n# \"return\" to end a test_expect_success block early\n# (and we want to make sure we run any cleanup like\n# \"set +x\").\ntest_eval_inner_ () {\n\t# Do not add anything extra (including LF) after '$*'\n\teval \"\n\t\twant_trace && set -x\n\t\t$*\"\n}\n\ntest_eval_ () {\n\t# If \"-x\" tracing is in effect, then we want to avoid polluting stderr\n\t# with non-test commands. But once in \"set -x\" mode, we cannot prevent\n\t# the shell from printing the \"set +x\" to turn it off (nor the saving\n\t# of $? before that). But we can make sure that the output goes to\n\t# /dev/null.\n\t#\n\t# There are a few subtleties here:\n\t#\n\t#   - we have to redirect descriptor 4 in addition to 2, to cover\n\t#     BASH_XTRACEFD\n\t#\n\t#   - the actual eval has to come before the redirection block (since\n\t#     it needs to see descriptor 4 to set up its stderr)\n\t#\n\t#   - likewise, any error message we print must be outside the block to\n\t#     access descriptor 4\n\t#\n\t#   - checking $? has to come immediately after the eval, but it must\n\t#     be _inside_ the block to avoid polluting the \"set -x\" output\n\t#\n\n\ttest_eval_inner_ \"$@\" </dev/null >&3 2>&4\n\t{\n\t\ttest_eval_ret_=$?\n\t\tif want_trace\n\t\tthen\n\t\t\tset +x\n\t\tfi\n\t} 2>/dev/null 4>&2\n\n\tif test \"$test_eval_ret_\" != 0 && want_trace\n\tthen\n\t\tsay_color error >&4 \"error: last command exited with \\$?=$test_eval_ret_\"\n\tfi\n\treturn $test_eval_ret_\n}\n\ntest_run_ () {\n\ttest_cleanup=:\n\texpecting_failure=$2\n\n\tif test \"${GIT_TEST_CHAIN_LINT:-1}\" != 0; then\n\t\t# turn off tracing for this test-eval, as it simply creates\n\t\t# confusing noise in the \"-x\" output\n\t\ttrace_tmp=$trace\n\t\ttrace=\n\t\t# 117 is magic because it is unlikely to match the exit\n\t\t# code of other programs\n\t\tif $(printf '%s\\n' \"$1\" | sed -f \"$GIT_BUILD_DIR/t/chainlint.sed\" | grep -q '?![A-Z][A-Z]*?!') ||\n\t\t\ttest \"OK-117\" != \"$(test_eval_ \"(exit 117) && $1${LF}${LF}echo OK-\\$?\" 3>&1)\"\n\t\tthen\n\t\t\tBUG \"broken &&-chain or run-away HERE-DOC: $1\"\n\t\tfi\n\t\ttrace=$trace_tmp\n\tfi\n\n\tsetup_malloc_check\n\ttest_eval_ \"$1\"\n\teval_ret=$?\n\tteardown_malloc_check\n\n\tif test -z \"$immediate\" || test $eval_ret = 0 ||\n\t   test -n \"$expecting_failure\" && test \"$test_cleanup\" != \":\"\n\tthen\n\t\tsetup_malloc_check\n\t\ttest_eval_ \"$test_cleanup\"\n\t\tteardown_malloc_check\n\tfi\n\tif test \"$verbose\" = \"t\" && test -n \"$HARNESS_ACTIVE\"\n\tthen\n\t\techo \"\"\n\tfi\n\treturn \"$eval_ret\"\n}\n\ntest_start_ () {\n\ttest_count=$(($test_count+1))\n\tmaybe_setup_verbose\n\tmaybe_setup_valgrind\n\tif test -n \"$write_junit_xml\"\n\tthen\n\t\tjunit_start=$(test-tool date getnanos)\n\tfi\n}\n\ntest_finish_ () {\n\techo >&3 \"\"\n\tmaybe_teardown_valgrind\n\tmaybe_teardown_verbose\n\tif test -n \"$GIT_TEST_TEE_OFFSET\"\n\tthen\n\t\tGIT_TEST_TEE_OFFSET=$(test-tool path-utils file-size \\\n\t\t\t\"$GIT_TEST_TEE_OUTPUT_FILE\")\n\tfi\n}\n\ntest_skip () {\n\tto_skip=\n\tskipped_reason=\n\tif match_pattern_list $this_test.$test_count $GIT_SKIP_TESTS\n\tthen\n\t\tto_skip=t\n\t\tskipped_reason=\"GIT_SKIP_TESTS\"\n\tfi\n\tif test -z \"$to_skip\" && test -n \"$test_prereq\" &&\n\t   ! test_have_prereq \"$test_prereq\"\n\tthen\n\t\tto_skip=t\n\n\t\tof_prereq=\n\t\tif test \"$missing_prereq\" != \"$test_prereq\"\n\t\tthen\n\t\t\tof_prereq=\" of $test_prereq\"\n\t\tfi\n\t\tskipped_reason=\"missing $missing_prereq${of_prereq}\"\n\tfi\n\tif test -z \"$to_skip\" && test -n \"$run_list\" &&\n\t\t! match_test_selector_list '--run' $test_count \"$run_list\"\n\tthen\n\t\tto_skip=t\n\t\tskipped_reason=\"--run\"\n\tfi\n\n\tcase \"$to_skip\" in\n\tt)\n\t\tif test -n \"$write_junit_xml\"\n\t\tthen\n\t\t\tmessage=\"$(xml_attr_encode \"$skipped_reason\")\"\n\t\t\twrite_junit_xml_testcase \"$1\" \\\n\t\t\t\t\"      <skipped message=\\\"$message\\\" />\"\n\t\tfi\n\n\t\tsay_color skip >&3 \"skipping test: $@\"\n\t\tsay_color skip \"ok $test_count # skip $1 ($skipped_reason)\"\n\t\t: true\n\t\t;;\n\t*)\n\t\tfalse\n\t\t;;\n\tesac\n}\n\n# stub; perf-lib overrides it\ntest_at_end_hook_ () {\n\t:\n}\n\nwrite_junit_xml () {\n\tcase \"$1\" in\n\t--truncate)\n\t\t>\"$junit_xml_path\"\n\t\tjunit_have_testcase=\n\t\tshift\n\t\t;;\n\tesac\n\tprintf '%s\\n' \"$@\" >>\"$junit_xml_path\"\n}\n\nxml_attr_encode () {\n\tprintf '%s\\n' \"$@\" | test-tool xml-encode\n}\n\nwrite_junit_xml_testcase () {\n\tjunit_attrs=\"name=\\\"$(xml_attr_encode \"$this_test.$test_count $1\")\\\"\"\n\tshift\n\tjunit_attrs=\"$junit_attrs classname=\\\"$this_test\\\"\"\n\tjunit_attrs=\"$junit_attrs time=\\\"$(test-tool \\\n\t\tdate getnanos $junit_start)\\\"\"\n\twrite_junit_xml \"$(printf '%s\\n' \\\n\t\t\"    <testcase $junit_attrs>\" \"$@\" \"    </testcase>\")\"\n\tjunit_have_testcase=t\n}\n\ntest_done () {\n\tGIT_EXIT_OK=t\n\n\tif test -n \"$write_junit_xml\" && test -n \"$junit_xml_path\"\n\tthen\n\t\ttest -n \"$junit_have_testcase\" || {\n\t\t\tjunit_start=$(test-tool date getnanos)\n\t\t\twrite_junit_xml_testcase \"all tests skipped\"\n\t\t}\n\n\t\t# adjust the overall time\n\t\tjunit_time=$(test-tool date getnanos $junit_suite_start)\n\t\tsed \"s/<testsuite [^>]*/& time=\\\"$junit_time\\\"/\" \\\n\t\t\t<\"$junit_xml_path\" >\"$junit_xml_path.new\"\n\t\tmv \"$junit_xml_path.new\" \"$junit_xml_path\"\n\n\t\twrite_junit_xml \"  </testsuite>\" \"</testsuites>\"\n\tfi\n\n\tif test -z \"$HARNESS_ACTIVE\"\n\tthen\n\t\tmkdir -p \"$TEST_RESULTS_DIR\"\n\n\t\tcat >\"$TEST_RESULTS_BASE.counts\" <<-EOF\n\t\ttotal $test_count\n\t\tsuccess $test_success\n\t\tfixed $test_fixed\n\t\tbroken $test_broken\n\t\tfailed $test_failure\n\n\t\tEOF\n\tfi\n\n\tif test \"$test_fixed\" != 0\n\tthen\n\t\tsay_color error \"# $test_fixed known breakage(s) vanished; please update test(s)\"\n\tfi\n\tif test \"$test_broken\" != 0\n\tthen\n\t\tsay_color warn \"# still have $test_broken known breakage(s)\"\n\tfi\n\tif test \"$test_broken\" != 0 || test \"$test_fixed\" != 0\n\tthen\n\t\ttest_remaining=$(( $test_count - $test_broken - $test_fixed ))\n\t\tmsg=\"remaining $test_remaining test(s)\"\n\telse\n\t\ttest_remaining=$test_count\n\t\tmsg=\"$test_count test(s)\"\n\tfi\n\tcase \"$test_failure\" in\n\t0)\n\t\tif test $test_external_has_tap -eq 0\n\t\tthen\n\t\t\tif test $test_remaining -gt 0\n\t\t\tthen\n\t\t\t\tsay_color pass \"# passed all $msg\"\n\t\t\tfi\n\n\t\t\t# Maybe print SKIP message\n\t\t\ttest -z \"$skip_all\" || skip_all=\"# SKIP $skip_all\"\n\t\t\tcase \"$test_count\" in\n\t\t\t0)\n\t\t\t\tsay \"1..$test_count${skip_all:+ $skip_all}\"\n\t\t\t\t;;\n\t\t\t*)\n\t\t\t\ttest -z \"$skip_all\" ||\n\t\t\t\tsay_color warn \"$skip_all\"\n\t\t\t\tsay \"1..$test_count\"\n\t\t\t\t;;\n\t\t\tesac\n\t\tfi\n\n\t\tif test -z \"$debug\"\n\t\tthen\n\t\t\ttest -d \"$TRASH_DIRECTORY\" ||\n\t\t\terror \"Tests passed but trash directory already removed before test cleanup; aborting\"\n\n\t\t\tcd \"$TRASH_DIRECTORY/..\" &&\n\t\t\trm -fr \"$TRASH_DIRECTORY\" || {\n\t\t\t\t# try again in a bit\n\t\t\t\tsleep 5;\n\t\t\t\trm -fr \"$TRASH_DIRECTORY\"\n\t\t\t} ||\n\t\t\terror \"Tests passed but test cleanup failed; aborting\"\n\t\tfi\n\t\ttest_at_end_hook_\n\n\t\texit 0 ;;\n\n\t*)\n\t\tif test $test_external_has_tap -eq 0\n\t\tthen\n\t\t\tsay_color error \"# failed $test_failure among $msg\"\n\t\t\tsay \"1..$test_count\"\n\t\tfi\n\n\t\texit 1 ;;\n\n\tesac\n}\n\nif test -z \"$GIT_TEST_CMP\"\nthen\n\tif test -n \"$GIT_TEST_CMP_USE_COPIED_CONTEXT\"\n\tthen\n\t\tGIT_TEST_CMP=\"$DIFF -c\"\n\telse\n\t\tGIT_TEST_CMP=\"$DIFF -u\"\n\tfi\nfi\n\n# Test repository\nrm -fr \"$TRASH_DIRECTORY\" || {\n\tGIT_EXIT_OK=t\n\techo >&5 \"FATAL: Cannot prepare test area\"\n\texit 1\n}\n\nHOME=\"$TRASH_DIRECTORY\"\nGNUPGHOME=\"$HOME/gnupg-home-not-used\"\nexport HOME GNUPGHOME\n\nif test -z \"$TEST_NO_CREATE_REPO\"\nthen\n\ttest_create_repo \"$TRASH_DIRECTORY\"\nelse\n\tmkdir -p \"$TRASH_DIRECTORY\"\nfi\n\n# Use -P to resolve symlinks in our working directory so that the cwd\n# in subprocesses like git equals our $PWD (for pathname comparisons).\ncd -P \"$TRASH_DIRECTORY\" || exit 1\n\nthis_test=${0##*/}\nthis_test=${this_test%%-*}\nif match_pattern_list \"$this_test\" $GIT_SKIP_TESTS\nthen\n\tsay_color info >&3 \"skipping test $this_test altogether\"\n\tskip_all=\"skip all tests in $this_test\"\n\ttest_done\nfi\n\nif test -n \"$write_junit_xml\"\nthen\n\tjunit_xml_dir=\"$TEST_OUTPUT_DIRECTORY/out\"\n\tmkdir -p \"$junit_xml_dir\"\n\tjunit_xml_base=${0##*/}\n\tjunit_xml_path=\"$junit_xml_dir/TEST-${junit_xml_base%.sh}.xml\"\n\tjunit_attrs=\"name=\\\"${junit_xml_base%.sh}\\\"\"\n\tjunit_attrs=\"$junit_attrs timestamp=\\\"$(TZ=UTC \\\n\t\tdate +%Y-%m-%dT%H:%M:%S)\\\"\"\n\twrite_junit_xml --truncate \"<testsuites>\" \"  <testsuite $junit_attrs>\"\n\tjunit_suite_start=$(test-tool date getnanos)\n\tif test -n \"$GIT_TEST_TEE_OUTPUT_FILE\"\n\tthen\n\t\tGIT_TEST_TEE_OFFSET=0\n\tfi\nfi\n\n# Provide an implementation of the 'yes' utility; the upper bound\n# limit is there to help Windows that cannot stop this loop from\n# wasting cycles when the downstream stops reading, so do not be\n# tempted to turn it into an infinite loop. cf. 6129c930 (\"test-lib:\n# limit the output of the yes utility\", 2016-02-02)\nyes () {\n\tif test $# = 0\n\tthen\n\t\ty=y\n\telse\n\t\ty=\"$*\"\n\tfi\n\n\ti=0\n\twhile test $i -lt 99\n\tdo\n\t\techo \"$y\"\n\t\ti=$(($i+1))\n\tdone\n}\n\n# Fix some commands on Windows, and other OS-specific things\nuname_s=$(uname -s)\ncase $uname_s in\n*MINGW*)\n\t# Windows has its own (incompatible) sort and find\n\tsort () {\n\t\t/usr/bin/sort \"$@\"\n\t}\n\tfind () {\n\t\t/usr/bin/find \"$@\"\n\t}\n\t# git sees Windows-style pwd\n\tpwd () {\n\t\tbuiltin pwd -W\n\t}\n\t# no POSIX permissions\n\t# backslashes in pathspec are converted to '/'\n\t# exec does not inherit the PID\n\ttest_set_prereq MINGW\n\ttest_set_prereq NATIVE_CRLF\n\ttest_set_prereq SED_STRIPS_CR\n\ttest_set_prereq GREP_STRIPS_CR\n\ttest_set_prereq WINDOWS\n\tGIT_TEST_CMP=mingw_test_cmp\n\t;;\n*CYGWIN*)\n\ttest_set_prereq POSIXPERM\n\ttest_set_prereq EXECKEEPSPID\n\ttest_set_prereq CYGWIN\n\ttest_set_prereq SED_STRIPS_CR\n\ttest_set_prereq GREP_STRIPS_CR\n\ttest_set_prereq WINDOWS\n\t;;\n*)\n\ttest_set_prereq POSIXPERM\n\ttest_set_prereq BSLASHPSPEC\n\ttest_set_prereq EXECKEEPSPID\n\t;;\nesac\n\n( COLUMNS=1 && test $COLUMNS = 1 ) && test_set_prereq COLUMNS_CAN_BE_1\ntest -z \"$NO_PERL\" && test_set_prereq PERL\ntest -z \"$NO_PTHREADS\" && test_set_prereq PTHREADS\ntest -z \"$NO_PYTHON\" && test_set_prereq PYTHON\ntest -n \"$USE_LIBPCRE1$USE_LIBPCRE2\" && test_set_prereq PCRE\ntest -n \"$USE_LIBPCRE1\" && test_set_prereq LIBPCRE1\ntest -n \"$USE_LIBPCRE2\" && test_set_prereq LIBPCRE2\ntest -z \"$NO_GETTEXT\" && test_set_prereq GETTEXT\n\nif test -n \"$GIT_TEST_GETTEXT_POISON_ORIG\"\nthen\n\tGIT_TEST_GETTEXT_POISON=$GIT_TEST_GETTEXT_POISON_ORIG\n\tunset GIT_TEST_GETTEXT_POISON_ORIG\nfi\n\n# Can we rely on git's output in the C locale?\nif test -z \"$GIT_TEST_GETTEXT_POISON\"\nthen\n\ttest_set_prereq C_LOCALE_OUTPUT\nfi\n\nif test -z \"$GIT_TEST_CHECK_CACHE_TREE\"\nthen\n\tGIT_TEST_CHECK_CACHE_TREE=true\n\texport GIT_TEST_CHECK_CACHE_TREE\nfi\n\ntest_lazy_prereq PIPE '\n\t# test whether the filesystem supports FIFOs\n\ttest_have_prereq !MINGW,!CYGWIN &&\n\trm -f testfifo && mkfifo testfifo\n'\n\ntest_lazy_prereq SYMLINKS '\n\t# test whether the filesystem supports symbolic links\n\tln -s x y && test -h y\n'\n\ntest_lazy_prereq FILEMODE '\n\ttest \"$(git config --bool core.filemode)\" = true\n'\n\ntest_lazy_prereq CASE_INSENSITIVE_FS '\n\techo good >CamelCase &&\n\techo bad >camelcase &&\n\ttest \"$(cat CamelCase)\" != good\n'\n\ntest_lazy_prereq FUNNYNAMES '\n\ttest_have_prereq !MINGW &&\n\ttouch -- \\\n\t\t\"FUNNYNAMES tab\tembedded\" \\\n\t\t\"FUNNYNAMES \\\"quote embedded\\\"\" \\\n\t\t\"FUNNYNAMES newline\nembedded\" 2>/dev/null &&\n\trm -- \\\n\t\t\"FUNNYNAMES tab\tembedded\" \\\n\t\t\"FUNNYNAMES \\\"quote embedded\\\"\" \\\n\t\t\"FUNNYNAMES newline\nembedded\" 2>/dev/null\n'\n\ntest_lazy_prereq UTF8_NFD_TO_NFC '\n\t# check whether FS converts nfd unicode to nfc\n\tauml=$(printf \"\\303\\244\")\n\taumlcdiar=$(printf \"\\141\\314\\210\")\n\t>\"$auml\" &&\n\ttest -f \"$aumlcdiar\"\n'\n\ntest_lazy_prereq AUTOIDENT '\n\tsane_unset GIT_AUTHOR_NAME &&\n\tsane_unset GIT_AUTHOR_EMAIL &&\n\tgit var GIT_AUTHOR_IDENT\n'\n\ntest_lazy_prereq EXPENSIVE '\n\ttest -n \"$GIT_TEST_LONG\"\n'\n\ntest_lazy_prereq EXPENSIVE_ON_WINDOWS '\n\ttest_have_prereq EXPENSIVE || test_have_prereq !MINGW,!CYGWIN\n'\n\ntest_lazy_prereq USR_BIN_TIME '\n\ttest -x /usr/bin/time\n'\n\ntest_lazy_prereq NOT_ROOT '\n\tuid=$(id -u) &&\n\ttest \"$uid\" != 0\n'\n\ntest_lazy_prereq JGIT '\n\ttype jgit\n'\n\n# SANITY is about \"can you correctly predict what the filesystem would\n# do by only looking at the permission bits of the files and\n# directories?\"  A typical example of !SANITY is running the test\n# suite as root, where a test may expect \"chmod -r file && cat file\"\n# to fail because file is supposed to be unreadable after a successful\n# chmod.  In an environment (i.e. combination of what filesystem is\n# being used and who is running the tests) that lacks SANITY, you may\n# be able to delete or create a file when the containing directory\n# doesn't have write permissions, or access a file even if the\n# containing directory doesn't have read or execute permissions.\n\ntest_lazy_prereq SANITY '\n\tmkdir SANETESTD.1 SANETESTD.2 &&\n\n\tchmod +w SANETESTD.1 SANETESTD.2 &&\n\t>SANETESTD.1/x 2>SANETESTD.2/x &&\n\tchmod -w SANETESTD.1 &&\n\tchmod -r SANETESTD.1/x &&\n\tchmod -rx SANETESTD.2 ||\n\tBUG \"cannot prepare SANETESTD\"\n\n\t! test -r SANETESTD.1/x &&\n\t! rm SANETESTD.1/x && ! test -f SANETESTD.2/x\n\tstatus=$?\n\n\tchmod +rwx SANETESTD.1 SANETESTD.2 &&\n\trm -rf SANETESTD.1 SANETESTD.2 ||\n\tBUG \"cannot clean SANETESTD\"\n\treturn $status\n'\n\ntest FreeBSD != $uname_s || GIT_UNZIP=${GIT_UNZIP:-/usr/local/bin/unzip}\nGIT_UNZIP=${GIT_UNZIP:-unzip}\ntest_lazy_prereq UNZIP '\n\t\"$GIT_UNZIP\" -v\n\ttest $? -ne 127\n'\n\nrun_with_limited_cmdline () {\n\t(ulimit -s 128 && \"$@\")\n}\n\ntest_lazy_prereq CMDLINE_LIMIT '\n\ttest_have_prereq !MINGW,!CYGWIN &&\n\trun_with_limited_cmdline true\n'\n\nrun_with_limited_stack () {\n\t(ulimit -s 128 && \"$@\")\n}\n\ntest_lazy_prereq ULIMIT_STACK_SIZE '\n\ttest_have_prereq !MINGW,!CYGWIN &&\n\trun_with_limited_stack true\n'\n\nbuild_option () {\n\tgit version --build-options |\n\tsed -ne \"s/^$1: //p\"\n}\n\ntest_lazy_prereq LONG_IS_64BIT '\n\ttest 8 -le \"$(build_option sizeof-long)\"\n'\n\ntest_lazy_prereq TIME_IS_64BIT 'test-tool date is64bit'\ntest_lazy_prereq TIME_T_IS_64BIT 'test-tool date time_t-is64bit'\n\ntest_lazy_prereq CURL '\n\tcurl --version\n'\n\n# SHA1 is a test if the hash algorithm in use is SHA-1.  This is both for tests\n# which will not work with other hash algorithms and tests that work but don't\n# test anything meaningful (e.g. special values which cause short collisions).\ntest_lazy_prereq SHA1 '\n\ttest $(git hash-object /dev/null) = e69de29bb2d1d6434b8b29ae775ad8c2e48c5391\n'\n\ntest_lazy_prereq REBASE_P '\n\ttest -z \"$GIT_TEST_SKIP_REBASE_P\"\n'\n"
  }
]