[
  {
    "path": ".coveragerc",
    "content": "[run]\nomit = ffsubsync/ffsubsync_gui.py, ffsubsync/_version.py, ffsubsync/version.py\n"
  },
  {
    "path": ".gitattributes",
    "content": "ffsubsync/_version.py export-subst\n"
  },
  {
    "path": ".github/FUNDING.yml",
    "content": "github: smacke\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/bug_report.md",
    "content": "---\nname: Bug report\nabout: Create a report to help us improve\ntitle: ''\nlabels: bug\nassignees: ''\n\n---\n\n**Environment (please complete the following information):**\n - OS: [e.g. Windows 10, MacOS Mojave, etc.]\n - python version (`python --version`)\n - subsync version (`subsync --version`)\n\n**Describe the bug**\nA clear and concise description of what the bug is.\n\n**To Reproduce**\nHow to reproduce the behavior.\n\n**Expected behavior**\nA clear and concise description of what you expected to happen.\n\n**Output**\nCopy+paste stdout from running the command here.\n\n**Test case**\n[Optional] You can bundle additional debugging information into a tar archive as follows:\n```\nsubsync vid.mkv -i in.srt -o out.srt --make-test-case\n```\nThis will create a file `vid.mkv.$timestamp.tar.gz` or similar a few KiB in size; you can attach it by clicking the \"attach files\" button below.\n\n**Additional context**\nAdd any other context about the problem here.\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/synchronization-problem.md",
    "content": "---\nname: Synchronization problem\nabout: Help us to improve syncing by reporting failed syncs\ntitle: output subtitles still out of sync\nlabels: out-of-sync\nassignees: ''\n\n---\n\n**Upload a tarball with debugging information**\n1. Run the command that produces the out-of-sync subtitle output, but with the additional `--make-test-case` flag, i.e.: `subsync ref.mkv -i in.srt -o failed.srt --make-test-case`\n2. This results in a file of the form `ref.mkv.$timestamp.tar.gz` or similar.\n3. Please upload this file using the \"attach files\" button at the bottom of the text prompt.\n\nThat's all! Thank you for contributing a test case; this helps me to continue improving the sync and to add additional integration tests once improvements have been made.\n\n**Additional context**\nAdd any other context about the problem here that might be helpful.\n"
  },
  {
    "path": ".github/workflows/ci.yml",
    "content": "name: ffsubsync\n\non: [push, pull_request]\n\njobs:\n  build:\n\n    runs-on: ${{ matrix.os }}\n\n    strategy:\n      matrix:\n        os: [ 'ubuntu-22.04', 'windows-latest' ]\n        python-version: [ '3.7', '3.8', '3.9', '3.10', '3.11', '3.12', '3.13', '3.14' ]\n        include:\n          - python-version: '3.7'\n            os: 'macos-15-intel'\n          - python-version: '3.8'\n            os: 'macos-15-intel'\n          - python-version: '3.9'\n            os: 'macos-15-intel'\n          - python-version: '3.10'\n            os: 'macos-latest'\n          - python-version: '3.11'\n            os: 'macos-latest'\n          - python-version: '3.12'\n            os: 'macos-latest'\n          - python-version: '3.13'\n            os: 'macos-latest'\n          - python-version: '3.14'\n            os: 'macos-latest'\n    steps:\n    - uses: actions/checkout@v4\n      with:\n        fetch-depth: 1\n    - uses: smacke/submodule-checkout@v3\n      if: ${{ matrix.os == 'ubuntu-latest' && matrix.python-version != '3.10'}}\n      with:\n        ssh-key: '${{ secrets.TEST_DATA_SECRET }}'\n    - name: Set up Python\n      uses: actions/setup-python@v5\n      with:\n        python-version: ${{ matrix.python-version }}\n    - name: Install dependencies\n      run: |\n        python -m pip install --upgrade pip\n        pip install -r requirements.txt\n        pip install -r requirements-dev.txt\n        pip install -e .\n    - name: Lint with flake8\n      run: |\n        pip install flake8\n        # stop the build if there are Python syntax errors or undefined names\n        #flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics\n        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide\n        #flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics\n        flake8 . --exit-zero  \n    - name: Run unit tests with pytest (no coverage)\n      if: matrix.os != 'ubuntu-latest'\n      run: |\n        pytest --cov-config=.coveragerc --cov-report= --cov=ffsubsync -v -m 'not integration' tests/\n    - name: Run unit tests with pytest (with coverage)\n      if: matrix.os == 'ubuntu-latest'\n      run: |\n        pytest --cov-config=.coveragerc --cov-report=xml:cov.xml --cov=ffsubsync -v -m 'not integration' tests/\n    - name: Run integration tests with pytest\n      if: ${{ matrix.os == 'ubuntu-latest' && matrix.python-version != '3.10'}}\n      run: |\n        INTEGRATION=1 pytest --cov-config=.coveragerc --cov-report=xml:cov.xml --cov=ffsubsync -v -m 'integration' tests/\n    - name: Upload coverage report\n      if: matrix.os == 'ubuntu-latest'\n      uses: codecov/codecov-action@v1\n      with:\n        token: '${{ secrets.CODECOV_TOKEN }}'\n        files: ./cov.xml\n        env_vars: PYTHON\n        name: codecov-umbrella\n        fail_ci_if_error: true\n        verbose: true\n"
  },
  {
    "path": ".gitignore",
    "content": "scratch-notebooks/\n**/__pycache__\nbuild\ndist\n*.egg-info\n.vim\n__version__\n.venv/\n.coverage\n"
  },
  {
    "path": ".gitmodules",
    "content": "[submodule \"test-data\"]\n\tpath = test-data\n\turl = git@github.com:smacke/subsync-data\n"
  },
  {
    "path": ".readthedocs.yml",
    "content": "# .readthedocs.yml\n# Read the Docs configuration file\n# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details\n\n# Required\nversion: 2\n\n# Build documentation in the docs/ directory with Sphinx\nsphinx:\n  configuration: docs/conf.py\n\n# Optionally build your docs in additional formats such as PDF and ePub\nformats: [pdf]\n\n# Optionally set the version of Python and requirements required to build your docs\npython:\n  version: 3.8\n  install:\n    - method: setuptools\n      path: .\n    - requirements: docs/requirements-docs.txt\n\nsubmodules:\n  exclude: all\n"
  },
  {
    "path": ".travis.yml",
    "content": "language: python\n\npython:\n  - \"3.6\"\n  - \"3.7\"\n  - \"3.8\"\n  - \"3.9\"\n\nos:\n  - linux\n#  - osx\n\ndist: xenial\n\ngit:\n  submodules: false\n  lfs_skip_smudge: true\n\ninstall:\n  - pip install -r requirements.txt\n  - pip install -r requirements-dev.txt\n  - pip install -e .\n\n#addons:\n#  apt:\n#    update: true\n#    packages: ffmpeg\n#  homebrew:\n#    packages: ffmpeg\n\nscript:\n  - pytest -v -m 'not integration' tests/\n  - flake8 . --exit-zero\n"
  },
  {
    "path": "CODE_OF_CONDUCT.md",
    "content": "# Contributor Covenant Code of Conduct\n\n## Our Pledge\n\nIn the interest of fostering an open and welcoming environment, we as\ncontributors and maintainers pledge to making participation in our project and\nour community a harassment-free experience for everyone, regardless of age, body\nsize, disability, ethnicity, sex characteristics, gender identity and expression,\nlevel of experience, education, socio-economic status, nationality, personal\nappearance, race, religion, or sexual identity and orientation.\n\n## Our Standards\n\nExamples of behavior that contributes to creating a positive environment\ninclude:\n\n* Using welcoming and inclusive language\n* Being respectful of differing viewpoints and experiences\n* Gracefully accepting constructive criticism\n* Focusing on what is best for the community\n* Showing empathy towards other community members\n\nExamples of unacceptable behavior by participants include:\n\n* The use of sexualized language or imagery and unwelcome sexual attention or\n advances\n* Trolling, insulting/derogatory comments, and personal or political attacks\n* Public or private harassment\n* Publishing others' private information, such as a physical or electronic\n address, without explicit permission\n* Other conduct which could reasonably be considered inappropriate in a\n professional setting\n\n## Our Responsibilities\n\nProject maintainers are responsible for clarifying the standards of acceptable\nbehavior and are expected to take appropriate and fair corrective action in\nresponse to any instances of unacceptable behavior.\n\nProject maintainers have the right and responsibility to remove, edit, or\nreject comments, commits, code, wiki edits, issues, and other contributions\nthat are not aligned to this Code of Conduct, or to ban temporarily or\npermanently any contributor for other behaviors that they deem inappropriate,\nthreatening, offensive, or harmful.\n\n## Scope\n\nThis Code of Conduct applies both within project spaces and in public spaces\nwhen an individual is representing the project or its community. Examples of\nrepresenting a project or community include using an official project e-mail\naddress, posting via an official social media account, or acting as an appointed\nrepresentative at an online or offline event. Representation of a project may be\nfurther defined and clarified by project maintainers.\n\n## Enforcement\n\nInstances of abusive, harassing, or otherwise unacceptable behavior may be\nreported by contacting the project team at stephen.macke@gmail.com. All\ncomplaints will be reviewed and investigated and will result in a response that\nis deemed necessary and appropriate to the circumstances. The project team is\nobligated to maintain confidentiality with regard to the reporter of an incident.\nFurther details of specific enforcement policies may be posted separately.\n\nProject maintainers who do not follow or enforce the Code of Conduct in good\nfaith may face temporary or permanent repercussions as determined by other\nmembers of the project's leadership.\n\n## Attribution\n\nThis Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,\navailable at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html\n\n[homepage]: https://www.contributor-covenant.org\n\nFor answers to common questions about this code of conduct, see\nhttps://www.contributor-covenant.org/faq\n"
  },
  {
    "path": "HISTORY.rst",
    "content": "History\n=======\n\n0.4.31 (2025-11-23)\n-------------------\n* Add support for Python 3.14;\n\n0.4.30 (2025-09-01)\n-------------------\n* Remove faust-cchardent dependency on Python 3.13;\n\n0.4.29 (2025-02-18)\n-------------------\n* Remove six dependency;\n\n0.4.28 (2025-02-16)\n-------------------\n* Add support for Python 3.13;\n\n0.4.27 (2024-12-23)\n-------------------\n* Add support for WebVTT by @GrahamDigital;\n* Make setuptools an explicit requirement to improve support for Python 3.12+;\n\n0.4.26 (2024-10-15)\n-------------------\n* Allow progress to work for multiple syncs even if alignment fails for a particular input;\n* Allow specifying ffmpeg exe path using --ffmpeg-path;\n* Updates for Python 3.12;\n* Don't report sync as successful if best score is in negatives (from @ajitid);\n* Turn on Audio Sync for audio extraction process (from @dvh312);\n\n0.4.25 (2023-03-26)\n-------------------\n* Replace unmaintained cchardet with faust-cchardet;\n\n0.4.23 (2023-01-17)\n-------------------\n* Bugfix for waitpid on Windows;\n\n0.4.22 (2022-12-31)\n-------------------\n* Misc maintenance / compatibility fixes;\n\n0.4.19 (2022-01-07)\n-------------------\n* Blacken code and get rid of future_annotations dependency;\n\n0.4.18 (2021-11-07)\n-------------------\n* Allow `--apply-offset-seconds` when only subtitles specified;\n* Make golden section search over scale factors option (`--gss`) available from help;\n* Use -inf as objective for invalid offsets;\n\n0.4.17 (2021-10-03)\n-------------------\n* Don't remove log file if --log-dir-path explicitly requested;\n* Add --suppress-output-if-offset-less-than arg to suppress output for small syncs;\n\n0.4.16 (2021-07-22)\n-------------------\n* Fix a couple of validation bugs that prevented certain uncommon command line options from use;\n\n0.4.15 (2021-05-25)\n-------------------\n* Make typing_extensions a requirement\n\n0.4.14 (2021-05-10)\n-------------------\n* Hotfix for pysubs2 on Python 3.6;\n\n0.4.13 (2021-05-10)\n-------------------\n* Support SSA embedded fonts using new pysubs2 'opaque_fonts' metadata;\n* Set min required pysubs2 version to 1.2.0 to ensure the aforementioned functionality is available;\n\n0.4.12 (2021-04-13)\n-------------------\n* Pin auditok to 0.1.5 to avoid API-breaking change\n\n0.4.11 (2021-01-29)\n-------------------\n* Misc sync improvements:\n    * Have webrtcvad use '0' as the non speech label instead of 0.5;\n    * Allow the vad non speech label to be specified via the --non-speech-label command line parameter;\n    * Don't try to infer framerate ratio based on length between first and last speech frames for non-subtitle speech detection;\n\n0.4.10 (2021-01-18)\n-------------------\n* Lots of improvements from PRs submitted by @alucryd (big thanks!):\n    * Retain ASS styles;\n    * Support syncing several subs against the same ref via --overwrite-input flag;\n    * Add --apply-offset-seconds postprocess option to shift alignment by prespecified amount;\n* Filter out metadata in subtitles when extracting speech;\n* Add experimental --golden-section-search over framerate ratio (off by default);\n* Try to improve sync by inferring framerate ratio based on relative duration of synced vs unsynced;\n\n0.4.9 (2020-10-11)\n------------------\n* Make default max offset seconds 60 and enforce during alignment as opposed to throwing away alignments with > max_offset_seconds;\n* Add experimental section for using golden section search to find framerate ratio;\n* Restore ability to read stdin and write stdout after buggy permissions check;\n* Exceptions that occur during syncing were mistakenly suppressed; this is now fixed;\n\n0.4.8 (2020-09-22)\n------------------\n* Use webrtcvad-wheels on Windows to eliminate dependency on compiler;\n\n0.4.7 (2020-09-05)\n------------------\n* Misc bugfixes and stability improvements;\n\n0.4.6 (2020-06-10)\n------------------\n* Bugfix for writing subs to stdout;\n\n0.4.5 (2020-06-09)\n------------------\n* Allow MicroDVD input format;\n* Use output extension to determine output format;\n\n0.4.4 (2020-06-08)\n------------------\n* Use rich formatting for Python >= 3.6;\n* Use versioneer to manage versions;\n\n0.4.3 (2020-06-07)\n------------------\n* Fix regression where stdout not used for default output;\n* Add ability to specify path to ffmpeg / ffprobe binaries;\n* Add ability to overwrite the input / unsynced srt with the --overwrite-input flag;\n\n0.4.2 (2020-06-06)\n------------------\n* Fix Python 2 compatibility bug;\n\n0.4.1 (2020-06-06)\n------------------\n* Add --reference-stream option for selecting the stream / track from the video reference to use for speech detection;\n\n0.4.0 (2020-06-02)\n------------------\n* Remove dependency on scikit-learn;\n* Implement PyInstaller / Gooey build process for graphical application on MacOS and Windows;\n\n0.3.7 (2020-05-11)\n------------------\n* Fix PyPI issues;\n\n0.3.5 (2020-05-08)\n------------------\n* Fix corner case bug that occurred when multiple sync attempts were scored the same;\n\n0.3.4 (2020-03-20)\n------------------\n* Attempt speech extraction from subtitle tracks embedded in video first before using VAD;\n\n0.3.3 (2020-03-15)\n------------------\n* Hotfix for test archive creation bug;\n\n0.3.2 (2020-03-13)\n------------------\n* Add ability to merge synced and reference subs into bilingual subs when reference is srt;\n\n0.3.1 (2020-03-12)\n------------------\n* Fix bug when handling ass/ssa input, this format should work now;\n\n0.3.0 (2020-03-11)\n------------------\n* Better detection of text file encodings;\n* ASS / SSA functionality (but currently untested);\n* Allow serialize speech with --serialize-speech flag;\n* Convenient --make-test-case flag to create test cases when filing sync-related bugs;\n* Use utf-8 as default output encoding (instead of using same encoding as input);\n* More robust test framework (integration tests!);\n\n0.2.17 (2019-12-21)\n------------------\n* Try to correct for framerate differences by picking best framerate ratio;\n\n0.2.16 (2019-12-04)\n------------------\n* Revert changes from 0.2.9 now that srt parses weird timestamps robustly;\n\n0.2.15 (2019-10-11)\n------------------\n* Revert changes from 0.2.12 (caused regression on Windows);\n\n0.2.14 (2019-10-07)\n------------------\n* Bump min required scikit-learn to 0.20.4;\n\n0.2.12 (2019-10-06)\n------------------\n* Clear O_NONBLOCK flag on stdout stream in case it is set;\n\n0.2.11 (2019-10-06)\n------------------\n* Quick and dirty fix to recover without progress info if `ffmpeg.probe` raises;\n\n0.2.10 (2019-09-22)\n------------------\n* Specify utf-8 encoding at top of file for backcompat with Python2;\n\n0.2.9 (2019-09-22)\n------------------\n* Quck and dirty fix to properly handle timestamp ms fields with >3 digits;\n\n0.2.8 (2019-06-15)\n------------------\n* Allow user to specify start time (in seconds) for processing;\n\n0.2.7 (2019-05-28)\n------------------\n* Add utf-16 to list of encodings to try for inference purposes;\n\n0.2.6 (2019-05-15)\n------------------\n* Fix argument parsing regression;\n\n0.2.5 (2019-05-14)\n------------------\n* Clamp subtitles to maximum duration (default 10);\n\n0.2.4 (2019-03-19)\n------------------\n* Add six to requirements.txt;\n* Set default encoding to utf8 to ensure non ascii filenames handled properly;\n\n0.2.3 (2019-03-08)\n------------------\n* Minor change to subtitle speech extraction;\n\n0.2.2 (2019-03-08)\n------------------\n* Allow reading input srt from stdin;\n* Allow specifying encodings for reference, input, and output srt;\n* Use the same encoding for both input srt and output srt by default;\n* Developer note: using sklearn-style data pipelines now;\n\n0.2.1 (2019-03-07)\n------------------\n* Developer note: change progress-only to vlc-mode and remove from help docs;\n\n0.2.0 (2019-03-06)\n------------------\n* Get rid of auditok (GPLv3, was hurting alignment algorithm);\n* Change to alignment algo: don't penalize matching video non-speech with subtitle speech;\n\n0.1.7 (2019-03-05)\n------------------\n* Add Chinese to the list of encodings that can be inferred;\n* Make srt parsing more robust;\n\n0.1.6 (2019-03-04)\n------------------\n* Misc bugfixes;\n* Proper logging;\n* Proper version handling;\n\n0.1.0 (2019-02-24)\n------------------\n* Support srt format;\n* Support using srt as reference;\n* Support using video as reference (via ffmpeg);\n* Support writing to stdout or file (read from stdin not yet supported; can only read from file);\n"
  },
  {
    "path": "LICENSE",
    "content": "Copyright 2019 Stephen Macke\n\nPermission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n"
  },
  {
    "path": "MANIFEST.in",
    "content": "include *.rst\ninclude versioneer.py\ninclude ffsubsync/_version.py\n"
  },
  {
    "path": "Makefile",
    "content": "# -*- coding: utf-8 -*-\n.PHONY: clean build bump deploy black blackcheck check test tests deps devdeps\n\nclean:\n\trm -rf dist/ build/ *.egg-info/\n\nbuild: clean\n\tpython setup.py sdist bdist_wheel --universal\n\nbump:\n\t./scripts/bump-version.py\n\ndeploy: build\n\t./scripts/deploy.sh\n\nblack:\n\t./scripts/blacken.sh\n\nblackcheck:\n\t./scripts/blacken.sh --check\n\nlint:\n\tflake8\n\ntypecheck:\n\tmypy ffsubsync\n\ncheck_no_typing:\n\tINTEGRATION=1 pytest --cov-config=.coveragerc --cov=ffsubsync\n\ncheck: blackcheck typecheck check_no_typing\n\ntest: check\ntests: check\n\ndeps:\n\tpip install -r requirements.txt\n\ndevdeps:\n\tpip install -e .\n\tpip install -r requirements-dev.txt\n\n"
  },
  {
    "path": "README.md",
    "content": "FFsubsync\n=======\n\n[![CI Status](https://github.com/smacke/ffsubsync/workflows/ffsubsync/badge.svg)](https://github.com/smacke/ffsubsync/actions)\n[![Support Ukraine](https://badgen.net/badge/support/UKRAINE/?color=0057B8&labelColor=FFD700)](https://github.com/vshymanskyy/StandWithUkraine/blob/main/docs/README.md)\n[![Checked with mypy](http://www.mypy-lang.org/static/mypy_badge.svg)](http://mypy-lang.org/)\n[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)\n[![License: MIT](https://img.shields.io/badge/License-MIT-maroon.svg)](https://opensource.org/licenses/MIT)\n[![Python Versions](https://img.shields.io/pypi/pyversions/ffsubsync.svg)](https://pypi.org/project/ffsubsync)\n[![Documentation Status](https://readthedocs.org/projects/ffsubsync/badge/?version=latest)](https://ffsubsync.readthedocs.io/en/latest/?badge=latest)\n[![PyPI Version](https://img.shields.io/pypi/v/ffsubsync.svg)](https://pypi.org/project/ffsubsync)\n\n\nLanguage-agnostic automatic synchronization of subtitles with video, so that\nsubtitles are aligned to the correct starting point within the video.\n\nTurn this:                       |  Into this:\n:-------------------------------:|:-------------------------:\n![](https://raw.githubusercontent.com/smacke/ffsubsync/master/resources/img/tearing-me-apart-wrong.gif)  |  ![](https://raw.githubusercontent.com/smacke/ffsubsync/master/resources/img/tearing-me-apart-correct.gif)\n\nHelping Development\n-------------------\nPlease consider [supporting Ukraine](https://github.com/vshymanskyy/StandWithUkraine/blob/main/docs/README.md)\nrather than donating directly to this project. That said, at the request of\nsome, you can now help cover my coffee expenses using the Github Sponsors\nbutton at the top, or using the below Paypal Donate button:\n\n[![Donate](https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif)](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=XJC5ANLMYECJE)\n\nInstall\n-------\nFirst, make sure ffmpeg is installed. On MacOS, this looks like:\n~~~\nbrew install ffmpeg\n~~~\n(Windows users: make sure `ffmpeg` is on your path and can be referenced\nfrom the command line!)\n\nNext, grab the package (compatible with Python >= 3.6):\n~~~\npip install ffsubsync\n~~~\nIf you want to live dangerously, you can grab the latest version as follows:\n~~~\npip install git+https://github.com/smacke/ffsubsync@latest\n~~~\n\nUsage\n-----\n`ffs`, `subsync` and `ffsubsync` all work as entrypoints:\n~~~\nffs video.mp4 -i unsynchronized.srt -o synchronized.srt\n~~~\n\nThere may be occasions where you have a correctly synchronized srt file in a\nlanguage you are unfamiliar with, as well as an unsynchronized srt file in your\nnative language. In this case, you can use the correctly synchronized srt file\ndirectly as a reference for synchronization, instead of using the video as the\nreference:\n\n~~~\nffsubsync reference.srt -i unsynchronized.srt -o synchronized.srt\n~~~\n\n`ffsubsync` uses the file extension to decide whether to perform voice activity\ndetection on the audio or to directly extract speech from an srt file.\n\nSync Issues\n-----------\nIf the sync fails, the following recourses are available:\n- Try to sync assuming identical video / subtitle framerates by passing\n  `--no-fix-framerate`;\n- Try passing `--gss` to use [golden-section search](https://en.wikipedia.org/wiki/Golden-section_search)\n  to find the optimal ratio between video and subtitle framerates (by default,\n  only a few common ratios are evaluated);\n- Try a value of `--max-offset-seconds` greater than the default of 60, in the\n  event that the subtitles are out of sync by more than 60 seconds (empirically\n  unlikely in practice, but possible).\n- Try `--vad=auditok` since [auditok](https://github.com/amsehili/auditok) can\n  sometimes work better in the case of low-quality audio than WebRTC's VAD.\n  Auditok does not specifically detect voice, but instead detects all audio;\n  this property can yield suboptimal syncing behavior when a proper VAD can\n  work well, but can be effective in some cases.\n\nIf the sync still fails, consider trying one of the following similar tools:\n- [sc0ty/subsync](https://github.com/sc0ty/subsync): does speech-to-text and looks for matching word morphemes\n- [kaegi/alass](https://github.com/kaegi/alass): rust-based subtitle synchronizer with a fancy dynamic programming algorithm\n- [tympanix/subsync](https://github.com/tympanix/subsync): neural net based approach that optimizes directly for alignment when performing speech detection\n- [oseiskar/autosubsync](https://github.com/oseiskar/autosubsync): performs speech detection with bespoke spectrogram + logistic regression\n- [pums974/srtsync](https://github.com/pums974/srtsync): similar approach to ffsubsync (WebRTC's VAD + FFT to maximize signal cross correlation)\n\nSpeed\n-----\n`ffsubsync` usually finishes in 20 to 30 seconds, depending on the length of\nthe video. The most expensive step is actually extraction of raw audio. If you\nalready have a correctly synchronized \"reference\" srt file (in which case audio\nextraction can be skipped), `ffsubsync` typically runs in less than a second.\n\nHow It Works\n------------\nThe synchronization algorithm operates in 3 steps:\n1. Discretize both the video file's audio stream and the subtitles into 10ms\n   windows.\n2. For each 10ms window, determine whether that window contains speech.  This\n   is trivial to do for subtitles (we just determine whether any subtitle is\n   \"on\" during each time window); for the audio stream, use an off-the-shelf\n   voice activity detector (VAD) like\n   the one built into [webrtc](https://webrtc.org/).\n3. Now we have two binary strings: one for the subtitles, and one for the\n   video.  Try to align these strings by matching 0's with 0's and 1's with\n   1's. We score these alignments as (# video 1's matched w/ subtitle 1's) - (#\n   video 1's matched with subtitle 0's).\n\nThe best-scoring alignment from step 3 determines how to offset the subtitles\nin time so that they are properly synced with the video. Because the binary\nstrings are fairly long (millions of digits for video longer than an hour), the\nnaive O(n^2) strategy for scoring all alignments is unacceptable. Instead, we\nuse the fact that \"scoring all alignments\" is a convolution operation and can\nbe implemented with the Fast Fourier Transform (FFT), bringing the complexity\ndown to O(n log n).\n\nLimitations\n-----------\nIn most cases, inconsistencies between video and subtitles occur when starting\nor ending segments present in video are not present in subtitles, or vice versa.\nThis can occur, for example, when a TV episode recap in the subtitles was pruned\nfrom video. FFsubsync typically works well in these cases, and in my experience\nthis covers >95% of use cases. Handling breaks and splits outside of the beginning\nand ending segments is left to future work (see below).\n\nFuture Work\n-----------\nBesides general stability and usability improvements, one line\nof work aims to extend the synchronization algorithm to handle splits\n/ breaks in the middle of video not present in subtitles (or vice versa).\nDeveloping a robust solution will take some time (assuming one is possible).\nSee [#10](https://github.com/smacke/ffsubsync/issues/10) for more details.\n\nHistory\n-------\nThe implementation for this project was started during HackIllinois 2019, for\nwhich it received an **_Honorable Mention_** (ranked in the top 5 projects,\nexcluding projects that won company-specific prizes).\n\nCredits\n-------\nThis project would not be possible without the following libraries:\n- [ffmpeg](https://www.ffmpeg.org/) and the [ffmpeg-python](https://github.com/kkroening/ffmpeg-python) wrapper, for extracting raw audio from video\n- VAD from [webrtc](https://webrtc.org/) and the [py-webrtcvad](https://github.com/wiseman/py-webrtcvad) wrapper, for speech detection\n- [srt](https://pypi.org/project/srt/) for operating on [SRT files](https://en.wikipedia.org/wiki/SubRip#SubRip_text_file_format)\n- [numpy](http://www.numpy.org/) and, indirectly, [FFTPACK](https://www.netlib.org/fftpack/), which powers the FFT-based algorithm for fast scoring of alignments between subtitles (or subtitles and video)\n- Other excellent Python libraries like [argparse](https://docs.python.org/3/library/argparse.html), [rich](https://github.com/willmcgugan/rich), and [tqdm](https://tqdm.github.io/), not related to the core functionality, but which enable much better experiences for developers and users.\n\n# License\nCode in this project is [MIT licensed](https://opensource.org/licenses/MIT).\n"
  },
  {
    "path": "docs/.gitignore",
    "content": "_build/\n"
  },
  {
    "path": "docs/Makefile",
    "content": "# Minimal makefile for Sphinx documentation\n#\n\n# You can set these variables from the command line, and also\n# from the environment for the first two.\nSPHINXOPTS    ?=\nSPHINXBUILD   ?= sphinx-build\nSOURCEDIR     = .\nBUILDDIR      = _build\n\n# Put it first so that \"make\" without argument is like \"make help\".\nhelp:\n\t@$(SPHINXBUILD) -M help \"$(SOURCEDIR)\" \"$(BUILDDIR)\" $(SPHINXOPTS) $(O)\n\n.PHONY: help Makefile\n\n# Catch-all target: route all unknown targets to Sphinx using the new\n# \"make mode\" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).\n%: Makefile\n\t@$(SPHINXBUILD) -M $@ \"$(SOURCEDIR)\" \"$(BUILDDIR)\" $(SPHINXOPTS) $(O)\n"
  },
  {
    "path": "docs/_static/.keep",
    "content": ""
  },
  {
    "path": "docs/_templates/.keep",
    "content": ""
  },
  {
    "path": "docs/conf.py",
    "content": "# Configuration file for the Sphinx documentation builder.\n#\n# This file only contains a selection of the most common options. For a full\n# list see the documentation:\n# https://www.sphinx-doc.org/en/master/usage/configuration.html\n\n# -- Path setup --------------------------------------------------------------\n\n# If extensions (or modules to document with autodoc) are in another directory,\n# add these directories to sys.path here. If the directory is relative to the\n# documentation root, use os.path.abspath to make it absolute, like shown here.\n#\n# import os\n# import sys\n# sys.path.insert(0, os.path.abspath('.'))\n\n\n# -- Project information -----------------------------------------------------\n\nproject = 'ffsubsync'\ncopyright = '2020, Stephen Macke'\nauthor = 'Stephen Macke'\n\n\n# -- General configuration ---------------------------------------------------\n\n# ref: https://stackoverflow.com/questions/56336234/build-fail-sphinx-error-contents-rst-not-found\nmaster_doc = 'index'\n\n# Add any Sphinx extension module names here, as strings. They can be\n# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom\n# ones.\nextensions = [\n    'sphinx.ext.autodoc',\n    'sphinxarg.ext',\n    'sphinx_rtd_theme',\n]\n\n# Add any paths that contain templates here, relative to this directory.\ntemplates_path = ['_templates']\n\n# List of patterns, relative to source directory, that match files and\n# directories to ignore when looking for source files.\n# This pattern also affects html_static_path and html_extra_path.\nexclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']\n\n\n# -- Options for HTML output -------------------------------------------------\n\n# The theme to use for HTML and HTML Help pages.  See the documentation for\n# a list of builtin themes.\n#\nhtml_theme = 'sphinx_rtd_theme'\n\n# Add any paths that contain custom static files (such as style sheets) here,\n# relative to this directory. They are copied after the builtin static files,\n# so a file named \"default.css\" will overwrite the builtin \"default.css\".\nhtml_static_path = ['_static']\n"
  },
  {
    "path": "docs/index.rst",
    "content": ".. ffsubsync documentation master file, created by\n   sphinx-quickstart on Mon Dec  2 17:06:18 2019.\n   You can adapt this file completely to your liking, but it should at least\n   contain the root `toctree` directive.\n\nWelcome to FFsubsync's documentation!\n=====================================\n\n.. toctree::\n   :maxdepth: 2\n   :caption: Contents:\n\n\n\nIndices and tables\n==================\n\n* :ref:`genindex`\n* :ref:`modindex`\n* :ref:`search`\n\n.. argparse::\n   :module: ffsubsync.ffsubsync\n   :func: make_parser\n   :prog: ffsubsync\n"
  },
  {
    "path": "docs/make.bat",
    "content": "@ECHO OFF\r\n\r\npushd %~dp0\r\n\r\nREM Command file for Sphinx documentation\r\n\r\nif \"%SPHINXBUILD%\" == \"\" (\r\n\tset SPHINXBUILD=sphinx-build\r\n)\r\nset SOURCEDIR=.\r\nset BUILDDIR=_build\r\n\r\nif \"%1\" == \"\" goto help\r\n\r\n%SPHINXBUILD% >NUL 2>NUL\r\nif errorlevel 9009 (\r\n\techo.\r\n\techo.The 'sphinx-build' command was not found. Make sure you have Sphinx\r\n\techo.installed, then set the SPHINXBUILD environment variable to point\r\n\techo.to the full path of the 'sphinx-build' executable. Alternatively you\r\n\techo.may add the Sphinx directory to PATH.\r\n\techo.\r\n\techo.If you don't have Sphinx installed, grab it from\r\n\techo.http://sphinx-doc.org/\r\n\texit /b 1\r\n)\r\n\r\n%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%\r\ngoto end\r\n\r\n:help\r\n%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%\r\n\r\n:end\r\npopd\r\n"
  },
  {
    "path": "docs/requirements-docs.txt",
    "content": "# docs\nautodoc\ndocutils<0.18  # ref: https://github.com/sphinx-doc/sphinx/issues/9788\nsphinx-argparse\nsphinx-rtd-theme\n"
  },
  {
    "path": "ffsubsync/__init__.py",
    "content": "# -*- coding: utf-8 -*-\nimport logging\nimport sys\n\ntry:\n    from rich.console import Console\n    from rich.logging import RichHandler\n\n    # configure logging here because some other later imported library does it first otherwise\n    # TODO: use a fileconfig\n    logging.basicConfig(\n        level=logging.INFO,\n        format=\"%(message)s\",\n        datefmt=\"[%X]\",\n        handlers=[RichHandler(console=Console(file=sys.stderr))],\n    )\nexcept:  # noqa: E722\n    logging.basicConfig(stream=sys.stderr, level=logging.INFO)\n\nfrom .version import __version__  # noqa\nfrom .ffsubsync import main  # noqa\n"
  },
  {
    "path": "ffsubsync/_version.py",
    "content": "\n# This file helps to compute a version number in source trees obtained from\n# git-archive tarball (such as those provided by githubs download-from-tag\n# feature). Distribution tarballs (built by setup.py sdist) and build\n# directories (produced by setup.py build) will contain a much shorter file\n# that just contains the computed version number.\n\n# This file is released into the public domain. Generated by\n# versioneer-0.18 (https://github.com/warner/python-versioneer)\n\n\"\"\"Git implementation of _version.py.\"\"\"\n\nimport errno\nimport os\nimport re\nimport subprocess\nimport sys\n\n\ndef get_keywords():\n    \"\"\"Get the keywords needed to look up the version information.\"\"\"\n    # these strings will be replaced by git during git-archive.\n    # setup.py/versioneer.py will grep for the variable names, so they must\n    # each be defined on a line of their own. _version.py will just call\n    # get_keywords().\n    git_refnames = \"$Format:%d$\"\n    git_full = \"$Format:%H$\"\n    git_date = \"$Format:%ci$\"\n    keywords = {\"refnames\": git_refnames, \"full\": git_full, \"date\": git_date}\n    return keywords\n\n\nclass VersioneerConfig:\n    \"\"\"Container for Versioneer configuration parameters.\"\"\"\n\n\ndef get_config():\n    \"\"\"Create, populate and return the VersioneerConfig() object.\"\"\"\n    # these strings are filled in when 'setup.py versioneer' creates\n    # _version.py\n    cfg = VersioneerConfig()\n    cfg.VCS = \"git\"\n    cfg.style = \"pep440-pre\"\n    cfg.tag_prefix = \"\"\n    cfg.parentdir_prefix = \"ffsubsync-\"\n    cfg.versionfile_source = \"ffsubsync/_version.py\"\n    cfg.verbose = False\n    return cfg\n\n\nclass NotThisMethod(Exception):\n    \"\"\"Exception raised if a method is not valid for the current scenario.\"\"\"\n\n\nLONG_VERSION_PY = {}\nHANDLERS = {}\n\n\ndef register_vcs_handler(vcs, method):  # decorator\n    \"\"\"Decorator to mark a method as the handler for a particular VCS.\"\"\"\n    def decorate(f):\n        \"\"\"Store f in HANDLERS[vcs][method].\"\"\"\n        if vcs not in HANDLERS:\n            HANDLERS[vcs] = {}\n        HANDLERS[vcs][method] = f\n        return f\n    return decorate\n\n\ndef run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,\n                env=None):\n    \"\"\"Call the given command(s).\"\"\"\n    assert isinstance(commands, list)\n    p = None\n    for c in commands:\n        try:\n            dispcmd = str([c] + args)\n            # remember shell=False, so use git.cmd on windows, not just git\n            p = subprocess.Popen([c] + args, cwd=cwd, env=env,\n                                 stdout=subprocess.PIPE,\n                                 stderr=(subprocess.PIPE if hide_stderr\n                                         else None))\n            break\n        except EnvironmentError:\n            e = sys.exc_info()[1]\n            if e.errno == errno.ENOENT:\n                continue\n            if verbose:\n                print(\"unable to run %s\" % dispcmd)\n                print(e)\n            return None, None\n    else:\n        if verbose:\n            print(\"unable to find command, tried %s\" % (commands,))\n        return None, None\n    stdout = p.communicate()[0].strip()\n    if sys.version_info[0] >= 3:\n        stdout = stdout.decode()\n    if p.returncode != 0:\n        if verbose:\n            print(\"unable to run %s (error)\" % dispcmd)\n            print(\"stdout was %s\" % stdout)\n        return None, p.returncode\n    return stdout, p.returncode\n\n\ndef versions_from_parentdir(parentdir_prefix, root, verbose):\n    \"\"\"Try to determine the version from the parent directory name.\n\n    Source tarballs conventionally unpack into a directory that includes both\n    the project name and a version string. We will also support searching up\n    two directory levels for an appropriately named parent directory\n    \"\"\"\n    rootdirs = []\n\n    for i in range(3):\n        dirname = os.path.basename(root)\n        if dirname.startswith(parentdir_prefix):\n            return {\"version\": dirname[len(parentdir_prefix):],\n                    \"full-revisionid\": None,\n                    \"dirty\": False, \"error\": None, \"date\": None}\n        else:\n            rootdirs.append(root)\n            root = os.path.dirname(root)  # up a level\n\n    if verbose:\n        print(\"Tried directories %s but none started with prefix %s\" %\n              (str(rootdirs), parentdir_prefix))\n    raise NotThisMethod(\"rootdir doesn't start with parentdir_prefix\")\n\n\n@register_vcs_handler(\"git\", \"get_keywords\")\ndef git_get_keywords(versionfile_abs):\n    \"\"\"Extract version information from the given file.\"\"\"\n    # the code embedded in _version.py can just fetch the value of these\n    # keywords. When used from setup.py, we don't want to import _version.py,\n    # so we do it with a regexp instead. This function is not used from\n    # _version.py.\n    keywords = {}\n    try:\n        f = open(versionfile_abs, \"r\")\n        for line in f.readlines():\n            if line.strip().startswith(\"git_refnames =\"):\n                mo = re.search(r'=\\s*\"(.*)\"', line)\n                if mo:\n                    keywords[\"refnames\"] = mo.group(1)\n            if line.strip().startswith(\"git_full =\"):\n                mo = re.search(r'=\\s*\"(.*)\"', line)\n                if mo:\n                    keywords[\"full\"] = mo.group(1)\n            if line.strip().startswith(\"git_date =\"):\n                mo = re.search(r'=\\s*\"(.*)\"', line)\n                if mo:\n                    keywords[\"date\"] = mo.group(1)\n        f.close()\n    except EnvironmentError:\n        pass\n    return keywords\n\n\n@register_vcs_handler(\"git\", \"keywords\")\ndef git_versions_from_keywords(keywords, tag_prefix, verbose):\n    \"\"\"Get version information from git keywords.\"\"\"\n    if not keywords:\n        raise NotThisMethod(\"no keywords at all, weird\")\n    date = keywords.get(\"date\")\n    if date is not None:\n        # git-2.2.0 added \"%cI\", which expands to an ISO-8601 -compliant\n        # datestamp. However we prefer \"%ci\" (which expands to an \"ISO-8601\n        # -like\" string, which we must then edit to make compliant), because\n        # it's been around since git-1.5.3, and it's too difficult to\n        # discover which version we're using, or to work around using an\n        # older one.\n        date = date.strip().replace(\" \", \"T\", 1).replace(\" \", \"\", 1)\n    refnames = keywords[\"refnames\"].strip()\n    if refnames.startswith(\"$Format\"):\n        if verbose:\n            print(\"keywords are unexpanded, not using\")\n        raise NotThisMethod(\"unexpanded keywords, not a git-archive tarball\")\n    refs = set([r.strip() for r in refnames.strip(\"()\").split(\",\")])\n    # starting in git-1.8.3, tags are listed as \"tag: foo-1.0\" instead of\n    # just \"foo-1.0\". If we see a \"tag: \" prefix, prefer those.\n    TAG = \"tag: \"\n    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])\n    if not tags:\n        # Either we're using git < 1.8.3, or there really are no tags. We use\n        # a heuristic: assume all version tags have a digit. The old git %d\n        # expansion behaves like git log --decorate=short and strips out the\n        # refs/heads/ and refs/tags/ prefixes that would let us distinguish\n        # between branches and tags. By ignoring refnames without digits, we\n        # filter out many common branch names like \"release\" and\n        # \"stabilization\", as well as \"HEAD\" and \"master\".\n        tags = set([r for r in refs if re.search(r'\\d', r)])\n        if verbose:\n            print(\"discarding '%s', no digits\" % \",\".join(refs - tags))\n    if verbose:\n        print(\"likely tags: %s\" % \",\".join(sorted(tags)))\n    for ref in sorted(tags):\n        # sorting will prefer e.g. \"2.0\" over \"2.0rc1\"\n        if ref.startswith(tag_prefix):\n            r = ref[len(tag_prefix):]\n            if verbose:\n                print(\"picking %s\" % r)\n            return {\"version\": r,\n                    \"full-revisionid\": keywords[\"full\"].strip(),\n                    \"dirty\": False, \"error\": None,\n                    \"date\": date}\n    # no suitable tags, so version is \"0+unknown\", but full hex is still there\n    if verbose:\n        print(\"no suitable tags, using unknown + full revision id\")\n    return {\"version\": \"0+unknown\",\n            \"full-revisionid\": keywords[\"full\"].strip(),\n            \"dirty\": False, \"error\": \"no suitable tags\", \"date\": None}\n\n\n@register_vcs_handler(\"git\", \"pieces_from_vcs\")\ndef git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):\n    \"\"\"Get version from 'git describe' in the root of the source tree.\n\n    This only gets called if the git-archive 'subst' keywords were *not*\n    expanded, and _version.py hasn't already been rewritten with a short\n    version string, meaning we're inside a checked out source tree.\n    \"\"\"\n    GITS = [\"git\"]\n    if sys.platform == \"win32\":\n        GITS = [\"git.cmd\", \"git.exe\"]\n\n    out, rc = run_command(GITS, [\"rev-parse\", \"--git-dir\"], cwd=root,\n                          hide_stderr=True)\n    if rc != 0:\n        if verbose:\n            print(\"Directory %s not under git control\" % root)\n        raise NotThisMethod(\"'git rev-parse --git-dir' returned error\")\n\n    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]\n    # if there isn't one, this yields HEX[-dirty] (no NUM)\n    describe_out, rc = run_command(GITS, [\"describe\", \"--tags\", \"--dirty\",\n                                          \"--always\", \"--long\",\n                                          \"--match\", \"%s*\" % tag_prefix],\n                                   cwd=root)\n    # --long was added in git-1.5.5\n    if describe_out is None:\n        raise NotThisMethod(\"'git describe' failed\")\n    describe_out = describe_out.strip()\n    full_out, rc = run_command(GITS, [\"rev-parse\", \"HEAD\"], cwd=root)\n    if full_out is None:\n        raise NotThisMethod(\"'git rev-parse' failed\")\n    full_out = full_out.strip()\n\n    pieces = {}\n    pieces[\"long\"] = full_out\n    pieces[\"short\"] = full_out[:7]  # maybe improved later\n    pieces[\"error\"] = None\n\n    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]\n    # TAG might have hyphens.\n    git_describe = describe_out\n\n    # look for -dirty suffix\n    dirty = git_describe.endswith(\"-dirty\")\n    pieces[\"dirty\"] = dirty\n    if dirty:\n        git_describe = git_describe[:git_describe.rindex(\"-dirty\")]\n\n    # now we have TAG-NUM-gHEX or HEX\n\n    if \"-\" in git_describe:\n        # TAG-NUM-gHEX\n        mo = re.search(r'^(.+)-(\\d+)-g([0-9a-f]+)$', git_describe)\n        if not mo:\n            # unparseable. Maybe git-describe is misbehaving?\n            pieces[\"error\"] = (\"unable to parse git-describe output: '%s'\"\n                               % describe_out)\n            return pieces\n\n        # tag\n        full_tag = mo.group(1)\n        if not full_tag.startswith(tag_prefix):\n            if verbose:\n                fmt = \"tag '%s' doesn't start with prefix '%s'\"\n                print(fmt % (full_tag, tag_prefix))\n            pieces[\"error\"] = (\"tag '%s' doesn't start with prefix '%s'\"\n                               % (full_tag, tag_prefix))\n            return pieces\n        pieces[\"closest-tag\"] = full_tag[len(tag_prefix):]\n\n        # distance: number of commits since tag\n        pieces[\"distance\"] = int(mo.group(2))\n\n        # commit: short hex revision ID\n        pieces[\"short\"] = mo.group(3)\n\n    else:\n        # HEX: no tags\n        pieces[\"closest-tag\"] = None\n        count_out, rc = run_command(GITS, [\"rev-list\", \"HEAD\", \"--count\"],\n                                    cwd=root)\n        pieces[\"distance\"] = int(count_out)  # total number of commits\n\n    # commit date: see ISO-8601 comment in git_versions_from_keywords()\n    date = run_command(GITS, [\"show\", \"-s\", \"--format=%ci\", \"HEAD\"],\n                       cwd=root)[0].strip()\n    pieces[\"date\"] = date.strip().replace(\" \", \"T\", 1).replace(\" \", \"\", 1)\n\n    return pieces\n\n\ndef plus_or_dot(pieces):\n    \"\"\"Return a + if we don't already have one, else return a .\"\"\"\n    if \"+\" in pieces.get(\"closest-tag\", \"\"):\n        return \".\"\n    return \"+\"\n\n\ndef render_pep440(pieces):\n    \"\"\"Build up version string, with post-release \"local version identifier\".\n\n    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you\n    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty\n\n    Exceptions:\n    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            rendered += plus_or_dot(pieces)\n            rendered += \"%d.g%s\" % (pieces[\"distance\"], pieces[\"short\"])\n            if pieces[\"dirty\"]:\n                rendered += \".dirty\"\n    else:\n        # exception #1\n        rendered = \"0+untagged.%d.g%s\" % (pieces[\"distance\"],\n                                          pieces[\"short\"])\n        if pieces[\"dirty\"]:\n            rendered += \".dirty\"\n    return rendered\n\n\ndef render_pep440_pre(pieces):\n    \"\"\"TAG[.post.devDISTANCE] -- No -dirty.\n\n    Exceptions:\n    1: no tags. 0.post.devDISTANCE\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"]:\n            rendered += \".post.dev%d\" % pieces[\"distance\"]\n    else:\n        # exception #1\n        rendered = \"0.post.dev%d\" % pieces[\"distance\"]\n    return rendered\n\n\ndef render_pep440_post(pieces):\n    \"\"\"TAG[.postDISTANCE[.dev0]+gHEX] .\n\n    The \".dev0\" means dirty. Note that .dev0 sorts backwards\n    (a dirty tree will appear \"older\" than the corresponding clean one),\n    but you shouldn't be releasing software with -dirty anyways.\n\n    Exceptions:\n    1: no tags. 0.postDISTANCE[.dev0]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            rendered += \".post%d\" % pieces[\"distance\"]\n            if pieces[\"dirty\"]:\n                rendered += \".dev0\"\n            rendered += plus_or_dot(pieces)\n            rendered += \"g%s\" % pieces[\"short\"]\n    else:\n        # exception #1\n        rendered = \"0.post%d\" % pieces[\"distance\"]\n        if pieces[\"dirty\"]:\n            rendered += \".dev0\"\n        rendered += \"+g%s\" % pieces[\"short\"]\n    return rendered\n\n\ndef render_pep440_old(pieces):\n    \"\"\"TAG[.postDISTANCE[.dev0]] .\n\n    The \".dev0\" means dirty.\n\n    Eexceptions:\n    1: no tags. 0.postDISTANCE[.dev0]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            rendered += \".post%d\" % pieces[\"distance\"]\n            if pieces[\"dirty\"]:\n                rendered += \".dev0\"\n    else:\n        # exception #1\n        rendered = \"0.post%d\" % pieces[\"distance\"]\n        if pieces[\"dirty\"]:\n            rendered += \".dev0\"\n    return rendered\n\n\ndef render_git_describe(pieces):\n    \"\"\"TAG[-DISTANCE-gHEX][-dirty].\n\n    Like 'git describe --tags --dirty --always'.\n\n    Exceptions:\n    1: no tags. HEX[-dirty]  (note: no 'g' prefix)\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"]:\n            rendered += \"-%d-g%s\" % (pieces[\"distance\"], pieces[\"short\"])\n    else:\n        # exception #1\n        rendered = pieces[\"short\"]\n    if pieces[\"dirty\"]:\n        rendered += \"-dirty\"\n    return rendered\n\n\ndef render_git_describe_long(pieces):\n    \"\"\"TAG-DISTANCE-gHEX[-dirty].\n\n    Like 'git describe --tags --dirty --always -long'.\n    The distance/hash is unconditional.\n\n    Exceptions:\n    1: no tags. HEX[-dirty]  (note: no 'g' prefix)\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        rendered += \"-%d-g%s\" % (pieces[\"distance\"], pieces[\"short\"])\n    else:\n        # exception #1\n        rendered = pieces[\"short\"]\n    if pieces[\"dirty\"]:\n        rendered += \"-dirty\"\n    return rendered\n\n\ndef render(pieces, style):\n    \"\"\"Render the given version pieces into the requested style.\"\"\"\n    if pieces[\"error\"]:\n        return {\"version\": \"unknown\",\n                \"full-revisionid\": pieces.get(\"long\"),\n                \"dirty\": None,\n                \"error\": pieces[\"error\"],\n                \"date\": None}\n\n    if not style or style == \"default\":\n        style = \"pep440\"  # the default\n\n    if style == \"pep440\":\n        rendered = render_pep440(pieces)\n    elif style == \"pep440-pre\":\n        rendered = render_pep440_pre(pieces)\n    elif style == \"pep440-post\":\n        rendered = render_pep440_post(pieces)\n    elif style == \"pep440-old\":\n        rendered = render_pep440_old(pieces)\n    elif style == \"git-describe\":\n        rendered = render_git_describe(pieces)\n    elif style == \"git-describe-long\":\n        rendered = render_git_describe_long(pieces)\n    else:\n        raise ValueError(\"unknown style '%s'\" % style)\n\n    return {\"version\": rendered, \"full-revisionid\": pieces[\"long\"],\n            \"dirty\": pieces[\"dirty\"], \"error\": None,\n            \"date\": pieces.get(\"date\")}\n\n\ndef get_versions():\n    \"\"\"Get version information or return default if unable to do so.\"\"\"\n    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have\n    # __file__, we can work backwards from there to the root. Some\n    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which\n    # case we can only use expanded keywords.\n\n    cfg = get_config()\n    verbose = cfg.verbose\n\n    try:\n        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,\n                                          verbose)\n    except NotThisMethod:\n        pass\n\n    try:\n        root = os.path.realpath(__file__)\n        # versionfile_source is the relative path from the top of the source\n        # tree (where the .git directory might live) to this file. Invert\n        # this to find the root from __file__.\n        for i in cfg.versionfile_source.split('/'):\n            root = os.path.dirname(root)\n    except NameError:\n        return {\"version\": \"0+unknown\", \"full-revisionid\": None,\n                \"dirty\": None,\n                \"error\": \"unable to find root of source tree\",\n                \"date\": None}\n\n    try:\n        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)\n        return render(pieces, cfg.style)\n    except NotThisMethod:\n        pass\n\n    try:\n        if cfg.parentdir_prefix:\n            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)\n    except NotThisMethod:\n        pass\n\n    return {\"version\": \"0+unknown\", \"full-revisionid\": None,\n            \"dirty\": None,\n            \"error\": \"unable to compute version\", \"date\": None}\n"
  },
  {
    "path": "ffsubsync/aligners.py",
    "content": "# -*- coding: utf-8 -*-\nimport logging\nimport math\nfrom typing import List, Optional, Tuple, Type, Union\n\nimport numpy as np\n\nfrom ffsubsync.golden_section_search import gss\nfrom ffsubsync.sklearn_shim import Pipeline, TransformerMixin\n\n\nlogging.basicConfig(level=logging.INFO)\nlogger: logging.Logger = logging.getLogger(__name__)\n\n\nMIN_FRAMERATE_RATIO = 0.9\nMAX_FRAMERATE_RATIO = 1.1\n\n\nclass FailedToFindAlignmentException(Exception):\n    pass\n\n\nclass FFTAligner(TransformerMixin):\n    def __init__(self, max_offset_samples: Optional[int] = None) -> None:\n        self.max_offset_samples: Optional[int] = max_offset_samples\n        self.best_offset_: Optional[int] = None\n        self.best_score_: Optional[float] = None\n        self.get_score_: bool = False\n\n    def _eliminate_extreme_offsets_from_solutions(\n        self, convolve: np.ndarray, substring: np.ndarray\n    ) -> np.ndarray:\n        convolve = np.copy(convolve)\n        if self.max_offset_samples is None:\n            return convolve\n\n        def _offset_to_index(offset):\n            return len(convolve) - 1 + offset - len(substring)\n\n        convolve[: _offset_to_index(-self.max_offset_samples)] = float(\"-inf\")\n        convolve[_offset_to_index(self.max_offset_samples) :] = float(\"-inf\")\n        return convolve\n\n    def _compute_argmax(self, convolve: np.ndarray, substring: np.ndarray) -> None:\n        best_idx = int(np.argmax(convolve))\n        self.best_offset_ = len(convolve) - 1 - best_idx - len(substring)\n        self.best_score_ = convolve[best_idx]\n\n    def fit(self, refstring, substring, get_score: bool = False) -> \"FFTAligner\":\n        refstring, substring = [\n            list(map(int, s)) if isinstance(s, str) else s\n            for s in [refstring, substring]\n        ]\n        refstring, substring = map(\n            lambda s: 2 * np.array(s).astype(float) - 1, [refstring, substring]\n        )\n        total_bits = math.log(len(substring) + len(refstring), 2)\n        total_length = int(2 ** math.ceil(total_bits))\n        extra_zeros = total_length - len(substring) - len(refstring)\n        subft = np.fft.fft(np.append(np.zeros(extra_zeros + len(refstring)), substring))\n        refft = np.fft.fft(\n            np.flip(np.append(refstring, np.zeros(len(substring) + extra_zeros)), 0)\n        )\n        convolve = np.real(np.fft.ifft(subft * refft))\n        self._compute_argmax(\n            self._eliminate_extreme_offsets_from_solutions(convolve, substring),\n            substring,\n        )\n        self.get_score_ = get_score\n        return self\n\n    def transform(self, *_) -> Union[int, Tuple[float, int]]:\n        if self.get_score_:\n            return self.best_score_, self.best_offset_\n        else:\n            return self.best_offset_\n\n\nclass MaxScoreAligner(TransformerMixin):\n    def __init__(\n        self,\n        base_aligner: Union[FFTAligner, Type[FFTAligner]],\n        srtin: Optional[str] = None,\n        sample_rate=None,\n        max_offset_seconds=None,\n    ) -> None:\n        self.srtin: Optional[str] = srtin\n        if sample_rate is None or max_offset_seconds is None:\n            self.max_offset_samples: Optional[int] = None\n        else:\n            self.max_offset_samples = abs(int(max_offset_seconds * sample_rate))\n        if isinstance(base_aligner, type):\n            self.base_aligner: FFTAligner = base_aligner(\n                max_offset_samples=self.max_offset_samples\n            )\n        else:\n            self.base_aligner = base_aligner\n        self.max_offset_seconds: Optional[int] = max_offset_seconds\n        self._scores: List[Tuple[Tuple[float, int], Pipeline]] = []\n\n    def fit_gss(self, refstring, subpipe_maker):\n        def opt_func(framerate_ratio, is_last_iter):\n            subpipe = subpipe_maker(framerate_ratio)\n            substring = subpipe.fit_transform(self.srtin)\n            score = self.base_aligner.fit_transform(\n                refstring, substring, get_score=True\n            )\n            logger.info(\n                \"got score %.0f (offset %d) for ratio %.3f\",\n                score[0],\n                score[1],\n                framerate_ratio,\n            )\n            if is_last_iter:\n                self._scores.append((score, subpipe))\n            return -score[0]\n\n        gss(opt_func, MIN_FRAMERATE_RATIO, MAX_FRAMERATE_RATIO)\n        return self\n\n    def fit(\n        self, refstring, subpipes: Union[Pipeline, List[Pipeline]]\n    ) -> \"MaxScoreAligner\":\n        if not isinstance(subpipes, list):\n            subpipes = [subpipes]\n        for subpipe in subpipes:\n            if callable(subpipe):\n                self.fit_gss(refstring, subpipe)\n                continue\n            elif hasattr(subpipe, \"transform\"):\n                substring = subpipe.transform(self.srtin)\n            else:\n                substring = subpipe\n            self._scores.append(\n                (\n                    self.base_aligner.fit_transform(\n                        refstring, substring, get_score=True\n                    ),\n                    subpipe,\n                )\n            )\n        return self\n\n    def transform(self, *_) -> Tuple[Tuple[float, float], Pipeline]:\n        scores = self._scores\n        if self.max_offset_samples is not None:\n            scores = list(\n                filter(lambda s: abs(s[0][1]) <= self.max_offset_samples, scores)\n            )\n        if len(scores) == 0:\n            raise FailedToFindAlignmentException(\n                \"Synchronization failed; consider passing \"\n                \"--max-offset-seconds with a number larger than \"\n                \"{}\".format(self.max_offset_seconds)\n            )\n        (score, offset), subpipe = max(scores, key=lambda x: x[0][0])\n        return (score, offset), subpipe\n"
  },
  {
    "path": "ffsubsync/constants.py",
    "content": "# -*- coding: utf-8 -*-\nfrom typing import List, Tuple\n\n\nSUBSYNC_RESOURCES_ENV_MAGIC: str = \"ffsubsync_resources_xj48gjdkl340\"\n\nSAMPLE_RATE: int = 100\n\nFRAMERATE_RATIOS: List[float] = [24.0 / 23.976, 25.0 / 23.976, 25.0 / 24.0]\n\nDEFAULT_FRAME_RATE: int = 48000\nDEFAULT_NON_SPEECH_LABEL: float = 0.0\nDEFAULT_ENCODING: str = \"infer\"\nDEFAULT_MAX_SUBTITLE_SECONDS: int = 10\nDEFAULT_START_SECONDS: int = 0\nDEFAULT_SCALE_FACTOR: float = 1\nDEFAULT_VAD: str = \"subs_then_webrtc\"\nDEFAULT_MAX_OFFSET_SECONDS: int = 60\nDEFAULT_APPLY_OFFSET_SECONDS: int = 0\n\nSUBTITLE_EXTENSIONS: Tuple[str, ...] = (\"srt\", \"ass\", \"ssa\", \"sub\")\n\nGITHUB_DEV_USER: str = \"smacke\"\nPROJECT_NAME: str = \"FFsubsync\"\nPROJECT_LICENSE: str = \"MIT\"\nCOPYRIGHT_YEAR: str = \"2019\"\nGITHUB_REPO: str = \"ffsubsync\"\nDESCRIPTION: str = \"Synchronize subtitles with video.\"\nLONG_DESCRIPTION: str = (\n    \"Automatic and language-agnostic synchronization of subtitles with video.\"\n)\nWEBSITE: str = \"https://github.com/{}/{}/\".format(GITHUB_DEV_USER, GITHUB_REPO)\nDEV_WEBSITE: str = \"https://smacke.net/\"\n\n# No trailing slash important for this one...\nAPI_RELEASE_URL: str = \"https://api.github.com/repos/{}/{}/releases/latest\".format(\n    GITHUB_DEV_USER, GITHUB_REPO\n)\nRELEASE_URL: str = \"https://github.com/{}/{}/releases/latest/\".format(\n    GITHUB_DEV_USER, GITHUB_REPO\n)\n"
  },
  {
    "path": "ffsubsync/ffmpeg_utils.py",
    "content": "# -*- coding: utf-8 -*-\nimport logging\nimport os\nimport platform\nimport subprocess\n\nfrom ffsubsync.constants import SUBSYNC_RESOURCES_ENV_MAGIC\n\nlogging.basicConfig(level=logging.INFO)\nlogger: logging.Logger = logging.getLogger(__name__)\n\n\n# ref: https://github.com/pyinstaller/pyinstaller/wiki/Recipe-subprocess\n# Create a set of arguments which make a ``subprocess.Popen`` (and\n# variants) call work with or without Pyinstaller, ``--noconsole`` or\n# not, on Windows and Linux. Typical use::\n#\n#   subprocess.call(['program_to_run', 'arg_1'], **subprocess_args())\n#\n# When calling ``check_output``::\n#\n#   subprocess.check_output(['program_to_run', 'arg_1'],\n#                           **subprocess_args(False))\ndef subprocess_args(include_stdout=True):\n    # The following is true only on Windows.\n    if hasattr(subprocess, \"STARTUPINFO\"):\n        # On Windows, subprocess calls will pop up a command window by default\n        # when run from Pyinstaller with the ``--noconsole`` option. Avoid this\n        # distraction.\n        si = subprocess.STARTUPINFO()\n        si.dwFlags |= subprocess.STARTF_USESHOWWINDOW\n        # Windows doesn't search the path by default. Pass it an environment so\n        # it will.\n        env = os.environ\n    else:\n        si = None\n        env = None\n\n    # ``subprocess.check_output`` doesn't allow specifying ``stdout``::\n    #\n    #   Traceback (most recent call last):\n    #     File \"test_subprocess.py\", line 58, in <module>\n    #       **subprocess_args(stdout=None))\n    #     File \"C:\\Python27\\lib\\subprocess.py\", line 567, in check_output\n    #       raise ValueError('stdout argument not allowed, it will be overridden.')\n    #   ValueError: stdout argument not allowed, it will be overridden.\n    #\n    # So, add it only if it's needed.\n    if include_stdout:\n        ret = {\"stdout\": subprocess.PIPE}\n    else:\n        ret = {}\n\n    # On Windows, running this from the binary produced by Pyinstaller\n    # with the ``--noconsole`` option requires redirecting everything\n    # (stdin, stdout, stderr) to avoid an OSError exception\n    # \"[Error 6] the handle is invalid.\"\n    ret.update(\n        {\n            \"stdin\": subprocess.PIPE,\n            \"stderr\": subprocess.PIPE,\n            \"startupinfo\": si,\n            \"env\": env,\n        }\n    )\n    return ret\n\n\ndef ffmpeg_bin_path(bin_name, gui_mode, ffmpeg_resources_path=None):\n    if platform.system() == \"Windows\":\n        bin_name = \"{}.exe\".format(bin_name)\n    if ffmpeg_resources_path is not None:\n        if not os.path.isdir(ffmpeg_resources_path):\n            if bin_name.lower().startswith(\"ffmpeg\"):\n                return ffmpeg_resources_path\n            ffmpeg_resources_path = os.path.dirname(ffmpeg_resources_path)\n        return os.path.join(ffmpeg_resources_path, bin_name)\n    try:\n        resource_path = os.environ[SUBSYNC_RESOURCES_ENV_MAGIC]\n        if len(resource_path) > 0:\n            return os.path.join(resource_path, \"ffmpeg-bin\", bin_name)\n    except KeyError:\n        if gui_mode:\n            logger.info(\n                \"Couldn't find resource path; falling back to searching system path\"\n            )\n    return bin_name\n"
  },
  {
    "path": "ffsubsync/ffsubsync.py",
    "content": "#!/usr/bin/env python\n# -*- coding: utf-8 -*-\nimport argparse\nfrom datetime import datetime\nimport logging\nimport os\nimport shutil\nimport subprocess\nimport sys\nfrom typing import cast, Any, Callable, Dict, List, Optional, Tuple, Union\n\nimport numpy as np\n\nfrom ffsubsync.aligners import FFTAligner, MaxScoreAligner\nfrom ffsubsync.constants import (\n    DEFAULT_APPLY_OFFSET_SECONDS,\n    DEFAULT_FRAME_RATE,\n    DEFAULT_MAX_OFFSET_SECONDS,\n    DEFAULT_MAX_SUBTITLE_SECONDS,\n    DEFAULT_NON_SPEECH_LABEL,\n    DEFAULT_START_SECONDS,\n    DEFAULT_VAD,\n    DEFAULT_ENCODING,\n    FRAMERATE_RATIOS,\n    SAMPLE_RATE,\n    SUBTITLE_EXTENSIONS,\n)\nfrom ffsubsync.ffmpeg_utils import ffmpeg_bin_path\nfrom ffsubsync.sklearn_shim import Pipeline, TransformerMixin\nfrom ffsubsync.speech_transformers import (\n    VideoSpeechTransformer,\n    DeserializeSpeechTransformer,\n    make_subtitle_speech_pipeline,\n)\nfrom ffsubsync.subtitle_parser import make_subtitle_parser\nfrom ffsubsync.subtitle_transformers import SubtitleMerger, SubtitleShifter\nfrom ffsubsync.version import get_version\n\n\nlogger: logging.Logger = logging.getLogger(__name__)\n\n\ndef override(args: argparse.Namespace, **kwargs: Any) -> Dict[str, Any]:\n    args_dict = dict(args.__dict__)\n    args_dict.update(kwargs)\n    return args_dict\n\n\ndef _ref_format(ref_fname: Optional[str]) -> Optional[str]:\n    if ref_fname is None:\n        return None\n    return ref_fname[-3:]\n\n\ndef make_test_case(\n    args: argparse.Namespace, npy_savename: Optional[str], sync_was_successful: bool\n) -> int:\n    if npy_savename is None:\n        raise ValueError(\"need non-null npy_savename\")\n    tar_dir = \"{}.{}\".format(\n        args.reference, datetime.now().strftime(\"%Y-%m-%d-%H-%M-%S\")\n    )\n    logger.info(\"creating test archive {}.tar.gz...\".format(tar_dir))\n    os.mkdir(tar_dir)\n    try:\n        log_path = \"ffsubsync.log\"\n        if args.log_dir_path is not None and os.path.isdir(args.log_dir_path):\n            log_path = os.path.join(args.log_dir_path, log_path)\n        shutil.copy(log_path, tar_dir)\n        shutil.copy(args.srtin[0], tar_dir)\n        if sync_was_successful:\n            shutil.move(args.srtout, tar_dir)\n        if _ref_format(args.reference) in SUBTITLE_EXTENSIONS:\n            shutil.copy(args.reference, tar_dir)\n        elif args.serialize_speech or args.reference == npy_savename:\n            shutil.copy(npy_savename, tar_dir)\n        else:\n            shutil.move(npy_savename, tar_dir)\n        supported_formats = set(list(zip(*shutil.get_archive_formats()))[0])\n        preferred_formats = [\"gztar\", \"bztar\", \"xztar\", \"zip\", \"tar\"]\n        for archive_format in preferred_formats:\n            if archive_format in supported_formats:\n                shutil.make_archive(tar_dir, archive_format, os.curdir, tar_dir)\n                break\n        else:\n            logger.error(\n                \"failed to create test archive; no formats supported \"\n                \"(this should not happen)\"\n            )\n            return 1\n        logger.info(\"...done\")\n    finally:\n        shutil.rmtree(tar_dir)\n    return 0\n\n\ndef get_srt_pipe_maker(\n    args: argparse.Namespace, srtin: Optional[str]\n) -> Callable[[Optional[float]], Union[Pipeline, Callable[[float], Pipeline]]]:\n    if srtin is None:\n        srtin_format = \"srt\"\n    else:\n        srtin_format = os.path.splitext(srtin)[-1][1:]\n    parser = make_subtitle_parser(fmt=srtin_format, caching=True, **args.__dict__)\n    return lambda scale_factor: make_subtitle_speech_pipeline(\n        **override(args, scale_factor=scale_factor, parser=parser)\n    )\n\n\ndef get_framerate_ratios_to_try(args: argparse.Namespace) -> List[Optional[float]]:\n    if args.no_fix_framerate:\n        return []\n    else:\n        framerate_ratios = list(\n            np.concatenate(\n                [np.array(FRAMERATE_RATIOS), 1.0 / np.array(FRAMERATE_RATIOS)]\n            )\n        )\n        if args.gss:\n            framerate_ratios.append(None)\n        return framerate_ratios\n\n\ndef try_sync(\n    args: argparse.Namespace, reference_pipe: Optional[Pipeline], result: Dict[str, Any]\n) -> bool:\n    result[\"sync_was_successful\"] = False\n    sync_was_successful = True\n    logger.info(\n        \"extracting speech segments from %s...\",\n        \"stdin\" if not args.srtin else \"subtitles file(s) {}\".format(args.srtin),\n    )\n    if not args.srtin:\n        args.srtin = [None]\n    for srtin in args.srtin:\n        try:\n            skip_sync = args.skip_sync or reference_pipe is None\n            skip_infer_framerate_ratio = (\n                args.skip_infer_framerate_ratio or reference_pipe is None\n            )\n            srtout = srtin if args.overwrite_input else args.srtout\n            srt_pipe_maker = get_srt_pipe_maker(args, srtin)\n            framerate_ratios = get_framerate_ratios_to_try(args)\n            srt_pipes = [srt_pipe_maker(1.0)] + [\n                srt_pipe_maker(rat) for rat in framerate_ratios\n            ]\n            for srt_pipe in srt_pipes:\n                if callable(srt_pipe):\n                    continue\n                else:\n                    srt_pipe.fit(srtin)\n            if not skip_infer_framerate_ratio and hasattr(\n                reference_pipe[-1], \"num_frames\"\n            ):\n                inferred_framerate_ratio_from_length = (\n                    float(reference_pipe[-1].num_frames)\n                    / cast(Pipeline, srt_pipes[0])[-1].num_frames\n                )\n                logger.info(\n                    \"inferred frameratio ratio: %.3f\"\n                    % inferred_framerate_ratio_from_length\n                )\n                srt_pipes.append(\n                    cast(\n                        Pipeline, srt_pipe_maker(inferred_framerate_ratio_from_length)\n                    ).fit(srtin)\n                )\n                logger.info(\"...done\")\n            logger.info(\"computing alignments...\")\n            if skip_sync:\n                best_score = 0.0\n                best_srt_pipe = cast(Pipeline, srt_pipes[0])\n                offset_samples = 0\n            else:\n                (best_score, offset_samples), best_srt_pipe = MaxScoreAligner(\n                    FFTAligner, srtin, SAMPLE_RATE, args.max_offset_seconds\n                ).fit_transform(\n                    reference_pipe.transform(args.reference),\n                    srt_pipes,\n                )\n            if best_score < 0:\n                sync_was_successful = False\n            logger.info(\"...done\")\n            offset_seconds = (\n                offset_samples / float(SAMPLE_RATE) + args.apply_offset_seconds\n            )\n            scale_step = best_srt_pipe.named_steps[\"scale\"]\n            logger.info(\"score: %.3f\", best_score)\n            logger.info(\"offset seconds: %.3f\", offset_seconds)\n            logger.info(\"framerate scale factor: %.3f\", scale_step.scale_factor)\n            output_steps: List[Tuple[str, TransformerMixin]] = [\n                (\"shift\", SubtitleShifter(offset_seconds))\n            ]\n            if args.merge_with_reference:\n                output_steps.append(\n                    (\"merge\", SubtitleMerger(reference_pipe.named_steps[\"parse\"].subs_))\n                )\n            output_pipe = Pipeline(output_steps)\n            out_subs = output_pipe.fit_transform(scale_step.subs_)\n            if args.output_encoding != \"same\":\n                out_subs = out_subs.set_encoding(args.output_encoding)\n            suppress_output_thresh = args.suppress_output_if_offset_less_than\n            if offset_seconds >= (suppress_output_thresh or float(\"-inf\")):\n                logger.info(\"writing output to {}\".format(srtout or \"stdout\"))\n                out_subs.write_file(srtout)\n            else:\n                logger.warning(\n                    \"suppressing output because offset %s was less than suppression threshold %s\",\n                    offset_seconds,\n                    args.suppress_output_if_offset_less_than,\n                )\n        except Exception:\n            sync_was_successful = False\n            logger.exception(\"failed to sync %s\", srtin)\n        else:\n            result[\"offset_seconds\"] = offset_seconds\n            result[\"framerate_scale_factor\"] = scale_step.scale_factor\n    result[\"sync_was_successful\"] = sync_was_successful\n    return sync_was_successful\n\n\ndef make_reference_pipe(args: argparse.Namespace) -> Pipeline:\n    ref_format = _ref_format(args.reference)\n    if ref_format in SUBTITLE_EXTENSIONS:\n        if args.vad is not None:\n            logger.warning(\"Vad specified, but reference was not a movie\")\n        return cast(\n            Pipeline,\n            make_subtitle_speech_pipeline(\n                fmt=ref_format,\n                **override(args, encoding=args.reference_encoding or DEFAULT_ENCODING),\n            ),\n        )\n    elif ref_format in (\"npy\", \"npz\"):\n        if args.vad is not None:\n            logger.warning(\"Vad specified, but reference was not a movie\")\n        return Pipeline(\n            [(\"deserialize\", DeserializeSpeechTransformer(args.non_speech_label))]\n        )\n    else:\n        vad = args.vad or DEFAULT_VAD\n        if args.reference_encoding is not None:\n            logger.warning(\n                \"Reference srt encoding specified, but reference was a video file\"\n            )\n        ref_stream = args.reference_stream\n        if ref_stream is not None and not ref_stream.startswith(\"0:\"):\n            ref_stream = \"0:\" + ref_stream\n        return Pipeline(\n            [\n                (\n                    \"speech_extract\",\n                    VideoSpeechTransformer(\n                        vad=vad,\n                        sample_rate=SAMPLE_RATE,\n                        frame_rate=args.frame_rate,\n                        non_speech_label=args.non_speech_label,\n                        start_seconds=args.start_seconds,\n                        ffmpeg_path=args.ffmpeg_path,\n                        ref_stream=ref_stream,\n                        vlc_mode=args.vlc_mode,\n                        gui_mode=args.gui_mode,\n                    ),\n                ),\n            ]\n        )\n\n\ndef extract_subtitles_from_reference(args: argparse.Namespace) -> int:\n    stream = args.extract_subs_from_stream\n    if not stream.startswith(\"0:s:\"):\n        stream = \"0:s:{}\".format(stream)\n    elif not stream.startswith(\"0:\") and stream.startswith(\"s:\"):\n        stream = \"0:{}\".format(stream)\n    if not stream.startswith(\"0:s:\"):\n        logger.error(\n            \"invalid stream for subtitle extraction: %s\", args.extract_subs_from_stream\n        )\n    ffmpeg_args = [\n        ffmpeg_bin_path(\"ffmpeg\", args.gui_mode, ffmpeg_resources_path=args.ffmpeg_path)\n    ]\n    ffmpeg_args.extend(\n        [\n            \"-y\",\n            \"-nostdin\",\n            \"-loglevel\",\n            \"fatal\",\n            \"-i\",\n            args.reference,\n            \"-map\",\n            \"{}\".format(stream),\n            \"-f\",\n            \"srt\",\n        ]\n    )\n    if args.srtout is None:\n        ffmpeg_args.append(\"-\")\n    else:\n        ffmpeg_args.append(args.srtout)\n    logger.info(\n        \"attempting to extract subtitles to {} ...\".format(\n            \"stdout\" if args.srtout is None else args.srtout\n        )\n    )\n    retcode = subprocess.call(ffmpeg_args)\n    if retcode == 0:\n        logger.info(\"...done\")\n    else:\n        logger.error(\n            \"ffmpeg unable to extract subtitles from reference; return code %d\", retcode\n        )\n    return retcode\n\n\ndef validate_args(args: argparse.Namespace) -> None:\n    if args.vlc_mode:\n        logger.setLevel(logging.CRITICAL)\n    if args.reference is None:\n        if args.apply_offset_seconds == 0 or not args.srtin:\n            raise ValueError(\n                \"`reference` required unless `--apply-offset-seconds` specified\"\n            )\n    if args.apply_offset_seconds != 0:\n        if not args.srtin:\n            args.srtin = [args.reference]\n        if not args.srtin:\n            raise ValueError(\n                \"at least one of `srtin` or `reference` must be specified to apply offset seconds\"\n            )\n    if args.srtin:\n        if len(args.srtin) > 1 and not args.overwrite_input:\n            raise ValueError(\n                \"cannot specify multiple input srt files without overwriting\"\n            )\n        if len(args.srtin) > 1 and args.make_test_case:\n            raise ValueError(\"cannot specify multiple input srt files for test cases\")\n        if len(args.srtin) > 1 and args.gui_mode:\n            raise ValueError(\"cannot specify multiple input srt files in GUI mode\")\n    if (\n        args.make_test_case and not args.gui_mode\n    ):  # this validation not necessary for gui mode\n        if not args.srtin or args.srtout is None:\n            raise ValueError(\n                \"need to specify input and output srt files for test cases\"\n            )\n    if args.overwrite_input:\n        if args.extract_subs_from_stream is not None:\n            raise ValueError(\n                \"input overwriting not allowed for extracting subtitles from reference\"\n            )\n        if not args.srtin:\n            raise ValueError(\n                \"need to specify input srt if --overwrite-input \"\n                \"is specified since we cannot overwrite stdin\"\n            )\n        if args.srtout is not None:\n            raise ValueError(\n                \"overwrite input set but output file specified; \"\n                \"refusing to run in case this was not intended\"\n            )\n    if args.extract_subs_from_stream is not None:\n        if args.make_test_case:\n            raise ValueError(\"test case is for sync and not subtitle extraction\")\n        if args.srtin:\n            raise ValueError(\n                \"stream specified for reference subtitle extraction; \"\n                \"-i flag for sync input not allowed\"\n            )\n\n\ndef validate_file_permissions(args: argparse.Namespace) -> None:\n    error_string_template = (\n        \"unable to {action} {file}; \"\n        \"try ensuring file exists and has correct permissions\"\n    )\n    if args.reference is not None and not os.access(args.reference, os.R_OK):\n        raise ValueError(\n            error_string_template.format(action=\"read reference\", file=args.reference)\n        )\n    if args.srtin:\n        for srtin in args.srtin:\n            if srtin is not None and not os.access(srtin, os.R_OK):\n                raise ValueError(\n                    error_string_template.format(\n                        action=\"read input subtitles\", file=srtin\n                    )\n                )\n    if (\n        args.srtout is not None\n        and os.path.exists(args.srtout)\n        and not os.access(args.srtout, os.W_OK)\n    ):\n        raise ValueError(\n            error_string_template.format(\n                action=\"write output subtitles\", file=args.srtout\n            )\n        )\n    if args.make_test_case or args.serialize_speech:\n        npy_savename = os.path.splitext(args.reference)[0] + \".npz\"\n        if os.path.exists(npy_savename) and not os.access(npy_savename, os.W_OK):\n            raise ValueError(\n                \"unable to write test case file archive %s (try checking permissions)\"\n                % npy_savename\n            )\n\n\ndef _setup_logging(\n    args: argparse.Namespace,\n) -> Tuple[Optional[str], Optional[logging.FileHandler]]:\n    log_handler = None\n    log_path = None\n    if args.make_test_case or args.log_dir_path is not None:\n        log_path = \"ffsubsync.log\"\n        if args.log_dir_path is not None and os.path.isdir(args.log_dir_path):\n            log_path = os.path.join(args.log_dir_path, log_path)\n        log_handler = logging.FileHandler(log_path)\n        logger.addHandler(log_handler)\n        logger.info(\"this log will be written to %s\", os.path.abspath(log_path))\n    return log_path, log_handler\n\n\ndef _npy_savename(args: argparse.Namespace) -> str:\n    return os.path.splitext(args.reference)[0] + \".npz\"\n\n\ndef _run_impl(args: argparse.Namespace, result: Dict[str, Any]) -> bool:\n    if args.extract_subs_from_stream is not None:\n        result[\"retval\"] = extract_subtitles_from_reference(args)\n        return True\n    if args.srtin is not None and (\n        args.reference is None\n        or (len(args.srtin) == 1 and args.srtin[0] == args.reference)\n    ):\n        return try_sync(args, None, result)\n    reference_pipe = make_reference_pipe(args)\n    logger.info(\"extracting speech segments from reference '%s'...\", args.reference)\n    reference_pipe.fit(args.reference)\n    logger.info(\"...done\")\n    if args.make_test_case or args.serialize_speech:\n        logger.info(\"serializing speech...\")\n        np.savez_compressed(\n            _npy_savename(args), speech=reference_pipe.transform(args.reference)\n        )\n        logger.info(\"...done\")\n        if not args.srtin:\n            logger.info(\n                \"unsynchronized subtitle file not specified; skipping synchronization\"\n            )\n            return False\n    return try_sync(args, reference_pipe, result)\n\n\ndef validate_and_transform_args(\n    parser_or_args: Union[argparse.ArgumentParser, argparse.Namespace]\n) -> Optional[argparse.Namespace]:\n    if isinstance(parser_or_args, argparse.Namespace):\n        parser = None\n        args = parser_or_args\n    else:\n        parser = parser_or_args\n        args = parser.parse_args()\n    try:\n        validate_args(args)\n    except ValueError as e:\n        logger.error(e)\n        if parser is not None:\n            parser.print_usage()\n        return None\n    if args.gui_mode and args.srtout is None:\n        args.srtout = \"{}.synced.srt\".format(os.path.splitext(args.srtin[0])[0])\n    try:\n        validate_file_permissions(args)\n    except ValueError as e:\n        logger.error(e)\n        return None\n    ref_format = _ref_format(args.reference)\n    if args.merge_with_reference and ref_format not in SUBTITLE_EXTENSIONS:\n        logger.error(\n            \"merging synced output with reference only valid \"\n            \"when reference composed of subtitles\"\n        )\n        return None\n    return args\n\n\ndef run(\n    parser_or_args: Union[argparse.ArgumentParser, argparse.Namespace]\n) -> Dict[str, Any]:\n    sync_was_successful = False\n    result = {\n        \"retval\": 0,\n        \"offset_seconds\": None,\n        \"framerate_scale_factor\": None,\n    }\n    args = validate_and_transform_args(parser_or_args)\n    if args is None:\n        result[\"retval\"] = 1\n        return result\n    log_path, log_handler = _setup_logging(args)\n    try:\n        sync_was_successful = _run_impl(args, result)\n        result[\"sync_was_successful\"] = sync_was_successful\n        return result\n    finally:\n        if log_handler is not None and log_path is not None:\n            log_handler.close()\n            logger.removeHandler(log_handler)\n            if args.make_test_case:\n                result[\"retval\"] += make_test_case(\n                    args, _npy_savename(args), sync_was_successful\n                )\n            if args.log_dir_path is None or not os.path.isdir(args.log_dir_path):\n                os.remove(log_path)\n\n\ndef add_main_args_for_cli(parser: argparse.ArgumentParser) -> None:\n    parser.add_argument(\n        \"reference\",\n        nargs=\"?\",\n        help=(\n            \"Reference (video, subtitles, or a numpy array with VAD speech) \"\n            \"to which to synchronize input subtitles.\"\n        ),\n    )\n    parser.add_argument(\n        \"-i\", \"--srtin\", nargs=\"*\", help=\"Input subtitles file (default=stdin).\"\n    )\n    parser.add_argument(\n        \"-o\", \"--srtout\", help=\"Output subtitles file (default=stdout).\"\n    )\n    parser.add_argument(\n        \"--merge-with-reference\",\n        \"--merge\",\n        action=\"store_true\",\n        help=\"Merge reference subtitles with synced output subtitles.\",\n    )\n    parser.add_argument(\n        \"--make-test-case\",\n        \"--create-test-case\",\n        action=\"store_true\",\n        help=\"If specified, serialize reference speech to a numpy array, \"\n        \"and create an archive with input/output subtitles \"\n        \"and serialized speech.\",\n    )\n    parser.add_argument(\n        \"--reference-stream\",\n        \"--refstream\",\n        \"--reference-track\",\n        \"--reftrack\",\n        default=None,\n        help=(\n            \"Which stream/track in the video file to use as reference, \"\n            \"formatted according to ffmpeg conventions. For example, 0:s:0 \"\n            \"uses the first subtitle track; 0:a:3 would use the third audio track. \"\n            \"You can also drop the leading `0:`; i.e. use s:0 or a:3, respectively. \"\n            \"Example: `ffs ref.mkv -i in.srt -o out.srt --reference-stream s:2`\"\n        ),\n    )\n\n\ndef add_cli_only_args(parser: argparse.ArgumentParser) -> None:\n    parser.add_argument(\n        \"-v\",\n        \"--version\",\n        action=\"version\",\n        version=\"{package} {version}\".format(\n            package=__package__, version=get_version()\n        ),\n    )\n    parser.add_argument(\n        \"--overwrite-input\",\n        action=\"store_true\",\n        help=(\n            \"If specified, will overwrite the input srt \"\n            \"instead of writing the output to a new file.\"\n        ),\n    )\n    parser.add_argument(\n        \"--encoding\",\n        default=DEFAULT_ENCODING,\n        help=\"What encoding to use for reading input subtitles \"\n        \"(default=%s).\" % DEFAULT_ENCODING,\n    )\n    parser.add_argument(\n        \"--max-subtitle-seconds\",\n        type=float,\n        default=DEFAULT_MAX_SUBTITLE_SECONDS,\n        help=\"Maximum duration for a subtitle to appear on-screen \"\n        \"(default=%.3f seconds).\" % DEFAULT_MAX_SUBTITLE_SECONDS,\n    )\n    parser.add_argument(\n        \"--start-seconds\",\n        type=int,\n        default=DEFAULT_START_SECONDS,\n        help=\"Start time for processing \"\n        \"(default=%d seconds).\" % DEFAULT_START_SECONDS,\n    )\n    parser.add_argument(\n        \"--max-offset-seconds\",\n        type=float,\n        default=DEFAULT_MAX_OFFSET_SECONDS,\n        help=\"The max allowed offset seconds for any subtitle segment \"\n        \"(default=%d seconds).\" % DEFAULT_MAX_OFFSET_SECONDS,\n    )\n    parser.add_argument(\n        \"--apply-offset-seconds\",\n        type=float,\n        default=DEFAULT_APPLY_OFFSET_SECONDS,\n        help=\"Apply a predefined offset in seconds to all subtitle segments \"\n        \"(default=%d seconds).\" % DEFAULT_APPLY_OFFSET_SECONDS,\n    )\n    parser.add_argument(\n        \"--frame-rate\",\n        type=int,\n        default=DEFAULT_FRAME_RATE,\n        help=\"Frame rate for audio extraction (default=%d).\" % DEFAULT_FRAME_RATE,\n    )\n    parser.add_argument(\n        \"--skip-infer-framerate-ratio\",\n        action=\"store_true\",\n        help=\"If set, do not try to infer framerate ratio based on duration ratio.\",\n    )\n    parser.add_argument(\n        \"--non-speech-label\",\n        type=float,\n        default=DEFAULT_NON_SPEECH_LABEL,\n        help=\"Label to use for frames detected as non-speech (default=%f)\"\n        % DEFAULT_NON_SPEECH_LABEL,\n    )\n    parser.add_argument(\n        \"--output-encoding\",\n        default=\"utf-8\",\n        help=\"What encoding to use for writing output subtitles \"\n        '(default=utf-8). Can indicate \"same\" to use same '\n        \"encoding as that of the input.\",\n    )\n    parser.add_argument(\n        \"--reference-encoding\",\n        help=\"What encoding to use for reading / writing reference subtitles \"\n        \"(if applicable, default=infer).\",\n    )\n    parser.add_argument(\n        \"--vad\",\n        choices=[\n            \"subs_then_webrtc\",\n            \"webrtc\",\n            \"subs_then_auditok\",\n            \"auditok\",\n            \"subs_then_silero\",\n            \"silero\",\n        ],\n        default=None,\n        help=\"Which voice activity detector to use for speech extraction \"\n        \"(if using video / audio as a reference, default={}).\".format(DEFAULT_VAD),\n    )\n    parser.add_argument(\n        \"--no-fix-framerate\",\n        action=\"store_true\",\n        help=\"If specified, subsync will not attempt to correct a framerate \"\n        \"mismatch between reference and subtitles.\",\n    )\n    parser.add_argument(\n        \"--serialize-speech\",\n        action=\"store_true\",\n        help=\"If specified, serialize reference speech to a numpy array.\",\n    )\n    parser.add_argument(\n        \"--extract-subs-from-stream\",\n        \"--extract-subtitles-from-stream\",\n        default=None,\n        help=\"If specified, do not attempt sync; instead, just extract subtitles\"\n        \" from the specified stream using the reference.\",\n    )\n    parser.add_argument(\n        \"--suppress-output-if-offset-less-than\",\n        type=float,\n        default=None,\n        help=\"If specified, do not produce output if offset below provided threshold.\",\n    )\n    parser.add_argument(\n        \"--ffmpeg-path\",\n        \"--ffmpegpath\",\n        default=None,\n        help=\"Where to look for ffmpeg and ffprobe. Uses the system PATH by default.\",\n    )\n    parser.add_argument(\n        \"--log-dir-path\",\n        default=None,\n        help=(\n            \"If provided, will save log file ffsubsync.log to this path \"\n            \"(must be an existing directory).\"\n        ),\n    )\n    parser.add_argument(\n        \"--gss\",\n        action=\"store_true\",\n        help=\"If specified, use golden-section search to try to find\"\n        \"the optimal framerate ratio between video and subtitles.\",\n    )\n    parser.add_argument(\n        \"--strict\",\n        action=\"store_true\",\n        help=\"If specified, refuse to parse srt files with formatting issues.\",\n    )\n    parser.add_argument(\"--vlc-mode\", action=\"store_true\", help=argparse.SUPPRESS)\n    parser.add_argument(\"--gui-mode\", action=\"store_true\", help=argparse.SUPPRESS)\n    parser.add_argument(\"--skip-sync\", action=\"store_true\", help=argparse.SUPPRESS)\n\n\ndef make_parser() -> argparse.ArgumentParser:\n    parser = argparse.ArgumentParser(description=\"Synchronize subtitles with video.\")\n    add_main_args_for_cli(parser)\n    add_cli_only_args(parser)\n    return parser\n\n\ndef main() -> int:\n    parser = make_parser()\n    return run(parser)[\"retval\"]\n\n\nif __name__ == \"__main__\":\n    sys.exit(main())\n"
  },
  {
    "path": "ffsubsync/ffsubsync_gui.py",
    "content": "#!/usr/bin/env python\n# -*- coding: utf-8 -*-\nimport logging\nimport os\nimport sys\n\nfrom gooey import Gooey, GooeyParser\n\nfrom ffsubsync.constants import (\n    RELEASE_URL,\n    WEBSITE,\n    DEV_WEBSITE,\n    DESCRIPTION,\n    LONG_DESCRIPTION,\n    PROJECT_NAME,\n    PROJECT_LICENSE,\n    COPYRIGHT_YEAR,\n    SUBSYNC_RESOURCES_ENV_MAGIC,\n)\n\n# set the env magic so that we look for resources in the right place\nif SUBSYNC_RESOURCES_ENV_MAGIC not in os.environ:\n    os.environ[SUBSYNC_RESOURCES_ENV_MAGIC] = getattr(sys, \"_MEIPASS\", \"\")\nfrom ffsubsync.ffsubsync import run, add_cli_only_args\nfrom ffsubsync.version import get_version, update_available\n\nlogging.basicConfig(level=logging.INFO)\nlogger = logging.getLogger(__name__)\n\n\n_menu = [\n    {\n        \"name\": \"File\",\n        \"items\": [\n            {\n                \"type\": \"AboutDialog\",\n                \"menuTitle\": \"About\",\n                \"name\": PROJECT_NAME,\n                \"description\": LONG_DESCRIPTION,\n                \"version\": get_version(),\n                \"copyright\": COPYRIGHT_YEAR,\n                \"website\": WEBSITE,\n                \"developer\": DEV_WEBSITE,\n                \"license\": PROJECT_LICENSE,\n            },\n            {\n                \"type\": \"Link\",\n                \"menuTitle\": \"Download latest release\",\n                \"url\": RELEASE_URL,\n            },\n        ],\n    }\n]\n\n\n@Gooey(\n    program_name=PROJECT_NAME,\n    image_dir=os.path.join(os.environ[SUBSYNC_RESOURCES_ENV_MAGIC], \"img\"),\n    menu=_menu,\n    tabbed_groups=True,\n    progress_regex=r\"(\\d+)%\",\n    hide_progress_msg=True,\n)\ndef make_parser():\n    description = DESCRIPTION\n    if update_available():\n        description += (\n            \"\\nUpdate available! Please go to \"\n            '\"File\" -> \"Download latest release\"'\n            \" to update FFsubsync.\"\n        )\n    parser = GooeyParser(description=description)\n    main_group = parser.add_argument_group(\"Basic\")\n    main_group.add_argument(\n        \"reference\",\n        help=\"Reference (video or subtitles file) to which to synchronize input subtitles.\",\n        widget=\"FileChooser\",\n    )\n    main_group.add_argument(\"srtin\", help=\"Input subtitles file\", widget=\"FileChooser\")\n    main_group.add_argument(\n        \"-o\",\n        \"--srtout\",\n        help=\"Output subtitles file (default=${srtin}.synced.srt).\",\n        widget=\"FileSaver\",\n    )\n    advanced_group = parser.add_argument_group(\"Advanced\")\n\n    # TODO: these are shared between gui and cli; don't duplicate this code\n    advanced_group.add_argument(\n        \"--merge-with-reference\",\n        \"--merge\",\n        action=\"store_true\",\n        help=\"Merge reference subtitles with synced output subtitles.\",\n    )\n    advanced_group.add_argument(\n        \"--make-test-case\",\n        \"--create-test-case\",\n        action=\"store_true\",\n        help=\"If specified, create a test archive a few KiB in size \"\n        \"to send to the developer as a debugging aid.\",\n    )\n    advanced_group.add_argument(\n        \"--reference-stream\",\n        \"--refstream\",\n        \"--reference-track\",\n        \"--reftrack\",\n        default=None,\n        help=\"Which stream/track in the video file to use as reference, \"\n        \"formatted according to ffmpeg conventions. For example, s:0 \"\n        \"uses the first subtitle track; a:3 would use the fourth audio track.\",\n    )\n    return parser\n\n\ndef main():\n    parser = make_parser()\n    _ = parser.parse_args()  # Fool Gooey into presenting the simpler menu\n    add_cli_only_args(parser)\n    args = parser.parse_args()\n    args.gui_mode = True\n    return run(args)\n\n\nif __name__ == \"__main__\":\n    sys.exit(main())\n"
  },
  {
    "path": "ffsubsync/file_utils.py",
    "content": "# -*- coding: utf-8 -*-\nimport sys\n\n\nclass open_file:\n    \"\"\"\n    Context manager that opens a filename and closes it on exit, but does\n    nothing for file-like objects.\n    \"\"\"\n\n    def __init__(self, filename, *args, **kwargs) -> None:\n        self.closing = kwargs.pop(\"closing\", False)\n        if filename is None:\n            stream = sys.stdout if \"w\" in args else sys.stdin\n            self.fh = open(stream.fileno(), *args, **kwargs)\n        elif isinstance(filename, str):\n            self.fh = open(filename, *args, **kwargs)\n            self.closing = True\n        else:\n            self.fh = filename\n\n    def __enter__(self):\n        return self.fh\n\n    def __exit__(self, exc_type, exc_val, exc_tb):\n        if self.closing:\n            self.fh.close()\n\n        return False\n"
  },
  {
    "path": "ffsubsync/generic_subtitles.py",
    "content": "# -*- coding: utf-8 -*-\nimport copy\nfrom datetime import timedelta\nimport logging\nimport os\nfrom typing import cast, Any, Dict, Iterator, List, Optional\n\nimport pysubs2\nimport srt\nimport sys\n\n\nlogging.basicConfig(level=logging.INFO)\nlogger: logging.Logger = logging.getLogger(__name__)\n\n\nclass GenericSubtitle:\n    def __init__(self, start, end, inner):\n        self.start = start\n        self.end = end\n        self.inner = inner\n\n    def __eq__(self, other: object) -> bool:\n        if not isinstance(other, GenericSubtitle):\n            return False\n        eq = True\n        eq = eq and self.start == other.start\n        eq = eq and self.end == other.end\n        eq = eq and self.inner == other.inner\n        return eq\n\n    @property\n    def content(self) -> str:\n        if isinstance(self.inner, srt.Subtitle):\n            ret = self.inner.content\n        elif isinstance(self.inner, pysubs2.SSAEvent):\n            ret = self.inner.text\n        else:\n            raise NotImplementedError(\n                \"unsupported subtitle type: %s\" % type(self.inner)\n            )\n        return ret\n\n    def resolve_inner_timestamps(self):\n        ret = copy.deepcopy(self.inner)\n        if isinstance(self.inner, srt.Subtitle):\n            ret.start = self.start\n            ret.end = self.end\n        elif isinstance(self.inner, pysubs2.SSAEvent):\n            ret.start = pysubs2.make_time(s=self.start.total_seconds())\n            ret.end = pysubs2.make_time(s=self.end.total_seconds())\n        else:\n            raise NotImplementedError(\n                \"unsupported subtitle type: %s\" % type(self.inner)\n            )\n        return ret\n\n    def merge_with(self, other):\n        assert isinstance(self.inner, type(other.inner))\n        inner_merged = copy.deepcopy(self.inner)\n        if isinstance(self.inner, srt.Subtitle):\n            inner_merged.content = \"{}\\n{}\".format(\n                inner_merged.content, other.inner.content\n            )\n            return self.__class__(self.start, self.end, inner_merged)\n        else:\n            raise NotImplementedError(\n                \"unsupported subtitle type: %s\" % type(self.inner)\n            )\n\n    @classmethod\n    def wrap_inner_subtitle(cls, sub) -> \"GenericSubtitle\":\n        if isinstance(sub, srt.Subtitle):\n            return cls(sub.start, sub.end, sub)\n        elif isinstance(sub, pysubs2.SSAEvent):\n            return cls(\n                timedelta(milliseconds=sub.start), timedelta(milliseconds=sub.end), sub\n            )\n        else:\n            raise NotImplementedError(\"unsupported subtitle type: %s\" % type(sub))\n\n\nclass GenericSubtitlesFile:\n    def __init__(self, subs: List[GenericSubtitle], *_, **kwargs: Any):\n        sub_format: str = cast(str, kwargs.pop(\"sub_format\", None))\n        if sub_format is None:\n            raise ValueError(\"format must be specified\")\n        encoding: str = cast(str, kwargs.pop(\"encoding\", None))\n        if encoding is None:\n            raise ValueError(\"encoding must be specified\")\n        self.subs_: List[GenericSubtitle] = subs\n        self._sub_format: str = sub_format\n        self._encoding: str = encoding\n        self._styles: Optional[Dict[str, pysubs2.SSAStyle]] = kwargs.pop(\"styles\", None)\n        self._fonts_opaque: Optional[Dict[str, Any]] = kwargs.pop(\"fonts_opaque\", None)\n        self._info: Optional[Dict[str, str]] = kwargs.pop(\"info\", None)\n\n    def set_encoding(self, encoding: str) -> \"GenericSubtitlesFile\":\n        if encoding != \"same\":\n            self._encoding = encoding\n        return self\n\n    def __len__(self) -> int:\n        return len(self.subs_)\n\n    def __getitem__(self, item: int) -> GenericSubtitle:\n        return self.subs_[item]\n\n    def __iter__(self) -> Iterator[GenericSubtitle]:\n        return iter(self.subs_)\n\n    def clone_props_for_subs(\n        self, new_subs: List[GenericSubtitle]\n    ) -> \"GenericSubtitlesFile\":\n        return GenericSubtitlesFile(\n            new_subs,\n            sub_format=self._sub_format,\n            encoding=self._encoding,\n            styles=self._styles,\n            fonts_opaque=self._fonts_opaque,\n            info=self._info,\n        )\n\n    def gen_raw_resolved_subs(self):\n        for sub in self.subs_:\n            yield sub.resolve_inner_timestamps()\n\n    def offset(self, td: timedelta) -> \"GenericSubtitlesFile\":\n        offset_subs = []\n        for sub in self.subs_:\n            offset_subs.append(GenericSubtitle(sub.start + td, sub.end + td, sub.inner))\n        return self.clone_props_for_subs(offset_subs)\n\n    def write_file(self, fname: str) -> None:\n        # TODO: converter to go between self.subs_format and out_format\n        if fname is None:\n            out_format = self._sub_format\n        else:\n            out_format = os.path.splitext(fname)[-1][1:]\n        subs = list(self.gen_raw_resolved_subs())\n        if self._sub_format in (\"ssa\", \"ass\", \"vtt\"):\n            ssaf = pysubs2.SSAFile()\n            ssaf.events = subs\n            if self._styles is not None:\n                ssaf.styles = self._styles\n            if self._info is not None:\n                ssaf.info = self._info\n            if self._fonts_opaque is not None:\n                ssaf.fonts_opaque = self._fonts_opaque\n            to_write = ssaf.to_string(out_format)\n        elif self._sub_format == \"srt\" and out_format in (\"ssa\", \"ass\", \"vtt\"):\n            to_write = pysubs2.SSAFile.from_string(srt.compose(subs)).to_string(\n                out_format\n            )\n        elif out_format == \"srt\":\n            to_write = srt.compose(subs)\n        else:\n            raise NotImplementedError(\"unsupported output format: %s\" % out_format)\n\n        with open(fname or sys.stdout.fileno(), \"wb\") as f:\n            f.write(to_write.encode(self._encoding))\n\n\nclass SubsMixin:\n    def __init__(self, subs: Optional[GenericSubtitlesFile] = None) -> None:\n        self.subs_: Optional[GenericSubtitlesFile] = subs\n\n    def set_encoding(self, encoding: str) -> \"SubsMixin\":\n        self.subs_.set_encoding(encoding)\n        return self\n"
  },
  {
    "path": "ffsubsync/golden_section_search.py",
    "content": "\"\"\"Python program for golden section search (straight-up copied from Wikipedia).\n   This implementation reuses function evaluations, saving 1/2 of the evaluations per\n   iteration, and returns a bounding interval.\"\"\"\nimport logging\nimport math\n\nlogging.basicConfig(level=logging.INFO)\nlogger = logging.getLogger(__name__)\n\n\ninvphi = (math.sqrt(5) - 1) / 2  # 1 / phi\ninvphi2 = (3 - math.sqrt(5)) / 2  # 1 / phi^2\n\n\ndef gss(f, a, b, tol=1e-4):\n    \"\"\"Golden-section search.\n\n    Given a function f with a single local minimum in\n    the interval [a,b], gss returns a subset interval\n    [c,d] that contains the minimum with d-c <= tol.\n\n    Example:\n    >>> f = lambda x: (x-2)**2\n    >>> a = 1\n    >>> b = 5\n    >>> tol = 1e-5\n    >>> (c,d) = gss(f, a, b, tol)\n    >>> print(c, d)\n    1.9999959837979107 2.0000050911830893\n    \"\"\"\n\n    (a, b) = (min(a, b), max(a, b))\n    h = b - a\n    if h <= tol:\n        return a, b\n\n    # Required steps to achieve tolerance\n    n = int(math.ceil(math.log(tol / h) / math.log(invphi)))\n    logger.info(\n        \"About to perform %d iterations of golden section search to find the best framerate\",\n        n,\n    )\n\n    def f_wrapped(x, is_last_iter):\n        try:\n            return f(x, is_last_iter)\n        except TypeError:\n            return f(x)\n\n    c = a + invphi2 * h\n    d = a + invphi * h\n    yc = f_wrapped(c, n == 1)\n    yd = f_wrapped(d, n == 1)\n\n    for k in range(n - 1):\n        if yc < yd:\n            b = d\n            d = c\n            yd = yc\n            h = invphi * h\n            c = a + invphi2 * h\n            yc = f_wrapped(c, k == n - 2)\n        else:\n            a = c\n            c = d\n            yc = yd\n            h = invphi * h\n            d = a + invphi * h\n            yd = f(d, k == n - 2)\n\n    if yc < yd:\n        return a, d\n    else:\n        return c, b\n"
  },
  {
    "path": "ffsubsync/sklearn_shim.py",
    "content": "# -*- coding: utf-8 -*-\n\"\"\"\nThis module borrows and adapts `Pipeline` from `sklearn.pipeline` and\n`TransformerMixin` from `sklearn.base` in the scikit-learn framework\n(commit hash d205638475ca542dc46862652e3bb0be663a8eac) to be precise).\nBoth are BSD licensed and allow for this sort of thing; attribution\nis given as a comment above each class. License reproduced below:\n\nBSD 3-Clause License\n\nCopyright (c) 2007-2022 The scikit-learn developers.\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n* Redistributions of source code must retain the above copyright notice, this\n  list of conditions and the following disclaimer.\n\n* Redistributions in binary form must reproduce the above copyright notice,\n  this list of conditions and the following disclaimer in the documentation\n  and/or other materials provided with the distribution.\n\n* Neither the name of the copyright holder nor the names of its\n  contributors may be used to endorse or promote products derived from\n  this software without specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\"\"\"\nfrom collections import defaultdict\nfrom itertools import islice\nfrom typing import Any, Callable, Optional\nfrom typing_extensions import Protocol\n\n\nclass TransformerProtocol(Protocol):\n    fit: Callable[..., \"TransformerProtocol\"]\n    transform: Callable[[Any], Any]\n\n\n# Author: Gael Varoquaux <gael.varoquaux@normalesup.org>\n# License: BSD 3 clause\nclass TransformerMixin(TransformerProtocol):\n    \"\"\"Mixin class for all transformers.\"\"\"\n\n    def fit_transform(self, X: Any, y: Optional[Any] = None, **fit_params: Any) -> Any:\n        \"\"\"\n        Fit to data, then transform it.\n        Fits transformer to X and y with optional parameters fit_params\n        and returns a transformed version of X.\n        Parameters\n        ----------\n        X : ndarray of shape (n_samples, n_features)\n            Training set.\n        y : ndarray of shape (n_samples,), default=None\n            Target values.\n        **fit_params : dict\n            Additional fit parameters.\n        Returns\n        -------\n        X_new : ndarray array of shape (n_samples, n_features_new)\n            Transformed array.\n        \"\"\"\n        # non-optimized default implementation; override when a better\n        # method is possible for a given clustering algorithm\n        if y is None:\n            # fit method of arity 1 (unsupervised transformation)\n            return self.fit(X, **fit_params).transform(X)\n        else:\n            # fit method of arity 2 (supervised transformation)\n            return self.fit(X, y, **fit_params).transform(X)\n\n\n# Author: Edouard Duchesnay\n#         Gael Varoquaux\n#         Virgile Fritsch\n#         Alexandre Gramfort\n#         Lars Buitinck\n# License: BSD\nclass Pipeline:\n    def __init__(self, steps, verbose=False):\n        self.steps = steps\n        self.verbose = verbose\n        self._validate_steps()\n\n    def _validate_steps(self):\n        names, estimators = zip(*self.steps)\n\n        # validate estimators\n        transformers = estimators[:-1]\n        estimator = estimators[-1]\n\n        for t in transformers:\n            if t is None or t == \"passthrough\":\n                continue\n            if not (hasattr(t, \"fit\") or hasattr(t, \"fit_transform\")) or not hasattr(\n                t, \"transform\"\n            ):\n                raise TypeError(\n                    \"All intermediate steps should be \"\n                    \"transformers and implement fit and transform \"\n                    \"or be the string 'passthrough' \"\n                    \"'%s' (type %s) doesn't\" % (t, type(t))\n                )\n\n        # We allow last estimator to be None as an identity transformation\n        if (\n            estimator is not None\n            and estimator != \"passthrough\"\n            and not hasattr(estimator, \"fit\")\n        ):\n            raise TypeError(\n                \"Last step of Pipeline should implement fit \"\n                \"or be the string 'passthrough'. \"\n                \"'%s' (type %s) doesn't\" % (estimator, type(estimator))\n            )\n\n    def _iter(self, with_final=True, filter_passthrough=True):\n        \"\"\"\n        Generate (idx, (name, trans)) tuples from self.steps\n\n        When filter_passthrough is True, 'passthrough' and None transformers\n        are filtered out.\n        \"\"\"\n        stop = len(self.steps)\n        if not with_final:\n            stop -= 1\n\n        for idx, (name, trans) in enumerate(islice(self.steps, 0, stop)):\n            if not filter_passthrough:\n                yield idx, name, trans\n            elif trans is not None and trans != \"passthrough\":\n                yield idx, name, trans\n\n    def __len__(self) -> int:\n        \"\"\"\n        Returns the length of the Pipeline\n        \"\"\"\n        return len(self.steps)\n\n    def __getitem__(self, ind):\n        \"\"\"Returns a sub-pipeline or a single esimtator in the pipeline\n\n        Indexing with an integer will return an estimator; using a slice\n        returns another Pipeline instance which copies a slice of this\n        Pipeline. This copy is shallow: modifying (or fitting) estimators in\n        the sub-pipeline will affect the larger pipeline and vice-versa.\n        However, replacing a value in `step` will not affect a copy.\n        \"\"\"\n        if isinstance(ind, slice):\n            if ind.step not in (1, None):\n                raise ValueError(\"Pipeline slicing only supports a step of 1\")\n            return self.__class__(self.steps[ind])\n        try:\n            name, est = self.steps[ind]\n        except TypeError:\n            # Not an int, try get step by name\n            return self.named_steps[ind]\n        return est\n\n    @property\n    def _estimator_type(self):\n        return self.steps[-1][1]._estimator_type\n\n    @property\n    def named_steps(self):\n        return dict(self.steps)\n\n    @property\n    def _final_estimator(self):\n        estimator = self.steps[-1][1]\n        return \"passthrough\" if estimator is None else estimator\n\n    def _log_message(self, step_idx):\n        if not self.verbose:\n            return None\n        name, step = self.steps[step_idx]\n\n        return \"(step %d of %d) Processing %s\" % (step_idx + 1, len(self.steps), name)\n\n    # Estimator interface\n\n    def _fit(self, X, y=None, **fit_params):\n        # shallow copy of steps - this should really be steps_\n        self.steps = list(self.steps)\n        self._validate_steps()\n\n        fit_params_steps = {name: {} for name, step in self.steps if step is not None}\n        for pname, pval in fit_params.items():\n            if \"__\" not in pname:\n                raise ValueError(\n                    \"Pipeline.fit does not accept the {} parameter. \"\n                    \"You can pass parameters to specific steps of your \"\n                    \"pipeline using the stepname__parameter format, e.g. \"\n                    \"`Pipeline.fit(X, y, logisticregression__sample_weight\"\n                    \"=sample_weight)`.\".format(pname)\n                )\n            step, param = pname.split(\"__\", 1)\n            fit_params_steps[step][param] = pval\n        for step_idx, name, transformer in self._iter(\n            with_final=False, filter_passthrough=False\n        ):\n            if transformer is None or transformer == \"passthrough\":\n                continue\n\n            # Fit or load from cache the current transformer\n            X, fitted_transformer = _fit_transform_one(\n                transformer, X, y, None, **fit_params_steps[name]\n            )\n            # Replace the transformer of the step with the fitted\n            # transformer. This is necessary when loading the transformer\n            # from the cache.\n            self.steps[step_idx] = (name, fitted_transformer)\n        if self._final_estimator == \"passthrough\":\n            return X, {}\n        return X, fit_params_steps[self.steps[-1][0]]\n\n    def fit(self, X, y=None, **fit_params):\n        \"\"\"Fit the model\n\n        Fit all the transforms one after the other and transform the\n        data, then fit the transformed data using the final estimator.\n\n        Parameters\n        ----------\n        X : iterable\n            Training data. Must fulfill input requirements of first step of the\n            pipeline.\n\n        y : iterable, default=None\n            Training targets. Must fulfill label requirements for all steps of\n            the pipeline.\n\n        **fit_params : dict of string -> object\n            Parameters passed to the ``fit`` method of each step, where\n            each parameter name is prefixed such that parameter ``p`` for step\n            ``s`` has key ``s__p``.\n\n        Returns\n        -------\n        self : Pipeline\n            This estimator\n        \"\"\"\n        Xt, fit_params = self._fit(X, y, **fit_params)\n        if self._final_estimator != \"passthrough\":\n            self._final_estimator.fit(Xt, y, **fit_params)\n        return self\n\n    def fit_transform(self, X, y=None, **fit_params):\n        \"\"\"Fit the model and transform with the final estimator\n\n        Fits all the transforms one after the other and transforms the\n        data, then uses fit_transform on transformed data with the final\n        estimator.\n\n        Parameters\n        ----------\n        X : iterable\n            Training data. Must fulfill input requirements of first step of the\n            pipeline.\n\n        y : iterable, default=None\n            Training targets. Must fulfill label requirements for all steps of\n            the pipeline.\n\n        **fit_params : dict of string -> object\n            Parameters passed to the ``fit`` method of each step, where\n            each parameter name is prefixed such that parameter ``p`` for step\n            ``s`` has key ``s__p``.\n\n        Returns\n        -------\n        Xt : array-like of shape  (n_samples, n_transformed_features)\n            Transformed samples\n        \"\"\"\n        last_step = self._final_estimator\n        Xt, fit_params = self._fit(X, y, **fit_params)\n        if last_step == \"passthrough\":\n            return Xt\n        if hasattr(last_step, \"fit_transform\"):\n            return last_step.fit_transform(Xt, y, **fit_params)\n        else:\n            return last_step.fit(Xt, y, **fit_params).transform(Xt)\n\n    @property\n    def transform(self):\n        \"\"\"Apply transforms, and transform with the final estimator\n\n        This also works where final estimator is ``None``: all prior\n        transformations are applied.\n\n        Parameters\n        ----------\n        X : iterable\n            Data to transform. Must fulfill input requirements of first step\n            of the pipeline.\n\n        Returns\n        -------\n        Xt : array-like of shape  (n_samples, n_transformed_features)\n        \"\"\"\n        # _final_estimator is None or has transform, otherwise attribute error\n        # XXX: Handling the None case means we can't use if_delegate_has_method\n        if self._final_estimator != \"passthrough\":\n            self._final_estimator.transform\n        return self._transform\n\n    def _transform(self, X):\n        Xt = X\n        for _, _, transform in self._iter():\n            Xt = transform.transform(Xt)\n        return Xt\n\n    @property\n    def classes_(self):\n        return self.steps[-1][-1].classes_\n\n    @property\n    def _pairwise(self):\n        # check if first estimator expects pairwise input\n        return getattr(self.steps[0][1], \"_pairwise\", False)\n\n    @property\n    def n_features_in_(self):\n        # delegate to first step (which will call _check_is_fitted)\n        return self.steps[0][1].n_features_in_\n\n\ndef _name_estimators(estimators):\n    \"\"\"Generate names for estimators.\"\"\"\n\n    names = [\n        estimator if isinstance(estimator, str) else type(estimator).__name__.lower()\n        for estimator in estimators\n    ]\n    namecount = defaultdict(int)\n    for est, name in zip(estimators, names):\n        namecount[name] += 1\n\n    for k, v in list(namecount.items()):\n        if v == 1:\n            del namecount[k]\n\n    for i in reversed(range(len(estimators))):\n        name = names[i]\n        if name in namecount:\n            names[i] += \"-%d\" % namecount[name]\n            namecount[name] -= 1\n\n    return list(zip(names, estimators))\n\n\ndef make_pipeline(*steps, **kwargs) -> Pipeline:\n    \"\"\"Construct a Pipeline from the given estimators.\n\n    This is a shorthand for the Pipeline constructor; it does not require, and\n    does not permit, naming the estimators. Instead, their names will be set\n    to the lowercase of their types automatically.\n\n    Parameters\n    ----------\n    *steps : list of estimators.\n\n    verbose : bool, default=False\n        If True, the time elapsed while fitting each step will be printed as it\n        is completed.\n\n    Returns\n    -------\n    p : Pipeline\n    \"\"\"\n    verbose = kwargs.pop(\"verbose\", False)\n    if kwargs:\n        raise TypeError(\n            'Unknown keyword arguments: \"{}\"'.format(list(kwargs.keys())[0])\n        )\n    return Pipeline(_name_estimators(steps), verbose=verbose)\n\n\ndef _transform_one(transformer, X, y, weight, **fit_params):\n    res = transformer.transform(X)\n    # if we have a weight for this transformer, multiply output\n    if weight is None:\n        return res\n    return res * weight\n\n\ndef _fit_transform_one(transformer, X, y, weight, **fit_params):\n    \"\"\"\n    Fits ``transformer`` to ``X`` and ``y``. The transformed result is returned\n    with the fitted transformer. If ``weight`` is not ``None``, the result will\n    be multiplied by ``weight``.\n    \"\"\"\n    if hasattr(transformer, \"fit_transform\"):\n        res = transformer.fit_transform(X, y, **fit_params)\n    else:\n        res = transformer.fit(X, y, **fit_params).transform(X)\n\n    if weight is None:\n        return res, transformer\n    return res * weight, transformer\n"
  },
  {
    "path": "ffsubsync/speech_transformers.py",
    "content": "# -*- coding: utf-8 -*-\nimport os\nfrom contextlib import contextmanager\nimport logging\nimport io\nimport subprocess\nimport sys\nfrom datetime import timedelta\nfrom typing import cast, Callable, Dict, List, Optional, Union\n\nimport ffmpeg\nimport numpy as np\nimport tqdm\n\nfrom ffsubsync.constants import (\n    DEFAULT_ENCODING,\n    DEFAULT_MAX_SUBTITLE_SECONDS,\n    DEFAULT_SCALE_FACTOR,\n    DEFAULT_START_SECONDS,\n    SAMPLE_RATE,\n)\nfrom ffsubsync.ffmpeg_utils import ffmpeg_bin_path, subprocess_args\nfrom ffsubsync.generic_subtitles import GenericSubtitle\nfrom ffsubsync.sklearn_shim import TransformerMixin\nfrom ffsubsync.sklearn_shim import Pipeline\nfrom ffsubsync.subtitle_parser import make_subtitle_parser\nfrom ffsubsync.subtitle_transformers import SubtitleScaler\n\n\nlogging.basicConfig(level=logging.INFO)\nlogger: logging.Logger = logging.getLogger(__name__)\n\n\ndef make_subtitle_speech_pipeline(\n    fmt: str = \"srt\",\n    encoding: str = DEFAULT_ENCODING,\n    caching: bool = False,\n    max_subtitle_seconds: int = DEFAULT_MAX_SUBTITLE_SECONDS,\n    start_seconds: int = DEFAULT_START_SECONDS,\n    scale_factor: float = DEFAULT_SCALE_FACTOR,\n    parser=None,\n    **kwargs,\n) -> Union[Pipeline, Callable[[float], Pipeline]]:\n    if parser is None:\n        parser = make_subtitle_parser(\n            fmt,\n            encoding=encoding,\n            caching=caching,\n            max_subtitle_seconds=max_subtitle_seconds,\n            start_seconds=start_seconds,\n            **kwargs,\n        )\n    assert parser.encoding == encoding\n    assert parser.max_subtitle_seconds == max_subtitle_seconds\n    assert parser.start_seconds == start_seconds\n\n    def subpipe_maker(framerate_ratio):\n        return Pipeline(\n            [\n                (\"parse\", parser),\n                (\"scale\", SubtitleScaler(framerate_ratio)),\n                (\n                    \"speech_extract\",\n                    SubtitleSpeechTransformer(\n                        sample_rate=SAMPLE_RATE,\n                        start_seconds=start_seconds,\n                        framerate_ratio=framerate_ratio,\n                    ),\n                ),\n            ]\n        )\n\n    if scale_factor is None:\n        return subpipe_maker\n    else:\n        return subpipe_maker(scale_factor)\n\n\ndef _make_auditok_detector(\n    sample_rate: int, frame_rate: int, non_speech_label: float\n) -> Callable[[bytes], np.ndarray]:\n    try:\n        from auditok import (\n            BufferAudioSource,\n            ADSFactory,\n            AudioEnergyValidator,\n            StreamTokenizer,\n        )\n    except ImportError as e:\n        logger.error(\n            \"\"\"Error: auditok not installed!\n        Consider installing it with `pip install auditok`. Note that auditok\n        is GPLv3 licensed, which means that successfully importing it at\n        runtime creates a derivative work that is GPLv3 licensed. For personal\n        use this is fine, but note that any commercial use that relies on\n        auditok must be open source as per the GPLv3!*\n        *Not legal advice. Consult with a lawyer.\n        \"\"\"\n        )\n        raise e\n    bytes_per_frame = 2\n    frames_per_window = frame_rate // sample_rate\n    validator = AudioEnergyValidator(sample_width=bytes_per_frame, energy_threshold=50)\n    tokenizer = StreamTokenizer(\n        validator=validator,\n        min_length=0.2 * sample_rate,\n        max_length=int(5 * sample_rate),\n        max_continuous_silence=0.25 * sample_rate,\n    )\n\n    def _detect(asegment: bytes) -> np.ndarray:\n        asource = BufferAudioSource(\n            data_buffer=asegment,\n            sampling_rate=frame_rate,\n            sample_width=bytes_per_frame,\n            channels=1,\n        )\n        ads = ADSFactory.ads(audio_source=asource, block_dur=1.0 / sample_rate)\n        ads.open()\n        tokens = tokenizer.tokenize(ads)\n        length = (\n            len(asegment) // bytes_per_frame + frames_per_window - 1\n        ) // frames_per_window\n        media_bstring = np.zeros(length + 1)\n        for token in tokens:\n            media_bstring[token[1]] = 1.0\n            media_bstring[token[2] + 1] = non_speech_label - 1.0\n        return np.clip(np.cumsum(media_bstring)[:-1], 0.0, 1.0)\n\n    return _detect\n\n\ndef _make_webrtcvad_detector(\n    sample_rate: int, frame_rate: int, non_speech_label: float\n) -> Callable[[bytes], np.ndarray]:\n    import webrtcvad\n\n    vad = webrtcvad.Vad()\n    vad.set_mode(3)  # set non-speech pruning aggressiveness from 0 to 3\n    window_duration = 1.0 / sample_rate  # duration in seconds\n    frames_per_window = int(window_duration * frame_rate + 0.5)\n    bytes_per_frame = 2\n\n    def _detect(asegment: bytes) -> np.ndarray:\n        media_bstring = []\n        failures = 0\n        for start in range(0, len(asegment) // bytes_per_frame, frames_per_window):\n            stop = min(start + frames_per_window, len(asegment) // bytes_per_frame)\n            try:\n                is_speech = vad.is_speech(\n                    asegment[start * bytes_per_frame : stop * bytes_per_frame],\n                    sample_rate=frame_rate,\n                )\n            except Exception:\n                is_speech = False\n                failures += 1\n            # webrtcvad has low recall on mode 3, so treat non-speech as \"not sure\"\n            media_bstring.append(1.0 if is_speech else non_speech_label)\n        return np.array(media_bstring)\n\n    return _detect\n\n\ndef _make_silero_detector(\n    sample_rate: int, frame_rate: int, non_speech_label: float\n) -> Callable[[bytes], np.ndarray]:\n    import torch\n\n    window_duration = 1.0 / sample_rate  # duration in seconds\n    frames_per_window = int(window_duration * frame_rate + 0.5)\n    bytes_per_frame = 1\n\n    model, _ = torch.hub.load(\n        repo_or_dir=\"snakers4/silero-vad\",\n        model=\"silero_vad\",\n        force_reload=False,\n        onnx=False,\n    )\n\n    exception_logged = False\n\n    def _detect(asegment) -> np.ndarray:\n        asegment = np.frombuffer(asegment, np.int16).astype(np.float32) / (1 << 15)\n        asegment = torch.FloatTensor(asegment)\n        media_bstring = []\n        failures = 0\n        for start in range(0, len(asegment) // bytes_per_frame, frames_per_window):\n            stop = min(start + frames_per_window, len(asegment))\n            try:\n                speech_prob = model(\n                    asegment[start * bytes_per_frame : stop * bytes_per_frame],\n                    frame_rate,\n                ).item()\n            except Exception:\n                nonlocal exception_logged\n                if not exception_logged:\n                    exception_logged = True\n                    logger.exception(\"exception occurred during speech detection\")\n                speech_prob = 0.0\n                failures += 1\n            media_bstring.append(1.0 - (1.0 - speech_prob) * (1.0 - non_speech_label))\n        return np.array(media_bstring)\n\n    return _detect\n\n\nclass ComputeSpeechFrameBoundariesMixin:\n    def __init__(self) -> None:\n        self.start_frame_: Optional[int] = None\n        self.end_frame_: Optional[int] = None\n\n    @property\n    def num_frames(self) -> Optional[int]:\n        if self.start_frame_ is None or self.end_frame_ is None:\n            return None\n        return self.end_frame_ - self.start_frame_\n\n    def fit_boundaries(\n        self, speech_frames: np.ndarray\n    ) -> \"ComputeSpeechFrameBoundariesMixin\":\n        nz = np.nonzero(speech_frames > 0.5)[0]\n        if len(nz) > 0:\n            self.start_frame_ = int(np.min(nz))\n            self.end_frame_ = int(np.max(nz))\n        return self\n\n\nclass VideoSpeechTransformer(TransformerMixin):\n    def __init__(\n        self,\n        vad: str,\n        sample_rate: int,\n        frame_rate: int,\n        non_speech_label: float,\n        start_seconds: int = 0,\n        ffmpeg_path: Optional[str] = None,\n        ref_stream: Optional[str] = None,\n        vlc_mode: bool = False,\n        gui_mode: bool = False,\n    ) -> None:\n        super(VideoSpeechTransformer, self).__init__()\n        self.vad: str = vad\n        self.sample_rate: int = sample_rate\n        self.frame_rate: int = frame_rate\n        self._non_speech_label: float = non_speech_label\n        self.start_seconds: int = start_seconds\n        self.ffmpeg_path: Optional[str] = ffmpeg_path\n        self.ref_stream: Optional[str] = ref_stream\n        self.vlc_mode: bool = vlc_mode\n        self.gui_mode: bool = gui_mode\n        self.video_speech_results_: Optional[np.ndarray] = None\n\n    def try_fit_using_embedded_subs(self, fname: str) -> None:\n        embedded_subs = []\n        embedded_subs_times = []\n        if self.ref_stream is None:\n            # check first 5; should cover 99% of movies\n            streams_to_try: List[str] = list(map(\"0:s:{}\".format, range(5)))\n        else:\n            streams_to_try = [self.ref_stream]\n        for stream in streams_to_try:\n            ffmpeg_args = [\n                ffmpeg_bin_path(\n                    \"ffmpeg\", self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path\n                )\n            ]\n            ffmpeg_args.extend(\n                [\n                    \"-loglevel\",\n                    \"fatal\",\n                    \"-nostdin\",\n                    \"-i\",\n                    fname,\n                    \"-map\",\n                    \"{}\".format(stream),\n                    \"-f\",\n                    \"srt\",\n                    \"-\",\n                ]\n            )\n            process = subprocess.Popen(\n                ffmpeg_args, **subprocess_args(include_stdout=True)\n            )\n            output = io.BytesIO(process.communicate()[0])\n            if process.returncode != 0:\n                break\n            pipe = cast(\n                Pipeline,\n                make_subtitle_speech_pipeline(start_seconds=self.start_seconds),\n            ).fit(output)\n            speech_step = pipe.steps[-1][1]\n            embedded_subs.append(speech_step)\n            embedded_subs_times.append(speech_step.max_time_)\n        if len(embedded_subs) == 0:\n            if self.ref_stream is None:\n                error_msg = \"Video file appears to lack subtitle stream\"\n            else:\n                error_msg = \"Stream {} not found\".format(self.ref_stream)\n            raise ValueError(error_msg)\n        # use longest set of embedded subs\n        subs_to_use = embedded_subs[int(np.argmax(embedded_subs_times))]\n        self.video_speech_results_ = subs_to_use.subtitle_speech_results_\n\n    def fit(self, fname: str, *_) -> \"VideoSpeechTransformer\":\n        if \"subs\" in self.vad and (\n            self.ref_stream is None or self.ref_stream.startswith(\"0:s:\")\n        ):\n            try:\n                logger.info(\"Checking video for subtitles stream...\")\n                self.try_fit_using_embedded_subs(fname)\n                logger.info(\"...success!\")\n                return self\n            except Exception as e:\n                logger.info(e)\n        try:\n            total_duration = (\n                float(\n                    ffmpeg.probe(\n                        fname,\n                        cmd=ffmpeg_bin_path(\n                            \"ffprobe\",\n                            self.gui_mode,\n                            ffmpeg_resources_path=self.ffmpeg_path,\n                        ),\n                    )[\"format\"][\"duration\"]\n                )\n                - self.start_seconds\n            )\n        except Exception as e:\n            logger.warning(e)\n            total_duration = None\n        if \"webrtc\" in self.vad:\n            detector = _make_webrtcvad_detector(\n                self.sample_rate, self.frame_rate, self._non_speech_label\n            )\n        elif \"auditok\" in self.vad:\n            detector = _make_auditok_detector(\n                self.sample_rate, self.frame_rate, self._non_speech_label\n            )\n        elif \"silero\" in self.vad:\n            detector = _make_silero_detector(\n                self.sample_rate, self.frame_rate, self._non_speech_label\n            )\n        else:\n            raise ValueError(\"unknown vad: %s\" % self.vad)\n        media_bstring: List[np.ndarray] = []\n        ffmpeg_args = [\n            ffmpeg_bin_path(\n                \"ffmpeg\", self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path\n            )\n        ]\n        if self.start_seconds > 0:\n            ffmpeg_args.extend(\n                [\n                    \"-ss\",\n                    str(timedelta(seconds=self.start_seconds)),\n                ]\n            )\n        ffmpeg_args.extend([\"-loglevel\", \"fatal\", \"-nostdin\", \"-i\", fname])\n        if self.ref_stream is not None and self.ref_stream.startswith(\"0:a:\"):\n            ffmpeg_args.extend([\"-map\", self.ref_stream])\n        ffmpeg_args.extend(\n            [\n                \"-f\",\n                \"s16le\",\n                \"-ac\",\n                \"1\",\n                \"-acodec\",\n                \"pcm_s16le\",\n                \"-af\",\n                \"aresample=async=1\",\n                \"-ar\",\n                str(self.frame_rate),\n                \"-\",\n            ]\n        )\n        process = subprocess.Popen(ffmpeg_args, **subprocess_args(include_stdout=True))\n        bytes_per_frame = 2\n        frames_per_window = bytes_per_frame * self.frame_rate // self.sample_rate\n        windows_per_buffer = 10000\n        simple_progress = 0.0\n\n        redirect_stderr = None\n        tqdm_extra_args = {}\n        should_print_redirected_stderr = self.gui_mode\n        if self.gui_mode:\n            try:\n                from contextlib import redirect_stderr  # type: ignore\n\n                tqdm_extra_args[\"file\"] = sys.stdout\n            except ImportError:\n                should_print_redirected_stderr = False\n        if redirect_stderr is None:\n\n            @contextmanager\n            def redirect_stderr(enter_result=None):\n                yield enter_result\n\n        assert redirect_stderr is not None\n        pbar_output = io.StringIO()\n        with redirect_stderr(pbar_output):\n            with tqdm.tqdm(\n                total=total_duration, disable=self.vlc_mode, **tqdm_extra_args\n            ) as pbar:\n                while True:\n                    in_bytes = process.stdout.read(\n                        frames_per_window * windows_per_buffer\n                    )\n                    if not in_bytes:\n                        break\n                    newstuff = len(in_bytes) / float(bytes_per_frame) / self.frame_rate\n                    if (\n                        total_duration is not None\n                        and simple_progress + newstuff > total_duration\n                    ):\n                        newstuff = total_duration - simple_progress\n                    simple_progress += newstuff\n                    pbar.update(newstuff)\n                    if self.vlc_mode and total_duration is not None:\n                        print(\"%d\" % int(simple_progress * 100.0 / total_duration))\n                        sys.stdout.flush()\n                    if should_print_redirected_stderr:\n                        assert self.gui_mode\n                        # no need to flush since we pass -u to do unbuffered output for gui mode\n                        print(pbar_output.read())\n                    if \"silero\" not in self.vad:\n                        in_bytes = np.frombuffer(in_bytes, np.uint8)\n                    media_bstring.append(detector(in_bytes))\n        process.wait()\n        if len(media_bstring) == 0:\n            raise ValueError(\n                \"Unable to detect speech. \"\n                \"Perhaps try specifying a different stream / track, or a different vad.\"\n            )\n        self.video_speech_results_ = np.concatenate(media_bstring)\n        logger.info(\"total of speech segments: %s\", np.sum(self.video_speech_results_))\n        return self\n\n    def transform(self, *_) -> np.ndarray:\n        return self.video_speech_results_\n\n\n_PAIRED_NESTER: Dict[str, str] = {\n    \"(\": \")\",\n    \"{\": \"}\",\n    \"[\": \"]\",\n    # FIXME: False positive sometimes when there are html tags, e.g. <i> Hello? </i>\n    # '<': '>',\n}\n\n\n# TODO: need way better metadata detector\ndef _is_metadata(content: str, is_beginning_or_end: bool) -> bool:\n    content = content.strip()\n    if len(content) == 0:\n        return True\n    if (\n        content[0] in _PAIRED_NESTER.keys()\n        and content[-1] == _PAIRED_NESTER[content[0]]\n    ):\n        return True\n    if is_beginning_or_end:\n        if \"english\" in content.lower():\n            return True\n        if \" - \" in content:\n            return True\n    return False\n\n\nclass SubtitleSpeechTransformer(TransformerMixin, ComputeSpeechFrameBoundariesMixin):\n    def __init__(\n        self, sample_rate: int, start_seconds: int = 0, framerate_ratio: float = 1.0\n    ) -> None:\n        super(SubtitleSpeechTransformer, self).__init__()\n        self.sample_rate: int = sample_rate\n        self.start_seconds: int = start_seconds\n        self.framerate_ratio: float = framerate_ratio\n        self.subtitle_speech_results_: Optional[np.ndarray] = None\n        self.max_time_: Optional[int] = None\n\n    def fit(self, subs: List[GenericSubtitle], *_) -> \"SubtitleSpeechTransformer\":\n        max_time = 0\n        for sub in subs:\n            max_time = max(max_time, sub.end.total_seconds())\n        self.max_time_ = max_time - self.start_seconds\n        samples = np.zeros(int(max_time * self.sample_rate) + 2, dtype=float)\n        start_frame = float(\"inf\")\n        end_frame = 0\n        for i, sub in enumerate(subs):\n            if _is_metadata(sub.content, i == 0 or i + 1 == len(subs)):\n                continue\n            start = int(\n                round(\n                    (sub.start.total_seconds() - self.start_seconds) * self.sample_rate\n                )\n            )\n            start_frame = min(start_frame, start)\n            duration = sub.end.total_seconds() - sub.start.total_seconds()\n            end = start + int(round(duration * self.sample_rate))\n            end_frame = max(end_frame, end)\n            samples[start:end] = min(1.0 / self.framerate_ratio, 1.0)\n        self.subtitle_speech_results_ = samples\n        self.fit_boundaries(self.subtitle_speech_results_)\n        return self\n\n    def transform(self, *_) -> np.ndarray:\n        assert self.subtitle_speech_results_ is not None\n        return self.subtitle_speech_results_\n\n\nclass DeserializeSpeechTransformer(TransformerMixin):\n    def __init__(self, non_speech_label: float) -> None:\n        super(DeserializeSpeechTransformer, self).__init__()\n        self._non_speech_label: float = non_speech_label\n        self.deserialized_speech_results_: Optional[np.ndarray] = None\n\n    def fit(self, fname, *_) -> \"DeserializeSpeechTransformer\":\n        speech = np.load(fname)\n        if hasattr(speech, \"files\"):\n            if \"speech\" in speech.files:\n                speech = speech[\"speech\"]\n            else:\n                raise ValueError(\n                    'could not find \"speech\" array in '\n                    \"serialized file; only contains: %s\" % speech.files\n                )\n        speech[speech < 1.0] = self._non_speech_label\n        self.deserialized_speech_results_ = speech\n        return self\n\n    def transform(self, *_) -> np.ndarray:\n        assert self.deserialized_speech_results_ is not None\n        return self.deserialized_speech_results_\n"
  },
  {
    "path": "ffsubsync/subtitle_parser.py",
    "content": "# -*- coding: utf-8 -*-\nfrom datetime import timedelta\nimport logging\nfrom typing import Any, cast, List, Optional\n\ntry:\n    import cchardet\nexcept:  # noqa: E722\n    cchardet = None\ntry:\n    import chardet\nexcept:  # noqa: E722\n    chardet = None\ntry:\n    import charset_normalizer\nexcept:  # noqa: E722\n    charset_normalizer = None\nimport pysubs2\nfrom ffsubsync.sklearn_shim import TransformerMixin\nimport srt\n\nfrom ffsubsync.constants import (\n    DEFAULT_ENCODING,\n    DEFAULT_MAX_SUBTITLE_SECONDS,\n    DEFAULT_START_SECONDS,\n)\nfrom ffsubsync.file_utils import open_file\nfrom ffsubsync.generic_subtitles import GenericSubtitle, GenericSubtitlesFile, SubsMixin\n\nlogging.basicConfig(level=logging.INFO)\nlogger: logging.Logger = logging.getLogger(__name__)\n\n\ndef _preprocess_subs(\n    subs,\n    max_subtitle_seconds: Optional[int] = None,\n    start_seconds: int = 0,\n    tolerant: bool = True,\n) -> List[GenericSubtitle]:\n    subs_list = []\n    start_time = timedelta(seconds=start_seconds)\n    max_duration = timedelta(days=1)\n    if max_subtitle_seconds is not None:\n        max_duration = timedelta(seconds=max_subtitle_seconds)\n    subs = iter(subs)\n    while True:\n        try:\n            next_sub = GenericSubtitle.wrap_inner_subtitle(next(subs))\n            if next_sub.start < start_time:\n                continue\n            next_sub.end = min(next_sub.end, next_sub.start + max_duration)\n            subs_list.append(next_sub)\n        # We don't catch SRTParseError here b/c that is typically raised when we\n        # are trying to parse with the wrong encoding, in which case we might\n        # be able to try another one on the *entire* set of subtitles elsewhere.\n        except ValueError as e:\n            if tolerant:\n                logger.warning(e)\n                continue\n            else:\n                raise\n        except StopIteration:\n            break\n    return subs_list\n\n\nclass GenericSubtitleParser(SubsMixin, TransformerMixin):\n    def __init__(\n        self,\n        fmt: str = \"srt\",\n        encoding: str = \"infer\",\n        caching: bool = False,\n        max_subtitle_seconds: Optional[int] = None,\n        start_seconds: int = 0,\n        skip_ssa_info: bool = False,\n        strict: bool = False,\n    ) -> None:\n        super(self.__class__, self).__init__()\n        self.sub_format: str = fmt\n        self.encoding: str = encoding\n        self.caching: bool = caching\n        self.fit_fname: Optional[str] = None\n        self.detected_encoding_: Optional[str] = None\n        self.max_subtitle_seconds: Optional[int] = max_subtitle_seconds\n        self.start_seconds: int = start_seconds\n        # FIXME: hack to get tests to pass; remove\n        self._skip_ssa_info: bool = skip_ssa_info\n        self._strict: bool = strict\n\n    def fit(self, fname: str, *_) -> \"GenericSubtitleParser\":\n        if self.caching and self.fit_fname == (\"<stdin>\" if fname is None else fname):\n            return self\n        encodings_to_try = (self.encoding,)\n        with open_file(fname, \"rb\") as f:\n            subs = f.read()\n        if self.encoding == \"infer\":\n            for chardet_lib in (cchardet, charset_normalizer, chardet):\n                if chardet_lib is not None:\n                    try:\n                        detected_encoding = cast(\n                            Optional[str], chardet_lib.detect(subs)[\"encoding\"]\n                        )\n                    except:  # noqa: E722\n                        continue\n                    if detected_encoding is not None:\n                        self.detected_encoding_ = detected_encoding\n                        encodings_to_try = (detected_encoding,)\n                        break\n            assert self.detected_encoding_ is not None\n            logger.info(\"detected encoding: %s\" % self.detected_encoding_)\n        exc = None\n        for encoding in encodings_to_try:\n            try:\n                decoded_subs = subs.decode(encoding, errors=\"replace\").strip()\n                if self.sub_format == \"srt\":\n                    parsed_subs = srt.parse(\n                        decoded_subs, ignore_errors=not self._strict\n                    )\n                elif self.sub_format in (\"ass\", \"ssa\", \"sub\", \"vtt\"):\n                    parsed_subs = pysubs2.SSAFile.from_string(decoded_subs)\n                else:\n                    raise NotImplementedError(\n                        \"unsupported format: %s\" % self.sub_format\n                    )\n                extra_generic_subtitle_file_kwargs = {}\n                if isinstance(parsed_subs, pysubs2.SSAFile):\n                    extra_generic_subtitle_file_kwargs.update(\n                        dict(\n                            styles=parsed_subs.styles,\n                            # pysubs2 on Python >= 3.6 doesn't support this\n                            fonts_opaque=getattr(parsed_subs, \"fonts_opaque\", None),\n                            info=parsed_subs.info if not self._skip_ssa_info else None,\n                        )\n                    )\n                self.subs_ = GenericSubtitlesFile(\n                    _preprocess_subs(\n                        parsed_subs,\n                        max_subtitle_seconds=self.max_subtitle_seconds,\n                        start_seconds=self.start_seconds,\n                    ),\n                    sub_format=self.sub_format,\n                    encoding=encoding,\n                    **extra_generic_subtitle_file_kwargs,\n                )\n                self.fit_fname = \"<stdin>\" if fname is None else fname\n                if len(encodings_to_try) > 1:\n                    self.detected_encoding_ = encoding\n                    logger.info(\"detected encoding: %s\" % self.detected_encoding_)\n                return self\n            except Exception as e:\n                exc = e\n                continue\n        raise exc\n\n    def transform(self, *_) -> GenericSubtitlesFile:\n        return self.subs_\n\n\ndef make_subtitle_parser(\n    fmt: str,\n    encoding: str = DEFAULT_ENCODING,\n    caching: bool = False,\n    max_subtitle_seconds: int = DEFAULT_MAX_SUBTITLE_SECONDS,\n    start_seconds: int = DEFAULT_START_SECONDS,\n    **kwargs: Any,\n) -> GenericSubtitleParser:\n    return GenericSubtitleParser(\n        fmt=fmt,\n        encoding=encoding,\n        caching=caching,\n        max_subtitle_seconds=max_subtitle_seconds,\n        start_seconds=start_seconds,\n        skip_ssa_info=kwargs.get(\"skip_ssa_info\", False),\n        strict=kwargs.get(\"strict\", False),\n    )\n"
  },
  {
    "path": "ffsubsync/subtitle_transformers.py",
    "content": "# -*- coding: utf-8 -*-\nfrom datetime import timedelta\nimport logging\nimport numbers\n\nfrom ffsubsync.generic_subtitles import GenericSubtitle, GenericSubtitlesFile, SubsMixin\nfrom ffsubsync.sklearn_shim import TransformerMixin\n\nlogging.basicConfig(level=logging.INFO)\nlogger: logging.Logger = logging.getLogger(__name__)\n\n\nclass SubtitleShifter(SubsMixin, TransformerMixin):\n    def __init__(self, td_seconds):\n        super(SubsMixin, self).__init__()\n        if not isinstance(td_seconds, timedelta):\n            self.td_seconds = timedelta(seconds=td_seconds)\n        else:\n            self.td_seconds = td_seconds\n\n    def fit(self, subs: GenericSubtitlesFile, *_):\n        self.subs_ = subs.offset(self.td_seconds)\n        return self\n\n    def transform(self, *_):\n        return self.subs_\n\n\nclass SubtitleScaler(SubsMixin, TransformerMixin):\n    def __init__(self, scale_factor):\n        assert isinstance(scale_factor, numbers.Number)\n        super(SubsMixin, self).__init__()\n        self.scale_factor = scale_factor\n\n    def fit(self, subs: GenericSubtitlesFile, *_):\n        scaled_subs = []\n        for sub in subs:\n            scaled_subs.append(\n                GenericSubtitle(\n                    # py2 doesn't support direct multiplication of timedelta w/ float\n                    timedelta(seconds=sub.start.total_seconds() * self.scale_factor),\n                    timedelta(seconds=sub.end.total_seconds() * self.scale_factor),\n                    sub.inner,\n                )\n            )\n        self.subs_ = subs.clone_props_for_subs(scaled_subs)\n        return self\n\n    def transform(self, *_):\n        return self.subs_\n\n\nclass SubtitleMerger(SubsMixin, TransformerMixin):\n    def __init__(self, reference_subs, first=\"reference\"):\n        assert first in (\"reference\", \"output\")\n        super(SubsMixin, self).__init__()\n        self.reference_subs = reference_subs\n        self.first = first\n\n    def fit(self, output_subs: GenericSubtitlesFile, *_):\n        def _merger_gen(a, b):\n            ita, itb = iter(a), iter(b)\n            cur_a = next(ita, None)\n            cur_b = next(itb, None)\n            while True:\n                if cur_a is None and cur_b is None:\n                    return\n                elif cur_a is None:\n                    while cur_b is not None:\n                        yield cur_b\n                        cur_b = next(itb, None)\n                    return\n                elif cur_b is None:\n                    while cur_a is not None:\n                        yield cur_a\n                        cur_a = next(ita, None)\n                    return\n                # else: neither are None\n                if cur_a.start < cur_b.start:\n                    swapped = False\n                else:\n                    swapped = True\n                    cur_a, cur_b = cur_b, cur_a\n                    ita, itb = itb, ita\n                prev_a = cur_a\n                while prev_a is not None and cur_a.start < cur_b.start:\n                    cur_a = next(ita, None)\n                    if cur_a is None or cur_a.start < cur_b.start:\n                        yield prev_a\n                        prev_a = cur_a\n                if prev_a is None:\n                    while cur_b is not None:\n                        yield cur_b\n                        cur_b = next(itb, None)\n                    return\n                if cur_b.start - prev_a.start < cur_a.start - cur_b.start:\n                    if swapped:\n                        yield cur_b.merge_with(prev_a)\n                        ita, itb = itb, ita\n                        cur_a, cur_b = cur_b, cur_a\n                        cur_a = next(ita, None)\n                    else:\n                        yield prev_a.merge_with(cur_b)\n                        cur_b = next(itb, None)\n                else:\n                    if swapped:\n                        yield cur_b.merge_with(cur_a)\n                        ita, itb = itb, ita\n                    else:\n                        yield cur_a.merge_with(cur_b)\n                    cur_a = next(ita, None)\n                    cur_b = next(itb, None)\n\n        merged_subs = []\n        if self.first == \"reference\":\n            first, second = self.reference_subs, output_subs\n        else:\n            first, second = output_subs, self.reference_subs\n        for merged in _merger_gen(first, second):\n            merged_subs.append(merged)\n        self.subs_ = output_subs.clone_props_for_subs(merged_subs)\n        return self\n\n    def transform(self, *_):\n        return self.subs_\n"
  },
  {
    "path": "ffsubsync/version.py",
    "content": "# -*- coding: utf-8 -*-\nimport os\nfrom ffsubsync.constants import SUBSYNC_RESOURCES_ENV_MAGIC\nfrom ffsubsync._version import get_versions\n\n__version__ = get_versions()[\"version\"]\ndel get_versions\n\n\ndef get_version():\n    if \"unknown\" in __version__.lower():\n        with open(\n            os.path.join(os.environ[SUBSYNC_RESOURCES_ENV_MAGIC], \"__version__\")\n        ) as f:\n            return f.read().strip()\n    else:\n        return __version__\n\n\ndef make_version_tuple(vstr=None):\n    if vstr is None:\n        vstr = __version__\n    if vstr[0] == \"v\":\n        vstr = vstr[1:]\n    components = []\n    for component in vstr.split(\"+\")[0].split(\".\"):\n        try:\n            components.append(int(component))\n        except ValueError:\n            break\n    return tuple(components)\n\n\ndef update_available():\n    import requests\n    from requests.exceptions import Timeout\n    from .constants import API_RELEASE_URL\n\n    try:\n        resp = requests.get(API_RELEASE_URL, timeout=1)\n        latest_vstr = resp.json()[\"tag_name\"]\n    except Timeout:\n        return False\n    except KeyError:\n        return False\n    if not resp.ok:\n        return False\n    return make_version_tuple(get_version()) < make_version_tuple(latest_vstr)\n"
  },
  {
    "path": "gui/.gitignore",
    "content": "build\ndist\n"
  },
  {
    "path": "gui/Makefile",
    "content": ".PHONY: macos\n\nmacos: clean app pkg\n\napp:\n\t./build-macos.sh\n\npkg:\n\t./package-macos.sh\n\nclean:\n\trm -r dist/ build/\n"
  },
  {
    "path": "gui/README.md",
    "content": "== Note on platform-specific PyInstaller version in requirements.txt ==\n\nPyInstaller>=3.6 introduces a webrtcvad hook that seems to not play nicely\nwith the webrtcvad-wheels package. This package contains prebuilt wheels\nand is needed for Windows (unless I can get a working C compiler in my\nWindows build environment, which is doubtful). For MacOS this isn't a\nproblem since I can use the vanilla webrtcvad package and leverage the\npreexisting hook in PyInstaller>=3.6, but for Windows I need to use the\nold version of PyInstaller without the hook and introduce my own (in the\n'hooks' directory).\n\n== Note on Scikit-Learn ==\nThere is some DLL that wasn't getting bundled in the Windows PyInstaller\nbuild and causing the built exe to complain. My solution was to remove\nthe dependency and include a shim for the Pipeline / Transformer fuctionality.\n"
  },
  {
    "path": "gui/build-macos.sh",
    "content": "#!/usr/bin/env bash\npython3 -m PyInstaller --clean -y --dist ./dist/macos build.spec\n# ref: https://github.com/chriskiehl/Gooey/issues/259#issuecomment-522432026\nmkdir -p ./dist/macos/Contents\n"
  },
  {
    "path": "gui/build-windows.sh",
    "content": "#!/usr/bin/env bash\nnbits=${1:-64}\ntag=\"python3\"\nif [[ \"$nbits\" == 32 ]]; then\n    tag=\"${tag}-32bit\"\nfi\ndocker run -v \"$(pwd):/src/\" -v \"$(pwd)/..:/ffsubsync/\" --entrypoint /bin/sh \"cdrx/pyinstaller-windows:${tag}\" -c \"pip install -e /ffsubsync && /ffsubsync/gui/entrypoint-windows.sh\"\nrm -r \"./dist/win${nbits}\"\nmv ./dist/windows \"./dist/win${nbits}\"\n"
  },
  {
    "path": "gui/build.spec",
    "content": "# -*- mode: python -*-\n\nimport os\nimport platform\nimport gooey\n\n\nroot = '..'\nhookspath = None\nif platform.system() == 'Windows':\n    root = '/ffsubsync'\n    hookspath = [os.path.join(os.curdir, 'hooks')]\n\nffmpeg_bin = os.path.join(root, 'resources/ffmpeg-bin')\ndatas = [(os.path.join(root, 'resources/img/program_icon.png'), './img')]\ndatas.append((os.path.join(root, 'resources/img/config_icon.png'), './img'))\ndatas.append((os.path.join(root, '__version__'), '.'))\nif platform.system() == 'Darwin':\n    ffmpeg_bin = os.path.join(ffmpeg_bin, 'macos')\nelif platform.system() == 'Windows':\n    arch_bits = int(platform.architecture()[0][:2])\n    ffmpeg_bin = os.path.join(ffmpeg_bin, 'win{}'.format(arch_bits))\n    if arch_bits == 64:\n        datas.append((os.path.join(root, 'resources/lib/win64/VCRUNTIME140_1.dll'), '.'))\nelse:\n    raise Exception('ffmpeg not available for {}'.format(platform.system()))\n\ngooey_root = os.path.dirname(gooey.__file__)\ngooey_languages = Tree(os.path.join(gooey_root, 'languages'), prefix = 'gooey/languages')\ngooey_images = Tree(os.path.join(gooey_root, 'images'), prefix = 'gooey/images')\na = Analysis([os.path.join(os.curdir, 'ffsubsync-gui.py')],\n             datas=datas,\n             hiddenimports=['pkg_resources.py2_warn'],  # ref: https://github.com/pypa/setuptools/issues/1963\n             hookspath=hookspath,\n             runtime_hooks=None,\n             binaries=[(ffmpeg_bin, 'ffmpeg-bin')],\n             )\npyz = PYZ(a.pure)\n\n# runtime options to pass to interpreter -- '-u' is for unbuffered io\noptions = [('u', None, 'OPTION')]\n\nexe = EXE(pyz,\n          a.scripts,\n          a.binaries,\n          a.zipfiles,\n          a.datas,\n          options,\n          gooey_languages, # Add them in to collected files\n          gooey_images, # Same here.\n          name='FFsubsync',\n          debug=False,\n          strip=None,\n          upx=True,\n          console=False,\n          windowed=True,\n          icon=os.path.join(root, 'resources', 'img', 'program_icon.ico')\n          )\n\n\nif platform.system() == 'Darwin':\n    # info_plist = {'addition_prop': 'additional_value'}\n    info_plist = {}\n    app = BUNDLE(exe,\n                 icon=os.path.join(root, 'resources', 'img', 'program_icon.icns'),\n                 name='FFsubsync.app',\n                 bundle_identifier=None,\n                 info_plist=info_plist\n                )\n"
  },
  {
    "path": "gui/entrypoint-windows.sh",
    "content": "#!/bin/bash\n\n# Fail on errors.\nset -e\n\n# Make sure .bashrc is sourced\n. /root/.bashrc\n\n# Allow the workdir to be set using an env var.\n# Useful for CI pipiles which use docker for their build steps\n# and don't allow that much flexibility to mount volumes\nWORKDIR=${SRCDIR:-/src}\n\n#\n# In case the user specified a custom URL for PYPI, then use\n# that one, instead of the default one.\n#\nif [[ \"$PYPI_URL\" != \"https://pypi.python.org/\" ]] || \\\n   [[ \"$PYPI_INDEX_URL\" != \"https://pypi.python.org/simple\" ]]; then\n    # the funky looking regexp just extracts the hostname, excluding port\n    # to be used as a trusted-host.\n    mkdir -p /wine/drive_c/users/root/pip\n    echo \"[global]\" > /wine/drive_c/users/root/pip/pip.ini\n    echo \"index = $PYPI_URL\" >> /wine/drive_c/users/root/pip/pip.ini\n    echo \"index-url = $PYPI_INDEX_URL\" >> /wine/drive_c/users/root/pip/pip.ini\n    echo \"trusted-host = $(echo $PYPI_URL | perl -pe 's|^.*?://(.*?)(:.*?)?/.*$|$1|')\" >> /wine/drive_c/users/root/pip/pip.ini\n\n    echo \"Using custom pip.ini: \"\n    cat /wine/drive_c/users/root/pip/pip.ini\nfi\n\ncd $WORKDIR\n\nif [ -f requirements.txt ]; then\n    pip install -r requirements.txt\nfi # [ -f requirements.txt ]\n\nrm /wine/drive_c/Python37/Lib/site-packages/PyInstaller/hooks/hook-webrtcvad.py\n\necho \"$@\"\n\nif [[ \"$@\" == \"\" ]]; then\n    pyinstaller --clean -y --dist ./dist/windows --workpath /tmp *.spec\n    chown -R --reference=. ./dist/windows\nelse\n    sh -c \"$@\"\nfi # [[ \"$@\" == \"\" ]]\n"
  },
  {
    "path": "gui/ffsubsync-gui.py",
    "content": "from ffsubsync.ffsubsync_gui import main\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "gui/hooks/hook-webrtcvad.py",
    "content": "from PyInstaller.utils.hooks import copy_metadata\n\ndatas = copy_metadata('webrtcvad-wheels')\n"
  },
  {
    "path": "gui/package-macos.sh",
    "content": "#!/usr/bin/env bash\n\nset -Eeuxo pipefail\n\nBASE=.\nDIST=\"$BASE/dist\"\nBUILD=\"$BASE/build/dmg\"\nVERSION=$(python3 -c \"from subsync.version import __version__; print(__version__)\")\nAPP=\"Subsync.app\"\nTARGET=\"$DIST/subsync-${VERSION}-mac-x86_64.dmg\"\n\ntest -e \"$BUILD\" && rm -rf \"$BUILD\"\ntest -e \"$TARGET\" && rm -f \"$TARGET\"\nmkdir -p \"$BUILD\"\ncp -r \"$DIST/$APP\" \"$BUILD\"\n\ncreate-dmg \\\n    --volname \"subsync installer\" \\\n    `#--volicon \"icon.icns\"` \\\n    --window-pos 300 200 \\\n    --window-size 700 500 \\\n    --icon-size 150 \\\n    --icon \"$APP\" 200 200 \\\n    --hide-extension \"$APP\" \\\n    --app-drop-link 450 200 \\\n    --no-internet-enable \\\n    \"$TARGET\" \"$BUILD\"\n"
  },
  {
    "path": "gui/requirements.txt",
    "content": "gooey\npyinstaller>=3.6\nrequests\n"
  },
  {
    "path": "pyproject.toml",
    "content": "[project]\nname = \"ffsubsync\"\ndynamic = [\"authors\", \"classifiers\", \"dependencies\", \"license\", \"readme\", \"scripts\", \"version\"]\n\n[tool.black]\nline-length = 88\ntarget-version = ['py39']\nextend-exclude = '(^/versioneer|_version)\\.py'\n\n"
  },
  {
    "path": "pytest.ini",
    "content": "[pytest]\nmarkers =\n    integration: mark a test as an integration test.\n#filterwarnings =\n#    ignore::DeprecationWarning\n"
  },
  {
    "path": "requirements-dev.txt",
    "content": "black\nflake8\nmypy\npytest\npytest-cov\npyyaml\ntwine\ntypes-requests\nversioneer\n"
  },
  {
    "path": "requirements.txt",
    "content": "auditok==0.1.5\nchardet;python_version>='3.7'\ncharset_normalizer\nfaust-cchardet;python_version<'3.13'\nffmpeg-python\nnumpy>=1.12.0\npysubs2;python_version<'3.7'\npysubs2>=1.2.0;python_version>='3.7'\nrich\nsetuptools\nsrt>=3.0.0\ntqdm\ntyping_extensions\nwebrtcvad;platform_system!='Windows'\nwebrtcvad-wheels;platform_system=='Windows'\n"
  },
  {
    "path": "scripts/blacken.sh",
    "content": "#!/usr/bin/env bash\n\n# ref: https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/\nset -euxo pipefail\n\nDIRS=\"./ffsubsync ./tests\"\nblack $DIRS $@\n"
  },
  {
    "path": "scripts/bump-version.py",
    "content": "#!/usr/bin/env python\n# -*- coding: utf-8 -*-\nimport argparse\nimport subprocess\nimport sys\n\nfrom ffsubsync.version import make_version_tuple\n\n\ndef main(*_):\n    components = list(make_version_tuple())\n    components[-1] += 1\n    version = '.'.join(str(c) for c in components)\n    subprocess.check_output(['git', 'tag', version])\n    return 0\n\n\nif __name__ == '__main__':\n    parser = argparse.ArgumentParser(description='Bump version and create git tag.')\n    args = parser.parse_args()\n    sys.exit(main(args))\n"
  },
  {
    "path": "scripts/deploy.sh",
    "content": "#!/usr/bin/env bash\n\n# ref: https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/\nset -euxo pipefail\n\nif ! git diff-index --quiet HEAD --; then\n    echo \"dirty working tree; please clean or commit changes\"\n    exit 1\nfi\n\nif ! git describe --exact-match --tags HEAD > /dev/null; then\n    echo \"current revision not tagged; please deploy from a tagged revision\"\n    exit 1\nfi\n\ncurrent=\"$(python -c 'import versioneer; print(versioneer.get_version())')\"\n[[ $? -eq 1 ]] && exit 1\n\nlatest=\"$(git describe --tags $(git rev-list --tags --max-count=1))\"\n[[ $? -eq 1 ]] && exit 1\n\nif [[ \"$current\" != \"$latest\" ]]; then\n    echo \"current revision is not the latest version; please deploy from latest version\"\n    exit 1\nfi\n\nexpect <<EOF\nset timeout -1\n\nspawn twine upload dist/*\n\nexpect \"Enter your API token:\"\nsend -- \"$(lpass show 937494930560669633 --password)\\r\"\nexpect\nEOF\n\nbranch=\"$(git branch --show-current)\"\ngit checkout latest\ngit rebase \"$branch\"\ngit push -f\ngit checkout \"$branch\"\n\ngit push --tags\n"
  },
  {
    "path": "scripts/write-version.py",
    "content": "#!/usr/bin/env python\n# -*- coding: utf-8 -*-\nfrom ffsubsync.version import __version__\n\n\nif __name__ == '__main__':\n    with open('__version__', 'w') as f:\n        f.write(__version__.strip() + '\\n')\n"
  },
  {
    "path": "setup.cfg",
    "content": "# See the docstring in versioneer.py for instructions. Note that you must\n# re-run 'versioneer.py install' after changing this section, and commit the\n# resulting files.\n\n[versioneer]\nVCS = git\nstyle = pep440\nversionfile_source = ffsubsync/_version.py\nversionfile_build = ffsubsync/_version.py\ntag_prefix =\nparentdir_prefix = ffsubsync-\n\n[metadata]\ndescription_file = README.md\n\n[flake8]\nmax-line-length = 100\nexclude = .git,__pycache__,old,build,dist,docs,versioneer.py,ffsubsync/_version.py\n\n[bdist_wheel]\nuniversal = 1\n\n[tool:pytest]\nfilterwarnings = ignore::DeprecationWarning\n\n[mypy]\nno_strict_optional = True\nignore_missing_imports = True\n\n[mypy-ffsubsync._version]\nignore_errors = True\n"
  },
  {
    "path": "setup.py",
    "content": "#!/usr/bin/env python\n# -*- coding: utf-8 -*-\nimport os\n\nfrom setuptools import setup, find_packages\nimport versioneer\n\npkg_name = 'ffsubsync'\n\n__version__ = versioneer.get_version()\nif 'unknown' in __version__.lower():\n    with open(os.path.join(pkg_name, '__version__')) as f:\n        __version__ = f.read().strip()\n\n\ndef read_file(fname):\n    with open(fname, 'r') as f:\n        return f.read()\n\n\nhistory = read_file('HISTORY.rst')\nrequirements = read_file('requirements.txt').strip().split()\nsetup(\n    name=pkg_name,\n    version=__version__,\n    cmdclass=versioneer.get_cmdclass(),\n    author='Stephen Macke',\n    author_email='stephen.macke@gmail.com',\n    description='Language-agnostic synchronization of subtitles with video.',\n    long_description=read_file('README.md'),\n    long_description_content_type='text/markdown',\n    url='https://github.com/smacke/ffsubsync',\n    packages=find_packages(exclude=['docs']),\n    include_package_data=True,\n    install_requires=requirements,\n    entry_points={\n        'console_scripts': [\n            'ffs = ffsubsync:main',\n            'subsync = ffsubsync:main',\n            'ffsubsync = ffsubsync:main'\n        ],\n    },\n    license='MIT',\n    zip_safe=False,\n    classifiers=[\n        'Development Status :: 3 - Alpha',\n        'Intended Audience :: End Users/Desktop',\n        'License :: OSI Approved :: MIT License',\n        'Natural Language :: English',\n        'Programming Language :: Python :: 3.6',\n        'Programming Language :: Python :: 3.7',\n        'Programming Language :: Python :: 3.8',\n        'Programming Language :: Python :: 3.9',\n        'Programming Language :: Python :: 3.10',\n        'Programming Language :: Python :: 3.11',\n        'Programming Language :: Python :: 3.12',\n        'Programming Language :: Python :: 3.13',\n        'Programming Language :: Python :: 3.14',\n        'Topic :: Multimedia :: Sound/Audio :: Analysis',\n        'Topic :: Multimedia :: Sound/Audio :: Speech',\n    ],\n)\n\n# python setup.py sdist\n# twine upload dist/*\n"
  },
  {
    "path": "tests/test_alignment.py",
    "content": "# -*- coding: utf-8 -*-\nimport pytest\nfrom ffsubsync.aligners import FFTAligner, MaxScoreAligner\n\n\n@pytest.mark.parametrize(\n    \"s1, s2, true_offset\",\n    [(\"111001\", \"11001\", -1), (\"1001\", \"1001\", 0), (\"10010\", \"01001\", 1)],\n)\ndef test_fft_alignment(s1, s2, true_offset):\n    assert FFTAligner().fit_transform(s2, s1) == true_offset\n    assert MaxScoreAligner(FFTAligner).fit_transform(s2, s1)[0][1] == true_offset\n    assert MaxScoreAligner(FFTAligner()).fit_transform(s2, s1)[0][1] == true_offset\n"
  },
  {
    "path": "tests/test_integration.py",
    "content": "# -*- coding: utf-8 -*-\nimport os\nimport shutil\nimport tempfile\n\nimport numpy as np\nimport pytest\n\ntry:\n    import yaml\nexcept ImportError:  # pyyaml does not work with py3.4\n    pass\n\nfrom ffsubsync import ffsubsync\nfrom ffsubsync.sklearn_shim import make_pipeline\nfrom ffsubsync.speech_transformers import SubtitleSpeechTransformer\nfrom ffsubsync.subtitle_parser import GenericSubtitleParser\n\nINTEGRATION = \"INTEGRATION\"\nSYNC_TESTS = \"sync_tests\"\nREF = \"reference\"\nSYNCED = \"synchronized\"\nUNSYNCED = \"unsynchronized\"\nSKIP = \"skip\"\nSHOULD_DETECT_ENCODING = \"should_detect_encoding\"\nEXTRA_ARGS = \"extra_args\"\nEXTRA_NO_VALUE_ARGS = \"extra_no_value_args\"\n\n\ndef gen_synctest_configs():\n    def test_path(fname):\n        return os.path.join(\"test-data\", fname)\n\n    if INTEGRATION not in os.environ or os.environ[INTEGRATION] == 0:\n        return\n    with open(\"test-data/integration-testing-config.yaml\", \"r\") as f:\n        config = yaml.load(f, yaml.SafeLoader)\n    parser = ffsubsync.make_parser()\n    for test in config[SYNC_TESTS]:\n        if SKIP in test and test[SKIP]:\n            continue\n        unparsed_args = [test_path(test[REF]), \"-i\", test_path(test[UNSYNCED])]\n        if EXTRA_ARGS in test:\n            for extra_key, extra_value in test[EXTRA_ARGS].items():\n                unparsed_args.extend([\"--{}\".format(extra_key), str(extra_value)])\n        if EXTRA_NO_VALUE_ARGS in test:\n            for extra_key in test[EXTRA_NO_VALUE_ARGS]:\n                unparsed_args.append(\"--{}\".format(extra_key))\n        args = parser.parse_args(unparsed_args)\n        truth = test_path(test[SYNCED])\n        should_detect_encoding = None\n        if SHOULD_DETECT_ENCODING in test:\n            should_detect_encoding = test[SHOULD_DETECT_ENCODING]\n        yield args, truth, should_detect_encoding\n\n\ndef timestamps_roughly_match(f1, f2):\n    parser = GenericSubtitleParser(skip_ssa_info=True)\n    extractor = SubtitleSpeechTransformer(sample_rate=ffsubsync.DEFAULT_FRAME_RATE)\n    pipe = make_pipeline(parser, extractor)\n    f1_bitstring = pipe.fit_transform(f1).astype(bool)\n    f2_bitstring = pipe.fit_transform(f2).astype(bool)\n    return np.sum(f1_bitstring == f2_bitstring) / len(f1_bitstring) >= 0.99\n\n\ndef detected_encoding(fname):\n    parser = GenericSubtitleParser(skip_ssa_info=True)\n    parser.fit(fname)\n    return parser.detected_encoding_\n\n\n@pytest.mark.integration\n@pytest.mark.parametrize(\"args,truth,should_detect_encoding\", gen_synctest_configs())\ndef test_sync_matches_ground_truth(args, truth, should_detect_encoding):\n    # context manager TemporaryDirectory not available on py2\n    dirpath = tempfile.mkdtemp()\n    try:\n        args.srtout = os.path.join(\n            dirpath, \"test\" + os.path.splitext(args.srtin[0])[-1]\n        )\n        args.skip_ssa_info = True\n        assert ffsubsync.run(args)[\"retval\"] == 0\n        assert timestamps_roughly_match(args.srtout, truth)\n        if should_detect_encoding is not None:\n            assert detected_encoding(args.srtin[0]) == should_detect_encoding\n    finally:\n        shutil.rmtree(dirpath)\n"
  },
  {
    "path": "tests/test_misc.py",
    "content": "# -*- coding: utf-8 -*-\nimport pytest\nfrom ffsubsync.version import make_version_tuple\n\n\n@pytest.mark.parametrize(\n    \"vstr, expected\",\n    [(\"v0.1.1\", (0, 1, 1)), (\"v1.2.3\", (1, 2, 3)), (\"4.5.6.1\", (4, 5, 6, 1))],\n)\ndef test_version_tuple_from_string(vstr, expected):\n    assert make_version_tuple(vstr) == expected\n"
  },
  {
    "path": "tests/test_subtitles.py",
    "content": "# -*- coding: utf-8 -*-\nimport itertools\nfrom io import BytesIO\nfrom datetime import timedelta\n\nimport pytest\nimport numpy as np\n\nfrom ffsubsync.sklearn_shim import make_pipeline\nfrom ffsubsync.speech_transformers import SubtitleSpeechTransformer\nfrom ffsubsync.subtitle_parser import GenericSubtitleParser\nfrom ffsubsync.subtitle_transformers import SubtitleShifter\n\nfake_srt = b\"\"\"1\n00:00:00,178 --> 00:00:01,1416\n<i>Previously on \"Your favorite TV show...\"</i>\n\n2\n00:00:01,1828 --> 00:00:04,549\nOh hi, Mark.\n\n3\n00:00:04,653 --> 00:00:03,3062\nYou are tearing me apart, Lisa!\n\"\"\"\n\n# Occasionally some srt files have timestamps whose 'milliseconds'\n# field has more than 3 digits... Ideally we should test that these\n# are handled properly with dedicated tests, but in the interest of\n# development speed I've opted to sprinkle in a few >3 digit\n# millisecond fields into the dummy string above in order to exercise\n# this case integration-test style in the below unit tests.\n\n\n@pytest.mark.parametrize(\"start_seconds\", [0, 2, 4, 6])\ndef test_start_seconds(start_seconds):\n    parser_zero = GenericSubtitleParser(start_seconds=0)\n    parser_zero.fit(BytesIO(fake_srt))\n    parser = GenericSubtitleParser(start_seconds=start_seconds)\n    parser.fit(BytesIO(fake_srt))\n    expected = [\n        sub\n        for sub in parser_zero.subs_\n        if sub.start >= timedelta(seconds=start_seconds)\n    ]\n    assert all(esub == psub for esub, psub in zip(expected, parser.subs_))\n\n\n@pytest.mark.parametrize(\"max_seconds\", [1, 1.5, 2.0, 2.5])\ndef test_max_seconds(max_seconds):\n    parser = GenericSubtitleParser(max_subtitle_seconds=max_seconds)\n    parser.fit(BytesIO(fake_srt))\n    assert max(sub.end - sub.start for sub in parser.subs_) <= timedelta(\n        seconds=max_seconds\n    )\n\n\n@pytest.mark.parametrize(\"encoding\", [\"utf-8\", \"ascii\", \"latin-1\"])\ndef test_same_encoding(encoding):\n    parser = GenericSubtitleParser(encoding=encoding)\n    offseter = SubtitleShifter(1)\n    pipe = make_pipeline(parser, offseter)\n    pipe.fit(BytesIO(fake_srt))\n    assert parser.subs_._encoding == encoding\n    assert offseter.subs_._encoding == parser.subs_._encoding\n    assert offseter.subs_.set_encoding(\"same\")._encoding == encoding\n    assert offseter.subs_.set_encoding(\"utf-8\")._encoding == \"utf-8\"\n\n\n@pytest.mark.parametrize(\"offset\", [1, 1.5, -2.3])\ndef test_offset(offset):\n    parser = GenericSubtitleParser()\n    offseter = SubtitleShifter(offset)\n    pipe = make_pipeline(parser, offseter)\n    pipe.fit(BytesIO(fake_srt))\n    for sub_orig, sub_offset in zip(parser.subs_, offseter.subs_):\n        assert (\n            abs(\n                sub_offset.start.total_seconds()\n                - sub_orig.start.total_seconds()\n                - offset\n            )\n            < 1e-6\n        )\n        assert (\n            abs(sub_offset.end.total_seconds() - sub_orig.end.total_seconds() - offset)\n            < 1e-6\n        )\n\n\n@pytest.mark.parametrize(\n    \"sample_rate,start_seconds\", itertools.product([10, 20, 100, 300], [0, 2, 4, 6])\n)\ndef test_speech_extraction(sample_rate, start_seconds):\n    parser = GenericSubtitleParser(start_seconds=start_seconds)\n    extractor = SubtitleSpeechTransformer(\n        sample_rate=sample_rate, start_seconds=start_seconds\n    )\n    pipe = make_pipeline(parser, extractor)\n    bitstring = pipe.fit_transform(BytesIO(fake_srt)).astype(bool)\n    bitstring_shifted_left = np.append(bitstring[1:], [False])\n    bitstring_shifted_right = np.append([False], bitstring[:-1])\n    bitstring_cumsum = np.cumsum(bitstring)\n    consec_ones_end_pos = np.nonzero(\n        bitstring_cumsum\n        * (bitstring ^ bitstring_shifted_left)\n        * (bitstring_cumsum != np.cumsum(bitstring_shifted_right))\n    )[0]\n    prev = 0\n    for pos, sub in zip(consec_ones_end_pos, parser.subs_):\n        start = int(round(sub.start.total_seconds() * sample_rate))\n        duration = sub.end.total_seconds() - sub.start.total_seconds()\n        stop = start + int(round(duration * sample_rate))\n        assert bitstring_cumsum[pos] - prev == stop - start\n        prev = bitstring_cumsum[pos]\n\n\ndef test_max_time_found():\n    parser = GenericSubtitleParser()\n    extractor = SubtitleSpeechTransformer(sample_rate=100)\n    pipe = make_pipeline(parser, extractor)\n    pipe.fit(BytesIO(fake_srt))\n    assert extractor.max_time_ == 6.062\n"
  },
  {
    "path": "versioneer.py",
    "content": "\n# Version: 0.22\n\n\"\"\"The Versioneer - like a rocketeer, but for versions.\n\nThe Versioneer\n==============\n\n* like a rocketeer, but for versions!\n* https://github.com/python-versioneer/python-versioneer\n* Brian Warner\n* License: Public Domain\n* Compatible with: Python 3.6, 3.7, 3.8, 3.9, 3.10 and pypy3\n* [![Latest Version][pypi-image]][pypi-url]\n* [![Build Status][travis-image]][travis-url]\n\nThis is a tool for managing a recorded version number in distutils/setuptools-based\npython projects. The goal is to remove the tedious and error-prone \"update\nthe embedded version string\" step from your release process. Making a new\nrelease should be as easy as recording a new tag in your version-control\nsystem, and maybe making new tarballs.\n\n\n## Quick Install\n\n* `pip install versioneer` to somewhere in your $PATH\n* add a `[versioneer]` section to your setup.cfg (see [Install](INSTALL.md))\n* run `versioneer install` in your source tree, commit the results\n* Verify version information with `python setup.py version`\n\n## Version Identifiers\n\nSource trees come from a variety of places:\n\n* a version-control system checkout (mostly used by developers)\n* a nightly tarball, produced by build automation\n* a snapshot tarball, produced by a web-based VCS browser, like github's\n  \"tarball from tag\" feature\n* a release tarball, produced by \"setup.py sdist\", distributed through PyPI\n\nWithin each source tree, the version identifier (either a string or a number,\nthis tool is format-agnostic) can come from a variety of places:\n\n* ask the VCS tool itself, e.g. \"git describe\" (for checkouts), which knows\n  about recent \"tags\" and an absolute revision-id\n* the name of the directory into which the tarball was unpacked\n* an expanded VCS keyword ($Id$, etc)\n* a `_version.py` created by some earlier build step\n\nFor released software, the version identifier is closely related to a VCS\ntag. Some projects use tag names that include more than just the version\nstring (e.g. \"myproject-1.2\" instead of just \"1.2\"), in which case the tool\nneeds to strip the tag prefix to extract the version identifier. For\nunreleased software (between tags), the version identifier should provide\nenough information to help developers recreate the same tree, while also\ngiving them an idea of roughly how old the tree is (after version 1.2, before\nversion 1.3). Many VCS systems can report a description that captures this,\nfor example `git describe --tags --dirty --always` reports things like\n\"0.7-1-g574ab98-dirty\" to indicate that the checkout is one revision past the\n0.7 tag, has a unique revision id of \"574ab98\", and is \"dirty\" (it has\nuncommitted changes).\n\nThe version identifier is used for multiple purposes:\n\n* to allow the module to self-identify its version: `myproject.__version__`\n* to choose a name and prefix for a 'setup.py sdist' tarball\n\n## Theory of Operation\n\nVersioneer works by adding a special `_version.py` file into your source\ntree, where your `__init__.py` can import it. This `_version.py` knows how to\ndynamically ask the VCS tool for version information at import time.\n\n`_version.py` also contains `$Revision$` markers, and the installation\nprocess marks `_version.py` to have this marker rewritten with a tag name\nduring the `git archive` command. As a result, generated tarballs will\ncontain enough information to get the proper version.\n\nTo allow `setup.py` to compute a version too, a `versioneer.py` is added to\nthe top level of your source tree, next to `setup.py` and the `setup.cfg`\nthat configures it. This overrides several distutils/setuptools commands to\ncompute the version when invoked, and changes `setup.py build` and `setup.py\nsdist` to replace `_version.py` with a small static file that contains just\nthe generated version data.\n\n## Installation\n\nSee [INSTALL.md](./INSTALL.md) for detailed installation instructions.\n\n## Version-String Flavors\n\nCode which uses Versioneer can learn about its version string at runtime by\nimporting `_version` from your main `__init__.py` file and running the\n`get_versions()` function. From the \"outside\" (e.g. in `setup.py`), you can\nimport the top-level `versioneer.py` and run `get_versions()`.\n\nBoth functions return a dictionary with different flavors of version\ninformation:\n\n* `['version']`: A condensed version string, rendered using the selected\n  style. This is the most commonly used value for the project's version\n  string. The default \"pep440\" style yields strings like `0.11`,\n  `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the \"Styles\" section\n  below for alternative styles.\n\n* `['full-revisionid']`: detailed revision identifier. For Git, this is the\n  full SHA1 commit id, e.g. \"1076c978a8d3cfc70f408fe5974aa6c092c949ac\".\n\n* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the\n  commit date in ISO 8601 format. This will be None if the date is not\n  available.\n\n* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that\n  this is only accurate if run in a VCS checkout, otherwise it is likely to\n  be False or None\n\n* `['error']`: if the version string could not be computed, this will be set\n  to a string describing the problem, otherwise it will be None. It may be\n  useful to throw an exception in setup.py if this is set, to avoid e.g.\n  creating tarballs with a version string of \"unknown\".\n\nSome variants are more useful than others. Including `full-revisionid` in a\nbug report should allow developers to reconstruct the exact code being tested\n(or indicate the presence of local changes that should be shared with the\ndevelopers). `version` is suitable for display in an \"about\" box or a CLI\n`--version` output: it can be easily compared against release notes and lists\nof bugs fixed in various releases.\n\nThe installer adds the following text to your `__init__.py` to place a basic\nversion in `YOURPROJECT.__version__`:\n\n    from ._version import get_versions\n    __version__ = get_versions()['version']\n    del get_versions\n\n## Styles\n\nThe setup.cfg `style=` configuration controls how the VCS information is\nrendered into a version string.\n\nThe default style, \"pep440\", produces a PEP440-compliant string, equal to the\nun-prefixed tag name for actual releases, and containing an additional \"local\nversion\" section with more detail for in-between builds. For Git, this is\nTAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags\n--dirty --always`. For example \"0.11+2.g1076c97.dirty\" indicates that the\ntree is like the \"1076c97\" commit but has uncommitted changes (\".dirty\"), and\nthat this commit is two revisions (\"+2\") beyond the \"0.11\" tag. For released\nsoftware (exactly equal to a known tag), the identifier will only contain the\nstripped tag, e.g. \"0.11\".\n\nOther styles are available. See [details.md](details.md) in the Versioneer\nsource tree for descriptions.\n\n## Debugging\n\nVersioneer tries to avoid fatal errors: if something goes wrong, it will tend\nto return a version of \"0+unknown\". To investigate the problem, run `setup.py\nversion`, which will run the version-lookup code in a verbose mode, and will\ndisplay the full contents of `get_versions()` (including the `error` string,\nwhich may help identify what went wrong).\n\n## Known Limitations\n\nSome situations are known to cause problems for Versioneer. This details the\nmost significant ones. More can be found on Github\n[issues page](https://github.com/python-versioneer/python-versioneer/issues).\n\n### Subprojects\n\nVersioneer has limited support for source trees in which `setup.py` is not in\nthe root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are\ntwo common reasons why `setup.py` might not be in the root:\n\n* Source trees which contain multiple subprojects, such as\n  [Buildbot](https://github.com/buildbot/buildbot), which contains both\n  \"master\" and \"slave\" subprojects, each with their own `setup.py`,\n  `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI\n  distributions (and upload multiple independently-installable tarballs).\n* Source trees whose main purpose is to contain a C library, but which also\n  provide bindings to Python (and perhaps other languages) in subdirectories.\n\nVersioneer will look for `.git` in parent directories, and most operations\nshould get the right version string. However `pip` and `setuptools` have bugs\nand implementation details which frequently cause `pip install .` from a\nsubproject directory to fail to find a correct version string (so it usually\ndefaults to `0+unknown`).\n\n`pip install --editable .` should work correctly. `setup.py install` might\nwork too.\n\nPip-8.1.1 is known to have this problem, but hopefully it will get fixed in\nsome later version.\n\n[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking\nthis issue. The discussion in\n[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the\nissue from the Versioneer side in more detail.\n[pip PR#3176](https://github.com/pypa/pip/pull/3176) and\n[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve\npip to let Versioneer work correctly.\n\nVersioneer-0.16 and earlier only looked for a `.git` directory next to the\n`setup.cfg`, so subprojects were completely unsupported with those releases.\n\n### Editable installs with setuptools <= 18.5\n\n`setup.py develop` and `pip install --editable .` allow you to install a\nproject into a virtualenv once, then continue editing the source code (and\ntest) without re-installing after every change.\n\n\"Entry-point scripts\" (`setup(entry_points={\"console_scripts\": ..})`) are a\nconvenient way to specify executable scripts that should be installed along\nwith the python package.\n\nThese both work as expected when using modern setuptools. When using\nsetuptools-18.5 or earlier, however, certain operations will cause\n`pkg_resources.DistributionNotFound` errors when running the entrypoint\nscript, which must be resolved by re-installing the package. This happens\nwhen the install happens with one version, then the egg_info data is\nregenerated while a different version is checked out. Many setup.py commands\ncause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into\na different virtualenv), so this can be surprising.\n\n[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes\nthis one, but upgrading to a newer version of setuptools should probably\nresolve it.\n\n\n## Updating Versioneer\n\nTo upgrade your project to a new release of Versioneer, do the following:\n\n* install the new Versioneer (`pip install -U versioneer` or equivalent)\n* edit `setup.cfg`, if necessary, to include any new configuration settings\n  indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details.\n* re-run `versioneer install` in your source tree, to replace\n  `SRC/_version.py`\n* commit any changed files\n\n## Future Directions\n\nThis tool is designed to make it easily extended to other version-control\nsystems: all VCS-specific components are in separate directories like\nsrc/git/ . The top-level `versioneer.py` script is assembled from these\ncomponents by running make-versioneer.py . In the future, make-versioneer.py\nwill take a VCS name as an argument, and will construct a version of\n`versioneer.py` that is specific to the given VCS. It might also take the\nconfiguration arguments that are currently provided manually during\ninstallation by editing setup.py . Alternatively, it might go the other\ndirection and include code from all supported VCS systems, reducing the\nnumber of intermediate scripts.\n\n## Similar projects\n\n* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time\n  dependency\n* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of\n  versioneer\n* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools\n  plugin\n\n## License\n\nTo make Versioneer easier to embed, all its code is dedicated to the public\ndomain. The `_version.py` that it creates is also in the public domain.\nSpecifically, both are released under the Creative Commons \"Public Domain\nDedication\" license (CC0-1.0), as described in\nhttps://creativecommons.org/publicdomain/zero/1.0/ .\n\n[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg\n[pypi-url]: https://pypi.python.org/pypi/versioneer/\n[travis-image]:\nhttps://img.shields.io/travis/com/python-versioneer/python-versioneer.svg\n[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer\n\n\"\"\"\n# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring\n# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements\n# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error\n# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with\n# pylint:disable=attribute-defined-outside-init,too-many-arguments\n\nimport configparser\nimport errno\nimport json\nimport os\nimport re\nimport subprocess\nimport sys\nfrom typing import Callable, Dict\nimport functools\n\n\nclass VersioneerConfig:\n    \"\"\"Container for Versioneer configuration parameters.\"\"\"\n\n\ndef get_root():\n    \"\"\"Get the project root directory.\n\n    We require that all commands are run from the project root, i.e. the\n    directory that contains setup.py, setup.cfg, and versioneer.py .\n    \"\"\"\n    root = os.path.realpath(os.path.abspath(os.getcwd()))\n    setup_py = os.path.join(root, \"setup.py\")\n    versioneer_py = os.path.join(root, \"versioneer.py\")\n    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):\n        # allow 'python path/to/setup.py COMMAND'\n        root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))\n        setup_py = os.path.join(root, \"setup.py\")\n        versioneer_py = os.path.join(root, \"versioneer.py\")\n    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):\n        err = (\"Versioneer was unable to run the project root directory. \"\n               \"Versioneer requires setup.py to be executed from \"\n               \"its immediate directory (like 'python setup.py COMMAND'), \"\n               \"or in a way that lets it use sys.argv[0] to find the root \"\n               \"(like 'python path/to/setup.py COMMAND').\")\n        raise VersioneerBadRootError(err)\n    try:\n        # Certain runtime workflows (setup.py install/develop in a setuptools\n        # tree) execute all dependencies in a single python process, so\n        # \"versioneer\" may be imported multiple times, and python's shared\n        # module-import table will cache the first one. So we can't use\n        # os.path.dirname(__file__), as that will find whichever\n        # versioneer.py was first imported, even in later projects.\n        my_path = os.path.realpath(os.path.abspath(__file__))\n        me_dir = os.path.normcase(os.path.splitext(my_path)[0])\n        vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])\n        if me_dir != vsr_dir:\n            print(\"Warning: build in %s is using versioneer.py from %s\"\n                  % (os.path.dirname(my_path), versioneer_py))\n    except NameError:\n        pass\n    return root\n\n\ndef get_config_from_root(root):\n    \"\"\"Read the project setup.cfg file to determine Versioneer config.\"\"\"\n    # This might raise OSError (if setup.cfg is missing), or\n    # configparser.NoSectionError (if it lacks a [versioneer] section), or\n    # configparser.NoOptionError (if it lacks \"VCS=\"). See the docstring at\n    # the top of versioneer.py for instructions on writing your setup.cfg .\n    setup_cfg = os.path.join(root, \"setup.cfg\")\n    parser = configparser.ConfigParser()\n    with open(setup_cfg, \"r\") as cfg_file:\n        parser.read_file(cfg_file)\n    VCS = parser.get(\"versioneer\", \"VCS\")  # mandatory\n\n    # Dict-like interface for non-mandatory entries\n    section = parser[\"versioneer\"]\n\n    cfg = VersioneerConfig()\n    cfg.VCS = VCS\n    cfg.style = section.get(\"style\", \"\")\n    cfg.versionfile_source = section.get(\"versionfile_source\")\n    cfg.versionfile_build = section.get(\"versionfile_build\")\n    cfg.tag_prefix = section.get(\"tag_prefix\")\n    if cfg.tag_prefix in (\"''\", '\"\"'):\n        cfg.tag_prefix = \"\"\n    cfg.parentdir_prefix = section.get(\"parentdir_prefix\")\n    cfg.verbose = section.get(\"verbose\")\n    return cfg\n\n\nclass NotThisMethod(Exception):\n    \"\"\"Exception raised if a method is not valid for the current scenario.\"\"\"\n\n\n# these dictionaries contain VCS-specific tools\nLONG_VERSION_PY: Dict[str, str] = {}\nHANDLERS: Dict[str, Dict[str, Callable]] = {}\n\n\ndef register_vcs_handler(vcs, method):  # decorator\n    \"\"\"Create decorator to mark a method as the handler of a VCS.\"\"\"\n    def decorate(f):\n        \"\"\"Store f in HANDLERS[vcs][method].\"\"\"\n        HANDLERS.setdefault(vcs, {})[method] = f\n        return f\n    return decorate\n\n\ndef run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,\n                env=None):\n    \"\"\"Call the given command(s).\"\"\"\n    assert isinstance(commands, list)\n    process = None\n\n    popen_kwargs = {}\n    if sys.platform == \"win32\":\n        # This hides the console window if pythonw.exe is used\n        startupinfo = subprocess.STARTUPINFO()\n        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW\n        popen_kwargs[\"startupinfo\"] = startupinfo\n\n    for command in commands:\n        try:\n            dispcmd = str([command] + args)\n            # remember shell=False, so use git.cmd on windows, not just git\n            process = subprocess.Popen([command] + args, cwd=cwd, env=env,\n                                       stdout=subprocess.PIPE,\n                                       stderr=(subprocess.PIPE if hide_stderr\n                                               else None), **popen_kwargs)\n            break\n        except OSError:\n            e = sys.exc_info()[1]\n            if e.errno == errno.ENOENT:\n                continue\n            if verbose:\n                print(\"unable to run %s\" % dispcmd)\n                print(e)\n            return None, None\n    else:\n        if verbose:\n            print(\"unable to find command, tried %s\" % (commands,))\n        return None, None\n    stdout = process.communicate()[0].strip().decode()\n    if process.returncode != 0:\n        if verbose:\n            print(\"unable to run %s (error)\" % dispcmd)\n            print(\"stdout was %s\" % stdout)\n        return None, process.returncode\n    return stdout, process.returncode\n\n\nLONG_VERSION_PY['git'] = r'''\n# This file helps to compute a version number in source trees obtained from\n# git-archive tarball (such as those provided by githubs download-from-tag\n# feature). Distribution tarballs (built by setup.py sdist) and build\n# directories (produced by setup.py build) will contain a much shorter file\n# that just contains the computed version number.\n\n# This file is released into the public domain. Generated by\n# versioneer-0.22 (https://github.com/python-versioneer/python-versioneer)\n\n\"\"\"Git implementation of _version.py.\"\"\"\n\nimport errno\nimport os\nimport re\nimport subprocess\nimport sys\nfrom typing import Callable, Dict\nimport functools\n\n\ndef get_keywords():\n    \"\"\"Get the keywords needed to look up the version information.\"\"\"\n    # these strings will be replaced by git during git-archive.\n    # setup.py/versioneer.py will grep for the variable names, so they must\n    # each be defined on a line of their own. _version.py will just call\n    # get_keywords().\n    git_refnames = \"%(DOLLAR)sFormat:%%d%(DOLLAR)s\"\n    git_full = \"%(DOLLAR)sFormat:%%H%(DOLLAR)s\"\n    git_date = \"%(DOLLAR)sFormat:%%ci%(DOLLAR)s\"\n    keywords = {\"refnames\": git_refnames, \"full\": git_full, \"date\": git_date}\n    return keywords\n\n\nclass VersioneerConfig:\n    \"\"\"Container for Versioneer configuration parameters.\"\"\"\n\n\ndef get_config():\n    \"\"\"Create, populate and return the VersioneerConfig() object.\"\"\"\n    # these strings are filled in when 'setup.py versioneer' creates\n    # _version.py\n    cfg = VersioneerConfig()\n    cfg.VCS = \"git\"\n    cfg.style = \"%(STYLE)s\"\n    cfg.tag_prefix = \"%(TAG_PREFIX)s\"\n    cfg.parentdir_prefix = \"%(PARENTDIR_PREFIX)s\"\n    cfg.versionfile_source = \"%(VERSIONFILE_SOURCE)s\"\n    cfg.verbose = False\n    return cfg\n\n\nclass NotThisMethod(Exception):\n    \"\"\"Exception raised if a method is not valid for the current scenario.\"\"\"\n\n\nLONG_VERSION_PY: Dict[str, str] = {}\nHANDLERS: Dict[str, Dict[str, Callable]] = {}\n\n\ndef register_vcs_handler(vcs, method):  # decorator\n    \"\"\"Create decorator to mark a method as the handler of a VCS.\"\"\"\n    def decorate(f):\n        \"\"\"Store f in HANDLERS[vcs][method].\"\"\"\n        if vcs not in HANDLERS:\n            HANDLERS[vcs] = {}\n        HANDLERS[vcs][method] = f\n        return f\n    return decorate\n\n\ndef run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,\n                env=None):\n    \"\"\"Call the given command(s).\"\"\"\n    assert isinstance(commands, list)\n    process = None\n\n    popen_kwargs = {}\n    if sys.platform == \"win32\":\n        # This hides the console window if pythonw.exe is used\n        startupinfo = subprocess.STARTUPINFO()\n        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW\n        popen_kwargs[\"startupinfo\"] = startupinfo\n\n    for command in commands:\n        try:\n            dispcmd = str([command] + args)\n            # remember shell=False, so use git.cmd on windows, not just git\n            process = subprocess.Popen([command] + args, cwd=cwd, env=env,\n                                       stdout=subprocess.PIPE,\n                                       stderr=(subprocess.PIPE if hide_stderr\n                                               else None), **popen_kwargs)\n            break\n        except OSError:\n            e = sys.exc_info()[1]\n            if e.errno == errno.ENOENT:\n                continue\n            if verbose:\n                print(\"unable to run %%s\" %% dispcmd)\n                print(e)\n            return None, None\n    else:\n        if verbose:\n            print(\"unable to find command, tried %%s\" %% (commands,))\n        return None, None\n    stdout = process.communicate()[0].strip().decode()\n    if process.returncode != 0:\n        if verbose:\n            print(\"unable to run %%s (error)\" %% dispcmd)\n            print(\"stdout was %%s\" %% stdout)\n        return None, process.returncode\n    return stdout, process.returncode\n\n\ndef versions_from_parentdir(parentdir_prefix, root, verbose):\n    \"\"\"Try to determine the version from the parent directory name.\n\n    Source tarballs conventionally unpack into a directory that includes both\n    the project name and a version string. We will also support searching up\n    two directory levels for an appropriately named parent directory\n    \"\"\"\n    rootdirs = []\n\n    for _ in range(3):\n        for prefix in [parentdir_prefix, \"\"]:\n            prefix = prefix.replace(\"-\", \"_\")\n            for dirname in [os.path.basename(root)] + os.listdir(root):\n                dirname = dirname.replace(\"-\", \"_\")\n                if not dirname.startswith(prefix):\n                    continue\n                components = dirname[len(prefix):].split(\".\")\n                components = [\n                    comp for comp in components\n                    if all(c.isdigit() for c in comp)\n                ]\n                if len(components) <= 1:\n                    continue\n                return {\"version\": \".\".join(components),\n                        \"full-revisionid\": None,\n                        \"dirty\": False, \"error\": None, \"date\": None}\n        rootdirs.append(root)\n        root = os.path.dirname(root)  # up a level\n\n    if verbose:\n        print(\"Tried directories %%s but none started with prefix %%s\" %%\n              (str(rootdirs), parentdir_prefix))\n    raise NotThisMethod(\"rootdir doesn't start with parentdir_prefix\")\n\n\n@register_vcs_handler(\"git\", \"get_keywords\")\ndef git_get_keywords(versionfile_abs):\n    \"\"\"Extract version information from the given file.\"\"\"\n    # the code embedded in _version.py can just fetch the value of these\n    # keywords. When used from setup.py, we don't want to import _version.py,\n    # so we do it with a regexp instead. This function is not used from\n    # _version.py.\n    keywords = {}\n    try:\n        with open(versionfile_abs, \"r\") as fobj:\n            for line in fobj:\n                if line.strip().startswith(\"git_refnames =\"):\n                    mo = re.search(r'=\\s*\"(.*)\"', line)\n                    if mo:\n                        keywords[\"refnames\"] = mo.group(1)\n                if line.strip().startswith(\"git_full =\"):\n                    mo = re.search(r'=\\s*\"(.*)\"', line)\n                    if mo:\n                        keywords[\"full\"] = mo.group(1)\n                if line.strip().startswith(\"git_date =\"):\n                    mo = re.search(r'=\\s*\"(.*)\"', line)\n                    if mo:\n                        keywords[\"date\"] = mo.group(1)\n    except OSError:\n        pass\n    return keywords\n\n\n@register_vcs_handler(\"git\", \"keywords\")\ndef git_versions_from_keywords(keywords, tag_prefix, verbose):\n    \"\"\"Get version information from git keywords.\"\"\"\n    if \"refnames\" not in keywords:\n        raise NotThisMethod(\"Short version file found\")\n    date = keywords.get(\"date\")\n    if date is not None:\n        # Use only the last line.  Previous lines may contain GPG signature\n        # information.\n        date = date.splitlines()[-1]\n\n        # git-2.2.0 added \"%%cI\", which expands to an ISO-8601 -compliant\n        # datestamp. However we prefer \"%%ci\" (which expands to an \"ISO-8601\n        # -like\" string, which we must then edit to make compliant), because\n        # it's been around since git-1.5.3, and it's too difficult to\n        # discover which version we're using, or to work around using an\n        # older one.\n        date = date.strip().replace(\" \", \"T\", 1).replace(\" \", \"\", 1)\n    refnames = keywords[\"refnames\"].strip()\n    if refnames.startswith(\"$Format\"):\n        if verbose:\n            print(\"keywords are unexpanded, not using\")\n        raise NotThisMethod(\"unexpanded keywords, not a git-archive tarball\")\n    refs = {r.strip() for r in refnames.strip(\"()\").split(\",\")}\n    # starting in git-1.8.3, tags are listed as \"tag: foo-1.0\" instead of\n    # just \"foo-1.0\". If we see a \"tag: \" prefix, prefer those.\n    TAG = \"tag: \"\n    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}\n    if not tags:\n        # Either we're using git < 1.8.3, or there really are no tags. We use\n        # a heuristic: assume all version tags have a digit. The old git %%d\n        # expansion behaves like git log --decorate=short and strips out the\n        # refs/heads/ and refs/tags/ prefixes that would let us distinguish\n        # between branches and tags. By ignoring refnames without digits, we\n        # filter out many common branch names like \"release\" and\n        # \"stabilization\", as well as \"HEAD\" and \"master\".\n        tags = {r for r in refs if re.search(r'\\d', r)}\n        if verbose:\n            print(\"discarding '%%s', no digits\" %% \",\".join(refs - tags))\n    if verbose:\n        print(\"likely tags: %%s\" %% \",\".join(sorted(tags)))\n    for ref in sorted(tags):\n        # sorting will prefer e.g. \"2.0\" over \"2.0rc1\"\n        if ref.startswith(tag_prefix):\n            r = ref[len(tag_prefix):]\n            # Filter out refs that exactly match prefix or that don't start\n            # with a number once the prefix is stripped (mostly a concern\n            # when prefix is '')\n            if not re.match(r'\\d', r):\n                continue\n            if verbose:\n                print(\"picking %%s\" %% r)\n            return {\"version\": r,\n                    \"full-revisionid\": keywords[\"full\"].strip(),\n                    \"dirty\": False, \"error\": None,\n                    \"date\": date}\n    # no suitable tags, so version is \"0+unknown\", but full hex is still there\n    if verbose:\n        print(\"no suitable tags, using unknown + full revision id\")\n    return {\"version\": \"0+unknown\",\n            \"full-revisionid\": keywords[\"full\"].strip(),\n            \"dirty\": False, \"error\": \"no suitable tags\", \"date\": None}\n\n\n@register_vcs_handler(\"git\", \"pieces_from_vcs\")\ndef git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):\n    \"\"\"Get version from 'git describe' in the root of the source tree.\n\n    This only gets called if the git-archive 'subst' keywords were *not*\n    expanded, and _version.py hasn't already been rewritten with a short\n    version string, meaning we're inside a checked out source tree.\n    \"\"\"\n    GITS = [\"git\"]\n    if sys.platform == \"win32\":\n        GITS = [\"git.cmd\", \"git.exe\"]\n\n    # GIT_DIR can interfere with correct operation of Versioneer.\n    # It may be intended to be passed to the Versioneer-versioned project,\n    # but that should not change where we get our version from.\n    env = os.environ.copy()\n    env.pop(\"GIT_DIR\", None)\n    runner = functools.partial(runner, env=env)\n\n    _, rc = runner(GITS, [\"rev-parse\", \"--git-dir\"], cwd=root,\n                   hide_stderr=True)\n    if rc != 0:\n        if verbose:\n            print(\"Directory %%s not under git control\" %% root)\n        raise NotThisMethod(\"'git rev-parse --git-dir' returned error\")\n\n    MATCH_ARGS = [\"--match\", \"%%s*\" %% tag_prefix] if tag_prefix else []\n\n    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]\n    # if there isn't one, this yields HEX[-dirty] (no NUM)\n    describe_out, rc = runner(GITS, [\"describe\", \"--tags\", \"--dirty\",\n                                     \"--always\", \"--long\", *MATCH_ARGS],\n                              cwd=root)\n    # --long was added in git-1.5.5\n    if describe_out is None:\n        raise NotThisMethod(\"'git describe' failed\")\n    describe_out = describe_out.strip()\n    full_out, rc = runner(GITS, [\"rev-parse\", \"HEAD\"], cwd=root)\n    if full_out is None:\n        raise NotThisMethod(\"'git rev-parse' failed\")\n    full_out = full_out.strip()\n\n    pieces = {}\n    pieces[\"long\"] = full_out\n    pieces[\"short\"] = full_out[:7]  # maybe improved later\n    pieces[\"error\"] = None\n\n    branch_name, rc = runner(GITS, [\"rev-parse\", \"--abbrev-ref\", \"HEAD\"],\n                             cwd=root)\n    # --abbrev-ref was added in git-1.6.3\n    if rc != 0 or branch_name is None:\n        raise NotThisMethod(\"'git rev-parse --abbrev-ref' returned error\")\n    branch_name = branch_name.strip()\n\n    if branch_name == \"HEAD\":\n        # If we aren't exactly on a branch, pick a branch which represents\n        # the current commit. If all else fails, we are on a branchless\n        # commit.\n        branches, rc = runner(GITS, [\"branch\", \"--contains\"], cwd=root)\n        # --contains was added in git-1.5.4\n        if rc != 0 or branches is None:\n            raise NotThisMethod(\"'git branch --contains' returned error\")\n        branches = branches.split(\"\\n\")\n\n        # Remove the first line if we're running detached\n        if \"(\" in branches[0]:\n            branches.pop(0)\n\n        # Strip off the leading \"* \" from the list of branches.\n        branches = [branch[2:] for branch in branches]\n        if \"master\" in branches:\n            branch_name = \"master\"\n        elif not branches:\n            branch_name = None\n        else:\n            # Pick the first branch that is returned. Good or bad.\n            branch_name = branches[0]\n\n    pieces[\"branch\"] = branch_name\n\n    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]\n    # TAG might have hyphens.\n    git_describe = describe_out\n\n    # look for -dirty suffix\n    dirty = git_describe.endswith(\"-dirty\")\n    pieces[\"dirty\"] = dirty\n    if dirty:\n        git_describe = git_describe[:git_describe.rindex(\"-dirty\")]\n\n    # now we have TAG-NUM-gHEX or HEX\n\n    if \"-\" in git_describe:\n        # TAG-NUM-gHEX\n        mo = re.search(r'^(.+)-(\\d+)-g([0-9a-f]+)$', git_describe)\n        if not mo:\n            # unparsable. Maybe git-describe is misbehaving?\n            pieces[\"error\"] = (\"unable to parse git-describe output: '%%s'\"\n                               %% describe_out)\n            return pieces\n\n        # tag\n        full_tag = mo.group(1)\n        if not full_tag.startswith(tag_prefix):\n            if verbose:\n                fmt = \"tag '%%s' doesn't start with prefix '%%s'\"\n                print(fmt %% (full_tag, tag_prefix))\n            pieces[\"error\"] = (\"tag '%%s' doesn't start with prefix '%%s'\"\n                               %% (full_tag, tag_prefix))\n            return pieces\n        pieces[\"closest-tag\"] = full_tag[len(tag_prefix):]\n\n        # distance: number of commits since tag\n        pieces[\"distance\"] = int(mo.group(2))\n\n        # commit: short hex revision ID\n        pieces[\"short\"] = mo.group(3)\n\n    else:\n        # HEX: no tags\n        pieces[\"closest-tag\"] = None\n        count_out, rc = runner(GITS, [\"rev-list\", \"HEAD\", \"--count\"], cwd=root)\n        pieces[\"distance\"] = int(count_out)  # total number of commits\n\n    # commit date: see ISO-8601 comment in git_versions_from_keywords()\n    date = runner(GITS, [\"show\", \"-s\", \"--format=%%ci\", \"HEAD\"], cwd=root)[0].strip()\n    # Use only the last line.  Previous lines may contain GPG signature\n    # information.\n    date = date.splitlines()[-1]\n    pieces[\"date\"] = date.strip().replace(\" \", \"T\", 1).replace(\" \", \"\", 1)\n\n    return pieces\n\n\ndef plus_or_dot(pieces):\n    \"\"\"Return a + if we don't already have one, else return a .\"\"\"\n    if \"+\" in pieces.get(\"closest-tag\", \"\"):\n        return \".\"\n    return \"+\"\n\n\ndef render_pep440(pieces):\n    \"\"\"Build up version string, with post-release \"local version identifier\".\n\n    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you\n    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty\n\n    Exceptions:\n    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            rendered += plus_or_dot(pieces)\n            rendered += \"%%d.g%%s\" %% (pieces[\"distance\"], pieces[\"short\"])\n            if pieces[\"dirty\"]:\n                rendered += \".dirty\"\n    else:\n        # exception #1\n        rendered = \"0+untagged.%%d.g%%s\" %% (pieces[\"distance\"],\n                                          pieces[\"short\"])\n        if pieces[\"dirty\"]:\n            rendered += \".dirty\"\n    return rendered\n\n\ndef render_pep440_branch(pieces):\n    \"\"\"TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .\n\n    The \".dev0\" means not master branch. Note that .dev0 sorts backwards\n    (a feature branch will appear \"older\" than the master branch).\n\n    Exceptions:\n    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            if pieces[\"branch\"] != \"master\":\n                rendered += \".dev0\"\n            rendered += plus_or_dot(pieces)\n            rendered += \"%%d.g%%s\" %% (pieces[\"distance\"], pieces[\"short\"])\n            if pieces[\"dirty\"]:\n                rendered += \".dirty\"\n    else:\n        # exception #1\n        rendered = \"0\"\n        if pieces[\"branch\"] != \"master\":\n            rendered += \".dev0\"\n        rendered += \"+untagged.%%d.g%%s\" %% (pieces[\"distance\"],\n                                          pieces[\"short\"])\n        if pieces[\"dirty\"]:\n            rendered += \".dirty\"\n    return rendered\n\n\ndef pep440_split_post(ver):\n    \"\"\"Split pep440 version string at the post-release segment.\n\n    Returns the release segments before the post-release and the\n    post-release version number (or -1 if no post-release segment is present).\n    \"\"\"\n    vc = str.split(ver, \".post\")\n    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None\n\n\ndef render_pep440_pre(pieces):\n    \"\"\"TAG[.postN.devDISTANCE] -- No -dirty.\n\n    Exceptions:\n    1: no tags. 0.post0.devDISTANCE\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        if pieces[\"distance\"]:\n            # update the post release segment\n            tag_version, post_version = pep440_split_post(pieces[\"closest-tag\"])\n            rendered = tag_version\n            if post_version is not None:\n                rendered += \".post%%d.dev%%d\" %% (post_version+1, pieces[\"distance\"])\n            else:\n                rendered += \".post0.dev%%d\" %% (pieces[\"distance\"])\n        else:\n            # no commits, use the tag as the version\n            rendered = pieces[\"closest-tag\"]\n    else:\n        # exception #1\n        rendered = \"0.post0.dev%%d\" %% pieces[\"distance\"]\n    return rendered\n\n\ndef render_pep440_post(pieces):\n    \"\"\"TAG[.postDISTANCE[.dev0]+gHEX] .\n\n    The \".dev0\" means dirty. Note that .dev0 sorts backwards\n    (a dirty tree will appear \"older\" than the corresponding clean one),\n    but you shouldn't be releasing software with -dirty anyways.\n\n    Exceptions:\n    1: no tags. 0.postDISTANCE[.dev0]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            rendered += \".post%%d\" %% pieces[\"distance\"]\n            if pieces[\"dirty\"]:\n                rendered += \".dev0\"\n            rendered += plus_or_dot(pieces)\n            rendered += \"g%%s\" %% pieces[\"short\"]\n    else:\n        # exception #1\n        rendered = \"0.post%%d\" %% pieces[\"distance\"]\n        if pieces[\"dirty\"]:\n            rendered += \".dev0\"\n        rendered += \"+g%%s\" %% pieces[\"short\"]\n    return rendered\n\n\ndef render_pep440_post_branch(pieces):\n    \"\"\"TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .\n\n    The \".dev0\" means not master branch.\n\n    Exceptions:\n    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            rendered += \".post%%d\" %% pieces[\"distance\"]\n            if pieces[\"branch\"] != \"master\":\n                rendered += \".dev0\"\n            rendered += plus_or_dot(pieces)\n            rendered += \"g%%s\" %% pieces[\"short\"]\n            if pieces[\"dirty\"]:\n                rendered += \".dirty\"\n    else:\n        # exception #1\n        rendered = \"0.post%%d\" %% pieces[\"distance\"]\n        if pieces[\"branch\"] != \"master\":\n            rendered += \".dev0\"\n        rendered += \"+g%%s\" %% pieces[\"short\"]\n        if pieces[\"dirty\"]:\n            rendered += \".dirty\"\n    return rendered\n\n\ndef render_pep440_old(pieces):\n    \"\"\"TAG[.postDISTANCE[.dev0]] .\n\n    The \".dev0\" means dirty.\n\n    Exceptions:\n    1: no tags. 0.postDISTANCE[.dev0]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            rendered += \".post%%d\" %% pieces[\"distance\"]\n            if pieces[\"dirty\"]:\n                rendered += \".dev0\"\n    else:\n        # exception #1\n        rendered = \"0.post%%d\" %% pieces[\"distance\"]\n        if pieces[\"dirty\"]:\n            rendered += \".dev0\"\n    return rendered\n\n\ndef render_git_describe(pieces):\n    \"\"\"TAG[-DISTANCE-gHEX][-dirty].\n\n    Like 'git describe --tags --dirty --always'.\n\n    Exceptions:\n    1: no tags. HEX[-dirty]  (note: no 'g' prefix)\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"]:\n            rendered += \"-%%d-g%%s\" %% (pieces[\"distance\"], pieces[\"short\"])\n    else:\n        # exception #1\n        rendered = pieces[\"short\"]\n    if pieces[\"dirty\"]:\n        rendered += \"-dirty\"\n    return rendered\n\n\ndef render_git_describe_long(pieces):\n    \"\"\"TAG-DISTANCE-gHEX[-dirty].\n\n    Like 'git describe --tags --dirty --always -long'.\n    The distance/hash is unconditional.\n\n    Exceptions:\n    1: no tags. HEX[-dirty]  (note: no 'g' prefix)\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        rendered += \"-%%d-g%%s\" %% (pieces[\"distance\"], pieces[\"short\"])\n    else:\n        # exception #1\n        rendered = pieces[\"short\"]\n    if pieces[\"dirty\"]:\n        rendered += \"-dirty\"\n    return rendered\n\n\ndef render(pieces, style):\n    \"\"\"Render the given version pieces into the requested style.\"\"\"\n    if pieces[\"error\"]:\n        return {\"version\": \"unknown\",\n                \"full-revisionid\": pieces.get(\"long\"),\n                \"dirty\": None,\n                \"error\": pieces[\"error\"],\n                \"date\": None}\n\n    if not style or style == \"default\":\n        style = \"pep440\"  # the default\n\n    if style == \"pep440\":\n        rendered = render_pep440(pieces)\n    elif style == \"pep440-branch\":\n        rendered = render_pep440_branch(pieces)\n    elif style == \"pep440-pre\":\n        rendered = render_pep440_pre(pieces)\n    elif style == \"pep440-post\":\n        rendered = render_pep440_post(pieces)\n    elif style == \"pep440-post-branch\":\n        rendered = render_pep440_post_branch(pieces)\n    elif style == \"pep440-old\":\n        rendered = render_pep440_old(pieces)\n    elif style == \"git-describe\":\n        rendered = render_git_describe(pieces)\n    elif style == \"git-describe-long\":\n        rendered = render_git_describe_long(pieces)\n    else:\n        raise ValueError(\"unknown style '%%s'\" %% style)\n\n    return {\"version\": rendered, \"full-revisionid\": pieces[\"long\"],\n            \"dirty\": pieces[\"dirty\"], \"error\": None,\n            \"date\": pieces.get(\"date\")}\n\n\ndef get_versions():\n    \"\"\"Get version information or return default if unable to do so.\"\"\"\n    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have\n    # __file__, we can work backwards from there to the root. Some\n    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which\n    # case we can only use expanded keywords.\n\n    cfg = get_config()\n    verbose = cfg.verbose\n\n    try:\n        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,\n                                          verbose)\n    except NotThisMethod:\n        pass\n\n    try:\n        root = os.path.realpath(__file__)\n        # versionfile_source is the relative path from the top of the source\n        # tree (where the .git directory might live) to this file. Invert\n        # this to find the root from __file__.\n        for _ in cfg.versionfile_source.split('/'):\n            root = os.path.dirname(root)\n    except NameError:\n        return {\"version\": \"0+unknown\", \"full-revisionid\": None,\n                \"dirty\": None,\n                \"error\": \"unable to find root of source tree\",\n                \"date\": None}\n\n    try:\n        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)\n        return render(pieces, cfg.style)\n    except NotThisMethod:\n        pass\n\n    try:\n        if cfg.parentdir_prefix:\n            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)\n    except NotThisMethod:\n        pass\n\n    return {\"version\": \"0+unknown\", \"full-revisionid\": None,\n            \"dirty\": None,\n            \"error\": \"unable to compute version\", \"date\": None}\n'''\n\n\n@register_vcs_handler(\"git\", \"get_keywords\")\ndef git_get_keywords(versionfile_abs):\n    \"\"\"Extract version information from the given file.\"\"\"\n    # the code embedded in _version.py can just fetch the value of these\n    # keywords. When used from setup.py, we don't want to import _version.py,\n    # so we do it with a regexp instead. This function is not used from\n    # _version.py.\n    keywords = {}\n    try:\n        with open(versionfile_abs, \"r\") as fobj:\n            for line in fobj:\n                if line.strip().startswith(\"git_refnames =\"):\n                    mo = re.search(r'=\\s*\"(.*)\"', line)\n                    if mo:\n                        keywords[\"refnames\"] = mo.group(1)\n                if line.strip().startswith(\"git_full =\"):\n                    mo = re.search(r'=\\s*\"(.*)\"', line)\n                    if mo:\n                        keywords[\"full\"] = mo.group(1)\n                if line.strip().startswith(\"git_date =\"):\n                    mo = re.search(r'=\\s*\"(.*)\"', line)\n                    if mo:\n                        keywords[\"date\"] = mo.group(1)\n    except OSError:\n        pass\n    return keywords\n\n\n@register_vcs_handler(\"git\", \"keywords\")\ndef git_versions_from_keywords(keywords, tag_prefix, verbose):\n    \"\"\"Get version information from git keywords.\"\"\"\n    if \"refnames\" not in keywords:\n        raise NotThisMethod(\"Short version file found\")\n    date = keywords.get(\"date\")\n    if date is not None:\n        # Use only the last line.  Previous lines may contain GPG signature\n        # information.\n        date = date.splitlines()[-1]\n\n        # git-2.2.0 added \"%cI\", which expands to an ISO-8601 -compliant\n        # datestamp. However we prefer \"%ci\" (which expands to an \"ISO-8601\n        # -like\" string, which we must then edit to make compliant), because\n        # it's been around since git-1.5.3, and it's too difficult to\n        # discover which version we're using, or to work around using an\n        # older one.\n        date = date.strip().replace(\" \", \"T\", 1).replace(\" \", \"\", 1)\n    refnames = keywords[\"refnames\"].strip()\n    if refnames.startswith(\"$Format\"):\n        if verbose:\n            print(\"keywords are unexpanded, not using\")\n        raise NotThisMethod(\"unexpanded keywords, not a git-archive tarball\")\n    refs = {r.strip() for r in refnames.strip(\"()\").split(\",\")}\n    # starting in git-1.8.3, tags are listed as \"tag: foo-1.0\" instead of\n    # just \"foo-1.0\". If we see a \"tag: \" prefix, prefer those.\n    TAG = \"tag: \"\n    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}\n    if not tags:\n        # Either we're using git < 1.8.3, or there really are no tags. We use\n        # a heuristic: assume all version tags have a digit. The old git %d\n        # expansion behaves like git log --decorate=short and strips out the\n        # refs/heads/ and refs/tags/ prefixes that would let us distinguish\n        # between branches and tags. By ignoring refnames without digits, we\n        # filter out many common branch names like \"release\" and\n        # \"stabilization\", as well as \"HEAD\" and \"master\".\n        tags = {r for r in refs if re.search(r'\\d', r)}\n        if verbose:\n            print(\"discarding '%s', no digits\" % \",\".join(refs - tags))\n    if verbose:\n        print(\"likely tags: %s\" % \",\".join(sorted(tags)))\n    for ref in sorted(tags):\n        # sorting will prefer e.g. \"2.0\" over \"2.0rc1\"\n        if ref.startswith(tag_prefix):\n            r = ref[len(tag_prefix):]\n            # Filter out refs that exactly match prefix or that don't start\n            # with a number once the prefix is stripped (mostly a concern\n            # when prefix is '')\n            if not re.match(r'\\d', r):\n                continue\n            if verbose:\n                print(\"picking %s\" % r)\n            return {\"version\": r,\n                    \"full-revisionid\": keywords[\"full\"].strip(),\n                    \"dirty\": False, \"error\": None,\n                    \"date\": date}\n    # no suitable tags, so version is \"0+unknown\", but full hex is still there\n    if verbose:\n        print(\"no suitable tags, using unknown + full revision id\")\n    return {\"version\": \"0+unknown\",\n            \"full-revisionid\": keywords[\"full\"].strip(),\n            \"dirty\": False, \"error\": \"no suitable tags\", \"date\": None}\n\n\n@register_vcs_handler(\"git\", \"pieces_from_vcs\")\ndef git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):\n    \"\"\"Get version from 'git describe' in the root of the source tree.\n\n    This only gets called if the git-archive 'subst' keywords were *not*\n    expanded, and _version.py hasn't already been rewritten with a short\n    version string, meaning we're inside a checked out source tree.\n    \"\"\"\n    GITS = [\"git\"]\n    if sys.platform == \"win32\":\n        GITS = [\"git.cmd\", \"git.exe\"]\n\n    # GIT_DIR can interfere with correct operation of Versioneer.\n    # It may be intended to be passed to the Versioneer-versioned project,\n    # but that should not change where we get our version from.\n    env = os.environ.copy()\n    env.pop(\"GIT_DIR\", None)\n    runner = functools.partial(runner, env=env)\n\n    _, rc = runner(GITS, [\"rev-parse\", \"--git-dir\"], cwd=root,\n                   hide_stderr=True)\n    if rc != 0:\n        if verbose:\n            print(\"Directory %s not under git control\" % root)\n        raise NotThisMethod(\"'git rev-parse --git-dir' returned error\")\n\n    MATCH_ARGS = [\"--match\", \"%s*\" % tag_prefix] if tag_prefix else []\n\n    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]\n    # if there isn't one, this yields HEX[-dirty] (no NUM)\n    describe_out, rc = runner(GITS, [\"describe\", \"--tags\", \"--dirty\",\n                                     \"--always\", \"--long\", *MATCH_ARGS],\n                              cwd=root)\n    # --long was added in git-1.5.5\n    if describe_out is None:\n        raise NotThisMethod(\"'git describe' failed\")\n    describe_out = describe_out.strip()\n    full_out, rc = runner(GITS, [\"rev-parse\", \"HEAD\"], cwd=root)\n    if full_out is None:\n        raise NotThisMethod(\"'git rev-parse' failed\")\n    full_out = full_out.strip()\n\n    pieces = {}\n    pieces[\"long\"] = full_out\n    pieces[\"short\"] = full_out[:7]  # maybe improved later\n    pieces[\"error\"] = None\n\n    branch_name, rc = runner(GITS, [\"rev-parse\", \"--abbrev-ref\", \"HEAD\"],\n                             cwd=root)\n    # --abbrev-ref was added in git-1.6.3\n    if rc != 0 or branch_name is None:\n        raise NotThisMethod(\"'git rev-parse --abbrev-ref' returned error\")\n    branch_name = branch_name.strip()\n\n    if branch_name == \"HEAD\":\n        # If we aren't exactly on a branch, pick a branch which represents\n        # the current commit. If all else fails, we are on a branchless\n        # commit.\n        branches, rc = runner(GITS, [\"branch\", \"--contains\"], cwd=root)\n        # --contains was added in git-1.5.4\n        if rc != 0 or branches is None:\n            raise NotThisMethod(\"'git branch --contains' returned error\")\n        branches = branches.split(\"\\n\")\n\n        # Remove the first line if we're running detached\n        if \"(\" in branches[0]:\n            branches.pop(0)\n\n        # Strip off the leading \"* \" from the list of branches.\n        branches = [branch[2:] for branch in branches]\n        if \"master\" in branches:\n            branch_name = \"master\"\n        elif not branches:\n            branch_name = None\n        else:\n            # Pick the first branch that is returned. Good or bad.\n            branch_name = branches[0]\n\n    pieces[\"branch\"] = branch_name\n\n    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]\n    # TAG might have hyphens.\n    git_describe = describe_out\n\n    # look for -dirty suffix\n    dirty = git_describe.endswith(\"-dirty\")\n    pieces[\"dirty\"] = dirty\n    if dirty:\n        git_describe = git_describe[:git_describe.rindex(\"-dirty\")]\n\n    # now we have TAG-NUM-gHEX or HEX\n\n    if \"-\" in git_describe:\n        # TAG-NUM-gHEX\n        mo = re.search(r'^(.+)-(\\d+)-g([0-9a-f]+)$', git_describe)\n        if not mo:\n            # unparsable. Maybe git-describe is misbehaving?\n            pieces[\"error\"] = (\"unable to parse git-describe output: '%s'\"\n                               % describe_out)\n            return pieces\n\n        # tag\n        full_tag = mo.group(1)\n        if not full_tag.startswith(tag_prefix):\n            if verbose:\n                fmt = \"tag '%s' doesn't start with prefix '%s'\"\n                print(fmt % (full_tag, tag_prefix))\n            pieces[\"error\"] = (\"tag '%s' doesn't start with prefix '%s'\"\n                               % (full_tag, tag_prefix))\n            return pieces\n        pieces[\"closest-tag\"] = full_tag[len(tag_prefix):]\n\n        # distance: number of commits since tag\n        pieces[\"distance\"] = int(mo.group(2))\n\n        # commit: short hex revision ID\n        pieces[\"short\"] = mo.group(3)\n\n    else:\n        # HEX: no tags\n        pieces[\"closest-tag\"] = None\n        count_out, rc = runner(GITS, [\"rev-list\", \"HEAD\", \"--count\"], cwd=root)\n        pieces[\"distance\"] = int(count_out)  # total number of commits\n\n    # commit date: see ISO-8601 comment in git_versions_from_keywords()\n    date = runner(GITS, [\"show\", \"-s\", \"--format=%ci\", \"HEAD\"], cwd=root)[0].strip()\n    # Use only the last line.  Previous lines may contain GPG signature\n    # information.\n    date = date.splitlines()[-1]\n    pieces[\"date\"] = date.strip().replace(\" \", \"T\", 1).replace(\" \", \"\", 1)\n\n    return pieces\n\n\ndef do_vcs_install(manifest_in, versionfile_source, ipy):\n    \"\"\"Git-specific installation logic for Versioneer.\n\n    For Git, this means creating/changing .gitattributes to mark _version.py\n    for export-subst keyword substitution.\n    \"\"\"\n    GITS = [\"git\"]\n    if sys.platform == \"win32\":\n        GITS = [\"git.cmd\", \"git.exe\"]\n    files = [manifest_in, versionfile_source]\n    if ipy:\n        files.append(ipy)\n    try:\n        my_path = __file__\n        if my_path.endswith(\".pyc\") or my_path.endswith(\".pyo\"):\n            my_path = os.path.splitext(my_path)[0] + \".py\"\n        versioneer_file = os.path.relpath(my_path)\n    except NameError:\n        versioneer_file = \"versioneer.py\"\n    files.append(versioneer_file)\n    present = False\n    try:\n        with open(\".gitattributes\", \"r\") as fobj:\n            for line in fobj:\n                if line.strip().startswith(versionfile_source):\n                    if \"export-subst\" in line.strip().split()[1:]:\n                        present = True\n                        break\n    except OSError:\n        pass\n    if not present:\n        with open(\".gitattributes\", \"a+\") as fobj:\n            fobj.write(f\"{versionfile_source} export-subst\\n\")\n        files.append(\".gitattributes\")\n    run_command(GITS, [\"add\", \"--\"] + files)\n\n\ndef versions_from_parentdir(parentdir_prefix, root, verbose):\n    \"\"\"Try to determine the version from the parent directory name.\n\n    Source tarballs conventionally unpack into a directory that includes both\n    the project name and a version string. We will also support searching up\n    two directory levels for an appropriately named parent directory\n    \"\"\"\n    rootdirs = []\n\n    for _ in range(3):\n        for dirname in [os.path.basename(root)] + os.listdir(root):\n            if dirname.startswith(parentdir_prefix):\n                return {\"version\": dirname[len(parentdir_prefix):],\n                        \"full-revisionid\": None,\n                        \"dirty\": False, \"error\": None, \"date\": None}\n        rootdirs.append(root)\n        root = os.path.dirname(root)  # up a level\n\n    if verbose:\n        print(\"Tried directories %s but none started with prefix %s\" %\n              (str(rootdirs), parentdir_prefix))\n    raise NotThisMethod(\"rootdir doesn't start with parentdir_prefix\")\n\n\nSHORT_VERSION_PY = \"\"\"\n# This file was generated by 'versioneer.py' (0.22) from\n# revision-control system data, or from the parent directory name of an\n# unpacked source archive. Distribution tarballs contain a pre-generated copy\n# of this file.\n\nimport json\n\nversion_json = '''\n%s\n'''  # END VERSION_JSON\n\n\ndef get_versions():\n    return json.loads(version_json)\n\"\"\"\n\n\ndef versions_from_file(filename):\n    \"\"\"Try to determine the version from _version.py if present.\"\"\"\n    try:\n        with open(filename) as f:\n            contents = f.read()\n    except OSError:\n        raise NotThisMethod(\"unable to read _version.py\")\n    mo = re.search(r\"version_json = '''\\n(.*)'''  # END VERSION_JSON\",\n                   contents, re.M | re.S)\n    if not mo:\n        mo = re.search(r\"version_json = '''\\r\\n(.*)'''  # END VERSION_JSON\",\n                       contents, re.M | re.S)\n    if not mo:\n        raise NotThisMethod(\"no version_json in _version.py\")\n    return json.loads(mo.group(1))\n\n\ndef write_to_version_file(filename, versions):\n    \"\"\"Write the given version number to the given _version.py file.\"\"\"\n    os.unlink(filename)\n    contents = json.dumps(versions, sort_keys=True,\n                          indent=1, separators=(\",\", \": \"))\n    with open(filename, \"w\") as f:\n        f.write(SHORT_VERSION_PY % contents)\n\n    print(\"set %s to '%s'\" % (filename, versions[\"version\"]))\n\n\ndef plus_or_dot(pieces):\n    \"\"\"Return a + if we don't already have one, else return a .\"\"\"\n    if \"+\" in pieces.get(\"closest-tag\", \"\"):\n        return \".\"\n    return \"+\"\n\n\ndef render_pep440(pieces):\n    \"\"\"Build up version string, with post-release \"local version identifier\".\n\n    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you\n    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty\n\n    Exceptions:\n    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            rendered += plus_or_dot(pieces)\n            rendered += \"%d.g%s\" % (pieces[\"distance\"], pieces[\"short\"])\n            if pieces[\"dirty\"]:\n                rendered += \".dirty\"\n    else:\n        # exception #1\n        rendered = \"0+untagged.%d.g%s\" % (pieces[\"distance\"],\n                                          pieces[\"short\"])\n        if pieces[\"dirty\"]:\n            rendered += \".dirty\"\n    return rendered\n\n\ndef render_pep440_branch(pieces):\n    \"\"\"TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .\n\n    The \".dev0\" means not master branch. Note that .dev0 sorts backwards\n    (a feature branch will appear \"older\" than the master branch).\n\n    Exceptions:\n    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            if pieces[\"branch\"] != \"master\":\n                rendered += \".dev0\"\n            rendered += plus_or_dot(pieces)\n            rendered += \"%d.g%s\" % (pieces[\"distance\"], pieces[\"short\"])\n            if pieces[\"dirty\"]:\n                rendered += \".dirty\"\n    else:\n        # exception #1\n        rendered = \"0\"\n        if pieces[\"branch\"] != \"master\":\n            rendered += \".dev0\"\n        rendered += \"+untagged.%d.g%s\" % (pieces[\"distance\"],\n                                          pieces[\"short\"])\n        if pieces[\"dirty\"]:\n            rendered += \".dirty\"\n    return rendered\n\n\ndef pep440_split_post(ver):\n    \"\"\"Split pep440 version string at the post-release segment.\n\n    Returns the release segments before the post-release and the\n    post-release version number (or -1 if no post-release segment is present).\n    \"\"\"\n    vc = str.split(ver, \".post\")\n    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None\n\n\ndef render_pep440_pre(pieces):\n    \"\"\"TAG[.postN.devDISTANCE] -- No -dirty.\n\n    Exceptions:\n    1: no tags. 0.post0.devDISTANCE\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        if pieces[\"distance\"]:\n            # update the post release segment\n            tag_version, post_version = pep440_split_post(pieces[\"closest-tag\"])\n            rendered = tag_version\n            if post_version is not None:\n                rendered += \".post%d.dev%d\" % (post_version+1, pieces[\"distance\"])\n            else:\n                rendered += \".post0.dev%d\" % (pieces[\"distance\"])\n        else:\n            # no commits, use the tag as the version\n            rendered = pieces[\"closest-tag\"]\n    else:\n        # exception #1\n        rendered = \"0.post0.dev%d\" % pieces[\"distance\"]\n    return rendered\n\n\ndef render_pep440_post(pieces):\n    \"\"\"TAG[.postDISTANCE[.dev0]+gHEX] .\n\n    The \".dev0\" means dirty. Note that .dev0 sorts backwards\n    (a dirty tree will appear \"older\" than the corresponding clean one),\n    but you shouldn't be releasing software with -dirty anyways.\n\n    Exceptions:\n    1: no tags. 0.postDISTANCE[.dev0]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            rendered += \".post%d\" % pieces[\"distance\"]\n            if pieces[\"dirty\"]:\n                rendered += \".dev0\"\n            rendered += plus_or_dot(pieces)\n            rendered += \"g%s\" % pieces[\"short\"]\n    else:\n        # exception #1\n        rendered = \"0.post%d\" % pieces[\"distance\"]\n        if pieces[\"dirty\"]:\n            rendered += \".dev0\"\n        rendered += \"+g%s\" % pieces[\"short\"]\n    return rendered\n\n\ndef render_pep440_post_branch(pieces):\n    \"\"\"TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .\n\n    The \".dev0\" means not master branch.\n\n    Exceptions:\n    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            rendered += \".post%d\" % pieces[\"distance\"]\n            if pieces[\"branch\"] != \"master\":\n                rendered += \".dev0\"\n            rendered += plus_or_dot(pieces)\n            rendered += \"g%s\" % pieces[\"short\"]\n            if pieces[\"dirty\"]:\n                rendered += \".dirty\"\n    else:\n        # exception #1\n        rendered = \"0.post%d\" % pieces[\"distance\"]\n        if pieces[\"branch\"] != \"master\":\n            rendered += \".dev0\"\n        rendered += \"+g%s\" % pieces[\"short\"]\n        if pieces[\"dirty\"]:\n            rendered += \".dirty\"\n    return rendered\n\n\ndef render_pep440_old(pieces):\n    \"\"\"TAG[.postDISTANCE[.dev0]] .\n\n    The \".dev0\" means dirty.\n\n    Exceptions:\n    1: no tags. 0.postDISTANCE[.dev0]\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"] or pieces[\"dirty\"]:\n            rendered += \".post%d\" % pieces[\"distance\"]\n            if pieces[\"dirty\"]:\n                rendered += \".dev0\"\n    else:\n        # exception #1\n        rendered = \"0.post%d\" % pieces[\"distance\"]\n        if pieces[\"dirty\"]:\n            rendered += \".dev0\"\n    return rendered\n\n\ndef render_git_describe(pieces):\n    \"\"\"TAG[-DISTANCE-gHEX][-dirty].\n\n    Like 'git describe --tags --dirty --always'.\n\n    Exceptions:\n    1: no tags. HEX[-dirty]  (note: no 'g' prefix)\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        if pieces[\"distance\"]:\n            rendered += \"-%d-g%s\" % (pieces[\"distance\"], pieces[\"short\"])\n    else:\n        # exception #1\n        rendered = pieces[\"short\"]\n    if pieces[\"dirty\"]:\n        rendered += \"-dirty\"\n    return rendered\n\n\ndef render_git_describe_long(pieces):\n    \"\"\"TAG-DISTANCE-gHEX[-dirty].\n\n    Like 'git describe --tags --dirty --always -long'.\n    The distance/hash is unconditional.\n\n    Exceptions:\n    1: no tags. HEX[-dirty]  (note: no 'g' prefix)\n    \"\"\"\n    if pieces[\"closest-tag\"]:\n        rendered = pieces[\"closest-tag\"]\n        rendered += \"-%d-g%s\" % (pieces[\"distance\"], pieces[\"short\"])\n    else:\n        # exception #1\n        rendered = pieces[\"short\"]\n    if pieces[\"dirty\"]:\n        rendered += \"-dirty\"\n    return rendered\n\n\ndef render(pieces, style):\n    \"\"\"Render the given version pieces into the requested style.\"\"\"\n    if pieces[\"error\"]:\n        return {\"version\": \"unknown\",\n                \"full-revisionid\": pieces.get(\"long\"),\n                \"dirty\": None,\n                \"error\": pieces[\"error\"],\n                \"date\": None}\n\n    if not style or style == \"default\":\n        style = \"pep440\"  # the default\n\n    if style == \"pep440\":\n        rendered = render_pep440(pieces)\n    elif style == \"pep440-branch\":\n        rendered = render_pep440_branch(pieces)\n    elif style == \"pep440-pre\":\n        rendered = render_pep440_pre(pieces)\n    elif style == \"pep440-post\":\n        rendered = render_pep440_post(pieces)\n    elif style == \"pep440-post-branch\":\n        rendered = render_pep440_post_branch(pieces)\n    elif style == \"pep440-old\":\n        rendered = render_pep440_old(pieces)\n    elif style == \"git-describe\":\n        rendered = render_git_describe(pieces)\n    elif style == \"git-describe-long\":\n        rendered = render_git_describe_long(pieces)\n    else:\n        raise ValueError(\"unknown style '%s'\" % style)\n\n    return {\"version\": rendered, \"full-revisionid\": pieces[\"long\"],\n            \"dirty\": pieces[\"dirty\"], \"error\": None,\n            \"date\": pieces.get(\"date\")}\n\n\nclass VersioneerBadRootError(Exception):\n    \"\"\"The project root directory is unknown or missing key files.\"\"\"\n\n\ndef get_versions(verbose=False):\n    \"\"\"Get the project version from whatever source is available.\n\n    Returns dict with two keys: 'version' and 'full'.\n    \"\"\"\n    if \"versioneer\" in sys.modules:\n        # see the discussion in cmdclass.py:get_cmdclass()\n        del sys.modules[\"versioneer\"]\n\n    root = get_root()\n    cfg = get_config_from_root(root)\n\n    assert cfg.VCS is not None, \"please set [versioneer]VCS= in setup.cfg\"\n    handlers = HANDLERS.get(cfg.VCS)\n    assert handlers, \"unrecognized VCS '%s'\" % cfg.VCS\n    verbose = verbose or cfg.verbose\n    assert cfg.versionfile_source is not None, \\\n        \"please set versioneer.versionfile_source\"\n    assert cfg.tag_prefix is not None, \"please set versioneer.tag_prefix\"\n\n    versionfile_abs = os.path.join(root, cfg.versionfile_source)\n\n    # extract version from first of: _version.py, VCS command (e.g. 'git\n    # describe'), parentdir. This is meant to work for developers using a\n    # source checkout, for users of a tarball created by 'setup.py sdist',\n    # and for users of a tarball/zipball created by 'git archive' or github's\n    # download-from-tag feature or the equivalent in other VCSes.\n\n    get_keywords_f = handlers.get(\"get_keywords\")\n    from_keywords_f = handlers.get(\"keywords\")\n    if get_keywords_f and from_keywords_f:\n        try:\n            keywords = get_keywords_f(versionfile_abs)\n            ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)\n            if verbose:\n                print(\"got version from expanded keyword %s\" % ver)\n            return ver\n        except NotThisMethod:\n            pass\n\n    try:\n        ver = versions_from_file(versionfile_abs)\n        if verbose:\n            print(\"got version from file %s %s\" % (versionfile_abs, ver))\n        return ver\n    except NotThisMethod:\n        pass\n\n    from_vcs_f = handlers.get(\"pieces_from_vcs\")\n    if from_vcs_f:\n        try:\n            pieces = from_vcs_f(cfg.tag_prefix, root, verbose)\n            ver = render(pieces, cfg.style)\n            if verbose:\n                print(\"got version from VCS %s\" % ver)\n            return ver\n        except NotThisMethod:\n            pass\n\n    try:\n        if cfg.parentdir_prefix:\n            ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)\n            if verbose:\n                print(\"got version from parentdir %s\" % ver)\n            return ver\n    except NotThisMethod:\n        pass\n\n    if verbose:\n        print(\"unable to compute version\")\n\n    return {\"version\": \"0+unknown\", \"full-revisionid\": None,\n            \"dirty\": None, \"error\": \"unable to compute version\",\n            \"date\": None}\n\n\ndef get_version():\n    \"\"\"Get the short version string for this project.\"\"\"\n    return get_versions()[\"version\"]\n\n\ndef get_cmdclass(cmdclass=None):\n    \"\"\"Get the custom setuptools/distutils subclasses used by Versioneer.\n\n    If the package uses a different cmdclass (e.g. one from numpy), it\n    should be provide as an argument.\n    \"\"\"\n    if \"versioneer\" in sys.modules:\n        del sys.modules[\"versioneer\"]\n        # this fixes the \"python setup.py develop\" case (also 'install' and\n        # 'easy_install .'), in which subdependencies of the main project are\n        # built (using setup.py bdist_egg) in the same python process. Assume\n        # a main project A and a dependency B, which use different versions\n        # of Versioneer. A's setup.py imports A's Versioneer, leaving it in\n        # sys.modules by the time B's setup.py is executed, causing B to run\n        # with the wrong versioneer. Setuptools wraps the sub-dep builds in a\n        # sandbox that restores sys.modules to it's pre-build state, so the\n        # parent is protected against the child's \"import versioneer\". By\n        # removing ourselves from sys.modules here, before the child build\n        # happens, we protect the child from the parent's versioneer too.\n        # Also see https://github.com/python-versioneer/python-versioneer/issues/52\n\n    cmds = {} if cmdclass is None else cmdclass.copy()\n\n    # we add \"version\" to both distutils and setuptools\n    try:\n        from setuptools import Command\n    except ImportError:\n        from distutils.core import Command\n\n    class cmd_version(Command):\n        description = \"report generated version string\"\n        user_options = []\n        boolean_options = []\n\n        def initialize_options(self):\n            pass\n\n        def finalize_options(self):\n            pass\n\n        def run(self):\n            vers = get_versions(verbose=True)\n            print(\"Version: %s\" % vers[\"version\"])\n            print(\" full-revisionid: %s\" % vers.get(\"full-revisionid\"))\n            print(\" dirty: %s\" % vers.get(\"dirty\"))\n            print(\" date: %s\" % vers.get(\"date\"))\n            if vers[\"error\"]:\n                print(\" error: %s\" % vers[\"error\"])\n    cmds[\"version\"] = cmd_version\n\n    # we override \"build_py\" in both distutils and setuptools\n    #\n    # most invocation pathways end up running build_py:\n    #  distutils/build -> build_py\n    #  distutils/install -> distutils/build ->..\n    #  setuptools/bdist_wheel -> distutils/install ->..\n    #  setuptools/bdist_egg -> distutils/install_lib -> build_py\n    #  setuptools/install -> bdist_egg ->..\n    #  setuptools/develop -> ?\n    #  pip install:\n    #   copies source tree to a tempdir before running egg_info/etc\n    #   if .git isn't copied too, 'git describe' will fail\n    #   then does setup.py bdist_wheel, or sometimes setup.py install\n    #  setup.py egg_info -> ?\n\n    # we override different \"build_py\" commands for both environments\n    if 'build_py' in cmds:\n        _build_py = cmds['build_py']\n    elif \"setuptools\" in sys.modules:\n        from setuptools.command.build_py import build_py as _build_py\n    else:\n        from distutils.command.build_py import build_py as _build_py\n\n    class cmd_build_py(_build_py):\n        def run(self):\n            root = get_root()\n            cfg = get_config_from_root(root)\n            versions = get_versions()\n            _build_py.run(self)\n            # now locate _version.py in the new build/ directory and replace\n            # it with an updated value\n            if cfg.versionfile_build:\n                target_versionfile = os.path.join(self.build_lib,\n                                                  cfg.versionfile_build)\n                print(\"UPDATING %s\" % target_versionfile)\n                write_to_version_file(target_versionfile, versions)\n    cmds[\"build_py\"] = cmd_build_py\n\n    if 'build_ext' in cmds:\n        _build_ext = cmds['build_ext']\n    elif \"setuptools\" in sys.modules:\n        from setuptools.command.build_ext import build_ext as _build_ext\n    else:\n        from distutils.command.build_ext import build_ext as _build_ext\n\n    class cmd_build_ext(_build_ext):\n        def run(self):\n            root = get_root()\n            cfg = get_config_from_root(root)\n            versions = get_versions()\n            _build_ext.run(self)\n            if self.inplace:\n                # build_ext --inplace will only build extensions in\n                # build/lib<..> dir with no _version.py to write to.\n                # As in place builds will already have a _version.py\n                # in the module dir, we do not need to write one.\n                return\n            # now locate _version.py in the new build/ directory and replace\n            # it with an updated value\n            target_versionfile = os.path.join(self.build_lib,\n                                              cfg.versionfile_build)\n            print(\"UPDATING %s\" % target_versionfile)\n            write_to_version_file(target_versionfile, versions)\n    cmds[\"build_ext\"] = cmd_build_ext\n\n    if \"cx_Freeze\" in sys.modules:  # cx_freeze enabled?\n        from cx_Freeze.dist import build_exe as _build_exe\n        # nczeczulin reports that py2exe won't like the pep440-style string\n        # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.\n        # setup(console=[{\n        #   \"version\": versioneer.get_version().split(\"+\", 1)[0], # FILEVERSION\n        #   \"product_version\": versioneer.get_version(),\n        #   ...\n\n        class cmd_build_exe(_build_exe):\n            def run(self):\n                root = get_root()\n                cfg = get_config_from_root(root)\n                versions = get_versions()\n                target_versionfile = cfg.versionfile_source\n                print(\"UPDATING %s\" % target_versionfile)\n                write_to_version_file(target_versionfile, versions)\n\n                _build_exe.run(self)\n                os.unlink(target_versionfile)\n                with open(cfg.versionfile_source, \"w\") as f:\n                    LONG = LONG_VERSION_PY[cfg.VCS]\n                    f.write(LONG %\n                            {\"DOLLAR\": \"$\",\n                             \"STYLE\": cfg.style,\n                             \"TAG_PREFIX\": cfg.tag_prefix,\n                             \"PARENTDIR_PREFIX\": cfg.parentdir_prefix,\n                             \"VERSIONFILE_SOURCE\": cfg.versionfile_source,\n                             })\n        cmds[\"build_exe\"] = cmd_build_exe\n        del cmds[\"build_py\"]\n\n    if 'py2exe' in sys.modules:  # py2exe enabled?\n        from py2exe.distutils_buildexe import py2exe as _py2exe\n\n        class cmd_py2exe(_py2exe):\n            def run(self):\n                root = get_root()\n                cfg = get_config_from_root(root)\n                versions = get_versions()\n                target_versionfile = cfg.versionfile_source\n                print(\"UPDATING %s\" % target_versionfile)\n                write_to_version_file(target_versionfile, versions)\n\n                _py2exe.run(self)\n                os.unlink(target_versionfile)\n                with open(cfg.versionfile_source, \"w\") as f:\n                    LONG = LONG_VERSION_PY[cfg.VCS]\n                    f.write(LONG %\n                            {\"DOLLAR\": \"$\",\n                             \"STYLE\": cfg.style,\n                             \"TAG_PREFIX\": cfg.tag_prefix,\n                             \"PARENTDIR_PREFIX\": cfg.parentdir_prefix,\n                             \"VERSIONFILE_SOURCE\": cfg.versionfile_source,\n                             })\n        cmds[\"py2exe\"] = cmd_py2exe\n\n    # we override different \"sdist\" commands for both environments\n    if 'sdist' in cmds:\n        _sdist = cmds['sdist']\n    elif \"setuptools\" in sys.modules:\n        from setuptools.command.sdist import sdist as _sdist\n    else:\n        from distutils.command.sdist import sdist as _sdist\n\n    class cmd_sdist(_sdist):\n        def run(self):\n            versions = get_versions()\n            self._versioneer_generated_versions = versions\n            # unless we update this, the command will keep using the old\n            # version\n            self.distribution.metadata.version = versions[\"version\"]\n            return _sdist.run(self)\n\n        def make_release_tree(self, base_dir, files):\n            root = get_root()\n            cfg = get_config_from_root(root)\n            _sdist.make_release_tree(self, base_dir, files)\n            # now locate _version.py in the new base_dir directory\n            # (remembering that it may be a hardlink) and replace it with an\n            # updated value\n            target_versionfile = os.path.join(base_dir, cfg.versionfile_source)\n            print(\"UPDATING %s\" % target_versionfile)\n            write_to_version_file(target_versionfile,\n                                  self._versioneer_generated_versions)\n    cmds[\"sdist\"] = cmd_sdist\n\n    return cmds\n\n\nCONFIG_ERROR = \"\"\"\nsetup.cfg is missing the necessary Versioneer configuration. You need\na section like:\n\n [versioneer]\n VCS = git\n style = pep440\n versionfile_source = src/myproject/_version.py\n versionfile_build = myproject/_version.py\n tag_prefix =\n parentdir_prefix = myproject-\n\nYou will also need to edit your setup.py to use the results:\n\n import versioneer\n setup(version=versioneer.get_version(),\n       cmdclass=versioneer.get_cmdclass(), ...)\n\nPlease read the docstring in ./versioneer.py for configuration instructions,\nedit setup.cfg, and re-run the installer or 'python versioneer.py setup'.\n\"\"\"\n\nSAMPLE_CONFIG = \"\"\"\n# See the docstring in versioneer.py for instructions. Note that you must\n# re-run 'versioneer.py setup' after changing this section, and commit the\n# resulting files.\n\n[versioneer]\n#VCS = git\n#style = pep440\n#versionfile_source =\n#versionfile_build =\n#tag_prefix =\n#parentdir_prefix =\n\n\"\"\"\n\nOLD_SNIPPET = \"\"\"\nfrom ._version import get_versions\n__version__ = get_versions()['version']\ndel get_versions\n\"\"\"\n\nINIT_PY_SNIPPET = \"\"\"\nfrom . import {0}\n__version__ = {0}.get_versions()['version']\n\"\"\"\n\n\ndef do_setup():\n    \"\"\"Do main VCS-independent setup function for installing Versioneer.\"\"\"\n    root = get_root()\n    try:\n        cfg = get_config_from_root(root)\n    except (OSError, configparser.NoSectionError,\n            configparser.NoOptionError) as e:\n        if isinstance(e, (OSError, configparser.NoSectionError)):\n            print(\"Adding sample versioneer config to setup.cfg\",\n                  file=sys.stderr)\n            with open(os.path.join(root, \"setup.cfg\"), \"a\") as f:\n                f.write(SAMPLE_CONFIG)\n        print(CONFIG_ERROR, file=sys.stderr)\n        return 1\n\n    print(\" creating %s\" % cfg.versionfile_source)\n    with open(cfg.versionfile_source, \"w\") as f:\n        LONG = LONG_VERSION_PY[cfg.VCS]\n        f.write(LONG % {\"DOLLAR\": \"$\",\n                        \"STYLE\": cfg.style,\n                        \"TAG_PREFIX\": cfg.tag_prefix,\n                        \"PARENTDIR_PREFIX\": cfg.parentdir_prefix,\n                        \"VERSIONFILE_SOURCE\": cfg.versionfile_source,\n                        })\n\n    ipy = os.path.join(os.path.dirname(cfg.versionfile_source),\n                       \"__init__.py\")\n    if os.path.exists(ipy):\n        try:\n            with open(ipy, \"r\") as f:\n                old = f.read()\n        except OSError:\n            old = \"\"\n        module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0]\n        snippet = INIT_PY_SNIPPET.format(module)\n        if OLD_SNIPPET in old:\n            print(\" replacing boilerplate in %s\" % ipy)\n            with open(ipy, \"w\") as f:\n                f.write(old.replace(OLD_SNIPPET, snippet))\n        elif snippet not in old:\n            print(\" appending to %s\" % ipy)\n            with open(ipy, \"a\") as f:\n                f.write(snippet)\n        else:\n            print(\" %s unmodified\" % ipy)\n    else:\n        print(\" %s doesn't exist, ok\" % ipy)\n        ipy = None\n\n    # Make sure both the top-level \"versioneer.py\" and versionfile_source\n    # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so\n    # they'll be copied into source distributions. Pip won't be able to\n    # install the package without this.\n    manifest_in = os.path.join(root, \"MANIFEST.in\")\n    simple_includes = set()\n    try:\n        with open(manifest_in, \"r\") as f:\n            for line in f:\n                if line.startswith(\"include \"):\n                    for include in line.split()[1:]:\n                        simple_includes.add(include)\n    except OSError:\n        pass\n    # That doesn't cover everything MANIFEST.in can do\n    # (http://docs.python.org/2/distutils/sourcedist.html#commands), so\n    # it might give some false negatives. Appending redundant 'include'\n    # lines is safe, though.\n    if \"versioneer.py\" not in simple_includes:\n        print(\" appending 'versioneer.py' to MANIFEST.in\")\n        with open(manifest_in, \"a\") as f:\n            f.write(\"include versioneer.py\\n\")\n    else:\n        print(\" 'versioneer.py' already in MANIFEST.in\")\n    if cfg.versionfile_source not in simple_includes:\n        print(\" appending versionfile_source ('%s') to MANIFEST.in\" %\n              cfg.versionfile_source)\n        with open(manifest_in, \"a\") as f:\n            f.write(\"include %s\\n\" % cfg.versionfile_source)\n    else:\n        print(\" versionfile_source already in MANIFEST.in\")\n\n    # Make VCS-specific changes. For git, this means creating/changing\n    # .gitattributes to mark _version.py for export-subst keyword\n    # substitution.\n    do_vcs_install(manifest_in, cfg.versionfile_source, ipy)\n    return 0\n\n\ndef scan_setup_py():\n    \"\"\"Validate the contents of setup.py against Versioneer's expectations.\"\"\"\n    found = set()\n    setters = False\n    errors = 0\n    with open(\"setup.py\", \"r\") as f:\n        for line in f.readlines():\n            if \"import versioneer\" in line:\n                found.add(\"import\")\n            if \"versioneer.get_cmdclass()\" in line:\n                found.add(\"cmdclass\")\n            if \"versioneer.get_version()\" in line:\n                found.add(\"get_version\")\n            if \"versioneer.VCS\" in line:\n                setters = True\n            if \"versioneer.versionfile_source\" in line:\n                setters = True\n    if len(found) != 3:\n        print(\"\")\n        print(\"Your setup.py appears to be missing some important items\")\n        print(\"(but I might be wrong). Please make sure it has something\")\n        print(\"roughly like the following:\")\n        print(\"\")\n        print(\" import versioneer\")\n        print(\" setup( version=versioneer.get_version(),\")\n        print(\"        cmdclass=versioneer.get_cmdclass(),  ...)\")\n        print(\"\")\n        errors += 1\n    if setters:\n        print(\"You should remove lines like 'versioneer.VCS = ' and\")\n        print(\"'versioneer.versionfile_source = ' . This configuration\")\n        print(\"now lives in setup.cfg, and should be removed from setup.py\")\n        print(\"\")\n        errors += 1\n    return errors\n\n\nif __name__ == \"__main__\":\n    cmd = sys.argv[1]\n    if cmd == \"setup\":\n        errors = do_setup()\n        errors += scan_setup_py()\n        if errors:\n            sys.exit(1)\n"
  }
]