[
  {
    "path": ".gitignore",
    "content": "*~\ndecaptcha/output/*\n\n*.py[co]\n\n# Packages\n*.egg\n*.egg-info\ndist\nbuild\neggs\nparts\nbin\nvar\nsdist\ndevelop-eggs\n.installed.cfg\n\n# Installer logs\npip-log.txt\n\n# Unit test / coverage reports\n.coverage\n.tox\n\n#Translations\n*.mo\n\n#Mr Developer\n.mr.developer.cfg\n\n# PyCharm\n.idea/"
  },
  {
    "path": ".travis.yml",
    "content": "language: python\npython:\n  - \"2.7\"\n  - \"3.4\"\nmatrix:\n  allow_failures:\n    - python: \"2.7\"\ninstall:\n  - pip install flake8 --use-mirrors\n  - pip install pep8 --use-mirrors\n  - pip install -q -e . --use-mirrors\nscript:\n  - nosetests\n  - if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then flake8 decaptcha; fi\ncache: apt\nsudo: false"
  },
  {
    "path": "AUTHORS",
    "content": "Mek <michael.karpeles@gmail.com> @mekarpeles (maintainer)\nBen Boyter <bboyte01@gmail.com> @boyter (original author)\nNate Urwin <nateurwin@gmail.com> @nateurwin\nAbel Molina\n\n\n"
  },
  {
    "path": "CHANGES",
    "content": "v0.0.1, Tue Jul 28 17:43:00 2015 -- Initial Release.\n"
  },
  {
    "path": "LICENSE",
    "content": "THE WORK (AS DEFINED BELOW) IS PROVIDED UNDER THE TERMS OF THIS\nCREATIVE COMMONS PUBLIC LICENSE (\"CCPL\" OR \"LICENSE\"). THE WORK IS\nPROTECTED BY COPYRIGHT AND/OR OTHER APPLICABLE LAW. ANY USE OF THE\nWORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE OR COPYRIGHT LAW IS\nPROHIBITED.\n\nBY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND\nAGREE TO BE BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS\nLICENSE MAY BE CONSIDERED TO BE A CONTRACT, THE LICENSOR GRANTS YOU\nTHE RIGHTS CONTAINED HERE IN CONSIDERATION OF YOUR ACCEPTANCE OF SUCH\nTERMS AND CONDITIONS.\n\n1. Definitions\n\n\"Adaptation\" means a work based upon the Work, or upon the Work and\nother pre-existing works, such as a translation, adaptation,\nderivative work, arrangement of music or other alterations of a\nliterary or artistic work, or phonogram or performance and includes\ncinematographic adaptations or any other form in which the Work may be\nrecast, transformed, or adapted including in any form recognizably\nderived from the original, except that a work that constitutes a\nCollection will not be considered an Adaptation for the purpose of\nthis License. For the avoidance of doubt, where the Work is a musical\nwork, performance or phonogram, the synchronization of the Work in\ntimed-relation with a moving image (\"synching\") will be considered an\nAdaptation for the purpose of this License.  \"Collection\" means a\ncollection of literary or artistic works, such as encyclopedias and\nanthologies, or performances, phonograms or broadcasts, or other works\nor subject matter other than works listed in Section 1(f) below,\nwhich, by reason of the selection and arrangement of their contents,\nconstitute intellectual creations, in which the Work is included in\nits entirety in unmodified form along with one or more other\ncontributions, each constituting separate and independent works in\nthemselves, which together are assembled into a collective whole. A\nwork that constitutes a Collection will not be considered an\nAdaptation (as defined below) for the purposes of this License.\n\"Creative Commons Compatible License\" means a license that is listed\nat http://creativecommons.org/compatiblelicenses that has been\napproved by Creative Commons as being essentially equivalent to this\nLicense, including, at a minimum, because that license: (i) contains\nterms that have the same purpose, meaning and effect as the License\nElements of this License; and, (ii) explicitly permits the relicensing\nof adaptations of works made available under that license under this\nLicense or a Creative Commons jurisdiction license with the same\nLicense Elements as this License.  \"Distribute\" means to make\navailable to the public the original and copies of the Work or\nAdaptation, as appropriate, through sale or other transfer of\nownership.  \"License Elements\" means the following high-level license\nattributes as selected by Licensor and indicated in the title of this\nLicense: Attribution, ShareAlike.  \"Licensor\" means the individual,\nindividuals, entity or entities that offer(s) the Work under the terms\nof this License.  \"Original Author\" means, in the case of a literary\nor artistic work, the individual, individuals, entity or entities who\ncreated the Work or if no individual or entity can be identified, the\npublisher; and in addition (i) in the case of a performance the\nactors, singers, musicians, dancers, and other persons who act, sing,\ndeliver, declaim, play in, interpret or otherwise perform literary or\nartistic works or expressions of folklore; (ii) in the case of a\nphonogram the producer being the person or legal entity who first\nfixes the sounds of a performance or other sounds; and, (iii) in the\ncase of broadcasts, the organization that transmits the broadcast.\n\"Work\" means the literary and/or artistic work offered under the terms\nof this License including without limitation any production in the\nliterary, scientific and artistic domain, whatever may be the mode or\nform of its expression including digital form, such as a book,\npamphlet and other writing; a lecture, address, sermon or other work\nof the same nature; a dramatic or dramatico-musical work; a\nchoreographic work or entertainment in dumb show; a musical\ncomposition with or without words; a cinematographic work to which are\nassimilated works expressed by a process analogous to cinematography;\na work of drawing, painting, architecture, sculpture, engraving or\nlithography; a photographic work to which are assimilated works\nexpressed by a process analogous to photography; a work of applied\nart; an illustration, map, plan, sketch or three-dimensional work\nrelative to geography, topography, architecture or science; a\nperformance; a broadcast; a phonogram; a compilation of data to the\nextent it is protected as a copyrightable work; or a work performed by\na variety or circus performer to the extent it is not otherwise\nconsidered a literary or artistic work.  \"You\" means an individual or\nentity exercising rights under this License who has not previously\nviolated the terms of this License with respect to the Work, or who\nhas received express permission from the Licensor to exercise rights\nunder this License despite a previous violation.  \"Publicly Perform\"\nmeans to perform public recitations of the Work and to communicate to\nthe public those public recitations, by any means or process,\nincluding by wire or wireless means or public digital performances; to\nmake available to the public Works in such a way that members of the\npublic may access these Works from a place and at a place individually\nchosen by them; to perform the Work to the public by any means or\nprocess and the communication to the public of the performances of the\nWork, including by public digital performance; to broadcast and\nrebroadcast the Work by any means including signs, sounds or images.\n\"Reproduce\" means to make copies of the Work by any means including\nwithout limitation by sound or visual recordings and the right of\nfixation and reproducing fixations of the Work, including storage of a\nprotected performance or phonogram in digital form or other electronic\nmedium.  2. Fair Dealing Rights. Nothing in this License is intended\nto reduce, limit, or restrict any uses free from copyright or rights\narising from limitations or exceptions that are provided for in\nconnection with the copyright protection under copyright law or other\napplicable laws.\n\n3. License Grant. Subject to the terms and conditions of this License,\nLicensor hereby grants You a worldwide, royalty-free, non-exclusive,\nperpetual (for the duration of the applicable copyright) license to\nexercise the rights in the Work as stated below:\n\nto Reproduce the Work, to incorporate the Work into one or more\nCollections, and to Reproduce the Work as incorporated in the\nCollections; to create and Reproduce Adaptations provided that any\nsuch Adaptation, including any translation in any medium, takes\nreasonable steps to clearly label, demarcate or otherwise identify\nthat changes were made to the original Work. For example, a\ntranslation could be marked \"The original work was translated from\nEnglish to Spanish,\" or a modification could indicate \"The original\nwork has been modified.\"; to Distribute and Publicly Perform the Work\nincluding as incorporated in Collections; and, to Distribute and\nPublicly Perform Adaptations.  For the avoidance of doubt:\n\nNon-waivable Compulsory License Schemes. In those jurisdictions in\nwhich the right to collect royalties through any statutory or\ncompulsory licensing scheme cannot be waived, the Licensor reserves\nthe exclusive right to collect such royalties for any exercise by You\nof the rights granted under this License; Waivable Compulsory License\nSchemes. In those jurisdictions in which the right to collect\nroyalties through any statutory or compulsory licensing scheme can be\nwaived, the Licensor waives the exclusive right to collect such\nroyalties for any exercise by You of the rights granted under this\nLicense; and, Voluntary License Schemes. The Licensor waives the right\nto collect royalties, whether individually or, in the event that the\nLicensor is a member of a collecting society that administers\nvoluntary licensing schemes, via that society, from any exercise by\nYou of the rights granted under this License.  The above rights may be\nexercised in all media and formats whether now known or hereafter\ndevised. The above rights include the right to make such modifications\nas are technically necessary to exercise the rights in other media and\nformats. Subject to Section 8(f), all rights not expressly granted by\nLicensor are hereby reserved.\n\n4. Restrictions. The license granted in Section 3 above is expressly\nmade subject to and limited by the following restrictions:\n\nYou may Distribute or Publicly Perform the Work only under the terms\nof this License. You must include a copy of, or the Uniform Resource\nIdentifier (URI) for, this License with every copy of the Work You\nDistribute or Publicly Perform. You may not offer or impose any terms\non the Work that restrict the terms of this License or the ability of\nthe recipient of the Work to exercise the rights granted to that\nrecipient under the terms of the License. You may not sublicense the\nWork. You must keep intact all notices that refer to this License and\nto the disclaimer of warranties with every copy of the Work You\nDistribute or Publicly Perform. When You Distribute or Publicly\nPerform the Work, You may not impose any effective technological\nmeasures on the Work that restrict the ability of a recipient of the\nWork from You to exercise the rights granted to that recipient under\nthe terms of the License. This Section 4(a) applies to the Work as\nincorporated in a Collection, but this does not require the Collection\napart from the Work itself to be made subject to the terms of this\nLicense. If You create a Collection, upon notice from any Licensor You\nmust, to the extent practicable, remove from the Collection any credit\nas required by Section 4(c), as requested. If You create an\nAdaptation, upon notice from any Licensor You must, to the extent\npracticable, remove from the Adaptation any credit as required by\nSection 4(c), as requested.  You may Distribute or Publicly Perform an\nAdaptation only under the terms of: (i) this License; (ii) a later\nversion of this License with the same License Elements as this\nLicense; (iii) a Creative Commons jurisdiction license (either this or\na later license version) that contains the same License Elements as\nthis License (e.g., Attribution-ShareAlike 3.0 US)); (iv) a Creative\nCommons Compatible License. If you license the Adaptation under one of\nthe licenses mentioned in (iv), you must comply with the terms of that\nlicense. If you license the Adaptation under the terms of any of the\nlicenses mentioned in (i), (ii) or (iii) (the \"Applicable License\"),\nyou must comply with the terms of the Applicable License generally and\nthe following provisions: (I) You must include a copy of, or the URI\nfor, the Applicable License with every copy of each Adaptation You\nDistribute or Publicly Perform; (II) You may not offer or impose any\nterms on the Adaptation that restrict the terms of the Applicable\nLicense or the ability of the recipient of the Adaptation to exercise\nthe rights granted to that recipient under the terms of the Applicable\nLicense; (III) You must keep intact all notices that refer to the\nApplicable License and to the disclaimer of warranties with every copy\nof the Work as included in the Adaptation You Distribute or Publicly\nPerform; (IV) when You Distribute or Publicly Perform the Adaptation,\nYou may not impose any effective technological measures on the\nAdaptation that restrict the ability of a recipient of the Adaptation\nfrom You to exercise the rights granted to that recipient under the\nterms of the Applicable License. This Section 4(b) applies to the\nAdaptation as incorporated in a Collection, but this does not require\nthe Collection apart from the Adaptation itself to be made subject to\nthe terms of the Applicable License.  If You Distribute, or Publicly\nPerform the Work or any Adaptations or Collections, You must, unless a\nrequest has been made pursuant to Section 4(a), keep intact all\ncopyright notices for the Work and provide, reasonable to the medium\nor mans You are utilizing: (i) the name of the Original Author (or\npseudonym, if applicable) if supplied, and/or if the Original Author\nand/or Licensor designate another party or parties (e.g., a sponsor\ninstitute, publishing entity, journal) for attribution (\"Attribution\nParties\") in Licensor's copyright notice, terms of service or by other\nreasonable means, the name of such party or parties; (ii) the title of\nthe Work if supplied; (iii) to the extent reasonably practicable, the\nURI, if any, that Licensor specifies to be associated with the Work,\nunless such URI does not refer to the copyright notice or licensing\ninformation for the Work; and (iv) , consistent with Ssection 3(b), in\nthe case of an Adaptation, a credit identifying the use of the Work in\nthe Adaptation (e.g., \"French translation of the Work by Original\nAuthor,\" or \"Screenplay based on original Work by Original\nAuthor\"). The credit required by this Section 4(c) may be implemented\nin any reasonable manner; provided, however, that in the case of a\nAdaptation or Collection, at a minimum such credit will appear, if a\ncredit for all contributing authors of the Adaptation or Collection\nappears, then as part of these credits and in a manner at least as\nprominent as the credits for the other contributing authors. For the\navoidance of doubt, You may only use the credit required by this\nSection for the purpose of attribution in the manner set out above\nand, by exercising Your rights under this License, You may not\nimplicitly or explicitly assert or imply any connection with,\nsponsorship or endorsement by the Original Author, Licensor and/or\nAttribution Parties, as appropriate, of You or Your use of the Work,\nwithout the separate, express prior written permission of the Original\nAuthor, Licensor and/or Attribution Parties.  Except as otherwise\nagreed in writing by the Licensor or as may be otherwise permitted by\napplicable law, if You Reproduce, Distribute or Publicly Perform the\nWork either by itself or as part of any Adaptations or Collections,\nYou must not distort, mutilate, modify or take other derogatory action\nin relation to the Work which would be prejudicial to the Original\nAuthor's honor or reputation. Licensor agrees that in those\njurisdictions (e.g. Japan), in which any exercise of the right granted\nin Section 3(b) of this License (the right to make Adaptations) would\nbe deemed to be a distortion, mutilation, modification or other\nderogatory action prejudicial to the Original Author's honor and\nreputation, the Licensor will waive or not assert, as appropriate,\nthis Section, to the fullest extent permitted by the applicable\nnational law, to enable You to reasonably exercise Your right under\nSection 3(b) of this License (right to make Adaptations) but not\notherwise.  5. Representations, Warranties and Disclaimer\n\nUNLESS OTHERWISE MUTUALLY AGREED TO BY THE PARTIES IN WRITING,\nLICENSOR OFFERS THE WORK AS-IS AND MAKES NO REPRESENTATIONS OR\nWARRANTIES OF ANY KIND CONCERNING THE WORK, EXPRESS, IMPLIED,\nSTATUTORY OR OTHERWISE, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF\nTITLE, MERCHANTIBILITY, FITNESS FOR A PARTICULAR PURPOSE,\nNONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, ACCURACY,\nOR THE PRESENCE OF ABSENCE OF ERRORS, WHETHER OR NOT\nDISCOVERABLE. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OF IMPLIED\nWARRANTIES, SO SUCH EXCLUSION MAY NOT APPLY TO YOU.\n\n6. Limitation on Liability. EXCEPT TO THE EXTENT REQUIRED BY\nAPPLICABLE LAW, IN NO EVENT WILL LICENSOR BE LIABLE TO YOU ON ANY\nLEGAL THEORY FOR ANY SPECIAL, INCIDENTAL, CONSEQUENTIAL, PUNITIVE OR\nEXEMPLARY DAMAGES ARISING OUT OF THIS LICENSE OR THE USE OF THE WORK,\nEVEN IF LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.\n\n7. Termination\n\nThis License and the rights granted hereunder will terminate\nautomatically upon any breach by You of the terms of this\nLicense. Individuals or entities who have received Adaptations or\nCollections from You under this License, however, will not have their\nlicenses terminated provided such individuals or entities remain in\nfull compliance with those licenses. Sections 1, 2, 5, 6, 7, and 8\nwill survive any termination of this License.  Subject to the above\nterms and conditions, the license granted here is perpetual (for the\nduration of the applicable copyright in the Work). Notwithstanding the\nabove, Licensor reserves the right to release the Work under different\nlicense terms or to stop distributing the Work at any time; provided,\nhowever that any such election will not serve to withdraw this License\n(or any other license that has been, or is required to be, granted\nunder the terms of this License), and this License will continue in\nfull force and effect unless terminated as stated above.\n\n8. Miscellaneous\n\nEach time You Distribute or Publicly Perform the Work or a Collection,\nthe Licensor offers to the recipient a license to the Work on the same\nterms and conditions as the license granted to You under this License.\nEach time You Distribute or Publicly Perform an Adaptation, Licensor\noffers to the recipient a license to the original Work on the same\nterms and conditions as the license granted to You under this License.\nIf any provision of this License is invalid or unenforceable under\napplicable law, it shall not affect the validity or enforceability of\nthe remainder of the terms of this License, and without further action\nby the parties to this agreement, such provision shall be reformed to\nthe minimum extent necessary to make such provision valid and\nenforceable.  No term or provision of this License shall be deemed\nwaived and no breach consented to unless such waiver or consent shall\nbe in writing and signed by the party to be charged with such waiver\nor consent.  This License constitutes the entire agreement between the\nparties with respect to the Work licensed here. There are no\nunderstandings, agreements or representations with respect to the Work\nnot specified here. Licensor shall not be bound by any additional\nprovisions that may appear in any communication from You. This License\nmay not be modified without the mutual written agreement of the\nLicensor and You.  The rights granted under, and the subject matter\nreferenced, in this License were drafted utilizing the terminology of\nthe Berne Convention for the Protection of Literary and Artistic Works\n(as amended on September 28, 1979), the Rome Convention of 1961, the\nWIPO Copyright Treaty of 1996, the WIPO Performances and Phonograms\nTreaty of 1996 and the Universal Copyright Convention (as revised on\nJuly 24, 1971). These rights and subject matter take effect in the\nrelevant jurisdiction in which the License terms are sought to be\nenforced according to the corresponding provisions of the\nimplementation of those treaty provisions in the applicable national\nlaw. If the standard suite of rights granted under applicable\ncopyright law includes additional rights not granted under this\nLicense, such additional rights are deemed to be included in the\nLicense; this License is not intended to restrict the license of any\nrights under applicable law.  Creative Commons Notice\n\nCreative Commons is not a party to this License, and makes no warranty\nwhatsoever in connection with the Work. Creative Commons will not be\nliable to You or any party on any legal theory for any damages\nwhatsoever, including without limitation any general, special,\nincidental or consequential damages arising in connection to this\nlicense. Notwithstanding the foregoing two (2) sentences, if Creative\nCommons has expressly identified itself as the Licensor hereunder, it\nshall have all rights and obligations of Licensor.\n\nExcept for the limited purpose of indicating to the public that the\nWork is licensed under the CCPL, Creative Commons does not authorize\nthe use by either party of the trademark \"Creative Commons\" or any\nrelated trademark or logo of Creative Commons without the prior\nwritten consent of Creative Commons. Any permitted use will be in\ncompliance with Creative Commons' then-current trademark usage\nguidelines, as may be published on its website or otherwise made\navailable upon request from time to time. For the avoidance of doubt,\nthis trademark restriction does not form part of the License.\n\nCreative Commons may be contacted at http://creativecommons.org/."
  },
  {
    "path": "MANIFEST.in",
    "content": "recursive-include decaptcha/iconset *\nrecursive-include examples *\n"
  },
  {
    "path": "README.md",
    "content": "captcha-decoder\n===============\n\n![Build Status](https://travis-ci.org/mekarpeles/captcha-decoder.png)\n\nThis module takes a captcha (image) as input, attempts to partition it into discrete segments, each (it hopes) containing a single symbol, and then runs basic vector space search to determine the similarity of each symbol against known characters (whose reference images are included). The objective of this project is to (a) make bboyte's code more accessible and (b) illustrate, in a readable way, the fundamentals of captcha cracking. It's primary goal is clarity and makes no claims or attempts at efficiency, accuracy, or practicality.\n\nThis work is a derivation of an original work by @boyter\n<bboyte01@gmail.com>, http://www.boyter.org/decoding-captchas/ (see\norigin tutorial at\nhttps://web.archive.org/web/20121012023114/http://www.wausita.com/captcha/)\n\n## Installation\n\nOn ubuntu, libjpeg-dev and libpng-dev may be system requirements for the Python Pillow (PIL) library \n\n    sudo apt-get install libjpeg-dev\n    sudo apt-get install libpng-dev\n    \nNext, fetch and build the decaptcha library\n\n    pip install git+https://github.com/mekarpeles/captcha-decoder.git\n\n## Usage\n\nThe decaptcha library comes with a command line utility called `decaptca`. Running the command with `-h` will show a list of options. The <img> argument can be provided a filepath or a url:\n\n    usage: decaptcha [-h] [-v] [-l LIMIT] [-c CHANNELS] [-t THRESHOLD] [--min MIN]\n                     [--max MAX] [-o TOLERANCE]\n                     [<img>]\n    \n    Python captcha cracking utility\n    \n    positional arguments:\n      <img>                 Enter the filesystem path or url of a captcha image\n    \n    optional arguments:\n      -h, --help            show this help message and exit\n      -v                    Displays the decaptcha version\n      -l LIMIT, --limit LIMIT\n                            Package url\n      -c CHANNELS, --channels CHANNELS\n                            The number of prominant color channels to keep\n      -t THRESHOLD, --threshold THRESHOLD\n                            Accuracy threshold for matching decimal [0-1]\n      --min MIN             Filter out colors darker than this [0-256]\n      --max MAX             Filter out colors light than this [0-256]\n      -o TOLERANCE, --tolerance TOLERANCE\n                            Pixel tolerance for character segmentation. Higher is\n                            more lenient/greedy, lower is strict.\n\n# Example\n\n    $ decaptcha http://www.mondor.org/img/capex.jpg  --min 0 --max 20 --limit 5 --channels 5 --tolerance 7\n    \n    Character 0 of 7:\n            t (confidence of 0.839150063096)\n            e (confidence of 0.827405543276)\n    Character 1 of 7:\n            0 (confidence of 0.834057656228)\n            l (confidence of 0.771064160322)\n    Character 2 of 7:\n            t (confidence of 0.309437274354)\n            e (confidence of 0.303227199152)\n    Character 3 of 7:\n    Character 4 of 7:\n            t (confidence of 0.267644230239)\n            7 (confidence of 0.266067912114)\n    Character 5 of 7:\n            0 (confidence of 0.834057656228)\n            l (confidence of 0.789422830806)\n    Character 6 of 7:\n            t (confidence of 0.835510535512)\n            e (confidence of 0.835221298415)\n\n\n## Further Reading\n\nThe following implementations and techniques are recommended as more practical and accurate alternatives for this project:\n\n1. http://www.codeproject.com/Articles/106583/Handwriting-Recognition-Revisited-Kernel-Support-V\n"
  },
  {
    "path": "decaptcha/__init__.py",
    "content": "# -*- coding: utf-8 -*-\n\n\"\"\"\n   decaptcha\n   ~~~~~~~~~\n\n   Basic Captcha Cracker\n\"\"\"\n\n__version__ = '0.0.1'\n__author__ = [\n    'Mek <michael.karpeles@gmail.com>'\n]\n__license__ = 'see LICENSE (creative commons)'\n__contributors__ = 'see AUTHORS'\n__title__ = 'Python captcha cracking utility'\n\nimport sys\nfrom .decoder import Captcha  # NOQA\nfrom .decoder import trim, channel, monochrome, regions, decode  # NOQA\nfrom .cli import main\n\nif __name__ == '__main__':\n    sys.exit(main())\n"
  },
  {
    "path": "decaptcha/cli.py",
    "content": "#!/usr/bin/env python\n# -*- coding: utf-8 -*-\n\nimport argparse\nfrom . import Captcha, __title__, __version__\n\n\ndef threshold(x):\n    x = float(x)\n    if 1 > x < 0:\n        raise argparse.ArgumentTypeError(\n            \"Threshold must be a value between 0 and 1.\")\n    return x\n\n\ndef argparser():\n    \"\"\"Creates a command line ArgumentParser for decaptcha.\"\"\"\n    parser = argparse.ArgumentParser(description=__title__)\n    parser.add_argument('-v', help='Displays the decaptcha version',\n                        action='version', version='%s v%s'\n                        % (__title__, __version__))\n    parser.add_argument('captcha', nargs='?', metavar='<img>',\n                        help='Enter the filesystem path or url '\n                        'of a captcha image')\n    parser.add_argument('-l', '--limit', dest='limit', help='Package url',\n                        type=int, default=3)\n    parser.add_argument('-c', '--channels', dest='channels',\n                        help='The number of prominant color channels to keep',\n                        type=int, default=3)\n    parser.add_argument('-t', '--threshold', dest='threshold',\n                        help='Accuracy threshold for matching decimal [0-1]',\n                        type=threshold, default=0)\n    parser.add_argument('--min', dest='min', type=int, default=0,\n                        help='Filter out colors darker than this [0-256]')\n    parser.add_argument('--max', dest='max', type=int, default=230,\n                        help='Filter out colors light than this [0-256]')\n    parser.add_argument('-o', '--tolerance', dest='tolerance', type=int,\n                        default=3, help='Pixel tolerance for character '\n                        'segmentation. Higher is more lenient/greedy, '\n                        'lower is strict.')\n    return parser\n\n\ndef prettyprint(guesses):\n    regions = len(guesses)\n    for i, guess in enumerate(guesses):\n        print('Character %s of %s:' % (i + 1, regions))\n        for result in guess:\n            confidence, symbol = result\n            print('\\t%s (%s confidence)' % (symbol, confidence))\n\n\ndef main():\n    parser = argparser()\n    args = parser.parse_args()\n\n    if not args.captcha:\n        raise ValueError('No captcha input image provided')\n\n    prettyprint(Captcha(args.captcha).decode(channels=args.channels,\n                                             limit=args.limit,\n                                             threshold=args.threshold,\n                                             tolerance=args.tolerance,\n                                             _min=args.min,\n                                             _max=args.max))\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "decaptcha/decoder.py",
    "content": "# -*- coding: utf-8 -*-\r\n\r\n\"\"\"\r\n    crack.py\r\n    ~~~~~~~~\r\n\r\n    This module takes captcha images as input and partitions them into\r\n    n new images, 1 image per character found within the captcha.\r\n\r\n    Original Code (http://tinyurl.com/puq6alb) by bboyte01@gmail.com\r\n    https://web.archive.org/web/20121012023114/http://www.wausita.com/captcha/\r\n    http://www.wausita.com/captcha/\r\n\r\n    :copyright: (c) 2012 by Mek\r\n    :license: see LICENSE for more details.\r\n\"\"\"\r\n\r\nimport os\r\nimport string\r\nfrom operator import itemgetter\r\nfrom math import sqrt\r\nfrom PIL import Image, ImageChops\r\nfrom io import BytesIO\r\n\r\ntry:\r\n    from urllib.request import urlopen\r\nexcept ImportError:\r\n    from urllib import urlopen\r\n\r\nSYMBOLS = list(string.ascii_lowercase + string.digits)\r\nICONS_PATH = os.path.abspath(\r\n    os.path.join(os.path.dirname(__file__), 'iconset'))\r\nIMAGESET = []\r\nWHITE = 255\r\n\r\n\r\ndef imageset():\r\n    \"\"\"Loads icons of various characters\"\"\"\r\n    imageset = []\r\n    for symbol in SYMBOLS:\r\n        for imfile in os.listdir(os.path.join(ICONS_PATH, symbol)):\r\n            path = os.path.join(ICONS_PATH, symbol, imfile)\r\n            imageset.append({symbol: Image.open(path)})\r\n    return imageset\r\n\r\n\r\ndef trim(im, color=WHITE):\r\n    \"\"\"Tims image to remove excess color (default: whitespace)\"\"\"\r\n    bg = Image.new(im.mode, im.size, WHITE)\r\n    diff = ImageChops.difference(im, bg)\r\n    diff = ImageChops.add(diff, diff, 2.0, -100)\r\n    return im.crop(diff.getbbox())\r\n\r\n\r\ndef channel(im, *colors, **kwargs):\r\n    \"\"\"Composes an new image with the same dimensions as `im` but\r\n    draws only pixels of the specified color channels on a `bg`\r\n    colored background.\r\n    \"\"\"\r\n    bg = kwargs.get('bg', WHITE)\r\n    sample = Image.new('P', im.size, bg)\r\n    width, height = im.size\r\n    for col in range(width):\r\n        for row in range(height):\r\n            pixel = im.getpixel((col, row))\r\n            if pixel in colors:\r\n                sample.putpixel((col, row), pixel)\r\n    return sample\r\n\r\n\r\ndef monochrome(im, threshold=255):\r\n    \"\"\"Converts all colors in gif image which are less than threshold\r\n    to black\"\"\"\r\n    return im.point(lambda x: 0 if x < 255 else 255, '1')\r\n\r\n\r\ndef regions(im, threshold=1):\r\n    \"\"\"Iterates over the columns of an image from left-to-right and\r\n    composes an ordered list of (start, end) column ranges referring\r\n    to discrete, contiguous columns which contain at least `threshold`\r\n    non-white pixel.\r\n    \"\"\"\r\n    regions = []\r\n    start = None\r\n    width, height = im.size\r\n    for col in range(width):\r\n        # if column contains at least one pixel\r\n        if sum([im.getpixel((col, row)) is not WHITE\r\n                for row in range(height)]) >= threshold:\r\n            start = start if start else col\r\n        elif start:\r\n            regions.append((start, col))\r\n            start = None  # reset start\r\n    return regions\r\n\r\n\r\ndef similarity(im1, im2, equalize=False):\r\n    \"\"\"Takes in two images, vectorizes them into concordance\r\n    dictionaries and spits out a number from 0 to 1 indicating how\r\n    related they are. 0 means no relation and 1 indicates they are the\r\n    same.\r\n\r\n    params:\r\n        stretch - stretch im2 to be the same dimensions as 1\r\n    \"\"\"\r\n    def scale(im1, im2):\r\n        \"\"\"Scales the image with the greater height to match the one\r\n        with the smaller height\r\n        \"\"\"\r\n        if im1.size[1] > im2.size[1]:\r\n            return im1.resize(im2.size, Image.ANTIALIAS), im2\r\n        elif im1.size[1] < im2.size[1]:\r\n            return im1, im2.resize(im1.size, Image.ANTIALIAS)\r\n        return im1, im2\r\n\r\n    def vectorize(im):\r\n        \"\"\"im.getdata returns the contents of an image as a flattened\r\n        sequence object containing pixel values.\r\n        \"\"\"\r\n        d1 = {}\r\n        for count, i in enumerate(im.getdata()):\r\n            d1[count] = i\r\n        return d1\r\n\r\n    def magnitude(concordance):\r\n        return sqrt(sum(count ** 2 for word, count in concordance.items()))\r\n\r\n    c1, c2 = [vectorize(im) for im in\r\n              (scale(im1, im2) if equalize else (im1, im2))]\r\n    topvalue = 0\r\n    for word, count in c1.items():\r\n        if word in c2:\r\n            topvalue += count * c2[word]\r\n    return topvalue / (magnitude(c1) * magnitude(c2))\r\n\r\n\r\nclass Captcha(object):\r\n\r\n    def __init__(self, imgpath):\r\n        self.imgpath = imgpath\r\n\r\n    @property\r\n    def im(self):\r\n        \"\"\"Fetches captcha's image from disk or url\"\"\"\r\n        try:\r\n            im = Image.open(self.imgpath)\r\n        except:\r\n            im = Image.open(BytesIO(urlopen(self.imgpath).read()))\r\n        return self.gif(im)\r\n\r\n    @property\r\n    def histogram(self):\r\n        with self.im as im:\r\n            return im.histogram()\r\n\r\n    def decode(self, channels=3, limit=3, threshold=0, tolerance=3,\r\n               _min=0, _max=245):\r\n        \"\"\"Attempts to decode a captcha by:\r\n\r\n        - Finding the `n` most prominant colors in the image\r\n        - Sampling the captcha into `n` images, each discretely composed\r\n          of a differnet prominant colors.\r\n        - Segmenting each sample into regions of contiguous columns\r\n          containing any pixels pixelation (which are hopefully\r\n          equates to individual alphanumeric characters), and finally\r\n        - Guessing which character appears in each segment\r\n\r\n        XXX Prettier output and organizing of results required\r\n        \"\"\"\r\n        colors = [color for color, _ in self\r\n                  .prominant_colors(n=channels, _min=_min, _max=_max)]\r\n        sample = monochrome(self.channel(*colors))\r\n        return [self.guess_character(segment, limit=limit, threshold=threshold)\r\n                for segment in self.segments(sample, tolerance=tolerance)]\r\n\r\n    def prominant_colors(self, n=5, _min=0, _max=256):\r\n        \"\"\"Calculates the n most prominant colors of an image as an\r\n        ordered list of (color, frequency) tuples.\r\n\r\n        params:\r\n            n - limit the number of colors to `n`\r\n            _min - exclude any colors below this number (filter\r\n                   out dark colors, like black/0)\r\n            _max - exclude any colors above this number (filter out\r\n                   light colors, like white/256)\r\n\r\n        XXX consider sorted(im.getcolors(n), reverse=True)\r\n        \"\"\"\r\n        hist = self.histogram[_min:_max]\r\n        return sorted([(c, f) for c, f in enumerate(hist)],\r\n                      key=itemgetter(1), reverse=True)[:n]\r\n\r\n    def channel(self, *colors, **kwargs):\r\n        \"\"\"Composes an image with the same dimensions as `im` but\r\n        draws only pixels of the specified colors on a `bg` colored\r\n        background.\r\n        \"\"\"\r\n        with self.im as im:\r\n            return channel(im, *colors, **kwargs)\r\n\r\n    @staticmethod\r\n    def gif(im):\r\n        \"\"\"Converts captcha to a GIF (makes things easier since it has\r\n        255 colors) and finds the most prominent colors in the image\r\n        \"\"\"\r\n        return im if im.mode is 'P' else im.convert('P')\r\n\r\n    @classmethod\r\n    def segments(cls, im, tolerance=3, crop=True):\r\n        \"\"\"Discover \"\"\"\r\n        return [cls.segment(im, region, crop=crop) for\r\n                region in regions(im, threshold=tolerance)]\r\n\r\n    @classmethod\r\n    def segment(cls, im, region, crop=True):\r\n        \"\"\"Returns a cropped image segment (hopefully of an\r\n        alphanumeric character) within the range of the region\r\n        \"\"\"\r\n        start, end = 0, 1\r\n        segment = im.crop((region[start], 0, region[end], im.size[1]))\r\n        return trim(segment) if crop else segment\r\n\r\n    @staticmethod\r\n    def guess_character(im, threshold=0, limit=None):\r\n        \"\"\"Guess alphanumeric character in image using Basic Vector\r\n        Space Search algorithm.\r\n\r\n        http://la2600.org/talks/files/20040102/Vector_Space_Search_Engine_Theory.pdf\r\n        \"\"\"\r\n        global IMAGESET  # lazy-ish style iconset loading\r\n        if not IMAGESET:\r\n            IMAGESET = imageset()\r\n\r\n        guesses = []\r\n        for icon in IMAGESET:\r\n            for symbol, im2 in icon.items():\r\n                guess = similarity(im, im2, equalize=True)\r\n                if guess >= threshold:\r\n                    guesses.append((guess, symbol))\r\n        return sorted(guesses, reverse=True)[:limit]\r\n\r\n\r\ndef decode(captcha, channels=1, limit=3, threshold=0, tolerance=3,\r\n           _min=0, _max=256):\r\n    \"\"\"Backward compatible method for decoding a captcha\"\"\"\r\n    return Captcha(captcha).decode(\r\n        channels=channels,\r\n        limit=limit,\r\n        threshold=threshold,\r\n        tolerance=tolerance,\r\n        _min=_min, _max=_max)\r\n"
  },
  {
    "path": "setup.cfg",
    "content": "[bdist_wheel]\nuniversal=1\n"
  },
  {
    "path": "setup.py",
    "content": "# -*- coding: utf-8 -*-\n\n\"\"\"\n    decaptcha\n    ~~~~~~~~~\n\n\"\"\"\n\nimport codecs\nimport os\nimport re\nfrom setuptools import setup\n\nhere = os.path.abspath(os.path.dirname(__file__))\n\n\ndef read(*parts):\n    \"\"\"Taken from pypa pip setup.py:\n    intentionally *not* adding an encoding option to open, See:\n    https://github.com/pypa/virtualenv/issues/201#issuecomment-3145690\n    \"\"\"\n    return codecs.open(os.path.join(here, *parts), 'r').read()\n\n\ndef find_version(*file_paths):\n    version_file = read(*file_paths)\n    version_match = re.search(r\"^__version__ = ['\\\"]([^'\\\"]*)['\\\"]\",\n                              version_file, re.M)\n    if version_match:\n        return version_match.group(1)\n    raise RuntimeError(\"Unable to find version string.\")\n\n\nsetup(\n    name='decaptcha',\n    version=find_version(\"decaptcha\", \"__init__.py\"),\n    description='Python captcha cracking utility',\n    long_description=read('README.md'),\n    url='http://github.com/mekarpeles/captcha-decoder',\n    author='mek',\n    author_email='michael.karpeles@gmail.com',\n    packages=[\n        'decaptcha',\n        ],\n    platforms='any',\n    license='LICENSE',\n    classifiers=[\n        'Development Status :: 2 - Pre-Alpha',\n        \"Intended Audience :: Developers\",\n        \"Programming Language :: Python :: 2.7\",\n        \"Programming Language :: Python :: 3\"\n        ],\n    install_requires=[\n        'Pillow >= 2.9.0'\n        ],\n    entry_points={\n        'console_scripts': ['decaptcha=decaptcha.cli:main'],\n        },\n    extras_require={\n        ':python_version==\"2.7\"': ['argparse']\n        },\n    include_package_data=True,\n    package_data={'': ['iconset/**/*.gif']},\n)\n"
  },
  {
    "path": "tests/__init__.py",
    "content": ""
  },
  {
    "path": "tests/test_captcha.py",
    "content": "# -*- coding: utf-8 -*-\n\n\"\"\"\n    tests\n    ~~~~~\n    Test cases for the decaptcha package\n\n    :copyright: (c) 2012 by Mek\n    :license: see LICENSE for more details.\n\"\"\"\n\nimport os.path\nimport unittest\nfrom PIL import Image\nimport decaptcha\n\nTEST_IMG_DIR = os.path.abspath(\n    os.path.join(\n        os.path.dirname(os.path.realpath(__file__)),\n        os.pardir, 'tests', 'images'))\nTEST_CAPTCHA_IMG = os.path.join(TEST_IMG_DIR, 'captcha.gif')\nTEST_CHANNEL_IMG = os.path.join(TEST_IMG_DIR, 'channel.gif')\nTEST_SEGMENT_IMG = lambda i: os.path.join(TEST_IMG_DIR, 'segment%s.gif' % i)\nEXPECTED_HISTOGRAM = [\n    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,\n    0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 1, 0, 0, 0,\n    0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 0,\n    2, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n    0, 1, 2, 0, 0, 0, 1, 2, 0, 1, 0, 0, 1, 0, 2, 0, 0, 1, 0, 0, 2, 0, 0, 0,\n    0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 3, 1, 3, 3, 0, 0, 0, 0, 0, 0, 1, 0, 3,\n    2, 132, 1, 1, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 15, 0, 1, 0, 1, 0, 0,\n    8, 1, 0, 0, 0, 0, 1, 6, 0, 2, 0, 0, 0, 0, 18, 1, 1, 1, 1, 1, 2, 365,\n    115, 0, 1, 0, 0, 0, 135, 186, 0, 0, 1, 0, 0, 0, 116, 3, 0, 0, 0, 0, 0,\n    21, 1, 1, 0, 0, 0, 2, 10, 2, 0, 0, 0, 0, 2, 10, 0, 0, 0, 0, 1, 0, 625\n    ]\nEXPECTED_DOMINANT_COLORS = [\n    (255, 625), (212, 365), (220, 186), (219, 135), (169, 132), (227, 116),\n    (213, 115), (234, 21), (205, 18), (184, 15)\n    ]\nEXPECTED_REGIONS = [\n    (6, 14), (15, 25), (27, 35), (37, 46), (48, 56), (57, 67)\n    ]\nEXPECTED_OUTPUT = (1.0, '7')\n\n\nclass CaptchaDecoderTest(unittest.TestCase):\n\n    def setUp(self):\n        self.captcha = decaptcha.Captcha(TEST_CAPTCHA_IMG)\n\n    def test_histogram(self):\n        self.assertTrue(self.captcha.histogram == EXPECTED_HISTOGRAM,\n                        \"Captcha histogram different from expected\")\n\n    def test_prominant_colors(self):\n        self.assertTrue(self.captcha.prominant_colors(n=10) ==\n                        EXPECTED_DOMINANT_COLORS,\n                        \"Captcha's dominant colors differ from expected\")\n\n    def test_channels(self):\n        channel = decaptcha.monochrome(self.captcha.channel(220, 227))\n        print(channel.histogram())\n        self.assertTrue(channel.histogram() ==\n                        Image.open(TEST_CHANNEL_IMG).histogram(),\n                        \"Channel sample did not match expected image\")\n\n    def test_regions(self):\n        sample = decaptcha.monochrome(self.captcha.channel(220, 227))\n        regions = decaptcha.regions(sample, threshold=1)\n        self.assertTrue(regions == EXPECTED_REGIONS,\n                        \"Expected regions %s, instead got %s\"\n                        % (EXPECTED_REGIONS, regions))\n\n    def test_segmentation(self):\n        \"\"\"Note, in the test cases of the original publication\n        (http://tinyurl.com/phvggox), output segments #3 and #5 (which\n        are both the number 9) are mistakenly swapped.\n        \"\"\"\n        sample = decaptcha.monochrome(self.captcha.channel(220, 227))\n        segments = self.captcha.segments(sample, crop=False, tolerance=1)\n        for i, segment in enumerate(segments):\n            EXPECTED_SEGMENT = Image.open(TEST_SEGMENT_IMG(i+1))\n            self.assertTrue(segment.histogram() ==\n                            EXPECTED_SEGMENT.histogram(),\n                            \"Segment #%s is wrong.\" % (i+1))\n\n    def test_guess_character(self):\n        sample = decaptcha.monochrome(self.captcha.channel(220, 227))\n        regions = decaptcha.regions(sample)\n        segment = self.captcha.segment(sample, regions[0], crop=False)\n        exp_seg = Image.open(TEST_SEGMENT_IMG(1))\n        self.assertTrue(segment.histogram() == exp_seg.histogram(),\n                        \"Segment is wrong\")\n        predictions = self.captcha.guess_character(segment)\n        self.assertTrue(predictions[0] == EXPECTED_OUTPUT,\n                        \"Expected %s, got %s\" %\n                        (EXPECTED_OUTPUT, predictions[0]))\n\n    def test_decoder(self):\n        self.captcha.decode()\n"
  }
]