Full Code of phatpiglet/autocorrect for AI

master 3ad22620b253 cached

10 files

29.0 KB

8.0k tokens

21 symbols

1 requests

Download .txt

Repository: phatpiglet/autocorrect
Branch: master
Commit: 3ad22620b253
Files: 10
Total size: 29.0 KB

Directory structure:
gitextract_rl8hzz88/

├── LICENSE
├── README.rst
├── autocorrect/
│   ├── __init__.py
│   ├── nlp_parser.py
│   ├── utils.py
│   ├── word.py
│   ├── word_lists.py
│   └── words.bz2
├── setup.py
└── unit_tests/
    └── test.py

================================================
FILE CONTENTS
================================================

================================================
FILE: LICENSE
================================================
The MIT License (MIT)

Copyright (c) 2014 Jonas McCallum

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.



================================================
FILE: README.rst
================================================
===========
autocorrect
===========
Python 3 Spelling Corrector

Deprecated Fork
===============
This is a deprecated fork. If you wish to contribute to the project, please visit the active fork https://github.com/fifimajster/autocorrect maintained by the awesome legend coder Filip Sondej

Installation
============
.. code-block:: bash

    pip install autocorrect

Examples
========
.. code-block:: python

    >>> from autocorrect import spell
    >>> spell('HTe')
    'The'


================================================
FILE: autocorrect/__init__.py
================================================
# Python 3 Spelling Corrector
#
# Copyright 2014 Jonas McCallum.
# Updated for Python 3, based on Peter Norvig's
# 2007 version: http://norvig.com/spell-correct.html
#
# Open source, MIT license
# http://www.opensource.org/licenses/mit-license.php
"""
Spell function

Author: Jonas McCallum
https://github.com/foobarmus/autocorrect

"""
from autocorrect.nlp_parser import NLP_COUNTS
from autocorrect.word import Word, common, exact, known, get_case

def spell(word):
    """most likely correction for everything up to a double typo"""
    w = Word(word)
    candidates = (common([word]) or exact([word]) or known([word]) or
                  known(w.typos()) or common(w.double_typos()) or
                  [word])
    correction = max(candidates, key=NLP_COUNTS.get)
    return get_case(word, correction)


================================================
FILE: autocorrect/nlp_parser.py
================================================
# Python 3 Spelling Corrector
#
# Copyright 2014 Jonas McCallum.
# Updated for Python 3, based on Peter Norvig's
# 2007 version: http://norvig.com/spell-correct.html
#
# Open source, MIT license
# http://www.opensource.org/licenses/mit-license.php
"""
NLP parser

Author: Jonas McCallum
https://github.com/foobarmus/autocorrect

"""
from autocorrect.utils import words_from_archive, zero_default_dict

def parse(lang_sample):
    """tally word popularity using novel extracts, etc"""
    words = words_from_archive(lang_sample, include_dups=True)
    counts = zero_default_dict()
    for word in words:
        counts[word] += 1
    return set(words), counts

NLP_WORDS, NLP_COUNTS = parse('big.txt')


================================================
FILE: autocorrect/utils.py
================================================
# Python 3 Spelling Corrector
#
# Copyright 2014 Jonas McCallum.
# Updated for Python 3, based on Peter Norvig's
# 2007 version: http://norvig.com/spell-correct.html
#
# Open source, MIT license
# http://www.opensource.org/licenses/mit-license.php
"""
File reader, concat function and dict wrapper

Author: Jonas McCallum
https://github.com/foobarmus/autocorrect

"""
import re, os, tarfile
from contextlib import closing
from itertools import chain

PATH = os.path.abspath(os.path.dirname(__file__))
BZ2 = 'words.bz2'
RE = '[A-Za-z]+'

def words_from_archive(filename, include_dups=False, map_case=False):
    """extract words from a text file in the archive"""
    bz2 = os.path.join(PATH, BZ2)
    tar_path = '{}/{}'.format('words', filename)
    with closing(tarfile.open(bz2, 'r:bz2')) as t:
        with closing(t.extractfile(tar_path)) as f:
            words = re.findall(RE, f.read().decode(encoding='utf-8'))
    if include_dups:
        return words
    elif map_case:
        return {w.lower():w for w in words}
    else:
        return set(words)

def concat(*args):
    """reversed('th'), 'e' => 'hte'"""
    try:
        return ''.join(args)
    except TypeError:
        return ''.join(chain.from_iterable(args))

class Zero(dict):
    """dict with a zero default"""

    def __getitem__(self, key):
        return self.get(key)

    def get(self, key):
        try:
            return super(Zero, self).__getitem__(key)
        except KeyError:
            return 0

zero_default_dict = Zero


================================================
FILE: autocorrect/word.py
================================================
# Python 3 Spelling Corrector
#
# Copyright 2014 Jonas McCallum.
# Updated for Python 3, based on Peter Norvig's
# 2007 version: http://norvig.com/spell-correct.html
#
# Open source, MIT license
# http://www.opensource.org/licenses/mit-license.php
"""
Word based methods and functions

Author: Jonas McCallum
https://github.com/foobarmus/autocorrect

"""
from autocorrect.utils import concat
from autocorrect.nlp_parser import NLP_WORDS
from autocorrect.word_lists import LOWERCASE, MIXED_CASE
from autocorrect.word_lists import LOWERED, CASE_MAPPED

ALPHABET = 'abcdefghijklmnopqrstuvwxyz'
KNOWN_WORDS = LOWERCASE | LOWERED | NLP_WORDS

class Word(object):
    """container for word-based methods"""

    def __init__(self, word):
        """
        Generate slices to assist with typo
        definitions.

        'the' => (('', 'the'), ('t', 'he'),
                  ('th', 'e'), ('the', ''))

        """
        word_ = word.lower()
        slice_range = range(len(word_) + 1)
        self.slices = tuple((word_[:i], word_[i:])
                            for i in slice_range)
        self.word = word

    def _deletes(self):
        """th"""
        return {concat(a, b[1:])
                for a, b in self.slices[:-1]}

    def _transposes(self):
        """teh"""
        return {concat(a, reversed(b[:2]), b[2:])
                for a, b in self.slices[:-2]}

    def _replaces(self):
        """tge"""
        return {concat(a, c, b[1:])
                for a, b in self.slices[:-1]
                for c in ALPHABET}

    def _inserts(self):
        """thwe"""
        return {concat(a, c, b)
                for a, b in self.slices
                for c in ALPHABET}

    def typos(self):
        """letter combinations one typo away from word"""
        return (self._deletes() | self._transposes() |
                self._replaces() | self._inserts())

    def double_typos(self):
        """letter combinations two typos away from word"""
        return {e2 for e1 in self.typos()
                for e2 in Word(e1).typos()}


def common(words):
    """{'the', 'teh'} => {'the'}"""
    return set(words) & NLP_WORDS

def exact(words):
    """{'Snog', 'snog', 'Snoddy'} => {'Snoddy'}"""
    return set(words) & MIXED_CASE

def known(words):
    """{'Gazpacho', 'gazzpacho'} => {'gazpacho'}"""
    return {w.lower() for w in words} & KNOWN_WORDS

def known_as_lower(words):
    """{'Natasha', 'Bob'} => {'bob'}"""
    return {w.lower() for w in words} & LOWERCASE

def get_case(word, correction):
    """
    Best guess of intended case.

    manchester => manchester
    chilton => Chilton
    AAvTech => AAvTech
    THe => The
    imho => IMHO

    """
    if word.istitle():
        return correction.title()
    if word.isupper():
        return correction.upper()
    if correction == word and not word.islower():
        return word
    if len(word) > 2 and word[:2].isupper():
        return correction.title()
    if not known_as_lower([correction]): #expensive
        try:
            return CASE_MAPPED[correction]
        except KeyError:
            pass
    return correction


================================================
FILE: autocorrect/word_lists.py
================================================
# Python 3 Spelling Corrector
#
# Copyright 2014 Jonas McCallum.
# Updated for Python 3, based on Peter Norvig's
# 2007 version: http://norvig.com/spell-correct.html
#
# Open source, MIT license
# http://www.opensource.org/licenses/mit-license.php
"""
Word lists for case sensitive/insensitive lookups

Author: Jonas McCallum
https://github.com/foobarmus/autocorrect

"""
from autocorrect.utils import words_from_archive

# en_US_GB_CA is a superset of US, GB and CA
# spellings (color, colour, etc). It contains
# roughly half a million words. For this
# example, imagine it's just seven words...
#
# we (lower)
# flew (lower)
# to (lower)
# Abu (mixed)
# Dhabi (mixed)
# via (lower)
# Colombo (mixed)

LOWERCASE = words_from_archive('en_US_GB_CA_lower.txt')
# {'we', 'flew', 'to', 'via'}

CASE_MAPPED = words_from_archive('en_US_GB_CA_mixed.txt',
                                 map_case=True)
#  {abu': 'Abu',
#  'dhabi': 'Dhabi',
#  'colombo': 'Colombo'}
#
# Note that en_US_GB_CA_mixed.txt also contains
# acronyms/mixed case variants of common words,
# so in reality, CASE_MAPPED also contains: 
#
# {'to': 'TO',
#  'via': 'Via'}

MIXED_CASE = set(CASE_MAPPED.values())
# {'Abu', 'Dhabi', 'Colombo'}

LOWERED = set(CASE_MAPPED.keys())
# {'abu', 'dhabi', 'colombo'}


================================================
FILE: setup.py
================================================
from distutils.core import setup

setup(name='autocorrect',
      version='0.3.0',
      packages=['autocorrect'],
      package_data={'autocorrect': ['words.bz2']},
      description='Python 3 Spelling Corrector',
      author='Jonas McCallum',
      author_email='jonasmccallum@gmail.com',
      url='https://github.com/phatpiglet/autocorrect/',
      license='http://www.opensource.org/licenses/mit-license.php',
      classifiers=('Intended Audience :: Developers',
                   'License :: OSI Approved :: MIT License',
                   'Natural Language :: English',
                   'Programming Language :: Python',
                   'Programming Language :: Python :: 2.7',
                   'Programming Language :: Python :: 3',),
      keywords='autocorrect spelling corrector')


================================================
FILE: unit_tests/test.py
================================================
import os, sys, time
from copy import deepcopy

PATH = os.path.abspath(os.path.dirname(__file__))
SOURCE_DIR = os.path.split(PATH)[0]
sys.path.append(SOURCE_DIR)
from autocorrect import spell
from autocorrect.word import known
from autocorrect.nlp_parser import NLP_COUNTS

MSG = 'spell({}) => {} ({}); should be {} ({})'
RESULT = 'bad: {}/{}, % correct: {}, unknown: {}, secs: {}'

def spelltest(tests, verbose=False):
    n, bad, unknown, start = 0, 0, 0, time.clock()
    for target, incorrect_spellings in tests.items():
        for incorrect_spelling in incorrect_spellings.split():
            n += 1
            w = spell(incorrect_spelling)
            if w != target:
                bad += 1
                if not known([target]):
                    unknown += 1
                if verbose:
                    print(MSG.format(incorrect_spelling, w, NLP_COUNTS[w],
                                     target, NLP_COUNTS[target]))
    return RESULT.format(bad, n, int(100. - 100. * bad / n), 
                         unknown, int(time.clock() - start))

tests1 = {'access': 'acess',
          'accessing': 'accesing',
          'accommodation': 'accomodation acommodation acomodation',
          'account': 'acount',
          'address': 'adress adres',
          'addressable': 'addresable',
          'arranged': 'aranged arrainged',
          'arrangeing': 'aranging',
          'arrangement': 'arragment',
          'articles': 'articals',
          'aunt': 'annt anut arnt',
          'auxiliary': 'auxillary',
          'available': 'avaible',
          'awful': 'awfall afful',
          'basically': 'basicaly',
          'beginning': 'begining',
          'benefit': 'benifit',
          'benefits': 'benifits',
          'between': 'beetween',
          'bicycle': 'bicycal bycicle bycycle',
          'biscuits': 'biscits biscutes biscuts bisquits buiscits buiscuts',
          'built': 'biult',
          'cake': 'cak',
          'career': 'carrer',
          'cemetery': 'cemetary semetary',
          'centrally': 'centraly',
          'certain': 'cirtain',
          'challenges': 'chalenges chalenges',
          'chapter': 'chaper chaphter chaptur',
          'choice': 'choise',
          'choosing': 'chosing',
          'clerical': 'clearical',
          'committee': 'comittee',
          'compare': 'compair',
          'completely': 'completly',
          'consider': 'concider',
          'considerable': 'conciderable',
          'contented': 'contenpted contende contended contentid',
          'curtains': 'cartains certans courtens cuaritains curtans curtians curtions',
          'decide': 'descide',
          'decided': 'descided',
          'definitely': 'definately difinately',
          'definition': 'defenition',
          'definitions': 'defenitions',
          'description': 'discription',
          'desiccate': 'desicate dessicate dessiccate',
          'diagrammatically': 'diagrammaticaally',
          'different': 'diffrent',
          'driven': 'dirven',
          'ecstasy': 'exstacy ecstacy',
          'embarrass': 'embaras embarass',
          'establishing': 'astablishing establising',
          'experience': 'experance experiance',
          'experiences': 'experances',
          'extended': 'extented',
          'extremely': 'extreamly',
          'fails': 'failes',
          'families': 'familes',
          'february': 'febuary',
          'further': 'futher',
          'gallery': 'galery gallary gallerry gallrey',
          'hierarchal': 'hierachial',
          'hierarchy': 'hierchy',
          'inconvenient': 'inconvienient inconvient inconvinient',
          'independent': 'independant independant',
          'initial': 'intial',
          'initials': 'inetials inistals initails initals intials',
          'juice': 'guic juce jucie juise juse',
          'latest': 'lates latets latiest latist',
          'laugh': 'lagh lauf laught lugh',
          'level': 'leval',
          'levels': 'levals',
          'liaison': 'liaision liason',
          'lieu': 'liew',
          'literature': 'litriture',
          'loans': 'lones',
          'locally': 'localy',
          'magnificent': 'magnificnet magificent magnifcent magnifecent magnifiscant magnifisent magnificant',
          'management': 'managment',
          'meant': 'ment',
          'minuscule': 'miniscule',
          'minutes': 'muinets',
          'monitoring': 'monitering',
          'necessary': 'neccesary necesary neccesary necassary necassery neccasary',
          'occurrence': 'occurence occurence',
          'often': 'ofen offen offten ofton',
          'opposite': 'opisite oppasite oppesite oppisit oppisite opposit oppossite oppossitte',
          'parallel': 'paralel paralell parrallel parralell parrallell',
          'particular': 'particulaur',
          'perhaps': 'perhapse',
          'personnel': 'personnell',
          'planned': 'planed',
          'poem': 'poame',
          'poems': 'poims pomes',
          'poetry': 'poartry poertry poetre poety powetry',
          'position': 'possition',
          'possible': 'possable',
          'pretend': 'pertend protend prtend pritend',
          'problem': 'problam proble promblem proplen',
          'pronunciation': 'pronounciation',
          'purple': 'perple perpul poarple',
          'questionnaire': 'questionaire',
          'really': 'realy relley relly',
          'receipt': 'receit receite reciet recipt',
          'receive': 'recieve',
          'refreshment': 'reafreshment refreshmant refresment refressmunt',
          'remember': 'rember remeber rememmer rermember',
          'remind': 'remine remined',
          'scarcely': 'scarcly scarecly scarely scarsely',
          'scissors': 'scisors sissors',
          'separate': 'seperate',
          'singular': 'singulaur',
          'someone': 'somone',
          'sources': 'sorces',
          'southern': 'southen',
          'special': 'speaical specail specal speical',
          'splendid': 'spledid splended splened splended',
          'standardizing': 'stanerdizing',
          'stomach': 'stomac stomache stomec stumache',
          'supersede': 'supercede superceed',
          'there': 'ther',
          'totally': 'totaly',
          'transferred': 'transfred',
          'transportability': 'transportibility',
          'triangular': 'triangulaur',
          'understand': 'undersand undistand',
          'unexpected': 'unexpcted unexpeted unexspected',
          'unfortunately': 'unfortunatly',
          'unique': 'uneque',
          'useful': 'usefull',
          'valuable': 'valubale valuble',
          'variable': 'varable',
          'variant': 'vairiant',
          'various': 'vairious',
          'visited': 'fisited viseted vistid vistied',
          'visitors': 'vistors',
          'voluntary': 'volantry',
          'voting': 'voteing',
          'wanted': 'wantid wonted',
          'whether': 'wether',
          'wrote': 'rote wote'}

tests2 = {'forbidden': 'forbiden',
          'decisions': 'deciscions descisions',
          'supposedly': 'supposidly',
          'embellishing': 'embelishing',
          'technique': 'tecnique',
          'permanently': 'perminantly',
          'confirmation': 'confermation',
          'appointment': 'appoitment',
          'progression': 'progresion',
          'accompanying': 'acompaning',
          'applicable': 'aplicable',
          'regained': 'regined',
          'guidelines': 'guidlines',
          'surrounding': 'serounding',
          'titles': 'tittles',
          'unavailable': 'unavailble',
          'advantageous': 'advantageos',
          'brief': 'brif',
          'appeal': 'apeal',
          'consisting': 'consisiting',
          'clerk': 'cleark clerck',
          'component': 'componant',
          'favourable': 'faverable',
          'separation': 'seperation',
          'search': 'serch',
          'receive': 'recieve',
          'employees': 'emploies',
          'prior': 'piror',
          'resulting': 'reulting',
          'suggestion': 'sugestion',
          'opinion': 'oppinion',
          'cancellation': 'cancelation',
          'criticism': 'citisum',
          'useful': 'usful',
          'humour': 'humor',
          'anomalies': 'anomolies',
          'would': 'whould',
          'doubt': 'doupt',
          'examination': 'eximination',
          'therefore': 'therefoe',
          'recommend': 'recomend',
          'separated': 'seperated',
          'successful': 'sucssuful succesful',
          'apparent': 'apparant',
          'occurred': 'occureed',
          'particular': 'paerticulaur',
          'pivoting': 'pivting',
          'announcing': 'anouncing',
          'challenge': 'chalange',
          'arrangements': 'araingements',
          'proportions': 'proprtions',
          'organized': 'oranised',
          'accept': 'acept',
          'dependence': 'dependance',
          'unequalled': 'unequaled',
          'numbers': 'numbuers',
          'sense': 'sence',
          'conversely': 'conversly',
          'provide': 'provid',
          'arrangement': 'arrangment',
          'responsibilities': 'responsiblities',
          'fourth': 'forth',
          'ordinary': 'ordenary',
          'description': 'desription descvription desacription',
          'inconceivable': 'inconcievable',
          'data': 'dsata',
          'register': 'rgister',
          'supervision': 'supervison',
          'encompassing': 'encompasing',
          'negligible': 'negligable',
          'allow': 'alow',
          'operations': 'operatins',
          'executed': 'executted',
          'interpretation': 'interpritation',
          'hierarchy': 'heiarky',
          'indeed': 'indead',
          'years': 'yesars',
          'through': 'throut',
          'committee': 'committe',
          'inquiries': 'equiries',
          'before': 'befor',
          'continued': 'contuned',
          'permanent': 'perminant',
          'choose': 'chose',
          'virtually': 'vertually',
          'correspondence': 'correspondance',
          'eventually': 'eventully',
          'lonely': 'lonley',
          'profession': 'preffeson',
          'they': 'thay',
          'now': 'noe',
          'desperately': 'despratly',
          'university': 'unversity',
          'adjournment': 'adjurnment',
          'possibilities': 'possablities',
          'stopped': 'stoped',
          'mean': 'meen',
          'weighted': 'wagted',
          'adequately': 'adequattly',
          'shown': 'hown',
          'matrix': 'matriiix',
          'profit': 'proffit',
          'encourage': 'encorage',
          'collate': 'colate',
          'disaggregate': 'disaggreagte disaggreaget',
          'receiving': 'recieving reciving',
          'proviso': 'provisoe',
          'umbrella': 'umberalla',
          'approached': 'aproached',
          'pleasant': 'plesent',
          'difficulty': 'dificulty',
          'appointments': 'apointments',
          'base': 'basse',
          'conditioning': 'conditining',
          'earliest': 'earlyest',
          'beginning': 'begining',
          'universally': 'universaly',
          'unresolved': 'unresloved',
          'length': 'lengh',
          'exponentially': 'exponentualy',
          'utilized': 'utalised',
          'set': 'et',
          'surveys': 'servays',
          'families': 'familys',
          'system': 'sysem',
          'approximately': 'aproximatly',
          'their': 'ther',
          'scheme': 'scheem',
          'speaking': 'speeking',
          'repetitive': 'repetative',
          'inefficient': 'ineffiect',
          'geneva': 'geniva',
          'exactly': 'exsactly',
          'immediate': 'imediate',
          'appreciation': 'apreciation',
          'luckily': 'luckeley',
          'eliminated': 'elimiated',
          'believe': 'belive',
          'appreciated': 'apreciated',
          'readjusted': 'reajusted',
          'were': 'wer where',
          'feeling': 'fealing',
          'and': 'anf',
          'false': 'faulse',
          'seen': 'seeen',
          'interrogating': 'interogationg',
          'academically': 'academicly',
          'relatively': 'relativly relitivly',
          'traditionally': 'traditionaly',
          'studying': 'studing',
          'majority': 'majorty',
          'build': 'biuld',
          'aggravating': 'agravating',
          'transactions': 'trasactions',
          'arguing': 'aurguing',
          'sheets': 'sheertes',
          'successive': 'sucsesive sucessive',
          'segment': 'segemnt',
          'especially': 'especaily',
          'later': 'latter',
          'senior': 'sienior',
          'dragged': 'draged',
          'atmosphere': 'atmospher',
          'drastically': 'drasticaly',
          'particularly': 'particulary',
          'visitor': 'vistor',
          'session': 'sesion',
          'continually': 'contually',
          'availability': 'avaiblity',
          'busy': 'buisy',
          'parameters': 'perametres',
          'surroundings': 'suroundings seroundings',
          'employed': 'emploied',
          'adequate': 'adiquate',
          'handle': 'handel',
          'means': 'meens',
          'familiar': 'familer',
          'between': 'beeteen',
          'overall': 'overal',
          'timing': 'timeing',
          'committees': 'comittees commitees',
          'queries': 'quies',
          'econometric': 'economtric',
          'erroneous': 'errounous',
          'decides': 'descides',
          'reference': 'refereence refference',
          'intelligence': 'inteligence',
          'edition': 'ediion ediition',
          'are': 'arte',
          'apologies': 'appologies',
          'thermawear': 'thermawere thermawhere',
          'techniques': 'tecniques',
          'voluntary': 'volantary',
          'subsequent': 'subsequant subsiquent',
          'currently': 'curruntly',
          'forecast': 'forcast',
          'weapons': 'wepons',
          'routine': 'rouint',
          'neither': 'niether',
          'approach': 'aproach',
          'available': 'availble',
          'recently': 'reciently',
          'ability': 'ablity',
          'nature': 'natior',
          'commercial': 'comersial',
          'agencies': 'agences',
          'however': 'howeverr',
          'suggested': 'sugested',
          'career': 'carear',
          'many': 'mony',
          'annual': 'anual',
          'according': 'acording',
          'receives': 'recives recieves',
          'interesting': 'intresting',
          'expense': 'expence',
          'relevant': 'relavent relevaant',
          'table': 'tasble',
          'throughout': 'throuout',
          'conference': 'conferance',
          'sensible': 'sensable',
          'described': 'discribed describd',
          'union': 'unioun',
          'interest': 'intrest',
          'flexible': 'flexable',
          'refered': 'reffered',
          'controlled': 'controled',
          'sufficient': 'suficient',
          'dissension': 'desention',
          'adaptable': 'adabtable',
          'representative': 'representitive',
          'irrelevant': 'irrelavent',
          'unnecessarily': 'unessasarily',
          'applied': 'upplied',
          'apologised': 'appologised',
          'these': 'thees thess',
          'choices': 'choises',
          'will': 'wil',
          'procedure': 'proceduer',
          'shortened': 'shortend',
          'manually': 'manualy',
          'disappointing': 'dissapoiting',
          'excessively': 'exessively',
          'comments': 'coments',
          'containing': 'containg',
          'develop': 'develope',
          'credit': 'creadit',
          'government': 'goverment',
          'acquaintances': 'aquantences',
          'orientated': 'orentated',
          'widely': 'widly',
          'advise': 'advice',
          'difficult': 'dificult',
          'investigated': 'investegated',
          'bonus': 'bonas',
          'conceived': 'concieved',
          'nationally': 'nationaly',
          'compared': 'comppared compased',
          'moving': 'moveing',
          'necessity': 'nessesity',
          'opportunity': 'oppertunity oppotunity opperttunity',
          'thoughts': 'thorts',
          'equalled': 'equaled',
          'variety': 'variatry',
          'analysis': 'analiss analsis analisis',
          'patterns': 'pattarns',
          'qualities': 'quaties',
          'easily': 'easyly',
          'organization': 'oranisation oragnisation',
          'the': 'thw hte thi',
          'corporate': 'corparate',
          'composed': 'compossed',
          'enormously': 'enomosly',
          'financially': 'financialy',
          'functionally': 'functionaly',
          'discipline': 'disiplin',
          'announcement': 'anouncement',
          'progresses': 'progressess',
          'except': 'excxept',
          'recommending': 'recomending',
          'mathematically': 'mathematicaly',
          'source': 'sorce',
          'combine': 'comibine',
          'input': 'inut',
          'careers': 'currers carrers',
          'resolved': 'resoved',
          'demands': 'diemands',
          'unequivocally': 'unequivocaly',
          'suffering': 'suufering',
          'immediately': 'imidatly imediatly',
          'accepted': 'acepted',
          'projects': 'projeccts',
          'necessary': 'necasery nessasary nessisary neccassary',
          'journalism': 'journaism',
          'unnecessary': 'unessessay',
          'night': 'nite',
          'output': 'oputput',
          'security': 'seurity',
          'essential': 'esential',
          'beneficial': 'benificial benficial',
          'explaining': 'explaning',
          'supplementary': 'suplementary',
          'questionnaire': 'questionare',
          'employment': 'empolyment',
          'proceeding': 'proceding',
          'decision': 'descisions descision',
          'per': 'pere',
          'discretion': 'discresion',
          'reaching': 'reching',
          'analysed': 'analised',
          'expansion': 'expanion',
          'although': 'athough',
          'subtract': 'subtrcat',
          'analysing': 'aalysing',
          'comparison': 'comparrison',
          'months': 'monthes',
          'hierarchal': 'hierachial',
          'misleading': 'missleading',
          'commit': 'comit',
          'auguments': 'aurgument',
          'within': 'withing',
          'obtaining': 'optaning',
          'accounts': 'acounts',
          'primarily': 'pimarily',
          'operator': 'opertor',
          'accumulated': 'acumulated',
          'extremely': 'extreemly',
          'there': 'thear',
          'summarys': 'sumarys',
          'analyse': 'analiss',
          'understandable': 'understadable',
          'safeguard': 'safegaurd',
          'consist': 'consisit',
          'declarations': 'declaratrions',
          'minutes': 'muinutes muiuets',
          'associated': 'assosiated',
          'accessibility': 'accessability',
          'examine': 'examin',
          'surveying': 'servaying',
          'politics': 'polatics',
          'annoying': 'anoying',
          'again': 'agiin',
          'assessing': 'accesing',
          'ideally': 'idealy',
          'scrutinized': 'scrutiniesed',
          'simular': 'similar',
          'personnel': 'personel',
          'whereas': 'wheras',
          'when': 'whn',
          'geographically': 'goegraphicaly',
          'gaining': 'ganing',
          'requested': 'rquested',
          'separate': 'seporate',
          'students': 'studens',
          'prepared': 'prepaired',
          'generated': 'generataed',
          'graphically': 'graphicaly',
          'suited': 'suted',
          'variable': 'varible vaiable',
          'building': 'biulding',
          'required': 'reequired',
          'necessitates': 'nessisitates',
          'together': 'togehter',
          'profits': 'proffits'}

if __name__ == '__main__':
    print(spelltest(tests1))

Download .txt

gitextract_rl8hzz88/

├── LICENSE
├── README.rst
├── autocorrect/
│   ├── __init__.py
│   ├── nlp_parser.py
│   ├── utils.py
│   ├── word.py
│   ├── word_lists.py
│   └── words.bz2
├── setup.py
└── unit_tests/
    └── test.py

Download .txt

SYMBOL INDEX (21 symbols across 5 files)

FILE: autocorrect/__init__.py
  function spell (line 19) | def spell(word):

FILE: autocorrect/nlp_parser.py
  function parse (line 18) | def parse(lang_sample):

FILE: autocorrect/utils.py
  function words_from_archive (line 24) | def words_from_archive(filename, include_dups=False, map_case=False):
  function concat (line 38) | def concat(*args):
  class Zero (line 45) | class Zero(dict):
    method __getitem__ (line 48) | def __getitem__(self, key):
    method get (line 51) | def get(self, key):

FILE: autocorrect/word.py
  class Word (line 24) | class Word(object):
    method __init__ (line 27) | def __init__(self, word):
    method _deletes (line 42) | def _deletes(self):
    method _transposes (line 47) | def _transposes(self):
    method _replaces (line 52) | def _replaces(self):
    method _inserts (line 58) | def _inserts(self):
    method typos (line 64) | def typos(self):
    method double_typos (line 69) | def double_typos(self):
  function common (line 75) | def common(words):
  function exact (line 79) | def exact(words):
  function known (line 83) | def known(words):
  function known_as_lower (line 87) | def known_as_lower(words):
  function get_case (line 91) | def get_case(word, correction):

FILE: unit_tests/test.py
  function spelltest (line 14) | def spelltest(tests, verbose=False):

Download .json

Condensed preview — 10 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (31K chars).

[
  {
    "path": "LICENSE",
    "chars": 1082,
    "preview": "The MIT License (MIT)\n\nCopyright (c) 2014 Jonas McCallum\n\nPermission is hereby granted, free of charge, to any person ob"
  },
  {
    "path": "README.rst",
    "chars": 479,
    "preview": "===========\nautocorrect\n===========\nPython 3 Spelling Corrector\n\nDeprecated Fork\n===============\nThis is a deprecated fo"
  },
  {
    "path": "autocorrect/__init__.py",
    "chars": 807,
    "preview": "# Python 3 Spelling Corrector\n#\n# Copyright 2014 Jonas McCallum.\n# Updated for Python 3, based on Peter Norvig's\n# 2007 "
  },
  {
    "path": "autocorrect/nlp_parser.py",
    "chars": 701,
    "preview": "# Python 3 Spelling Corrector\n#\n# Copyright 2014 Jonas McCallum.\n# Updated for Python 3, based on Peter Norvig's\n# 2007 "
  },
  {
    "path": "autocorrect/utils.py",
    "chars": 1509,
    "preview": "# Python 3 Spelling Corrector\n#\n# Copyright 2014 Jonas McCallum.\n# Updated for Python 3, based on Peter Norvig's\n# 2007 "
  },
  {
    "path": "autocorrect/word.py",
    "chars": 3108,
    "preview": "# Python 3 Spelling Corrector\n#\n# Copyright 2014 Jonas McCallum.\n# Updated for Python 3, based on Peter Norvig's\n# 2007 "
  },
  {
    "path": "autocorrect/word_lists.py",
    "chars": 1272,
    "preview": "# Python 3 Spelling Corrector\n#\n# Copyright 2014 Jonas McCallum.\n# Updated for Python 3, based on Peter Norvig's\n# 2007 "
  },
  {
    "path": "setup.py",
    "chars": 803,
    "preview": "from distutils.core import setup\n\nsetup(name='autocorrect',\n      version='0.3.0',\n      packages=['autocorrect'],\n     "
  },
  {
    "path": "unit_tests/test.py",
    "chars": 19984,
    "preview": "import os, sys, time\nfrom copy import deepcopy\n\nPATH = os.path.abspath(os.path.dirname(__file__))\nSOURCE_DIR = os.path.s"
  }
]

// ... and 1 more files (download for full content)

About this extraction

This page contains the full source code of the phatpiglet/autocorrect GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 10 files (29.0 KB), approximately 8.0k tokens, and a symbol index with 21 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo