SYMBOL INDEX (62 symbols across 10 files) FILE: src/DictBuilder.py class WordDictBuilder (line 15) | class WordDictBuilder: method __init__ (line 16) | def __init__(self, ori_path='', filelist=[], tokenlist=[]): method run (line 25) | def run(self): method _updateDict (line 31) | def _updateDict(self, filepath): method _updateDictByTokenList (line 37) | def _updateDictByTokenList(self): method save (line 43) | def save(self, filepath): FILE: src/DictUtils.py class WordDict (line 8) | class WordDict(dict): method __init__ (line 12) | def __init__(self, dict_path=None): method load_dict (line 15) | def load_dict(self, dict_path): method add_one (line 26) | def add_one(self, word): method save_dict (line 33) | def save_dict(self, dict_path): method __del__ (line 39) | def __del__(self): FILE: src/DocUtils.py class DocDict (line 8) | class DocDict(dict): method __init__ (line 12) | def __init__(self, fpath=None): method load_from_db (line 16) | def load_from_db(self): method load_from_file (line 19) | def load_from_file(self, fpath): method update (line 28) | def update(self, docid, doc_str): method save_to_file (line 32) | def save_to_file(self, fpath): method __del__ (line 36) | def __del__(self): FILE: src/Utils.py function norm_vector_nonzero (line 12) | def norm_vector_nonzero(ori_vec): function cosine_distance_nonzero (line 22) | def cosine_distance_nonzero(feat_vec1, feat_vec2, norm=True): function euclidean_distance_nonzero (line 40) | def euclidean_distance_nonzero(feat_vec1, feat_vec2, norm=True): function norm_vector (line 61) | def norm_vector(ori_vec): function cosine_distance (line 71) | def cosine_distance(feat_vec1, feat_vec2, norm=True): function euclidean_distance (line 85) | def euclidean_distance(feat_vec1, feat_vec2, norm=True): FILE: src/features.py class FeatureBuilder (line 13) | class FeatureBuilder: method __init__ (line 14) | def __init__(self, word_dict): method compute (line 17) | def compute(self, token_list): method _add_word (line 24) | def _add_word(self, word): method update_words (line 28) | def update_words(self, word_list=[]): class FeatureBuilderUpdate (line 32) | class FeatureBuilderUpdate(FeatureBuilder): method _add_word (line 33) | def _add_word(self, word): function feature_single (line 37) | def feature_single(inputfile, outputfile): FILE: src/isSimilar.py class DocFeatLoader (line 15) | class DocFeatLoader: method __init__ (line 16) | def __init__(self, simhash_builder, feat_nonzero): FILE: src/launch_incre.py class FeatureContainer (line 16) | class FeatureContainer: method __init__ (line 17) | def __init__(self, word_dict_path): method compute_feature (line 31) | def compute_feature(self, token_list): FILE: src/simhash_imp.py function hamming_distance (line 15) | def hamming_distance(hash_a, hash_b, hashbits=128): class SimhashBuilder (line 22) | class SimhashBuilder: method __init__ (line 23) | def __init__(self, word_list=[], hashbits=128): method _string_hash (line 33) | def _string_hash(self, word): method sim_hash_nonzero (line 48) | def sim_hash_nonzero(self, feature_vec): method sim_hash (line 67) | def sim_hash(self, feature_vec): method _add_word (line 87) | def _add_word(self, word): method update_words (line 90) | def update_words(self, word_list=[]): class simhash (line 94) | class simhash(): method __init__ (line 95) | def __init__(self, tokens='', hashbits=128): method __str__ (line 99) | def __str__(self): method __long__ (line 102) | def __long__(self): method __float__ (line 105) | def __float__(self): method simhash (line 108) | def simhash(self, tokens): method _string_hash (line 130) | def _string_hash(self, v): method hamming_distance (line 145) | def hamming_distance(self, other_hash): method similarity (line 153) | def similarity(self, other_hash): FILE: src/tokens.py class JiebaTokenizer (line 14) | class JiebaTokenizer: method __init__ (line 15) | def __init__(self, stop_words_path, mode='s'): method tokens (line 23) | def tokens(self, intext): function token_single_file (line 32) | def token_single_file(input_fname, output_fname): FILE: test/test_token.py class JiebaTokenizerTestCase (line 15) | class JiebaTokenizerTestCase(unittest.TestCase): method setUp (line 17) | def setUp(self): method testTokens (line 20) | def testTokens(self):