SYMBOL INDEX (57 symbols across 1 files) FILE: nlp_zero.py function log (line 33) | def log(x): class Trie (line 40) | class Trie: method __init__ (line 46) | def __init__(self, path_or_trie=None): method __setitem__ (line 63) | def __setitem__(self, item, count): method __getitem__ (line 82) | def __getitem__(self, item): # 获取指定词的频率,不存在则返回0 method __delitem__ (line 91) | def __delitem__(self, item): # 删除某个词 method __iter__ (line 103) | def __iter__(self, _=None): # 以(词, 词频)的形式逐一返回所有记录 method __str__ (line 114) | def __str__(self): # 方便调试的显示 method __repr__ (line 118) | def __repr__(self): method search (line 121) | def search(self, sent): # 返回字符串中所有能找到的词语 method update (line 135) | def update(self, tire): # 用一个词典更新当前trie树 method get_proba (line 139) | def get_proba(self, w, logit=True): # 算词频 class DAG (line 147) | class DAG: method __init__ (line 153) | def __init__(self, nb_node, null_score=-100): method __setitem__ (line 158) | def __setitem__(self, start_end, score): # 构建图上的加权边 method optimal_path (line 164) | def optimal_path(self): method _all_paths (line 192) | def _all_paths(self, n): # all_paths的辅助函数,递归获取从n开始的所有路径 method all_paths (line 202) | def all_paths(self): # 返回所有连通路径(包括孤立节点) class Tokenizer (line 213) | class Tokenizer: method __init__ (line 218) | def __init__(self, word_trie=None): method simple_tokenize (line 226) | def simple_tokenize(self, sent): # 利用动态规划完成分词的基本函数 method combine_Aa123 (line 236) | def combine_Aa123(self, simple_result): # 将连续的字母或者数字合并 method tokenize (line 256) | def tokenize(self, sent, combine_Aa123=True): class Word_Finder (line 275) | class Word_Finder: method __init__ (line 282) | def __init__(self, min_proba=1e-7, min_pmi=1): method texts (line 289) | def texts(self, sents, filtering=True): method train (line 297) | def train(self, sents, filtering=True, remove_weak_pairs=True): method find (line 315) | def find(self, sents, filtering=True, remove_weak_pairs=True): method export_trie (line 388) | def export_trie(self): method export_tokenizer (line 391) | def export_tokenizer(self): class Template (line 395) | class Template: method __init__ (line 399) | def __init__(self, container=None): method add (line 407) | def add(self, value): method is_trivial (line 415) | def is_trivial(self): method __iter__ (line 421) | def __iter__(self): method __hash__ (line 425) | def __hash__(self): method __eq__ (line 428) | def __eq__(self, t): method __str__ (line 431) | def __str__(self): method __repr__ (line 440) | def __repr__(self): class Template_Finder (line 444) | class Template_Finder: method __init__ (line 448) | def __init__(self, tokenize, window=5, min_proba=1e-5, min_pmi=2): method texts (line 457) | def texts(self, sents, filtering=True): method train (line 471) | def train(self, sents, filtering=True): method find (line 489) | def find(self, sents, filtering=True, remove_weak_grams=True): class XTrie (line 556) | class XTrie(Trie): method search (line 561) | def search(self, sent): method _search (line 571) | def _search( class SentTree (line 632) | class SentTree: method __init__ (line 637) | def __init__(self, parsing, words, start, end): method plot (line 656) | def plot(self, prefix='+---> '): # 可视化函数 method __str__ (line 671) | def __str__(self): method __repr__ (line 677) | def __repr__(self): class Parser (line 681) | class Parser: method __init__ (line 688) | def __init__(self, templates, tokenize, null_score=-20): method max (line 693) | def max(self, x): # 自定义最大值函数 method parse (line 699) | def parse(self, sent): method _parse (line 705) | def _parse(self, span, matches):