SYMBOL INDEX (222 symbols across 30 files) FILE: 法律文书切分程序及说明文档/split_case.py function get_text (line 8) | def get_text(filename): function find_line_no (line 35) | def find_line_no(lines, s, flag): # 根据关键字s在lines中寻找对应的行 flag False 倒序寻找 function cut_case (line 51) | def cut_case(filename): # 按规则切分文本 function write2file_cut (line 153) | def write2file_cut(target_path,item): function main (line 201) | def main(source_path,target_path): FILE: 法律文档—法律条文推荐/MemNN_classifier.py function get_each_case_laws (line 22) | def get_each_case_laws(sid,new_all_pieces_count_dict): function get_stories (line 32) | def get_stories(s_index,e_index,contents,top_50_law_text,new_all_pieces_... function get_vocab (line 47) | def get_vocab(train_stories,test_stories,test_stories2): function vectorize_stories (line 52) | def vectorize_stories(data,word_idx,story_maxlen,query_maxlen): function Mem_Model (line 67) | def Mem_Model(story_maxlen,query_maxlen,vocab_size): function Mem_Model2 (line 106) | def Mem_Model2(story_maxlen,query_maxlen,vocab_size): function get_labels_by_probs (line 165) | def get_labels_by_probs(prob_list,k): function generate_arrays (line 172) | def generate_arrays(inputs_train,queries_train,answers_train): function merge_labels (line 178) | def merge_labels(real_labels,pred_labels): function write2file (line 184) | def write2file(filename,flag,epoch,result,hloss,Accuracy): function main (line 196) | def main(): FILE: 法律文档—法律条文推荐/baseline_random/eval_functions.py function createPRF_me (line 7) | def createPRF_me(real_label,pred_label): # 计算prf值 写入文件 rate ig 比率 function get_PRF_subAcc (line 51) | def get_PRF_subAcc(all_case_laebls): function get_per_hloss (line 77) | def get_per_hloss(list1,list2): # 计算单个样本的汉明损失 function get_hloss (line 87) | def get_hloss(all_case_laebls): # 计算测试集的hloss function get_Accuracy (line 96) | def get_Accuracy(all_case_laebls): function main (line 111) | def main(): FILE: 法律文档—法律条文推荐/baseline_random/top10labels.py function create_pred_label_top10 (line 9) | def create_pred_label_top10(answers_test): function create_pred_label_by_random (line 16) | def create_pred_label_by_random(answers_test): function create_all_case_labels (line 23) | def create_all_case_labels(answers_test,pred_labels): function main (line 29) | def main(): FILE: 法律文档—法律条文推荐/extract_caipanyiju.py function readFromFile (line 14) | def readFromFile(filename): # 读文件 function get_info (line 22) | def get_info(contents): # 提取每个案件的首部 事实信息 和 裁判依据 function write2file (line 35) | def write2file(fname,res_list): # 写入文件 function main (line 42) | def main(): FILE: 法律文档—法律条文推荐/get_all_laws.py function merge_laws (line 9) | def merge_laws(laws_list): # key:xx法xx条 value:包含该法律条文的案件号 function digit2chinese (line 39) | def digit2chinese(digit): # 将数字转换为汉字 最多考虑百位 function remove_punct (line 56) | def remove_punct(piece): # 法律条文处理 function merge_same_laws (line 124) | def merge_same_laws(all_pieces_count_dict): # 合并相同的法律条文 function get_each_case_laws (line 141) | def get_each_case_laws(sid,new_all_pieces_count_dict): function get_nums_by_del_some_pieces (line 150) | def get_nums_by_del_some_pieces(k,new_all_pieces_count_dict): function get_counts_of_laws (line 162) | def get_counts_of_laws(new_all_pieces_count_dict): # 统计xx法出现的情况 function write2file (line 177) | def write2file(idict,filename): # 将laws和pieces的字典写入文件 function main (line 184) | def main(): FILE: 法律文档—法律条文推荐/lstm_classifier_imbalance/Model/embedding.py class MySentences (line 5) | class MySentences(object): method __init__ (line 6) | def __init__(self,contents): method __iter__ (line 8) | def __iter__(self): function main (line 15) | def main(): FILE: 法律文档—法律条文推荐/lstm_classifier_imbalance/em_lstm.py class Document (line 12) | class Document: method __init__ (line 13) | def __init__(self,polarity,words): function get_each_case_laws (line 18) | def get_each_case_laws(sid,new_all_pieces_count_dict): function make_documents (line 28) | def make_documents(icontent,polarity): # 生成documents 最大熵分类器的套路 function make_document_per_case (line 39) | def make_document_per_case(icontent,polarity_list): function get_contents_documents (line 52) | def get_contents_documents(id_list,polarity,contents): # 根据idlist从conte... function get_contents_documents_for_test (line 61) | def get_contents_documents_for_test(test_id_list,pos_id_list,neg_id_list... function split_case_by_law_1 (line 73) | def split_case_by_law_1(new_all_pieces_count_dict,law): function split_case_by_law_2 (line 103) | def split_case_by_law_2(new_all_pieces_count_dict,law): function get_trains_tests_order_by_polarity (line 138) | def get_trains_tests_order_by_polarity(law,new_all_pieces_count_dict,con... function get_trains_tests_order_by_caseId (line 191) | def get_trains_tests_order_by_caseId(law,new_all_pieces_count_dict,conte... function trainAndTestByLstm (line 242) | def trainAndTestByLstm(trains,vals,tests,my_model,result_file,law_index,... function main (line 246) | def main(): FILE: 法律文档—法律条文推荐/lstm_classifier_imbalance/eval_functions.py function createPRF_me (line 7) | def createPRF_me(real_label,pred_label): # 计算prf值 写入文件 rate ig 比率 function get_PRF_subAcc (line 51) | def get_PRF_subAcc(all_case_laebls): function get_per_hloss (line 78) | def get_per_hloss(list1,list2): # 计算单个样本的汉明损失 function get_hloss (line 88) | def get_hloss(all_case_laebls): # 计算测试集的hloss function get_Accuracy (line 97) | def get_Accuracy(all_case_laebls): function main (line 112) | def main(): FILE: 法律文档—法律条文推荐/lstm_classifier_imbalance/get_multi_label_result.py function readFromFile (line 12) | def readFromFile(filesname): function get_all_case_labels (line 28) | def get_all_case_labels(source_path,k): # k用于限制预测标签的个数 只保留预测概率最大的k个 function cut_pred_label (line 58) | def cut_pred_label(all_case_laebls,k): function createPRF_me (line 74) | def createPRF_me(real_label,pred_label): # 计算prf值 写入文件 rate ig 比率 function main (line 117) | def main(): FILE: 法律文档—法律条文推荐/lstm_classifier_imbalance/lstm_process.py class Document (line 30) | class Document: method __init__ (line 31) | def __init__(self,polarity,words): function createVec (line 35) | def createVec(docs,my_model): # 生成词向量表示 function LSTM_model (line 67) | def LSTM_model(X_train,Y_train,X_val,Y_val,X_test,Y_test,test_label): function LSTM_model2 (line 134) | def LSTM_model2(X_train,Y_train,X_val,Y_val,X_test,Y_test,test_label): function get_documents (line 196) | def get_documents(trains,vals,tests,my_model): # 从em_lstm.py中接收数据供实验所用 ... function createResult (line 219) | def createResult(pred_label,real_label,law_index,lenT,lenV,lenP,lenN,log... function make_result_file (line 268) | def make_result_file(best_pred_label,best_pred_prob,real_label,result_fi... function lstm_run (line 280) | def lstm_run(trains,vals,tests,my_model,result_file,law_index,lenT,lenV,... FILE: 法律文档—法律条文推荐/lstm_classifier_imbalance/tongji_case.py function get_each_case_laws (line 10) | def get_each_case_laws(sid,new_all_pieces_count_dict): function get_pieces_count (line 18) | def get_pieces_count(contents,new_all_pieces_count_dict): function get_tests_predAndpred_pieces_count (line 25) | def get_tests_predAndpred_pieces_count(all_case_laebls): function tongji_count (line 35) | def tongji_count(count_dict,length): function main (line 41) | def main(): FILE: 法律文档—法律条文推荐/maxent_classifier_imbalance/documents.py class Document (line 14) | class Document: method __init__ (line 15) | def __init__(self,polarity,words): function get_each_case_laws (line 20) | def get_each_case_laws(sid,new_all_pieces_count_dict): function make_documents (line 32) | def make_documents(icontent,polarity): # 生成documents 最大熵分类器的套路 function make_document_per_case (line 43) | def make_document_per_case(icontent,polarity_list): function get_contents_documents (line 56) | def get_contents_documents(id_list,polarity,contents): # 根据idlist从conte... function get_contents_documents_for_test (line 65) | def get_contents_documents_for_test(test_id_list,pos_id_list,neg_id_list... function split_case_by_law_1 (line 101) | def split_case_by_law_1(new_all_pieces_count_dict,law): function split_case_by_law_2 (line 124) | def split_case_by_law_2(new_all_pieces_count_dict,law): function trainAndTest (line 146) | def trainAndTest(trains,tests,result_file,law_index,lenTP,lenTN,lenP,len... function get_trains_tests_order_by_polarity (line 155) | def get_trains_tests_order_by_polarity(law,new_all_pieces_count_dict,con... function get_trains_tests_order_by_caseId (line 204) | def get_trains_tests_order_by_caseId(law,new_all_pieces_count_dict,conte... function main (line 239) | def main(): FILE: 法律文档—法律条文推荐/maxent_classifier_imbalance/eval_functions.py function createPRF_me (line 7) | def createPRF_me(real_label,pred_label): # 计算prf值 写入文件 rate ig 比率 function get_PRF_subAcc (line 51) | def get_PRF_subAcc(all_case_labels): function get_per_hloss (line 81) | def get_per_hloss(list1,list2): # 计算单个样本的汉明损失 function get_hloss (line 91) | def get_hloss(all_case_labels): # 计算测试集的hloss function get_Accuracy (line 100) | def get_Accuracy(all_case_labels): function xiugai_pred (line 119) | def xiugai_pred(all_case_labels): function main (line 125) | def main(): FILE: 法律文档—法律条文推荐/maxent_classifier_imbalance/get_multi_label_result.py function readFromFile (line 12) | def readFromFile(filesname): function get_all_case_labels (line 28) | def get_all_case_labels(source_path,k): # k用于限制预测标签的个数 只保留预测概率最大的k个 function cut_pred_label (line 59) | def cut_pred_label(all_case_laebls,k,labelNum): function createPRF_me (line 76) | def createPRF_me(real_label,pred_label): # 计算prf值 写入文件 rate ig 比率 function main (line 119) | def main(): FILE: 法律文档—法律条文推荐/maxent_classifier_imbalance/maxent.py function get_lexcion (line 11) | def get_lexcion(documents): #生成字典 function getDFWords (line 19) | def getDFWords(documents): #词频 保留出现次数大于5的词汇 function createFormatText (line 38) | def createFormatText(documents,lexcion,path): #生成mallet工具所需的文本格式 function file2Bin (line 64) | def file2Bin(FilePath,BinPath): #tests.txt时要加上pipe function train (line 78) | def train(trainBinPath,modelPath): function classify (line 88) | def classify(modelPath,testBinPath,resultPath): function createResult2 (line 98) | def createResult2(resultPath): function getPredLabel (line 151) | def getPredLabel(FilePath): # 得到预测的类别和真实的类别 function createPRF (line 165) | def createPRF(FilePath,seed,log_path='maxent_prf.txt'): # 计算prf值 写入文件 function createPRF_me (line 208) | def createPRF_me(pred_label,real_label,law_index,lenTP,lenTN,lenP,lenN,l... function getPredProb (line 258) | def getPredProb(FilePath): function me_classify (line 280) | def me_classify(trains,tests,result_file='result.txt'): # 默认为result.txt FILE: 法律文档—法律条文推荐/maxent_classifier_imbalance/tongji_case.py function get_each_case_laws (line 10) | def get_each_case_laws(sid,new_all_pieces_count_dict): function get_pieces_count (line 18) | def get_pieces_count(contents,new_all_pieces_count_dict): function get_tests_predAndpred_pieces_count (line 25) | def get_tests_predAndpred_pieces_count(all_case_laebls): function tongji_count (line 35) | def tongji_count(count_dict,length): function main (line 41) | def main(): FILE: 法律文档—法律条文推荐/maxent_classifier_imbalance/word_cut.py function cut (line 9) | def cut(contents): # 分词 function main (line 17) | def main(): FILE: 法律文档—法律条文推荐/new_model_2/extract_law_text.py function readFromFile (line 7) | def readFromFile(filename): # 读文件 function cut (line 16) | def cut(contents): # 分词 function main (line 24) | def main(): FILE: 法律文档—法律条文推荐/new_model_2/tf-idf.py class Document (line 7) | class Document: method __init__ (line 8) | def __init__(self,words): function getDFWords (line 11) | def getDFWords(documents): #词频 保留出现次数大于3的词汇 function make_new_documents (line 31) | def make_new_documents(lexcion,documents,contents): function make_documents (line 43) | def make_documents(new_contents): function tfidf (line 52) | def tfidf(documents): # 计算tf-idf function select_feat_by_tfidf (line 65) | def select_feat_by_tfidf(documents,contents): function main (line 76) | def main(): FILE: 法律文档—法律条文推荐/new_model_2/tongji.py function get_count (line 4) | def get_count(count,rate): function main (line 11) | def main(): FILE: 法律文档—法律条文推荐/tongji_laws.py function get_all_yiju (line 10) | def get_all_yiju(filename): # 得到所有的裁判依据 function deal_yiju (line 19) | def deal_yiju(yiju): # 处理法律依据 提取法律条文 xx法xx条 function write2file (line 46) | def write2file(contents,yiju_list,filename): function main (line 58) | def main(): FILE: 法律文档相似度计算程序及文档/cal_similarity.py class Document (line 12) | class Document: method __init__ (line 13) | def __init__(self,words,filename): function get_text (line 17) | def get_text(f_path,filename): function extract (line 29) | def extract(lines,filename,key_part=['# 首部']): # 按关键部分提取文书内容 默认只使用首部 function tfidf (line 45) | def tfidf(documents): # 计算tf-idf function test_tfidf (line 64) | def test_tfidf(documents,idf_words): # 计算测试样本的tfidf function cosine (line 74) | def cosine(source,target): # 计算两文本cos距离 function similar (line 84) | def similar(source,target): # 计算两个文档的相似度 function find_max_sim (line 87) | def find_max_sim(test,targets,top_k): # 寻找和某个文件最相似的k个文件 function make_documents (line 96) | def make_documents(f_path,filenames,key_part): # 将文件生成所需格式 function process (line 109) | def process(test,target,top_k): # 在target中寻找与test最相似的top_k个文件 function write2file (line 119) | def write2file(test_file,sim_filenames,saved_file): function start (line 127) | def start(train_path,test_path,top_k,key_part,use_model): function main (line 164) | def main(): FILE: 法律文档相似度计算程序及文档/cal_similarity2.py class Document (line 15) | class Document: method __init__ (line 16) | def __init__(self,words,filename): function get_text (line 20) | def get_text(f_path,filename): function extract (line 33) | def extract(lines,filename,key_part=['# 首部']): # 按关键部分提取文书内容 默认只使用首部 function tfidf (line 51) | def tfidf(documents): # 计算tf-idf function cosine (line 66) | def cosine(source,target): # 计算两文本cos距离 function similar (line 76) | def similar(source,target): # 计算两个文档的相似度 function find_max_sim (line 79) | def find_max_sim(test,targets,top_k): # 寻找和某个文件最相似的k个文件 function make_documents (line 88) | def make_documents(f_path,filenames,key_part): # 将文件生成所需格式 function process (line 101) | def process(test,target,top_k): # 在target中寻找与test最相似的top_k个文件 function write2file (line 111) | def write2file(test_file,sim_filenames,saved_file): function start (line 119) | def start(train_path,test_path,top_k,key_part): function main (line 142) | def main(): FILE: 法律文档相似度计算程序及文档/jieba_seg.py function cut (line 10) | def cut(filename1,filename2): # 对每一个文件seg and pos function getFileList (line 18) | def getFileList(source_path,target_path): function main (line 27) | def main(): FILE: 法律文档相似度计算程序及文档/train_model.py class Document (line 10) | class Document: method __init__ (line 11) | def __init__(self,words,filename): function get_text (line 15) | def get_text(f_path,filename): function extract (line 27) | def extract(lines,filename,key_part=['# 首部']): # 按关键部分提取文书内容 默认只使用首部 function make_documents (line 43) | def make_documents(f_path,filenames,key_part): # 将文件生成所需格式 function tfidf (line 56) | def tfidf(documents): # 计算tf-idf function start (line 75) | def start(train_path,key_part): function main (line 86) | def main(): FILE: 法律文档聚类方法研究及文档/cal_similarity.py class Document (line 9) | class Document: method __init__ (line 10) | def __init__(self,words,filename): function get_text (line 14) | def get_text(f_path,filename): function extract (line 26) | def extract(lines,filename,key_part=['# 首部']): # 按关键部分提取文书内容 默认只使用首部 function tfidf (line 39) | def tfidf(documents): # 计算tf-idf function cosine (line 52) | def cosine(source,target): # 计算两文本cos距离 function similar (line 62) | def similar(source,target): # 计算两个文档的相似度 function find_max_sim (line 65) | def find_max_sim(test,targets,top_k): # 寻找和某个文件最相似的k个文件 function make_documents (line 74) | def make_documents(f_path,filenames,key_part): # 将文件生成所需格式 function process (line 90) | def process(test,target,top_k): # 在target中寻找与test最相似的top_k个文件 function write2file (line 100) | def write2file(test_file,sim_filenames,saved_file): function start (line 108) | def start(train_path,test_path,top_k,key_part): function main (line 129) | def main(): FILE: 法律文档聚类方法研究及文档/cluster_test.py class Document (line 6) | class Document: # 文档类 文件名 词 method __init__ (line 7) | def __init__(self,words,filename): function get_text (line 11) | def get_text(filename): function extract (line 23) | def extract(lines,filename,key_part=['# 首部']): # 按关键部分提取文书内容 默认只使用首部 function tfidf (line 36) | def tfidf(documents): # 计算tf-idf function cosine (line 49) | def cosine(source,target): function similar (line 59) | def similar(source,target): function init (line 62) | def init(documents): function getDocuments (line 74) | def getDocuments(item): function clustering (line 94) | def clustering(documents,k=5): # 层次聚类 function make_documents (line 131) | def make_documents(filenames): # 将文件生成所需格式 class Item (line 148) | class Item: method __init__ (line 149) | def __init__(self,children): function main (line 183) | def main(): FILE: 法律文档聚类方法研究及文档/kmeans_cluster.py class Document (line 21) | class Document: method __init__ (line 22) | def __init__(self,words,filename,text): function get_text (line 27) | def get_text(filename): function extract (line 40) | def extract(filename,key_part=['# 首部']): # 按关键部分提取文书内容 默认只使用首部 function tfidf (line 57) | def tfidf(documents): # 计算tf-idf function get_tfidf (line 70) | def get_tfidf(documents): # 使用gensim计算得到tfidf function create_format_mat (line 86) | def create_format_mat(documents,words): function clustering (line 105) | def clustering(docs,n_clusters): # 聚类 n_clusters 类别数 function write2file (line 113) | def write2file(item_parts): # 将结果写入文件 function cluster_process (line 127) | def cluster_process(filenames,key_part,s,n_clusters=2): function main (line 170) | def main(): FILE: 法律文档聚类方法研究及文档/test_tfidf.py class Document (line 8) | class Document: # 文档类 文件名 词 method __init__ (line 9) | def __init__(self,words,filename): function tfidf (line 13) | def tfidf(documents): # 计算tf-idf function get_tfidf (line 26) | def get_tfidf(documents): # 使用gensim计算得到tfidf