[
  {
    "path": ".classpath",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<classpath>\r\n\t<classpathentry including=\"**/*.java\" kind=\"src\" output=\"target/classes\" path=\"src/main/java\">\r\n\t\t<attributes>\r\n\t\t\t<attribute name=\"optional\" value=\"true\"/>\r\n\t\t\t<attribute name=\"maven.pomderived\" value=\"true\"/>\r\n\t\t</attributes>\r\n\t</classpathentry>\r\n\t<classpathentry kind=\"src\" path=\"src/main/resources\"/>\r\n\t<classpathentry kind=\"src\" output=\"target/test-classes\" path=\"src/test/java\">\r\n\t\t<attributes>\r\n\t\t\t<attribute name=\"optional\" value=\"true\"/>\r\n\t\t\t<attribute name=\"maven.pomderived\" value=\"true\"/>\r\n\t\t</attributes>\r\n\t</classpathentry>\r\n\t<classpathentry excluding=\"**\" kind=\"src\" output=\"target/test-classes\" path=\"src/test/resources\">\r\n\t\t<attributes>\r\n\t\t\t<attribute name=\"maven.pomderived\" value=\"true\"/>\r\n\t\t</attributes>\r\n\t</classpathentry>\r\n\t<classpathentry kind=\"con\" path=\"org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7\">\r\n\t\t<attributes>\r\n\t\t\t<attribute name=\"maven.pomderived\" value=\"true\"/>\r\n\t\t</attributes>\r\n\t</classpathentry>\r\n\t<classpathentry kind=\"con\" path=\"org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER\">\r\n\t\t<attributes>\r\n\t\t\t<attribute name=\"maven.pomderived\" value=\"true\"/>\r\n\t\t</attributes>\r\n\t</classpathentry>\r\n\t<classpathentry kind=\"output\" path=\"target/classes\"/>\r\n</classpath>\r\n"
  },
  {
    "path": ".gitignore",
    "content": "/bin/\n/target/\n*/target/*\ntarget/*\n"
  },
  {
    "path": ".project",
    "content": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<projectDescription>\r\n\t<name>sensitive-words-filter</name>\r\n\t<comment></comment>\r\n\t<projects>\r\n\t</projects>\r\n\t<buildSpec>\r\n\t\t<buildCommand>\r\n\t\t\t<name>org.eclipse.jdt.core.javabuilder</name>\r\n\t\t\t<arguments>\r\n\t\t\t</arguments>\r\n\t\t</buildCommand>\r\n\t\t<buildCommand>\r\n\t\t\t<name>org.eclipse.m2e.core.maven2Builder</name>\r\n\t\t\t<arguments>\r\n\t\t\t</arguments>\r\n\t\t</buildCommand>\r\n\t</buildSpec>\r\n\t<natures>\r\n\t\t<nature>org.eclipse.m2e.core.maven2Nature</nature>\r\n\t\t<nature>org.eclipse.jdt.core.javanature</nature>\r\n\t</natures>\r\n</projectDescription>\r\n"
  },
  {
    "path": ".settings/org.eclipse.core.resources.prefs",
    "content": "eclipse.preferences.version=1\r\nencoding//src/main/java=UTF-8\r\nencoding//src/test/java=UTF-8\r\nencoding//src/test/resources=UTF-8\r\nencoding/<project>=UTF-8\r\n"
  },
  {
    "path": ".settings/org.eclipse.jdt.core.prefs",
    "content": "eclipse.preferences.version=1\r\norg.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7\r\norg.eclipse.jdt.core.compiler.compliance=1.7\r\norg.eclipse.jdt.core.compiler.problem.forbiddenReference=warning\r\norg.eclipse.jdt.core.compiler.source=1.7\r\n"
  },
  {
    "path": ".settings/org.eclipse.m2e.core.prefs",
    "content": "activeProfiles=\r\neclipse.preferences.version=1\r\nresolveWorkspaceProjects=true\r\nversion=1\r\n"
  },
  {
    "path": "_config.yml",
    "content": "theme: jekyll-theme-cayman"
  },
  {
    "path": "pom.xml",
    "content": "<project xmlns=\"http://maven.apache.org/POM/4.0.0\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n\txsi:schemaLocation=\"http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd\">\n\n\t<groupId>com.cnblogs.hoojo</groupId>\n\t<artifactId>sensitive-words-filter</artifactId>\n\t<version>1.0-SNAPSHOT</version>\n\t<modelVersion>4.0.0</modelVersion>\n\n\t<properties>\n\t\t<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>\n\t\t<maven.compiler.encoding>UTF-8</maven.compiler.encoding>\n\t\t<encoding>UTF-8</encoding>\n\t\t<skipTests>true</skipTests>\n\t\t<java_version>1.7</java_version>\n\t\t<java_source_version>1.7</java_source_version>\n\t\t<java_target_version>1.7</java_target_version>\n\t\t<maven_compiler_plugin>2.3.2</maven_compiler_plugin>\n\t\t<plexus_compiler_javac>1.8.1</plexus_compiler_javac>\n\t\t<maven_shade_plugin>1.7.1</maven_shade_plugin>\n\t\t<maven_resources_plugin>2.6</maven_resources_plugin>\n\t\t<spring_version>3.2.9.RELEASE</spring_version>\n\t</properties>\n\n\t<dependencies>\n\n\t\t<dependency>\n\t\t\t<groupId>com.google.guava</groupId>\n\t\t\t<artifactId>guava</artifactId>\n\t\t\t<version>20.0</version>\n\t\t</dependency>\n\n\t\t<dependency>\n\t\t\t<groupId>junit</groupId>\n\t\t\t<artifactId>junit</artifactId>\n\t\t\t<version>4.13.1</version>\n\t\t\t<scope>test</scope>\n\t\t</dependency>\n\n\t\t<dependency>\n\t\t\t<groupId>org.mockito</groupId>\n\t\t\t<artifactId>mockito-core</artifactId>\n\t\t\t<version>2.7.19</version>\n\t\t\t<scope>test</scope>\n\t\t</dependency>\n\n\t\t<!-- web servlet包开始 -->\n\t\t<dependency>\n\t\t\t<groupId>javax.servlet</groupId>\n\t\t\t<artifactId>javax.servlet-api</artifactId>\n\t\t\t<version>3.1.0</version>\n\t\t</dependency>\n\t\t<!-- web servlet包结束 -->\n\n\t\t<dependency>\n\t\t\t<groupId>commons-lang</groupId>\n\t\t\t<artifactId>commons-lang</artifactId>\n\t\t\t<version>2.6</version>\n\t\t</dependency>\n\n\t\t<dependency>\n\t\t\t<groupId>org.springframework</groupId>\n\t\t\t<artifactId>spring-web</artifactId>\n\t\t\t<version>${spring_version}</version>\n\t\t</dependency>\n\n\t\t<dependency>\n\t\t\t<groupId>org.slf4j</groupId>\n\t\t\t<artifactId>slf4j-api</artifactId>\n\t\t\t<version>1.6.1</version>\n\t\t</dependency>\n\n\t\t<dependency>\n\t\t\t<groupId>org.slf4j</groupId>\n\t\t\t<artifactId>slf4j-log4j12</artifactId>\n\t\t\t<version>1.6.1</version>\n\t\t</dependency>\n\n\t\t<!-- https://mvnrepository.com/artifact/oro/oro -->\n\t\t<dependency>\n\t\t\t<groupId>oro</groupId>\n\t\t\t<artifactId>oro</artifactId>\n\t\t\t<version>2.0.8</version>\n\t\t</dependency>\n\n\t\t<dependency>\n\t\t\t<groupId>commons-fileupload</groupId>\n\t\t\t<artifactId>commons-fileupload</artifactId>\n\t\t\t<version>1.3.3</version>\n\t\t</dependency>\n\t</dependencies>\n\n\t<build>\n\t\t<finalName>sensitive-words-filter-1.0</finalName>\n\t\t<plugins>\n\t\t\t<plugin>\n\t\t\t\t<groupId>org.apache.maven.plugins</groupId>\n\t\t\t\t<artifactId>maven-compiler-plugin</artifactId>\n\t\t\t\t<version>2.3.2</version>\n\t\t\t\t<configuration>\n\t\t\t\t\t<source>1.7</source>\n\t\t\t\t\t<target>1.7</target>\n\t\t\t\t\t<encoding>UTF-8</encoding>\n\t\t\t\t</configuration>\n\t\t\t</plugin>\n\t\t</plugins>\n\t\t\n\t\t<resources>\n\t\t\t<resource>\n\t\t\t\t<directory>src/main/java</directory>\n\t\t\t\t<includes>\n\t\t\t\t\t<include>**/*.xml</include>\n\t\t\t\t\t<include>**/*.json</include>\n\t\t\t\t</includes>\n\t\t\t</resource>\n\t\t</resources>\n\t</build>\n</project>\n"
  },
  {
    "path": "readme.md",
    "content": "\n\n\n# 敏感词过滤、脱敏\n\n[TOC]\n\n## 1、简介\n`sensitive-words-filter` 是一个`JavaWeb`工程下的过滤器，可以过滤一些敏感的字符串，如：`色情`、`政治`、`暴力`、`人名`等特殊字符，防止发表一些不和谐的词条给系统带来运营成本。\n\n目前`sensitive-words-filter`提供了如下几种算法脱敏支持：\n\n* `DFA(\"dfa算法\", DfaFilter.class)` 综合性能比较高，表现突出，过滤效果好\n* `TIRE(\"tire树算法\", TireTreeFilter.class),` 大文本过滤效率稍低\n* `HASH_BUCKET(\"二级hash算法\", HashBucketFilter.class),` 综合性能中等，实现简单易懂\n* `DAT(\"双数组算法\", DatFilter.class),` 小文本过滤效果突出，实现超简单\n* `TTMP(\"ttmp算法\", TtmpFilter.class),` 综合性能突出，性能稍低占有内存大，但效率快，匹配有漏词情况\n* `SIMHASH(\"simhash算法\", SimHashFilter.class)` 可以了解，不推荐使用\n\n以上每一种算法都有自己的特点，以供选择。\n\n\n\n## 2、使用方法\n\n> 提供两种方法，一种直接运行敏感词执行器`AbstractFilterExecutor`的实现类，该类的实现类是单例模式，提供常用的**判词、高亮、替换、查词**等接口；另一种就是工具类的接口`SensitiveWordsFilterUtils`,相对于执行器提供了**刷新缓存、刷新全部缓存**的接口\n\n\n\n下面以 `DatFilterExecutor`算法实现类为例演示\n\n### 2.1、初始化并添加敏感词\n> 初始化是初始化缓存和数据存储根对象，init方法就是初始化根对象的，put方法是添加敏感词\n\n```java\nDfaFilterExecutor.getInstance().init();\nDfaFilterExecutor.getInstance().put(\"中国人\");\nDfaFilterExecutor.getInstance().put(\"中国男人\");\nDfaFilterExecutor.getInstance().put(\"中国人民\");\nDfaFilterExecutor.getInstance().put(\"人民\");\nDfaFilterExecutor.getInstance().put(\"中间\");\nDfaFilterExecutor.getInstance().put(\"女人\");\n\nDfaFilterExecutor.getInstance().put(\"一举\");\nDfaFilterExecutor.getInstance().put(\"一举成名\");\nDfaFilterExecutor.getInstance().put(\"一举成名走四方\");\nDfaFilterExecutor.getInstance().put(\"成名\");\nDfaFilterExecutor.getInstance().put(\"走四方\");\n```\n\n\n### 2.2、匹配敏感词\n\n> 匹配敏感词会把符合的敏感词都找到，该方法有两个参数\n\n+ **接口说明：**\n\n```java\n/**\n* 返回匹配到的敏感词语\n* @author hoojo\n* @createDate 2018年2月9日 下午4:00:06\n* @param partMatch 是否部分匹配\n* @param content 被匹配的语句\n* @return 返回匹配的敏感词语集合\n*/\npublic Set<String> getWords(boolean partMatch, String content) throws RuntimeException;\n```\n\n+ **运行示例：**\n\n```java\nString content = \"我们中国人都是好人，在他们中间有男人和女人。中国男人很惨，中国人民长期被压迫。\";\nSystem.out.println(DfaFilterExecutor.getInstance().getWords(true, content));\nSystem.out.println(DfaFilterExecutor.getInstance().getWords(false, content));\n```\n\n+ **运行结果：**\n\n```\n[中国人, 女人, 中间, 中国男人]\n[中国人, 女人, 中间, 中国男人, 中国人民, 人民]\n```\n> 从上面结果发现部分匹配和完整匹配的结果集不一样，部分匹配是匹配到敏感词后立即退出当前匹配；而完整匹配会把所有词都匹配出来，也就是把敏感词中的小词也匹配到。\n\n### 2.3、过滤敏感词\n\n> 过滤敏感词主要是将匹配到的敏感词过滤掉，以某种字符串进行替换敏感词字符\n\n\n+ **接口说明：**\n\n```java\n/**\n* 过滤敏感词，并把敏感词替换为指定字符\n* @author hoojo\n* @createDate 2018年2月9日 下午4:38:12\n* @param partMatch 是否部分匹配\n* @param content 被匹配的语句\n* @param replaceChar 替换字符\n* @return 过滤后的字符串\n* @throws RuntimeException\n*/\npublic String filter(boolean partMatch, String content, char replaceChar) throws RuntimeException;\n```\n\n+ **运行示例：**\n\n```java\nSystem.out.println(DfaFilterExecutor.getInstance().filter(false, content, '*'));\n```\n\n\n+ **运行结果：**\n```\n我们***都是好人，在他们**有男人和**。****很惨，***民长期被压迫。\n```\n\n### 2.4、敏感词高亮\n\n> 敏感词高亮就是将匹配到的敏感字符以HTML的tag进行替换，这样在前端显示的时候就会比较突出\n\n+ **接口介绍：**\n\n```java\n/**\n * html高亮敏感词\n * @author hoojo\n * @createDate 2018年2月9日 下午4:37:33\n * @param partMatch 是否部分匹配\n * @param content 被匹配的语句\n * @return 返回html高亮敏感词\n * @throws RuntimeException\n */\npublic String highlight(boolean partMatch, String content) throws RuntimeException;\n```\n\n+ **运行示例：**\n```java\nSystem.out.println(DfaFilterExecutor.getInstance().highlight(false, content));\n```\n\n+ **运行结果：**\n```html\n我们<font color='red'>中国人</font>都是好人，在他们<font color='red'>中间</font>有男人和<font color='red'>女人</font>。<font color='red'>中国男人</font>很惨，<font color='red'>中国人</font>民长期被压迫。\n```\n\n\n### 2.5、是否存在敏感词\n\n> 判断一段文本是否包含敏感词，若包含立即返回true，否则 false\n\n+ **接口说明：**\n\n```java\n/**\n* 是否包含敏感字符\n* @author hoojo\n* @createDate 2018年2月9日 下午2:57:52\n* @param partMatch 是否支持匹配词语的一部分\n* @param content 被匹配内容\n* @return 是否包含敏感字符\n*/\npublic boolean contains(boolean partMatch, String content) throws RuntimeException;\n```\n\n+ **运行示例：**\n\n\n```java\nSystem.out.println(DfaFilterExecutor.getInstance().contains(true, content));\n```\n\n+ **运行结果：**\n\n```\ntrue\n```\n\n\n## 3、各算法实现测试说明\n\n> 针对各算法进行测试，分别测试 匹配文本 344字符、5519字符、11.304.959字符\n\n+ **测试结果**\n\n| 算法接口                                     |       过滤字符数       |  耗时(毫秒)   |    内存消耗（KB）     |\n| :--------------------------------------- | :---------------: | :-------: | :-------------: |\n| `DFA(\"dfa算法\", DfaFilter.class)`          | 344/5519/11304959 |  5/7/241  | 3276/3276/42470 |\n| `TIRE(\"tire树算法\", TireTreeFilter.class) ` | 344/5519/11304959 | 1/9/12413 | 1638/1638/47934 |\n| `HASH_BUCKET(\"hash桶算法\")`                 | 344/5519/11304959 |  0/4/659  | 1638/1638/79269 |\n| `DAT(\"双数组算法\", DatFilter.class)`          | 344/5519/11304959 |  1/4/720  | 819/819/424066  |\n| `TTMP(\"ttmp算法\", TtmpFilter.class)`       | 344/5519/11304959 |  0/2/226  | 819/819/567125  |\n\n在小于5000字左右，各算法差距不大。但字符量大的情况下，差距明显。\n\n## 4、总结\n+ `TTMP`算法用的内存最多，但速度最快，但是存在一个漏词的问题。\n+ `DFA`算法表现良好，各方面都不错，比较实用，特别在大量文本情况下很稳定。\n+ `TIRE`算法在大量文本情况下，效率稍低。可以优化下查找速度。\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/SensitiveWordsFilterUtils.java",
    "content": "package com.cnblogs.hoojo.sensitivewords;\r\n\r\nimport java.util.Set;\r\n\r\nimport org.apache.commons.lang.StringUtils;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.factory.FilterType;\r\nimport com.cnblogs.hoojo.sensitivewords.factory.SensitiveWordsFactory;\r\nimport com.cnblogs.hoojo.sensitivewords.filter.AbstractSensitiveWordsFilter;\r\nimport com.cnblogs.hoojo.sensitivewords.log.ApplicationLogging;\r\nimport com.google.common.base.Optional;\r\n\r\n/**\r\n * 分词执行器工具类\r\n * \r\n * @author hoojo\r\n * @createDate 2018年2月2日 下午4:25:38\r\n * @file SensitiveWordsFilterUtils.java\r\n * @package com.cnblogs.hoojo.sensitivewords\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic final class SensitiveWordsFilterUtils extends ApplicationLogging {\r\n\r\n\tprivate static class SingleFactory {\r\n\t\tprivate static final SensitiveWordsFilterUtils INSTANCE = new SensitiveWordsFilterUtils();\r\n\t}\r\n\r\n\tpublic static final SensitiveWordsFilterUtils getInstance() {\r\n\t\treturn SingleFactory.INSTANCE;\r\n\t}\r\n\r\n\tprivate SensitiveWordsFilterUtils() {\r\n\t}\r\n\r\n\t/**\r\n\t * 是否包含敏感字符\r\n\t * @author hoojo\r\n\t * @createDate 2018年2月9日 下午2:57:52\r\n\t * @param partMatch 是否支持匹配词语的一部分\r\n\t * @param content 被匹配内容\r\n\t * @return 是否包含敏感字符\r\n\t */\r\n\tpublic boolean contains(FilterType type, boolean partMatch, String content) throws Exception {\r\n\t\ttype = checkFilterType(type);\r\n\r\n\t\tif (StringUtils.isBlank(content)) {\r\n\t\t\tthrow new RuntimeException(\"必填参数content 为空\");\r\n\t\t}\r\n\t\tdebug(\"执行敏感词接口：{}，算法：{}\", type.getClazz().getSimpleName(), type.getDesc());\r\n\t\t\r\n\t\tAbstractSensitiveWordsFilter wordsFilter = SensitiveWordsFactory.create(type);\r\n\r\n\t\twordsFilter.initAll();\r\n\t\t\r\n\t\treturn wordsFilter.contains(partMatch, content);\r\n\t}\r\n\t\r\n\t/**\r\n\t * 返回匹配到的敏感词语\r\n\t * @author hoojo\r\n\t * @createDate 2018年2月9日 下午4:00:06\r\n\t * @param partMatch 是否部分匹配\r\n\t * @param content 被匹配的语句\r\n\t * @return 返回匹配的敏感词语集合\r\n\t */\r\n\tpublic Set<String> getWords(FilterType type, boolean partMatch, String content) throws Exception {\r\n\t\ttype = checkFilterType(type);\r\n\r\n\t\tif (StringUtils.isBlank(content)) {\r\n\t\t\tthrow new RuntimeException(\"必填参数content 为空\");\r\n\t\t}\r\n\r\n\t\tdebug(\"执行敏感词接口：{}，算法：{}\", type.getClazz().getSimpleName(), type.getDesc());\r\n\t\tAbstractSensitiveWordsFilter wordsFilter = SensitiveWordsFactory.create(type);\r\n\r\n\t\twordsFilter.initAll();\r\n\t\tSet<String> words = wordsFilter.getWords(partMatch, content);\r\n\r\n\t\tdebug(\"包含敏感词：{}\", words);\r\n\t\treturn words;\r\n\t}\r\n\t\r\n\t/**\r\n\t * html高亮敏感词\r\n\t * @author hoojo\r\n\t * @createDate 2018年2月9日 下午4:37:33\r\n\t * @param partMatch 是否部分匹配\r\n\t * @param content 被匹配的语句\r\n\t * @return 返回html高亮敏感词\r\n\t * @throws RuntimeException\r\n\t */\r\n\tpublic String highlight(FilterType type, boolean partMatch, String content) throws Exception {\r\n\t\ttype = checkFilterType(type);\r\n\r\n\t\tif (StringUtils.isBlank(content)) {\r\n\t\t\tthrow new RuntimeException(\"必填参数content 为空\");\r\n\t\t}\r\n\r\n\t\tdebug(\"执行“高亮”敏感词接口：{}，算法：{}\", type.getClazz().getSimpleName(), type.getDesc());\r\n\t\tAbstractSensitiveWordsFilter wordsFilter = SensitiveWordsFactory.create(type);\r\n\r\n\t\twordsFilter.initAll();\r\n\t\tcontent = wordsFilter.highlight(partMatch, content);\r\n\r\n\t\tdebug(\"高亮敏感词：{}\", content);\r\n\t\treturn content;\r\n\t}\r\n\t\r\n\t/**\r\n\t * 过滤敏感词，并把敏感词替换为指定字符\r\n\t * @author hoojo\r\n\t * @createDate 2018年2月9日 下午4:38:12\r\n\t * @param partMatch 是否部分匹配\r\n\t * @param content 被匹配的语句\r\n\t * @param replaceChar 替换字符\r\n\t * @return 过滤后的字符串\r\n\t * @throws RuntimeException\r\n\t */\r\n\tpublic String filter(FilterType type, boolean partMatch, String target, Character replaceChar) throws Exception {\r\n\r\n\t\treplaceChar = Optional.fromNullable(replaceChar).or('*');\r\n\t\ttype = checkFilterType(type);\r\n\r\n\t\tif (StringUtils.isBlank(target)) {\r\n\t\t\tthrow new RuntimeException(\"必填参数content 为空\");\r\n\t\t}\r\n\r\n\t\tdebug(\"执行“过滤”敏感词接口：{}，算法：{}\", type.getClazz().getSimpleName(), type.getDesc());\r\n\t\tAbstractSensitiveWordsFilter wordsFilter = SensitiveWordsFactory.create(type);\r\n\r\n\t\twordsFilter.initAll();\r\n\t\tString result = wordsFilter.filter(partMatch, target, replaceChar);\r\n\r\n\t\tdebug(\"脱敏结果：{}\", result);\r\n\t\treturn result;\r\n\t}\r\n\r\n\tpublic void refresh(FilterType type) throws Exception {\r\n\r\n\t\ttype = checkFilterType(type);\r\n\r\n\t\tdebug(\"执行“刷新”敏感词库缓存接口：{}，算法：{}\", type.getClazz().getSimpleName(), type.getDesc());\r\n\t\tAbstractSensitiveWordsFilter wordsFilter = SensitiveWordsFactory.create(type);\r\n\r\n\t\twordsFilter.initAll();\r\n\t\twordsFilter.refresh();\r\n\t}\r\n\t\r\n\tpublic void refreshAll(FilterType type) throws Exception {\r\n\t\t\r\n\t\ttype = checkFilterType(type);\r\n\r\n\t\tdebug(\"执行“刷新”敏感词库和所有缓存接口：{}，算法：{}\", type.getClazz().getSimpleName(), type.getDesc());\r\n\t\tAbstractSensitiveWordsFilter wordsFilter = SensitiveWordsFactory.create(type);\r\n\r\n\t\twordsFilter.initAll();\r\n\t\twordsFilter.refreshAll();\r\n\t}\r\n\t\r\n\tpublic boolean contains(boolean firstPart, String content) throws Exception {\r\n\t\t\r\n\t\treturn contains(null, firstPart, content);\r\n\t}\r\n\t\r\n\tpublic Set<String> getWords(boolean firstPart, String content) throws Exception {\r\n\t\t\r\n\t\treturn getWords(null, firstPart, content);\r\n\t}\r\n\t\r\n\tpublic String highlight(boolean firstPart, String content) throws Exception {\r\n\t\t\r\n\t\treturn highlight(null, firstPart, content);\r\n\t}\r\n\t\r\n\tpublic String filter(boolean firstPart, String target, Character replaceChar) throws Exception {\r\n\t\t\r\n\t\treturn filter(null, firstPart, target, replaceChar);\r\n\t}\r\n\t\r\n\tpublic void refresh() throws Exception {\r\n\t\t\r\n\t\tthis.refresh(null);\r\n\t}\r\n\t\r\n\tpublic void refreshAll() throws Exception {\r\n\t\t\r\n\t\tthis.refreshAll(null);\r\n\t}\r\n\t\r\n\tprivate FilterType checkFilterType(FilterType type) {\r\n\t\tif (type == null) {\r\n\t\t\ttype = FilterType.DFA;\r\n\t\t}\r\n\t\treturn type;\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/SensitiveWordsService.java",
    "content": "package com.cnblogs.hoojo.sensitivewords;\r\n\r\nimport static com.google.common.base.Preconditions.checkArgument;\r\nimport static com.google.common.base.Preconditions.checkNotNull;\r\nimport static com.google.common.base.Preconditions.checkState;\r\n\r\nimport java.util.ArrayList;\r\nimport java.util.Date;\r\nimport java.util.Iterator;\r\nimport java.util.List;\r\n\r\nimport org.springframework.beans.factory.annotation.Autowired;\r\nimport org.springframework.stereotype.Service;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.business.dao.SensitiveWordsDao;\r\nimport com.cnblogs.hoojo.sensitivewords.business.enums.SensitiveWordsType;\r\nimport com.cnblogs.hoojo.sensitivewords.business.model.SensitiveWords;\r\nimport com.cnblogs.hoojo.sensitivewords.cache.RedisWordsCache;\r\nimport com.cnblogs.hoojo.sensitivewords.event.CacheChangedEvent;\r\nimport com.cnblogs.hoojo.sensitivewords.event.CacheChangedEvent.Action;\r\nimport com.cnblogs.hoojo.sensitivewords.event.WordsCacheContext;\r\nimport com.cnblogs.hoojo.sensitivewords.log.ApplicationLogging;\r\nimport com.google.common.base.Strings;\r\nimport com.google.common.collect.Lists;\r\n\r\n\r\n\r\n/**\r\n * 敏感词库服务接口业务实现\r\n * @author hoojo\r\n * @createDate 2018-02-02 14:54:58\r\n * @file SensitiveWords.java\r\n * @package com.cnblogs.hoojo.sensitivewords\r\n * @project fengkong\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\n@Service\r\npublic class SensitiveWordsService<T extends SensitiveWords> extends ApplicationLogging {\r\n\r\n\t@Autowired\r\n\tprivate SensitiveWordsDao<T> dao;\r\n\t\r\n\tpublic SensitiveWordsService() {\r\n\t\t\r\n\t\tRedisWordsCache.getInstance().setDataSource(this);\r\n\t}\r\n\t\r\n\tpublic boolean add(T entity) throws Exception {\r\n\t\tlogger.debug(\"添加[敏感词库]数据参数：{}\", entity);\r\n\t\t\r\n\t\tvalidate(entity);\r\n\r\n\t\tboolean flag = false;\r\n\t\ttry {\r\n\t\t\tbind(entity);\r\n\t\t\tcheckUnique(entity);\r\n\t\t\t\r\n\t\t\tflag = dao.add(entity) > 0;\r\n\t\t\t\r\n\t\t\tif (flag) {\r\n\t\t\t\tWordsCacheContext.getInstance().dispatchChanged(new CacheChangedEvent(entity, Action.PUT));\r\n\t\t\t}\r\n        } catch (Exception e) {\r\n\t        logger.error(e.getMessage(), e);\r\n\t        throw e;\r\n        }\r\n\t\t\r\n\t\tlogger.debug(\"添加[敏感词库]数据是否成功：{}\", flag);\r\n\t\treturn flag;\r\n\t}\r\n\t\r\n\tprivate void checkUnique(T entity) throws Exception {\r\n\t\t@SuppressWarnings(\"unchecked\")\r\n\t\tT card = dao.get((T) new SensitiveWords(entity.getWord()));\r\n\t\t\r\n\t\tcheckState(card == null, \"该敏感词：%s 已经存在，不能重复添加\", entity.getWord());\r\n\t}\r\n\t\r\n\tpublic void validate(T entity) throws Exception {\r\n\t\tcheckNotNull(entity, \"敏感词库对象不能为空\");\r\n\t\t\r\n\t\tcheckArgument(!Strings.isNullOrEmpty(entity.getWord()), \"敏感词不能为空\");\r\n\t\tcheckNotNull(entity.getType(), \"敏感词类型，1：色情，2：政治，3：暴恐，4：民生，5：反动，6：贪腐，7：其他不能为空\");\r\n\t\tcheckArgument(!Strings.isNullOrEmpty(entity.getCreator()), \"创建人不能为空\");\r\n\t\tcheckArgument(!Strings.isNullOrEmpty(entity.getUpdater()), \"更新人不能为空\");\r\n\t}\r\n\t\r\n\tpublic void bind(T entity) throws Exception {\r\n\t\tif (entity.getSensitiveWordsId() == null) {\r\n\t\t\tlong sensitiveWordsId = System.currentTimeMillis();\r\n\t\t\tentity.setSensitiveWordsId(sensitiveWordsId);\r\n\t\t}\r\n\r\n\t\tDate now = new Date();\r\n\t\tentity.setCreateTime(now);\r\n\t\tentity.setUpdateTime(now);\r\n\t}\r\n\t\r\n\tpublic boolean batch(List<T> entities, SensitiveWordsType type) throws Exception {\r\n\t\tlogger.debug(\"批量添加[敏感词库]数据参数：{}\", entities.size());\r\n\t\t\r\n\t\tboolean flag = false;\r\n\t\ttry {\r\n\t\t\tif (entities != null && !entities.isEmpty()) {\r\n\t\t\t\t\r\n\t\t\t\tList<T> result = Lists.newArrayList();\r\n\t\t\t\t\r\n\t\t\t\tIterator<T> iter = entities.iterator();\r\n\t\t\t\twhile (iter.hasNext()) {\r\n\t\t\t\t\tT entity = iter.next();\r\n\t\t\t\t\tentity.setType(type);\r\n\t\t\t\t\t\r\n\t\t\t\t\tvalidate(entity);\r\n\t\t\t\t\tbind(entity);\r\n\t\t\t\t\tcheckUnique(entity);\r\n\t\t\t\t}\r\n\t\t\t\t\r\n\t\t\t\tint count = dao.batch(result);\r\n\t\t\t\tlogger.debug(\"批量入库敏感词库数据：{}\", count);\r\n\t\t\t\t\r\n\t\t\t\tif (count != result.size()) {\r\n\t\t\t\t\tthrow new RuntimeException(\"批量入库敏感词库数据不完整\");\r\n\t\t\t\t} \r\n\t\t\t\t\r\n\t\t\t\tWordsCacheContext.getInstance().dispatchChanged(new CacheChangedEvent(result, Action.PUT_LIST));\r\n\t\t\t} \r\n\t\t\t\r\n\t\t\tflag = true;\r\n        } catch (Exception e) {\r\n\t        logger.error(e.getMessage(), e);\r\n\t        throw e;\r\n        }\r\n\t\t\r\n\t\tlogger.debug(\"批量添加[敏感词库]数据是否成功：{}\", flag);\r\n\t\treturn flag;\r\n\t}\r\n\t\r\n    public boolean edit(T entity) throws Exception {\r\n\t\tlogger.debug(\"修改[敏感词库]数据参数：{}\", entity);\r\n\t\t\r\n\t\tif (entity != null) {\r\n\t\t\tentity.setUpdateTime(new Date());\r\n\t\t}\r\n\t\t\r\n\t\tboolean flag = false;\r\n\t\ttry {\r\n\t\t\tflag = dao.edit(entity) > 0;\r\n\t\t\t\r\n\t\t\tif (flag) {\r\n\t\t\t\tWordsCacheContext.getInstance().dispatchChanged(new CacheChangedEvent(entity, Action.UPDATE));\r\n\t\t\t}\r\n        } catch (Exception e) {\r\n\t        logger.error(e.getMessage(), e);\r\n\t        throw e;\r\n        }\r\n\t\t\r\n\t\tlogger.debug(\"修改[敏感词库]数据是否成功：{}\", flag);\r\n\t\treturn flag;\r\n\t}\r\n\t\r\n    public boolean remove(T entity) throws Exception {\r\n\t\tlogger.debug(\"删除[敏感词库]数据参数：{}\", entity);\r\n\t\t\r\n\t\tboolean flag = false;\r\n\t\ttry {\r\n\t\t\tflag = dao.remove(entity) > 0;\r\n\t\t\t\r\n\t\t\tif (flag) {\r\n\t\t\t\tWordsCacheContext.getInstance().dispatchChanged(new CacheChangedEvent(entity, Action.REMOVE));\r\n\t\t\t}\r\n        } catch (Exception e) {\r\n\t        logger.error(e.getMessage(), e);\r\n\t        throw e;\r\n        }\r\n\t\t\r\n\t\tlogger.debug(\"删除[敏感词库]数据是否成功：{}\", flag);\r\n\t\treturn flag;\r\n\t}\r\n\t\r\n    public List<T> list(T entity) throws Exception {\r\n\t\tlogger.debug(\"动态查询[敏感词库]数据参数 Entity：{}\", entity);\r\n\t\t\r\n\t\tList<T> result = new ArrayList<T>();\r\n\t\ttry {\r\n\t\t\tresult = dao.query(entity);\r\n        } catch (Exception e) {\r\n\t        logger.error(e.getMessage(), e);\r\n\t        throw e;\r\n        }\r\n\t\t\r\n\t\tlogger.debug(\"动态查询[敏感词库]数据结果集：{}\", result);\r\n\t\treturn result;\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/business/dao/SensitiveWordsDao.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.business.dao;\r\n\r\nimport java.util.List;\r\n\r\nimport org.springframework.stereotype.Repository;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.business.model.SensitiveWords;\r\n\r\n/**\r\n * 敏感词库 MyBatis数据库操作\r\n * @author hoojo\r\n * @createDate 2018-02-02 14:54:58\r\n * @file SensitiveWordsDao.java\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\n@Repository\r\npublic interface SensitiveWordsDao<T extends SensitiveWords> {\r\n\r\n\tpublic int add(T entity) throws Exception;\r\n\t\r\n\tpublic int batch(List<T> entity) throws Exception;\r\n\t\r\n\tpublic int edit(T entity) throws Exception;\r\n\t\r\n\tpublic int remove(T entity) throws Exception;\r\n\t\r\n\tpublic Long count(T entity) throws Exception;\r\n\r\n\tpublic List<T> query(T entity) throws Exception;\r\n\t\r\n\tpublic T get(T entity) throws Exception;\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/business/enums/SensitiveWordsType.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.business.enums;\r\n\r\n/**\r\n * 过滤敏感词类型\r\n * @author hoojo\r\n * @createDate 2018年9月24日 下午9:16:04\r\n * @file SensitiveWordsType.java\r\n * @package com.cnblogs.hoojo.sensitivewords.business.enums\r\n * @project sensitive-words-filter\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic enum SensitiveWordsType {\r\n\t/** 色情 */\r\n\tPORNO(1, \"色情\"), \r\n\t/** 政治 */\r\n\tPOLITICS(2, \"政治\"), \r\n\t/** 暴恐 */\r\n\tTERROR(3, \"暴恐\"), \r\n\t/** 民生 */\r\n\tLIVELIHOOD(4, \"民生\"), \r\n\t/** 反动 */\r\n\tREACTION(5, \"反动\"), \r\n\t/** 贪腐 */\r\n\tCORRUPTION(6, \"贪腐\"), \r\n\t/** 其他 */\r\n\tOTHERS(7, \"其他\"); \r\n\r\n\tprivate int code;\r\n\tprivate String name;\r\n\t\r\n\tSensitiveWordsType(int code, String name) {\r\n\t\tthis.code = code;\r\n\t\tthis.name = name;\r\n\t}\r\n\r\n\tpublic Integer getCode() {\r\n\t\treturn code;\r\n\t}\r\n\r\n\tpublic String getName() {\r\n\t\treturn name;\r\n\t}\r\n\t\r\n\tpublic String getEnumName() {\r\n\t\treturn this.name();\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/business/model/SensitiveWords.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.business.model;\r\n\r\nimport java.io.Serializable;\r\nimport java.util.Date;\r\n\r\nimport org.apache.commons.lang.builder.ToStringBuilder;\r\nimport org.apache.commons.lang.builder.ToStringStyle;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.business.enums.SensitiveWordsType;\r\n\r\n\r\n/**\r\n * 敏感词库\r\n * @author hoojo\r\n * @createDate 2018-02-02 14:54:58\r\n * @file SensitiveWords.java\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic class SensitiveWords implements Serializable{\r\n\t\r\n\tprivate static final long serialVersionUID = 1L;\r\n\t\r\n\t/** 主键id */\r\n\tprivate Long sensitiveWordsId;\r\n\t/** 敏感词 */\r\n\tprivate String word;\r\n\t/** 敏感词类型，1：色情，2：政治，3：暴恐，4：民生，5：反动，6：贪腐，7：其他 */\r\n\tprivate SensitiveWordsType type;\r\n\t/** 创建人 */\r\n\tprivate String creator;\r\n\t/** 创建时间 */\r\n\tprivate Date createTime;\r\n\t/** 更新人 */\r\n\tprivate String updater;\r\n\t/** 更新时间 */\r\n\tprivate Date updateTime;\r\n\t\r\n\tpublic SensitiveWords() {\r\n\t}\r\n\t\r\n\tpublic SensitiveWords(String word, String creator, String updater) {\r\n\t\tsuper();\r\n\t\tthis.word = word;\r\n\t\tthis.creator = creator;\r\n\t\tthis.updater = updater;\r\n\t}\r\n\r\n\tpublic SensitiveWords(String word) {\r\n\t\tsuper();\r\n\t\tthis.word = word;\r\n\t}\r\n\r\n\t/** 主键id */\r\n\tpublic void setSensitiveWordsId(Long sensitiveWordsId) {\r\n\t\tthis.sensitiveWordsId = sensitiveWordsId;\r\n\t}\r\n\t\r\n\t/** 主键id*/\r\n\tpublic Long getSensitiveWordsId() {\r\n\t\treturn this.sensitiveWordsId;\r\n\t}\r\n\t/** 敏感词 */\r\n\tpublic void setWord(String word) {\r\n\t\tthis.word = word;\r\n\t}\r\n\t\r\n\t/** 敏感词*/\r\n\tpublic String getWord() {\r\n\t\treturn this.word;\r\n\t}\r\n\t/** 敏感词类型，1：色情，2：政治，3：暴恐，4：民生，5：反动，6：贪腐，7：其他 */\r\n\tpublic void setType(SensitiveWordsType type) {\r\n\t\tthis.type = type;\r\n\t}\r\n\t\r\n\t/** 敏感词类型，1：色情，2：政治，3：暴恐，4：民生，5：反动，6：贪腐，7：其他*/\r\n\tpublic SensitiveWordsType getType() {\r\n\t\treturn this.type;\r\n\t}\r\n\t/** 创建人 */\r\n\tpublic void setCreator(String creator) {\r\n\t\tthis.creator = creator;\r\n\t}\r\n\t\r\n\t/** 创建人*/\r\n\tpublic String getCreator() {\r\n\t\treturn this.creator;\r\n\t}\r\n\t/** 创建时间 */\r\n\tpublic void setCreateTime(Date createTime) {\r\n\t\tthis.createTime = createTime;\r\n\t}\r\n\t\r\n\t/** 创建时间*/\r\n\tpublic Date getCreateTime() {\r\n\t\treturn this.createTime;\r\n\t}\r\n\t/** 更新人 */\r\n\tpublic void setUpdater(String updater) {\r\n\t\tthis.updater = updater;\r\n\t}\r\n\t\r\n\t/** 更新人*/\r\n\tpublic String getUpdater() {\r\n\t\treturn this.updater;\r\n\t}\r\n\t/** 更新时间 */\r\n\tpublic void setUpdateTime(Date updateTime) {\r\n\t\tthis.updateTime = updateTime;\r\n\t}\r\n\t\r\n\t/** 更新时间*/\r\n\tpublic Date getUpdateTime() {\r\n\t\treturn this.updateTime;\r\n\t}\r\n\t\r\n\t@Override\r\n\tpublic String toString() {\r\n\t\treturn ToStringBuilder.reflectionToString(this, ToStringStyle.SHORT_PREFIX_STYLE);\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/cache/AbstractWordCache.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.cache;\r\n\r\nimport java.util.List;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.business.model.SensitiveWords;\r\nimport com.cnblogs.hoojo.sensitivewords.event.CacheChangedEvent;\r\nimport com.cnblogs.hoojo.sensitivewords.event.WordsCacheContext.CacheChangedListener;\r\nimport com.cnblogs.hoojo.sensitivewords.log.ApplicationLogging;\r\n\r\n/**\r\n * abstract word cache interface method\r\n * @author hoojo\r\n * @createDate 2018年2月6日 下午2:33:54\r\n * @file AbstractWordCache.java\r\n * @package com.cnblogs.hoojo.sensitivewords.cache\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic abstract class AbstractWordCache extends ApplicationLogging implements WordsCache, CacheChangedListener {\r\n\r\n\tprivate String listenerName;\r\n\r\n\tpublic AbstractWordCache(String listenerName) {\r\n\t\tthis.listenerName = listenerName;\r\n\t}\r\n\t\r\n\tpublic String getListenerName() {\r\n\t\treturn listenerName;\r\n\t}\r\n\t\r\n\t@Override\r\n\tpublic void setDataSource(Object dataSource) {\r\n\t\tdebug(\"{}: bindDataSource: {}\", listenerName, dataSource);\r\n\t}\r\n\t\r\n\t@Override\r\n\tpublic boolean init() throws Exception {\r\n\t\tdebug(\"{}: init word cache\", listenerName);\r\n\t\t\r\n\t\treturn true;\r\n\t}\r\n\r\n\t@Override\r\n\tpublic boolean put(SensitiveWords words) throws Exception {\r\n\t\tdebug(\"{}: put word: {}\", listenerName, words);\r\n\t\t\r\n\t\treturn true;\r\n\t}\r\n\r\n\t@Override\r\n\tpublic boolean put(List<SensitiveWords> words) throws Exception {\r\n\t\tdebug(\"{}: put word list: {}\", listenerName, words);\r\n\t\t\r\n\t\treturn true;\r\n\t}\r\n\r\n\t@Override\r\n\tpublic List<SensitiveWords> get() throws Exception {\r\n\t\tdebug(\"{}: get word list\", listenerName);\r\n\t\t\r\n\t\treturn null;\r\n\t}\r\n\t\r\n\t@Override\r\n\tpublic boolean update(SensitiveWords word) throws Exception {\r\n\t\tdebug(\"{}: update word: {}\", listenerName, word);\r\n\t\t\r\n\t\treturn true;\r\n\t}\r\n\r\n\t@Override\r\n\tpublic boolean remove(SensitiveWords words) throws Exception {\r\n\t\tdebug(\"{}: remove word: {}\", listenerName, words);\r\n\t\t\r\n\t\treturn false;\r\n\t}\r\n\r\n\t@Override\r\n\tpublic boolean refresh() throws Exception {\r\n\t\tdebug(\"{}: refresh word cache\", listenerName);\r\n\t\t\r\n\t\treturn false;\r\n\t}\r\n\t\r\n\t@SuppressWarnings(\"unchecked\")\r\n\t@Override\r\n\tpublic void handleChangedEvent(CacheChangedEvent event) throws Exception {\r\n\t\t\r\n\t\tthis.init();\r\n\t\t\r\n\t\tswitch (event.getAction()) {\r\n\t\t\t\r\n\t\t\tcase PUT:\r\n\t\t\t\tthis.put((SensitiveWords) event.getSource());\r\n\t\t\t\tbreak;\r\n\t\t\t\t\r\n\t\t\tcase PUT_LIST:\r\n\t\t\t\tthis.put((List<SensitiveWords>) event.getSource());\r\n\t\t\t\tbreak;\r\n\t\t\t\t\r\n\t\t\tcase REMOVE:\r\n\t\t\t\tthis.remove((SensitiveWords) event.getSource());\r\n\t\t\t\tbreak;\r\n\t\t\t\t\r\n\t\t\tcase UPDATE:\r\n\t\t\t\tthis.update((SensitiveWords) event.getSource());\r\n\t\t\t\tbreak;\r\n\t\t\t\t\r\n\t\t\tcase REFRESH:\r\n\t\t\t\tthis.refresh();\r\n\t\t\t\tbreak;\r\n\r\n\t\t\tdefault:\r\n\t\t\t\tthrow new UnsupportedOperationException();\r\n\t\t}\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/cache/JvmWordsCache.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.cache;\r\n\r\nimport java.util.List;\r\n\r\nimport org.apache.commons.lang.StringUtils;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.business.model.SensitiveWords;\r\nimport com.google.common.base.Predicate;\r\nimport com.google.common.collect.Iterators;\r\nimport com.google.common.collect.Lists;\r\n\r\n/**\r\n * Jvm 敏感词缓存\r\n * @author hoojo\r\n * @createDate 2018年2月2日 下午4:56:38\r\n * @file JvmWordsCache.java\r\n * @package com.cnblogs.hoojo.sensitivewords.cache\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic final class JvmWordsCache extends AbstractWordCache {\r\n\r\n\tprivate WordsCache wordsCache;\r\n\tpublic static List<SensitiveWords> cache = null;\r\n\r\n\tprivate static class SingleFactory {\r\n\t\t\r\n\t\tprivate static final JvmWordsCache INSTANCE = new JvmWordsCache();\r\n\t}\r\n\r\n\tpublic static final JvmWordsCache getInstance() {\r\n\t\t\r\n\t\treturn SingleFactory.INSTANCE;\r\n\t}\r\n\t\r\n\tprivate JvmWordsCache() {\r\n\t\tsuper(\"JVM 脱敏词库缓存\");\r\n\t}\r\n\r\n\t@Override\r\n\tpublic void setDataSource(Object dataSource) {\r\n\t\tsuper.setDataSource(dataSource);\r\n\r\n\t\tif (dataSource instanceof WordsCache) {\r\n\t\t\tthis.wordsCache = (WordsCache) dataSource;\r\n\t\t} else {\r\n\t\t\tthrow new IllegalArgumentException(\"未知数据源类型\" + getListenerName());\r\n\t\t}\r\n\t}\r\n\r\n\t@Override\r\n\tpublic boolean init() throws Exception {\r\n\t\tsuper.init();\r\n\r\n\t\tif (cache == null || cache.isEmpty()) {\r\n\t\t\tdebug(\"{}: jvm cache 首次初始化\", getListenerName());\r\n\t\t\tcache = Lists.newArrayList();\r\n\r\n\t\t\treturn refresh();\r\n\t\t} else {\r\n\t\t\tdebug(\"{}: jvm cache 已被初始化，无需重复执行\", getListenerName());\r\n\t\t}\r\n\r\n\t\treturn true;\r\n\t}\r\n\r\n\tpublic boolean put(SensitiveWords words) throws Exception {\r\n\t\tsuper.put(words);\r\n\r\n\t\tcache.add(words);\r\n\t\treturn true;\r\n\t}\r\n\r\n\tpublic boolean put(List<SensitiveWords> words) throws Exception {\r\n\t\tsuper.put(words);\r\n\r\n\t\tcache.addAll(words);\r\n\t\treturn true;\r\n\t}\r\n\r\n\tpublic List<SensitiveWords> get() throws Exception {\r\n\t\tsuper.get();\r\n\r\n\t\treturn cache;\r\n\t}\r\n\r\n\tpublic boolean update(SensitiveWords word) throws Exception {\r\n\t\tsuper.update(word);\r\n\r\n\t\tif (remove(word)) {\r\n\t\t\treturn put(word);\r\n\t\t}\r\n\r\n\t\treturn false;\r\n\t}\r\n\r\n\tpublic boolean remove(final SensitiveWords word) throws Exception {\r\n\t\tsuper.remove(word);\r\n\r\n\t\tif (word == null) {\r\n\t\t\treturn false;\r\n\t\t}\r\n\r\n\t\treturn Iterators.removeIf(cache.iterator(), new Predicate<SensitiveWords>() {\r\n\t\t\t@Override\r\n\t\t\tpublic boolean apply(SensitiveWords item) {\r\n\t\t\t\tif (word.getSensitiveWordsId() == item.getSensitiveWordsId()) {\r\n\t\t\t\t\treturn true;\r\n\t\t\t\t}\r\n\t\t\t\tif (StringUtils.equals(word.getWord(), item.getWord())) {\r\n\t\t\t\t\treturn true;\r\n\t\t\t\t}\r\n\t\t\t\treturn false;\r\n\t\t\t}\r\n\t\t});\r\n\t}\r\n\r\n\tpublic boolean refresh() throws Exception {\r\n\t\tsuper.refresh();\r\n\r\n\t\tdebug(\"{}: 从新刷新初始化JVM缓存\", getListenerName());\r\n\t\ttry {\r\n\t\t\tcache.clear();\r\n\t\t\t\r\n\t\t\tList<SensitiveWords> words = wordsCache.get();\r\n\t\t\tif (words != null) {\r\n\t\t\t\tcache.addAll(words);\r\n\t\t\t}\r\n\t\t\tdebug(\"{}: JVM缓存敏感词数量：{}\", getListenerName(), cache.size());\r\n\t\t} catch (Exception e) {\r\n\t\t\tthrow e;\r\n\t\t}\r\n\r\n\t\treturn true;\r\n\t}\r\n}"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/cache/RedisWordsCache.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.cache;\r\n\r\nimport java.util.List;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.SensitiveWordsService;\r\nimport com.cnblogs.hoojo.sensitivewords.business.model.SensitiveWords;\r\nimport com.google.common.collect.Lists;\r\n\r\n/**\r\n * Redis 分布式缓存\r\n * \r\n * @author hoojo\r\n * @createDate 2018年2月2日 下午4:55:39\r\n * @file RedisWordsCache.java\r\n * @package com.cnblogs.hoojo.sensitivewords.cache\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic final class RedisWordsCache extends AbstractWordCache {\r\n\r\n\tprivate SensitiveWordsService<SensitiveWords> service;\r\n\t\r\n\tprivate RedisWordsCache() {\r\n\t\tsuper(\"redis 脱敏词库缓存\");\r\n\t\t\r\n\t\tJvmWordsCache.getInstance().setDataSource(this);\r\n\t}\r\n\r\n\tprivate static class SingleFactory {\r\n\t\t\r\n\t\tprivate static final RedisWordsCache INSTANCE = new RedisWordsCache();\r\n\t}\r\n\r\n\tpublic static final RedisWordsCache getInstance() {\r\n\t\t\r\n\t\treturn SingleFactory.INSTANCE;\r\n\t}\r\n\t\r\n\t@SuppressWarnings(\"unchecked\")\r\n\t@Override\r\n\tpublic void setDataSource(Object dataSource) {\r\n\t\tsuper.setDataSource(dataSource);\r\n\t\t\r\n\t\tif (dataSource instanceof SensitiveWordsService) {\r\n\t\t\tthis.service = (SensitiveWordsService<SensitiveWords>) dataSource;\r\n\t\t} else {\r\n\t\t\tthrow new IllegalArgumentException(\"未知数据源类型\" + getListenerName());\r\n\t\t}\r\n\t}\r\n\t\r\n\t@Override\r\n\tpublic boolean init() throws Exception {\r\n\t\tsuper.init();\r\n\t\t\r\n\t\t/*\r\n\t\tif (RedisUtil.getCountQueue(RedisKeyUtil.FENGKONG_SW_FILTER_PREFIX) == 0) {\r\n\t\t\tdebug(\"{}: redis cache 首次初始化\", getListenerName());\r\n\t\t\t\r\n\t\t\treturn refresh();\r\n\t\t} else {\r\n\t\t\tdebug(\"{}: redis缓存已被初始化，无需再执行\", getListenerName());\r\n\t\t}\r\n\t\t*/\r\n\t\treturn true;\r\n\t}\r\n\t\r\n\tpublic boolean put(SensitiveWords words) throws Exception {\r\n\t\tsuper.put(words);\r\n\t\t\r\n\t\t//RedisUtil.push(RedisKeyUtil.FENGKONG_SW_FILTER_PREFIX, toJSON(words));\r\n\t\treturn true;\r\n\t}\r\n\r\n\tpublic boolean put(List<SensitiveWords> words) throws Exception {\r\n\t\tsuper.put(words);\r\n\t\t\r\n\t\tList<String> rows = Lists.newArrayList();\r\n\t\tfor (SensitiveWords word : words) {\r\n\t\t\trows.add(toJSON(word));\r\n\t\t}\r\n\t\t\r\n\t\t//RedisUtil.push(RedisKeyUtil.FENGKONG_SW_FILTER_PREFIX, rows);\r\n\t\treturn true;\r\n\t}\r\n\r\n\tpublic List<SensitiveWords> get() throws Exception {\r\n\t\tsuper.get();\r\n\t\t/*\r\n\t\tJSONObject json = RedisUtil.getQueue(RedisKeyUtil.FENGKONG_SW_FILTER_PREFIX, 0, -1);\r\n\t\tif (json == null) {\r\n\t\t\treturn Lists.newArrayList();\r\n\t\t}\r\n\t\t\r\n\t\tJSONArray rows = json.getJSONArray(\"rows\");\r\n\t\tList<SensitiveWords> words = Lists.newArrayList();\r\n\t\tfor (int i = 0; i < rows.size(); i++) {\r\n\t\t\twords.add(JSONObject.parseObject(rows.get(i).toString(), SensitiveWords.class));\r\n\t\t}\r\n\t\treturn words;\r\n\t\t*/\r\n\t\treturn null;\r\n\t}\r\n\t\r\n\tpublic boolean update(SensitiveWords word) throws Exception {\r\n\t\tsuper.update(word);\r\n\t\t\r\n\t\tif (remove(word)) {\r\n\t\t\treturn put(word);\r\n\t\t}\r\n\t\t\r\n\t\treturn false;\r\n\t}\r\n\r\n\tpublic boolean remove(final SensitiveWords word) throws Exception {\r\n\t\tsuper.remove(word);\r\n\t\t\r\n\t\tif (word == null) {\r\n\t\t\treturn false;\r\n\t\t}\r\n\t\t/*\r\n\t\tJSONObject json = RedisUtil.getQueue(RedisKeyUtil.FENGKONG_SW_FILTER_PREFIX, 0, -1);\r\n\t\tif (json == null) {\r\n\t\t\treturn false;\r\n\t\t}\r\n\t\t\r\n\t\tList<String> newRows = Lists.newArrayList();\r\n\r\n\t\tJSONArray rows = json.getJSONArray(\"rows\");\r\n\t\tfor (int i = 0; i < rows.size(); i++) {\r\n\t\t\tSensitiveWords cacheWord = JSONObject.parseObject(rows.get(i).toString(), SensitiveWords.class);\r\n\t\t\t\r\n\t\t\tif (cacheWord.getSensitiveWordsId() == word.getSensitiveWordsId()) {\r\n\t\t\t\tcontinue;\r\n\t\t\t}\r\n\t\t\tif (StringUtils.equals(cacheWord.getWord(), word.getWord())) {\r\n\t\t\t\tcontinue;\r\n\t\t\t}\r\n\t\t\t\r\n\t\t\tnewRows.add(rows.get(i).toString());\r\n\t\t}\r\n\t\t\r\n\t\tRedisUtil.removeRedisCache(RedisKeyUtil.FENGKONG_SW_FILTER_PREFIX);\r\n\t\treturn RedisUtil.push(RedisKeyUtil.FENGKONG_SW_FILTER_PREFIX, rows);\r\n\t\t*/\r\n\t\treturn false;\r\n\t}\r\n\r\n\tpublic boolean refresh() throws Exception {\r\n\t\tsuper.refresh();\r\n\t\t\r\n\t\tdebug(\"{}: 从新刷新初始化redis缓存\", getListenerName());\r\n\t\t/*\r\n\t\ttry {\r\n\t\t\tRedisUtil.removeRedisCache(RedisKeyUtil.FENGKONG_SW_FILTER_PREFIX);\r\n\t\t\t\r\n\t\t\tSensitiveWords entity = new SensitiveWords();\r\n\t\t\tentity.setEnableFlag(EnableState.ENABLE);\r\n\t\t\t\r\n\t\t\tList<SensitiveWords> words = service.list(entity);\r\n\t\t\t\r\n\t\t\tList<String> jsonWords = Lists.newArrayList();\r\n\t\t\tfor (SensitiveWords word : words) {\r\n\t\t\t\tjsonWords.add(toJSON(word));\r\n\t\t\t}\r\n\t\t\t\r\n\t\t\tRedisUtil.push(RedisKeyUtil.FENGKONG_SW_FILTER_PREFIX, jsonWords);\r\n\t\t\t\r\n\t\t\tdebug(\"{}: redis 缓存敏感词数量：{}\", getListenerName(), words.size());\r\n\t\t} catch(Exception e) {\r\n\t\t\tthrow e;\r\n\t\t}\r\n\t\t*/\r\n\t\treturn true;\r\n\t}\r\n\t\r\n\tprivate String toJSON(SensitiveWords word) {\r\n\t\t/*\r\n\t\tMap<String, Object> map = BeanMapUtils.transBean2Map(word);\r\n\t\tmap.put(\"type\", word.getType().name());\r\n\t\t\r\n\t\treturn JSON.toJSONString(map);\r\n\t\t*/\r\n\t\treturn null;\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/cache/WordsCache.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.cache;\r\n\r\nimport java.util.List;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.business.model.SensitiveWords;\r\n\r\n/**\r\n * 敏感词库缓存\r\n * \r\n * @author hoojo\r\n * @createDate 2018年2月2日 下午4:57:07\r\n * @file WordsCache.java\r\n * @package com.cnblogs.hoojo.sensitivewords.cache\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic interface WordsCache {\r\n\t\r\n\tpublic void setDataSource(Object dataSource);\r\n\r\n\tpublic boolean init() throws Exception;\r\n\t\r\n\tpublic boolean put(SensitiveWords words) throws Exception;\r\n\r\n\tpublic boolean put(List<SensitiveWords> words) throws Exception;\r\n\r\n\tpublic List<SensitiveWords> get() throws Exception;\r\n\t\r\n\tpublic boolean remove(SensitiveWords words) throws Exception;\r\n\t\r\n\tpublic boolean refresh() throws Exception;\r\n\r\n\tboolean update(SensitiveWords word) throws Exception;\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/event/CacheChangedEvent.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.event;\r\n\r\nimport java.util.EventObject;\r\n\r\n/**\r\n * 缓存修改事件\r\n * \r\n * @author hoojo\r\n * @createDate 2018年2月5日 下午5:50:51\r\n * @file Event.java\r\n * @package com.cnblogs.hoojo.sensitivewords.event\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic class CacheChangedEvent extends EventObject {\r\n\r\n\tprivate static final long serialVersionUID = 6217432220420884817L;\r\n\r\n\tpublic enum Action {\r\n\t\tPUT, PUT_LIST, REMOVE, REFRESH, UPDATE;\r\n\t}\r\n\t\r\n\tprivate Action action;\r\n\t\r\n\tpublic CacheChangedEvent(Object source, Action action) {\r\n\t\tsuper(source);\r\n\t\t\r\n\t\tthis.action = action;\r\n\t}\r\n\r\n\tpublic void doEvent() {\r\n\t\tSystem.out.println(\"触发事件：\" + action + \"，\" + this.getSource());\r\n\t}\r\n\t\r\n\tpublic Action getAction() {\r\n\t\treturn action;\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/event/WordsCacheContext.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.event;\r\n\r\nimport java.util.Enumeration;\r\nimport java.util.Vector;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.cache.JvmWordsCache;\r\nimport com.cnblogs.hoojo.sensitivewords.cache.RedisWordsCache;\r\nimport com.cnblogs.hoojo.sensitivewords.log.ApplicationLogging;\r\n\r\n/**\r\n * 目标事件源对象上下文\r\n * @author hoojo\r\n * @createDate 2018年2月5日 下午5:22:47\r\n * @file TargetEventSource.java\r\n * @package com.cnblogs.hoojo.sensitivewords.event\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic final class WordsCacheContext extends ApplicationLogging {\r\n\r\n\tprivate Vector<CacheChangedListener> listeners = new Vector<CacheChangedListener>();\r\n\r\n\tprivate static class SingleFactory {\r\n\t\t\r\n\t\tprivate static final WordsCacheContext INSTANCE = new WordsCacheContext();\r\n\t}\r\n\r\n\tpublic static final WordsCacheContext getInstance() {\r\n\t\t\r\n\t\treturn SingleFactory.INSTANCE;\r\n\t}\r\n\t\r\n\tprivate WordsCacheContext() {\r\n\t\t\r\n\t\ttry {\r\n\t\t\tregister(RedisWordsCache.getInstance());\r\n\t\t\tregister(JvmWordsCache.getInstance());\r\n\t\t} catch (Exception e) {\r\n\t\t\terror(e);\r\n\t\t\tthrow e;\r\n\t\t}\r\n\t}\r\n\t\r\n\tpublic void register(CacheChangedListener listener) {\r\n\t\t\r\n\t\tif (!this.listeners.contains(listener)) {\r\n\t\t\tthis.listeners.add(listener);\r\n\t\t}\r\n\t}\r\n\r\n\tpublic void dispatchChanged(CacheChangedEvent event) throws Exception {\r\n\t\t\r\n\t\tevent.doEvent();\r\n\t\t\r\n\t\tEnumeration<CacheChangedListener> enums = listeners.elements();\r\n\t\twhile (enums.hasMoreElements()) {\r\n\t\t\tCacheChangedListener listener = enums.nextElement();\r\n\r\n\t\t\ttry {\r\n\t\t\t\tinfo(\"触发事件：{}，执行监听业务：{}，数据：{}\", event.getAction(), listener.getListenerName(), event.getSource());\r\n\t\t\t\tlistener.handleChangedEvent(event);\r\n\t\t\t} catch (Exception e) {\r\n\t\t\t\tthrow e;\r\n\t\t\t}\r\n\t\t}\r\n\t}\r\n\r\n\tpublic interface CacheChangedListener extends java.util.EventListener {\r\n\r\n\t\tpublic void handleChangedEvent(CacheChangedEvent event) throws Exception;\r\n\t\t\r\n\t\tpublic String getListenerName();\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/factory/FilterType.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.factory;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.filter.AbstractSensitiveWordsFilter;\r\nimport com.cnblogs.hoojo.sensitivewords.filter.bucket.HashBucketFilter;\r\nimport com.cnblogs.hoojo.sensitivewords.filter.dat.DatFilter;\r\nimport com.cnblogs.hoojo.sensitivewords.filter.dfa.DfaFilter;\r\nimport com.cnblogs.hoojo.sensitivewords.filter.simhash.SimHashFilter;\r\nimport com.cnblogs.hoojo.sensitivewords.filter.tire.TireTreeFilter;\r\nimport com.cnblogs.hoojo.sensitivewords.filter.ttmp.TtmpFilter;\r\n\r\n/**\r\n * 敏感词算法实现类型\r\n * @author hoojo\r\n * @createDate 2018年2月2日 下午4:28:11\r\n * @file FilterType.java\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic enum FilterType {\r\n\r\n\tDFA(\"dfa算法\", DfaFilter.class),\r\n\tTIRE(\"tire树算法\", TireTreeFilter.class),\r\n\tHASH_BUCKET(\"二级hash算法\", HashBucketFilter.class),\r\n\tDAT(\"双数组算法\", DatFilter.class),\r\n\tTTMP(\"ttmp算法\", TtmpFilter.class),\r\n\tSIMHASH(\"simhash算法\", SimHashFilter.class);\r\n\t\r\n\tprivate String desc;\r\n\tprivate Class<? extends AbstractSensitiveWordsFilter> clazz;\r\n\t\r\n\tFilterType(String desc, Class<? extends AbstractSensitiveWordsFilter> clazz) {\r\n\t\tthis.desc = desc;\r\n\t\tthis.clazz = clazz;\r\n\t}\r\n\t\r\n\tpublic String getDesc() {\r\n\t\treturn desc;\r\n\t}\r\n\r\n\tpublic Class<? extends AbstractSensitiveWordsFilter> getClazz() {\r\n\t\treturn clazz;\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/factory/SensitiveWordsFactory.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.factory;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.filter.AbstractSensitiveWordsFilter;\r\n\r\n/**\r\n * 敏感词库过滤实现工厂\r\n * @author hoojo\r\n * @createDate 2018年2月2日 下午4:05:29\r\n * @file SWFacotry.java\r\n * @package com.cnblogs.hoojo.sensitivewords.factory\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic abstract class SensitiveWordsFactory {\r\n\r\n\tpublic static final AbstractSensitiveWordsFilter create(FilterType filterType) throws Exception {\r\n\t\t\r\n\t\treturn (AbstractSensitiveWordsFilter) Class.forName(filterType.getClazz().getName()).newInstance();\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/AbstractFilter.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter;\r\n\r\nimport java.util.Set;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.event.WordsCacheContext;\r\n\r\n/**\r\n * 抽象过滤脱敏接口实现\r\n * \r\n * @author hoojo\r\n * @createDate 2018年3月22日 上午9:30:25\r\n * @file AbstractFilter.java\r\n * @package com.cnblogs.hoojo.sensitivewords.filter\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic abstract class AbstractFilter extends AbstractSensitiveWordsFilter {\r\n\r\n\tprivate AbstractFilterExecutor<?> executor;\r\n\t\r\n\tpublic AbstractFilter(AbstractFilterExecutor<?> executor) {\r\n\t\t\r\n\t\tWordsCacheContext.getInstance().register(this.executor);\r\n\t\t\r\n\t\tthis.executor = executor;\r\n\t}\r\n\t\r\n\t@Override\r\n\tpublic boolean contains(boolean partMatch, String content) throws RuntimeException {\r\n\t\t\r\n\t\treturn executor.contains(partMatch, content);\r\n\t}\r\n\r\n\t@Override\r\n\tpublic Set<String> getWords(boolean partMatch, String content) throws RuntimeException {\r\n\t\t\r\n\t\treturn executor.getWords(partMatch, content);\r\n\t}\r\n\r\n\t@Override\r\n\tpublic String highlight(boolean partMatch, String content) throws RuntimeException {\r\n\t\t\r\n\t\treturn executor.highlight(partMatch, content);\r\n\t}\r\n\r\n\t@Override\r\n\tpublic String filter(boolean partMatch, String content, char replaceChar) throws RuntimeException {\r\n\t\t\r\n\t\treturn executor.filter(partMatch, content, replaceChar);\r\n\t}\r\n\r\n\t@Override\r\n\tpublic void init() throws RuntimeException {\r\n\t\t\r\n\t\texecutor.init();\r\n\t}\r\n\r\n\t@Override\r\n\tpublic void refresh() throws RuntimeException {\r\n\r\n\t\texecutor.refresh();\r\n\t}\r\n\r\n\t@Override\r\n\tpublic void destroy() throws RuntimeException {\r\n\r\n\t\texecutor.destroy();\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/AbstractFilterExecutor.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter;\r\n\r\nimport java.io.BufferedReader;\r\nimport java.io.IOException;\r\nimport java.io.InputStreamReader;\r\nimport java.nio.charset.StandardCharsets;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.business.model.SensitiveWords;\r\nimport com.cnblogs.hoojo.sensitivewords.event.CacheChangedEvent;\r\nimport com.cnblogs.hoojo.sensitivewords.event.WordsCacheContext.CacheChangedListener;\r\n\r\n/**\r\n * 抽象敏感词过滤执行器，提供过滤的缓存初始化、刷新、销毁基础封装\r\n * \r\n * @author hoojo\r\n * @createDate 2018年3月21日 下午2:54:07\r\n * @file AbstractFilterExecutor.java\r\n * @package com.cnblogs.hoojo.sensitivewords.filter\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic abstract class AbstractFilterExecutor<T> extends AbstractSensitiveWordsFilterSupport implements CacheChangedListener {\r\n\r\n\tprotected T cacheNodes;\r\n\tprivate String listenerName;\r\n\r\n\tpublic AbstractFilterExecutor(String listenerName) {\r\n\t\tthis.listenerName = listenerName;\r\n\t}\r\n\t\r\n\t@Override\r\n\tpublic String getListenerName() {\r\n\t\treturn listenerName;\r\n\t}\r\n\t\r\n\t/**\r\n\t * 分词数据对象模型\r\n\t * @author hoojo\r\n\t * @createDate 2018年3月21日 下午3:09:33\r\n\t * @return 根模型\r\n\t */\r\n\tprotected abstract T getCacheNodes();\r\n\t\r\n\t/**\r\n\t * 添加分词\r\n\t * @author hoojo\r\n\t * @createDate 2018年3月21日 下午3:09:59\r\n\t * @throws RuntimeException\r\n\t */\r\n\tprotected abstract boolean put(String word) throws RuntimeException;\r\n\t\r\n\tpublic boolean put(SensitiveWords word) throws RuntimeException {\r\n\t\ttrace(\"{}: put数据 {}\", getListenerName(), word.getWord());\r\n\t\t\r\n\t\treturn put(word.getWord());\r\n\t}\r\n\t\r\n\tpublic void init(String wordsFileName) throws RuntimeException {\r\n\t\tcacheNodes = this.getCacheNodes();\r\n\t\t\r\n\t\tBufferedReader reader = null;\r\n\t\ttry {\r\n\t\t\treader = readDic(wordsFileName);\r\n\t\t\t\r\n\t\t\tfor (String line = reader.readLine(); line != null; line = reader.readLine()) {\r\n\t\t\t\tput(line);\r\n\t\t\t}\r\n\t\t\t\r\n\t\t\tinfo(\"{}: 从本地词库加载数据：\", getListenerName());\r\n\t\t} catch (Exception e) {\r\n\t\t\terror(\"{}: 从本地词库加载数据异常：\", getListenerName(), e);\r\n\t\t} finally {\r\n\t\t\tif (reader != null) {\r\n\t\t\t\ttry {\r\n\t\t\t\t\treader.close();\r\n\t\t\t\t} catch (IOException e) {\r\n\t\t\t\t\tlogger.error(e.getMessage(), e);\r\n\t\t\t\t}\r\n\t\t\t}\r\n\t\t}\r\n\t}\r\n\t\r\n\t@Override\r\n\tpublic void init() throws RuntimeException {\r\n\t\ttry {\r\n\t\t\tif (cacheNodes == null) {\r\n\t\t\t\tdebug(\"{}: 初始化数据\", getListenerName());\r\n\t\t\t\trefresh();\r\n\t\t\t} else {\r\n\t\t\t\tdebug(\"{}: 已初始化数据，无需重复执行\", getListenerName());\r\n\t\t\t}\r\n\t\t} catch (Exception e) {\r\n\t\t\tthrow new RuntimeException(e);\r\n\t\t}\t\t\r\n\t}\r\n\r\n\t@Override\r\n\tpublic void refresh() throws RuntimeException {\r\n\t\tdebug(\"{}: 刷新数据\", getListenerName());\r\n\t\t\r\n\t\ttry {\r\n\t\t\tcacheNodes = this.getCacheNodes();\r\n\t\t\t/*\r\n\t\t\tList<SensitiveWords> list = JvmWordsCache.getInstance().get();\r\n\t\t\tfor (SensitiveWords word : list) {\r\n\t\t\t\tput(word);\r\n\t\t\t}\r\n\t\t\t*/\r\n\t\t} catch (Exception e) {\r\n\t\t\tthrow new RuntimeException(e);\r\n\t\t}\r\n\t}\r\n\t\r\n\t@Override\r\n\tpublic void destroy() throws RuntimeException {\r\n\t\tdebug(\"{}: 销毁数据\", getListenerName());\r\n\t\t\r\n\t\ttry {\r\n\t\t\tcacheNodes = null;\r\n\t\t} catch (Exception e) {\r\n\t\t\tthrow new RuntimeException(e);\r\n\t\t}\r\n\t}\r\n\t\r\n\t/**\r\n\t * 读取敏感词库中的内容，将内容添加到set集合中\r\n\t * @author hoojo\r\n\t * @createDate 2018年2月8日 下午6:27:28\r\n\t * @param wordsFileName 文件名\r\n\t * @return 敏感词集合\r\n\t * @throws Exception\r\n\t */\r\n\tprotected BufferedReader readDic(String wordsFileName) throws Exception {\r\n\t\t\r\n\t\ttry {\r\n\t\t\t\r\n\t\t\tInputStreamReader reader = new InputStreamReader(ClassLoader.getSystemResourceAsStream(wordsFileName), StandardCharsets.UTF_8);\r\n\t\t\tBufferedReader bufferedReader = new BufferedReader(reader);\r\n\t\t\t\r\n\t\t\treturn bufferedReader;\r\n\t\t} catch (Exception e) {\r\n\t\t\tthrow e;\r\n\t\t} \r\n\t}\r\n\t\r\n\t@Override\r\n\tpublic void handleChangedEvent(CacheChangedEvent event) throws Exception {\r\n\t\t\r\n\t\tthis.init();\r\n\t\tswitch (event.getAction()) {\r\n\t\t\t\r\n\t\t\tcase PUT:\r\n\t\t\t\tthis.put((SensitiveWords) event.getSource());\r\n\t\t\t\tbreak;\r\n\t\t\t\t\r\n\t\t\tcase PUT_LIST:\r\n\t\t\t\tthis.refresh();\r\n\t\t\t\tbreak;\r\n\t\t\t\t\r\n\t\t\tcase REMOVE:\r\n\t\t\t\tthis.refresh();\r\n\t\t\t\tbreak;\r\n\t\t\t\t\r\n\t\t\tcase UPDATE:\r\n\t\t\t\tthis.refresh();\r\n\t\t\t\tbreak;\r\n\t\t\t\t\r\n\t\t\tcase REFRESH:\r\n\t\t\t\tthis.refresh();\r\n\t\t\t\tbreak;\r\n\r\n\t\t\tdefault:\r\n\t\t\t\tthrow new UnsupportedOperationException();\r\n\t\t}\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/AbstractSensitiveWordsFilter.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.cache.JvmWordsCache;\r\nimport com.cnblogs.hoojo.sensitivewords.cache.RedisWordsCache;\r\nimport com.cnblogs.hoojo.sensitivewords.log.ApplicationLogging;\r\n\r\n/**\r\n * 敏感词库抽象接口实现\r\n * \r\n * @author hoojo\r\n * @createDate 2018年2月2日 下午4:09:02\r\n * @file AbstractSensitiveWordsFilter.java\r\n * @package com.cnblogs.hoojo.sensitivewords\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic abstract class AbstractSensitiveWordsFilter extends ApplicationLogging implements SensitiveWordsFilter {\r\n\r\n\tprivate volatile static boolean HAS_INIT_WORDS_CACHE = false;\r\n\t\r\n\tpublic void initAll() throws Exception {\r\n\t\t\r\n\t\tif (!HAS_INIT_WORDS_CACHE) {\r\n\t\t\tdebug(\"初始化所有缓存\");\r\n\t\t\tRedisWordsCache.getInstance().init();\r\n\t\t\tJvmWordsCache.getInstance().init();\r\n\t\t\t\r\n\t\t\tthis.init();\r\n\t\t} else {\r\n\t\t\tdebug(\"缓存已被初始化，无需重复执行！\");\r\n\t\t}\r\n\t}\r\n\t\r\n\tpublic void refreshAll() throws Exception {\r\n\t\t\r\n\t\tdebug(\"刷新所有缓存\");\r\n\t\tRedisWordsCache.getInstance().refresh();\r\n\t\tJvmWordsCache.getInstance().refresh();\r\n\t\t\r\n\t\tthis.refresh();\r\n\t}\r\n\t\r\n\tpublic abstract void init() throws RuntimeException;\r\n\t\r\n\tpublic abstract void refresh() throws RuntimeException;\r\n\t\r\n\tpublic abstract void destroy() throws RuntimeException;\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/AbstractSensitiveWordsFilterSupport.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter;\r\n\r\nimport java.util.Iterator;\r\nimport java.util.Set;\r\n\r\nimport com.google.common.base.Strings;\r\nimport com.google.common.collect.Sets;\r\n\r\n/**\r\n * 各算法支持类抽象接口\r\n * \r\n * @author hoojo\r\n * @createDate 2018年2月7日 下午6:35:02\r\n * @file AbstractSensitiveWordsFilterSupport.java\r\n * @package com.cnblogs.hoojo.sensitivewords.filter\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic abstract class AbstractSensitiveWordsFilterSupport extends AbstractSensitiveWordsFilter {\r\n\r\n\tprivate static final String HTML_HIGHLIGHT = \"<font color='red'>%s</font>\";\r\n\r\n\t/**\r\n\t * 匹配到敏感词的回调接口\r\n\t * @author hoojo\r\n\t * @createDate 2018年3月21日 上午11:46:15\r\n\t * @param 敏感词对象类型\r\n\t */\r\n\tprotected interface Callback {\r\n\t\t\r\n\t\t/**\r\n\t\t * 匹配掉敏感词回调\r\n\t\t * @author hoojo\r\n\t\t * @createDate 2018年3月21日 上午11:48:11\r\n\t\t * @param word 敏感词\r\n\t\t * @return true 立即停止后续任务并返回，false 继续执行\r\n\t\t */\r\n\t\tboolean call(String word);\r\n\t}\r\n\t\r\n\t/**\r\n\t * 判断一段文字包含敏感词语，支持敏感词结果回调\r\n\t * @author hoojo\r\n\t * @createDate 2018年2月9日 下午2:54:59\r\n\t * @param partMatch 是否支持匹配词语的一部分\r\n\t * @param content 被匹配内容\r\n\t * @param callback 回调接口\r\n\t * @return 是否匹配到的词语\r\n\t */\r\n\tprotected abstract boolean processor(boolean partMatch, String content, Callback callback) throws RuntimeException;\r\n\t\r\n\t@Override\r\n\tpublic boolean contains(boolean partMatch, String content) throws RuntimeException {\r\n\t\t\r\n\t\treturn processor(partMatch, content, new Callback() {\r\n\t\t\t@Override\r\n\t\t\tpublic boolean call(String word) {\r\n\t\t\t\treturn true; // 有敏感词立即返回\r\n\t\t\t}\r\n\t\t});\r\n\t}\r\n\r\n\t@Override\r\n\tpublic Set<String> getWords(boolean partMatch, String content) throws RuntimeException {\r\n\t\tfinal Set<String> words = Sets.newHashSet();\r\n\t\t\r\n\t\tprocessor(partMatch, content, new Callback() {\r\n\t\t\t@Override\r\n\t\t\tpublic boolean call(String word) {\r\n\t\t\t\twords.add(word);\r\n\t\t\t\treturn false; // 继续匹配后面的敏感词\r\n\t\t\t}\r\n\t\t});\r\n\t\t\r\n\t\treturn words;\r\n\t}\r\n\t\r\n\t@Override\r\n\tpublic String highlight(boolean partMatch, String content) throws RuntimeException {\r\n\t\tSet<String> words = this.getWords(partMatch, content);\r\n\t\t\r\n\t\tIterator<String> iter = words.iterator();\r\n\t\twhile (iter.hasNext()) {\r\n\t\t\tString word = iter.next();\r\n\t\t\tcontent = content.replaceAll(word, String.format(HTML_HIGHLIGHT, word));\r\n\t\t}\r\n\t\t\r\n\t\treturn content;\r\n\t}\r\n\r\n\t@Override\r\n\tpublic String filter(boolean partMatch, String content, char replaceChar) throws RuntimeException {\r\n\t\tSet<String> words = this.getWords(partMatch, content);\r\n\t\t\r\n\t\tIterator<String> iter = words.iterator();\r\n\t\twhile (iter.hasNext()) {\r\n\t\t\tString word = iter.next();\r\n\t\t\tcontent = content.replaceAll(word, Strings.repeat(String.valueOf(replaceChar), word.length()));\r\n\t\t}\r\n\t\t\r\n\t\treturn content;\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/SensitiveWordsFilter.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter;\r\n\r\nimport java.util.Set;\r\n\r\n/**\r\n * 敏感词库接口定义\r\n * \r\n * @author hoojo\r\n * @createDate 2018年2月2日 下午4:03:46\r\n * @file SensitiveWordsFilter.java\r\n * @package com.cnblogs.hoojo.sensitivewords\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic interface SensitiveWordsFilter {\r\n\r\n\t/**\r\n\t * 是否包含敏感字符\r\n\t * @author hoojo\r\n\t * @createDate 2018年2月9日 下午2:57:52\r\n\t * @param partMatch 是否支持匹配词语的一部分\r\n\t * @param content 被匹配内容\r\n\t * @return 是否包含敏感字符\r\n\t */\r\n\tpublic boolean contains(boolean partMatch, String content) throws RuntimeException;\r\n\t\r\n\t/**\r\n\t * 返回匹配到的敏感词语\r\n\t * @author hoojo\r\n\t * @createDate 2018年2月9日 下午4:00:06\r\n\t * @param partMatch 是否部分匹配\r\n\t * @param content 被匹配的语句\r\n\t * @return 返回匹配的敏感词语集合\r\n\t */\r\n\tpublic Set<String> getWords(boolean partMatch, String content) throws RuntimeException;\r\n\t\r\n\t/**\r\n\t * html高亮敏感词\r\n\t * @author hoojo\r\n\t * @createDate 2018年2月9日 下午4:37:33\r\n\t * @param partMatch 是否部分匹配\r\n\t * @param content 被匹配的语句\r\n\t * @return 返回html高亮敏感词\r\n\t * @throws RuntimeException\r\n\t */\r\n\tpublic String highlight(boolean partMatch, String content) throws RuntimeException;\r\n\t\r\n\t/**\r\n\t * 过滤敏感词，并把敏感词替换为指定字符\r\n\t * @author hoojo\r\n\t * @createDate 2018年2月9日 下午4:38:12\r\n\t * @param partMatch 是否部分匹配\r\n\t * @param content 被匹配的语句\r\n\t * @param replaceChar 替换字符\r\n\t * @return 过滤后的字符串\r\n\t * @throws RuntimeException\r\n\t */\r\n\tpublic String filter(boolean partMatch, String content, char replaceChar) throws RuntimeException;\r\n\t\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/bucket/HashBucketFilter.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter.bucket;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.filter.AbstractFilter;\r\nimport com.cnblogs.hoojo.sensitivewords.filter.bucket.executor.HashBucketFilterExecutor;\r\n\r\n/**\r\n * hash bucket 脱敏过滤算法实现\r\n * \r\n * @author hoojo\r\n * @createDate 2018年3月22日 上午9:25:16\r\n * @file HashBucketFilter.java\r\n * @package com.cnblogs.hoojo.sensitivewords.filter.bucket\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic class HashBucketFilter extends AbstractFilter {\r\n\r\n\tpublic HashBucketFilter() {\r\n\t\tsuper(HashBucketFilterExecutor.getInstance());\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/bucket/executor/HashBucketFilterExecutor.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter.bucket.executor;\r\n\r\nimport java.util.HashMap;\r\nimport java.util.HashSet;\r\nimport java.util.Map;\r\nimport java.util.Set;\r\n\r\nimport org.apache.commons.lang.StringUtils;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.filter.AbstractFilterExecutor;\r\nimport com.google.common.collect.Maps;\r\n\r\n/**\r\n * hash bucket 脱敏过滤算法实现\r\n * \r\n * @author hoojo\r\n * @createDate 2018年3月21日 下午4:59:33\r\n * @file HashBucketFilterExecutor.java\r\n * @package com.cnblogs.hoojo.sensitivewords.filter.bucket.executor\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic final class HashBucketFilterExecutor extends AbstractFilterExecutor<Map<Character, Map<Integer, Set<String>>>> {\r\n\r\n\tprivate HashBucketFilterExecutor() {\r\n\t\tsuper(\"二级hash(hash bucket)脱敏算法实现\");\r\n\t}\r\n\r\n\tprivate static class SingleFactory {\r\n\t\tprivate static final HashBucketFilterExecutor INSTANCE = new HashBucketFilterExecutor();\r\n\t}\r\n\r\n\tpublic static final HashBucketFilterExecutor getInstance() {\r\n\t\treturn SingleFactory.INSTANCE;\r\n\t}\r\n\t\r\n\t@Override\r\n\tprotected Map<Character, Map<Integer, Set<String>>> getCacheNodes() {\r\n\t\treturn new HashMap<Character, Map<Integer, Set<String>>>();\r\n\t}\r\n\r\n\t@Override\r\n\tprotected boolean put(String word) throws RuntimeException {\r\n\t\t\r\n\t\tif (StringUtils.isBlank(word)) {\r\n\t\t\treturn false;\r\n\t\t}\r\n\t\t\r\n\t\tword = StringUtils.trim(word);\r\n\t\tif (word.length() < 2) {\r\n\t\t\treturn false;\r\n\t\t}\r\n\t\t\r\n\t\t\r\n\t\tchar firstChar = word.charAt(0);\r\n\t\t\r\n\t\tMap<Integer, Set<String>> buckets = cacheNodes.get(firstChar);\r\n\t\tif (buckets == null) {\r\n\t\t\tbuckets = Maps.newHashMap();\r\n\t\t\tcacheNodes.put(firstChar, buckets);\r\n\t\t}\r\n\t\t\r\n\t\tSet<String> words = buckets.get(word.length());\r\n\t\tif (words == null) {\r\n\t\t\twords = new HashSet<String>();\r\n\t\t\tbuckets.put(word.length(), words);\r\n\t\t}\r\n\t\twords.add(word);\r\n\t\t\r\n\t\treturn true;\r\n\t}\r\n\r\n\t@Override\r\n\tprotected boolean processor(boolean partMatch, String content, Callback callback) throws RuntimeException {\r\n\t\t\r\n\t\tif (StringUtils.isBlank(content)) {\r\n\t\t\treturn false;\r\n\t\t}\r\n\t\t\r\n\t\tcontent = StringUtils.trim(content);\r\n\t\tif (content.length() < 2) {\r\n\t\t\treturn false;\r\n\t\t}\r\n\t\t\r\n\t\tfor (int i = 0; i < content.length(); i++) {\r\n            Character wordChar = content.charAt(i);\r\n            \r\n            // 判断是否属于脏字符\r\n            if (!cacheNodes.containsKey(wordChar)) {\r\n                continue;\r\n            }\r\n            \r\n            Map<Integer, Set<String>> buckets = cacheNodes.get(wordChar);\r\n            Set<Integer> sizes = buckets.keySet();\r\n            for (int size : sizes) {\r\n            \t\r\n            \tif (i + size > content.length()) {\r\n            \t\tcontinue;\r\n            \t}\r\n            \t\r\n            \tString word = content.substring(i, i + size);\r\n            \tSet<String> words = buckets.get(size);\r\n            \t// 判断是否是脏词\r\n                if (words.contains(word)) {\r\n                \tif (callback.call(word)) {\r\n                \t\treturn true;\r\n                \t}\r\n\r\n                \tif (partMatch) {\r\n                \t\ti += word.length();\r\n                \t} \r\n                }\r\n            }\r\n        }\r\n\t\t\r\n\t\treturn false;\r\n\t}\r\n\t\r\n\tpublic static void main(String[] args) {\r\n\t\tHashBucketFilterExecutor.getInstance().init();\r\n\t\tHashBucketFilterExecutor.getInstance().put(\"中国人\");\r\n\t\tHashBucketFilterExecutor.getInstance().put(\"中国男人\");\r\n\t\tHashBucketFilterExecutor.getInstance().put(\"中国人民\");\r\n\t\tHashBucketFilterExecutor.getInstance().put(\"人民\");\r\n\t\tHashBucketFilterExecutor.getInstance().put(\"中间\");\r\n\t\tHashBucketFilterExecutor.getInstance().put(\"女人\");\r\n\r\n\t\tHashBucketFilterExecutor.getInstance().put(\"一举\");\r\n\t\tHashBucketFilterExecutor.getInstance().put(\"一举成名\");\r\n\t\tHashBucketFilterExecutor.getInstance().put(\"一举成名走四方\");\r\n\t\tHashBucketFilterExecutor.getInstance().put(\"成名\");\r\n\t\tHashBucketFilterExecutor.getInstance().put(\"走四方\");\r\n\t\t\r\n\t\tString content = \"我们中国人都是好人，在他们中间有男人和女人。中国男人很惨，中国人民长期被压迫。\";\r\n\t\tSystem.out.println(HashBucketFilterExecutor.getInstance().getWords(true, content));\r\n\t\tSystem.out.println(HashBucketFilterExecutor.getInstance().getWords(false, content));\r\n\t\tSystem.out.println(HashBucketFilterExecutor.getInstance().filter(false, content, '*'));\r\n\t\tSystem.out.println(HashBucketFilterExecutor.getInstance().highlight(false, content));\r\n\t\t\r\n\t\tcontent = \"一举成名走四方大大的好\";\r\n\t\tSystem.out.println(HashBucketFilterExecutor.getInstance().getWords(true, content));\r\n\t\tSystem.out.println(HashBucketFilterExecutor.getInstance().getWords(false, content));\r\n\t\tSystem.out.println(HashBucketFilterExecutor.getInstance().filter(false, content, '*'));\r\n\t\tSystem.out.println(HashBucketFilterExecutor.getInstance().highlight(false, content));\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/dat/DatFilter.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter.dat;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.filter.AbstractFilter;\r\nimport com.cnblogs.hoojo.sensitivewords.filter.dat.exectuor.DatFilterExecutor;\r\n\r\n/**\r\n * DAT 算法实现敏感词脱敏过滤\r\n * \r\n * @author hoojo\r\n * @createDate 2018年2月2日 下午4:23:20\r\n * @file DFASWFilter.java\r\n * @package com.cnblogs.hoojo.sensitivewords.support.dfa\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic class DatFilter extends AbstractFilter {\r\n\r\n\tpublic DatFilter() {\r\n\t\tsuper(DatFilterExecutor.getInstance());\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/dat/exectuor/DatCacheNode.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter.dat.exectuor;\r\n\r\nimport java.util.Set;\r\n\r\nimport com.google.common.collect.Sets;\r\n\r\n/**\r\n * 双数组脏词缓存节点\r\n * \r\n * @author hoojo\r\n * @createDate 2018年3月21日 下午3:29:03\r\n * @file DatCacheNode.java\r\n * @package com.cnblogs.hoojo.sensitivewords.filter.dat.exectuor\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic class DatCacheNode {\r\n\r\n\t//脏字库\r\n    private Set<Character> chars = Sets.newHashSet();\r\n    \r\n    //敏感词库\r\n    private Set<String> words = Sets.newHashSet();\r\n\r\n\tpublic Set<Character> getChars() {\r\n\t\treturn chars;\r\n\t}\r\n\r\n\tpublic void setChars(Set<Character> chars) {\r\n\t\tthis.chars = chars;\r\n\t}\r\n\r\n\tpublic Set<String> getWords() {\r\n\t\treturn words;\r\n\t}\r\n\r\n\tpublic void setWords(Set<String> words) {\r\n\t\tthis.words = words;\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/dat/exectuor/DatFilterExecutor.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter.dat.exectuor;\r\n\r\nimport org.apache.commons.lang.StringUtils;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.filter.AbstractFilterExecutor;\r\n\r\n/**\r\n * 双数组算法过滤敏感词\r\n * \r\n * @author hoojo\r\n * @createDate 2018年3月21日 下午3:28:21\r\n * @package com.cnblogs.hoojo.sensitivewords.filter.dat.exectuor\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic final class DatFilterExecutor extends AbstractFilterExecutor<DatCacheNode> {\r\n\r\n\tprivate DatFilterExecutor() {\r\n\t\tsuper(\"dat 双数组算法脱敏实现\");\r\n\t}\r\n\t\r\n\tprivate static class SingleFactory {\r\n\t\tprivate static final DatFilterExecutor INSTANCE = new DatFilterExecutor();\r\n\t}\r\n\r\n\tpublic static final DatFilterExecutor getInstance() {\r\n\t\treturn SingleFactory.INSTANCE;\r\n\t}\r\n\r\n\t@Override\r\n\tprotected DatCacheNode getCacheNodes() {\r\n\t\treturn new DatCacheNode();\r\n\t}\r\n\r\n\t@Override\r\n\tprotected boolean put(String word) throws RuntimeException {\r\n\t\t\r\n\t\tif (StringUtils.isBlank(word)) {\r\n\t\t\treturn false;\r\n\t\t}\r\n\t\t\r\n\t\tword = StringUtils.trim(word);\r\n\t\tif (word.length() < 2) {\r\n\t\t\treturn false;\r\n\t\t}\r\n\t\t\r\n\t\tcacheNodes.getWords().add(word);\r\n\t\t\r\n        for (Character character : word.toCharArray()) {\r\n        \tcacheNodes.getChars().add(character);\r\n        }\r\n        \r\n\t\treturn true;\r\n\t}\r\n\r\n\t@Override\r\n\tprotected boolean processor(boolean partMatch, String content, Callback callback) throws RuntimeException {\r\n\t\t\r\n\t\tif (StringUtils.isBlank(content)) {\r\n\t\t\treturn false;\r\n\t\t}\r\n\t\t\r\n\t\tcontent = StringUtils.trim(content);\r\n\t\tif (content.length() < 2) {\r\n\t\t\treturn false;\r\n\t\t}\r\n\t\t\r\n\t\tfor (int i = 0; i < content.length(); i++) {\r\n            Character wordChar = content.charAt(i);\r\n            // 判断是否属于脏字符\r\n            if (!cacheNodes.getChars().contains(wordChar)) {\r\n                continue;\r\n            }\r\n            \r\n            int j = i + 1;\r\n            while (j < content.length()) {\r\n            \t\r\n            \t// 判断下一个字符是否属于脏字符\r\n            \twordChar = content.charAt(j);\r\n                if (!cacheNodes.getChars().contains(wordChar)) {\r\n                    break;\r\n                }\r\n                \r\n                String word = content.substring(i, j + 1);\r\n                // 判断是否是脏词\r\n                if (cacheNodes.getWords().contains(word)) {\r\n                    \r\n                \tif (callback.call(word)) {\r\n                \t\treturn true;\r\n                \t}\r\n\r\n                \tif (partMatch) {\r\n                \t\ti += word.length();\r\n                \t} \r\n                }\r\n                \r\n                j++;\r\n            }\r\n        }\r\n\t\t\r\n\t\treturn false;\r\n\t}\r\n\r\n\tpublic static void main(String[] args) {\r\n\t\tDatFilterExecutor.getInstance().init();\r\n\t\tDatFilterExecutor.getInstance().put(\"中国人\");\r\n\t\tDatFilterExecutor.getInstance().put(\"中国男人\");\r\n\t\tDatFilterExecutor.getInstance().put(\"中国人民\");\r\n\t\tDatFilterExecutor.getInstance().put(\"人民\");\r\n\t\tDatFilterExecutor.getInstance().put(\"中间\");\r\n\t\tDatFilterExecutor.getInstance().put(\"女人\");\r\n\r\n\t\tDatFilterExecutor.getInstance().put(\"一举\");\r\n\t\tDatFilterExecutor.getInstance().put(\"一举成名\");\r\n\t\tDatFilterExecutor.getInstance().put(\"一举成名走四方\");\r\n\t\tDatFilterExecutor.getInstance().put(\"成名\");\r\n\t\tDatFilterExecutor.getInstance().put(\"走四方\");\r\n\t\t\r\n\t\tString content = \"我们中国人都是好人，在他们中间有男人和女人。中国男人很惨，中国人民长期被压迫。\";\r\n\t\tSystem.out.println(DatFilterExecutor.getInstance().getWords(true, content));\r\n\t\tSystem.out.println(DatFilterExecutor.getInstance().getWords(false, content));\r\n\t\tSystem.out.println(DatFilterExecutor.getInstance().filter(false, content, '*'));\r\n\t\tSystem.out.println(DatFilterExecutor.getInstance().highlight(false, content));\r\n\t\t\r\n\t\tcontent = \"一举成名走四方的是什么\";\r\n\t\tSystem.out.println(DatFilterExecutor.getInstance().getWords(true, content));\r\n\t\tSystem.out.println(DatFilterExecutor.getInstance().getWords(false, content));\r\n\t\tSystem.out.println(DatFilterExecutor.getInstance().filter(false, content, '*'));\r\n\t\tSystem.out.println(DatFilterExecutor.getInstance().highlight(false, content));\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/dat/exectuor/DoubleArrayTrie2.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter.dat.exectuor;\r\n\r\n/**\r\n * DoubleArrayTrie: Java implementation of Darts (Double-ARray Trie System)\r\n * \r\n * <p>\r\n * Copyright(C) 2001-2007 Taku Kudo &lt;taku@chasen.org&gt;<br />\r\n * Copyright(C) 2009 MURAWAKI Yugo &lt;murawaki@nlp.kuee.kyoto-u.ac.jp&gt;\r\n * Copyright(C) 2012 KOMIYA Atsushi &lt;komiya.atsushi@gmail.com&gt;\r\n * </p>\r\n * \r\n * <p>\r\n * The contents of this file may be used under the terms of either of the GNU\r\n * Lesser General Public License Version 2.1 or later (the \"LGPL\"), or the BSD\r\n * License (the \"BSD\").\r\n * </p>\r\n */\r\nimport java.io.BufferedInputStream;\r\nimport java.io.BufferedOutputStream;\r\nimport java.io.DataInputStream;\r\nimport java.io.DataOutputStream;\r\nimport java.io.File;\r\nimport java.io.FileInputStream;\r\nimport java.io.FileOutputStream;\r\nimport java.io.IOException;\r\nimport java.util.ArrayList;\r\nimport java.util.Collections;\r\nimport java.util.List;\r\n\r\nimport com.google.common.collect.Lists;\r\n\r\npublic class DoubleArrayTrie2 {\r\n\r\n\tprivate final static int BUF_SIZE = 16384;\r\n\tprivate final static int UNIT_SIZE = 8; // size of int + int\r\n\r\n\tprivate static class Node {\r\n\t\tint code;\r\n\t\tint depth;\r\n\t\tint left;\r\n\t\tint right;\r\n\t};\r\n\r\n\tprivate int check[];\r\n\tprivate int base[];\r\n\r\n\tprivate boolean used[];\r\n\tprivate int size;\r\n\tprivate int allocSize;\r\n\tprivate List<String> key;\r\n\tprivate int keySize;\r\n\tprivate int length[];\r\n\tprivate int value[];\r\n\tprivate int progress;\r\n\tprivate int nextCheckPos;\r\n\t// boolean no_delete_;\r\n\tint error_;\r\n\r\n\t// int (*progressfunc_) (size_t, size_t);\r\n\r\n\t// inline _resize expanded\r\n\tprivate int resize(int newSize) {\r\n\t\tint[] base2 = new int[newSize];\r\n\t\tint[] check2 = new int[newSize];\r\n\t\tboolean used2[] = new boolean[newSize];\r\n\r\n\t\tif (allocSize > 0) {\r\n\t\t\tSystem.arraycopy(base, 0, base2, 0, allocSize);\r\n\t\t\tSystem.arraycopy(check, 0, check2, 0, allocSize);\r\n\t\t\tSystem.arraycopy(used2, 0, used2, 0, allocSize);\r\n\t\t}\r\n\r\n\t\tbase = base2;\r\n\t\tcheck = check2;\r\n\t\tused = used2;\r\n\r\n\t\treturn allocSize = newSize;\r\n\t}\r\n\r\n\tprivate int fetch(Node parent, List<Node> siblings) {\r\n\t\tif (error_ < 0)\r\n\t\t\treturn 0;\r\n\r\n\t\tint prev = 0;\r\n\r\n\t\tfor (int i = parent.left; i < parent.right; i++) {\r\n\t\t\tif ((length != null ? length[i] : key.get(i).length()) < parent.depth)\r\n\t\t\t\tcontinue;\r\n\r\n\t\t\tString tmp = key.get(i);\r\n\r\n\t\t\tint cur = 0;\r\n\t\t\tif ((length != null ? length[i] : tmp.length()) != parent.depth)\r\n\t\t\t\tcur = (int) tmp.charAt(parent.depth) + 1;\r\n\r\n\t\t\tif (prev > cur) {\r\n\t\t\t\terror_ = -3;\r\n\t\t\t\treturn 0;\r\n\t\t\t}\r\n\r\n\t\t\tif (cur != prev || siblings.size() == 0) {\r\n\t\t\t\tNode tmp_node = new Node();\r\n\t\t\t\ttmp_node.depth = parent.depth + 1;\r\n\t\t\t\ttmp_node.code = cur;\r\n\t\t\t\ttmp_node.left = i;\r\n\t\t\t\tif (siblings.size() != 0)\r\n\t\t\t\t\tsiblings.get(siblings.size() - 1).right = i;\r\n\r\n\t\t\t\tsiblings.add(tmp_node);\r\n\t\t\t}\r\n\r\n\t\t\tprev = cur;\r\n\t\t}\r\n\r\n\t\tif (siblings.size() != 0)\r\n\t\t\tsiblings.get(siblings.size() - 1).right = parent.right;\r\n\r\n\t\treturn siblings.size();\r\n\t}\r\n\r\n\tprivate int insert(List<Node> siblings) {\r\n\t\tif (error_ < 0)\r\n\t\t\treturn 0;\r\n\r\n\t\tint begin = 0;\r\n\t\tint pos = ((siblings.get(0).code + 1 > nextCheckPos) ? siblings.get(0).code + 1 : nextCheckPos) - 1;\r\n\t\tint nonzero_num = 0;\r\n\t\tint first = 0;\r\n\r\n\t\tif (allocSize <= pos)\r\n\t\t\tresize(pos + 1);\r\n\r\n\t\touter: while (true) {\r\n\t\t\tpos++;\r\n\r\n\t\t\tif (allocSize <= pos)\r\n\t\t\t\tresize(pos + 1);\r\n\r\n\t\t\tif (check[pos] != 0) {\r\n\t\t\t\tnonzero_num++;\r\n\t\t\t\tcontinue;\r\n\t\t\t} else if (first == 0) {\r\n\t\t\t\tnextCheckPos = pos;\r\n\t\t\t\tfirst = 1;\r\n\t\t\t}\r\n\r\n\t\t\tbegin = pos - siblings.get(0).code;\r\n\t\t\tif (allocSize <= (begin + siblings.get(siblings.size() - 1).code)) {\r\n\t\t\t\t// progress can be zero\r\n\t\t\t\tdouble l = (1.05 > 1.0 * keySize / (progress + 1)) ? 1.05 : 1.0 * keySize / (progress + 1);\r\n\t\t\t\tresize((int) (allocSize * l));\r\n\t\t\t}\r\n\r\n\t\t\tif (used[begin])\r\n\t\t\t\tcontinue;\r\n\r\n\t\t\tfor (int i = 1; i < siblings.size(); i++)\r\n\t\t\t\tif (check[begin + siblings.get(i).code] != 0)\r\n\t\t\t\t\tcontinue outer;\r\n\r\n\t\t\tbreak;\r\n\t\t}\r\n\r\n\t\t// -- Simple heuristics --\r\n\t\t// if the percentage of non-empty contents in check between the\r\n\t\t// index\r\n\t\t// 'next_check_pos' and 'check' is greater than some constant value\r\n\t\t// (e.g. 0.9),\r\n\t\t// new 'next_check_pos' index is written by 'check'.\r\n\t\tif (1.0 * nonzero_num / (pos - nextCheckPos + 1) >= 0.95)\r\n\t\t\tnextCheckPos = pos;\r\n\r\n\t\tused[begin] = true;\r\n\t\tsize = (size > begin + siblings.get(siblings.size() - 1).code + 1) ? size\r\n\t\t        : begin + siblings.get(siblings.size() - 1).code + 1;\r\n\r\n\t\tfor (int i = 0; i < siblings.size(); i++)\r\n\t\t\tcheck[begin + siblings.get(i).code] = begin;\r\n\r\n\t\tfor (int i = 0; i < siblings.size(); i++) {\r\n\t\t\tList<Node> new_siblings = new ArrayList<Node>();\r\n\r\n\t\t\tif (fetch(siblings.get(i), new_siblings) == 0) {\r\n\t\t\t\tbase[begin + siblings.get(i).code] = (value != null) ? (-value[siblings.get(i).left] - 1)\r\n\t\t\t\t        : (-siblings.get(i).left - 1);\r\n\r\n\t\t\t\tif (value != null && (-value[siblings.get(i).left] - 1) >= 0) {\r\n\t\t\t\t\terror_ = -2;\r\n\t\t\t\t\treturn 0;\r\n\t\t\t\t}\r\n\r\n\t\t\t\tprogress++;\r\n\t\t\t\t// if (progress_func_) (*progress_func_) (progress,\r\n\t\t\t\t// keySize);\r\n\t\t\t} else {\r\n\t\t\t\tint h = insert(new_siblings);\r\n\t\t\t\tbase[begin + siblings.get(i).code] = h;\r\n\t\t\t}\r\n\t\t}\r\n\t\treturn begin;\r\n\t}\r\n\r\n\tpublic DoubleArrayTrie2() {\r\n\t\tcheck = null;\r\n\t\tbase = null;\r\n\t\tused = null;\r\n\t\tsize = 0;\r\n\t\tallocSize = 0;\r\n\t\t// no_delete_ = false;\r\n\t\terror_ = 0;\r\n\t}\r\n\r\n\t// no deconstructor\r\n\r\n\t// set_result omitted\r\n\t// the search methods returns (the list of) the value(s) instead\r\n\t// of (the list of) the pair(s) of value(s) and length(s)\r\n\r\n\t// set_array omitted\r\n\t// array omitted\r\n\r\n\tvoid clear() {\r\n\t\t// if (! no_delete_)\r\n\t\tcheck = null;\r\n\t\tbase = null;\r\n\t\tused = null;\r\n\t\tallocSize = 0;\r\n\t\tsize = 0;\r\n\t\t// no_delete_ = false;\r\n\t}\r\n\r\n\tpublic int getUnitSize() {\r\n\t\treturn UNIT_SIZE;\r\n\t}\r\n\r\n\tpublic int getSize() {\r\n\t\treturn size;\r\n\t}\r\n\r\n\tpublic int getTotalSize() {\r\n\t\treturn size * UNIT_SIZE;\r\n\t}\r\n\r\n\tpublic int getNonzeroSize() {\r\n\t\tint result = 0;\r\n\t\tfor (int i = 0; i < size; i++)\r\n\t\t\tif (check[i] != 0)\r\n\t\t\t\tresult++;\r\n\t\treturn result;\r\n\t}\r\n\r\n\tpublic int build(List<String> key) {\r\n\t\treturn build(key, null, null, key.size());\r\n\t}\r\n\r\n\tpublic int build(List<String> _key, int _length[], int _value[], int _keySize) {\r\n\t\tif (_keySize > _key.size() || _key == null)\r\n\t\t\treturn 0;\r\n\r\n\t\t// progress_func_ = progress_func;\r\n\t\tkey = _key;\r\n\t\tlength = _length;\r\n\t\tkeySize = _keySize;\r\n\t\tvalue = _value;\r\n\t\tprogress = 0;\r\n\r\n\t\tresize(65536 * 32);\r\n\r\n\t\tbase[0] = 1;\r\n\t\tnextCheckPos = 0;\r\n\r\n\t\tNode root_node = new Node();\r\n\t\troot_node.left = 0;\r\n\t\troot_node.right = keySize;\r\n\t\troot_node.depth = 0;\r\n\r\n\t\tList<Node> siblings = new ArrayList<Node>();\r\n\t\tfetch(root_node, siblings);\r\n\t\tinsert(siblings);\r\n\r\n\t\t// size += (1 << 8 * 2) + 1; // ???\r\n\t\t// if (size >= allocSize) resize (size);\r\n\r\n\t\tused = null;\r\n\t\tkey = null;\r\n\r\n\t\treturn error_;\r\n\t}\r\n\t\r\n\tpublic int put(String _key, int _length[], int _value[]) {\r\n\t\tif ( _key == null)\r\n\t\t\treturn 0;\r\n\r\n\t\t// progress_func_ = progress_func;\r\n\t\tif (key == null) {\r\n\t\t\tkey = Lists.newArrayList();\r\n\t\t}\r\n\t\tkey.add(_key);\r\n\t\tkeySize = key.size();\r\n\t\t\r\n\t\tlength = _length;\r\n\t\tvalue = _value;\r\n\t\tprogress = 0;\r\n\r\n\t\tresize(65536 * 32);\r\n\r\n\t\tbase[0] = 1;\r\n\t\tnextCheckPos = 0;\r\n\r\n\t\tNode root_node = new Node();\r\n\t\troot_node.left = 0;\r\n\t\troot_node.right = keySize;\r\n\t\troot_node.depth = 0;\r\n\r\n\t\tList<Node> siblings = new ArrayList<Node>();\r\n\t\tfetch(root_node, siblings);\r\n\t\tinsert(siblings);\r\n\r\n\t\t// size += (1 << 8 * 2) + 1; // ???\r\n\t\t// if (size >= allocSize) resize (size);\r\n\r\n\t\tused = null;\r\n\t\tkey = null;\r\n\r\n\t\treturn error_;\r\n\t}\r\n\r\n\tpublic void open(String fileName) throws IOException {\r\n\t\tFile file = new File(fileName);\r\n\t\tsize = (int) file.length() / UNIT_SIZE;\r\n\t\tcheck = new int[size];\r\n\t\tbase = new int[size];\r\n\r\n\t\tDataInputStream is = null;\r\n\t\ttry {\r\n\t\t\tis = new DataInputStream(new BufferedInputStream(new FileInputStream(file), BUF_SIZE));\r\n\t\t\tfor (int i = 0; i < size; i++) {\r\n\t\t\t\tbase[i] = is.readInt();\r\n\t\t\t\tcheck[i] = is.readInt();\r\n\t\t\t}\r\n\t\t} finally {\r\n\t\t\tif (is != null)\r\n\t\t\t\tis.close();\r\n\t\t}\r\n\t}\r\n\r\n\tpublic void save(String fileName) throws IOException {\r\n\t\tDataOutputStream out = null;\r\n\t\ttry {\r\n\t\t\tout = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(fileName)));\r\n\t\t\tfor (int i = 0; i < size; i++) {\r\n\t\t\t\tout.writeInt(base[i]);\r\n\t\t\t\tout.writeInt(check[i]);\r\n\t\t\t}\r\n\t\t\tout.close();\r\n\t\t} finally {\r\n\t\t\tif (out != null)\r\n\t\t\t\tout.close();\r\n\t\t}\r\n\t}\r\n\r\n\tpublic int exactMatchSearch(String key) {\r\n\t\treturn exactMatchSearch(key, 0, 0, 0);\r\n\t}\r\n\r\n\tpublic int exactMatchSearch(String key, int pos, int len, int nodePos) {\r\n\t\tif (len <= 0)\r\n\t\t\tlen = key.length();\r\n\t\tif (nodePos <= 0)\r\n\t\t\tnodePos = 0;\r\n\r\n\t\tint result = -1;\r\n\r\n\t\tchar[] keyChars = key.toCharArray();\r\n\r\n\t\tint b = base[nodePos];\r\n\t\tint p;\r\n\r\n\t\tfor (int i = pos; i < len; i++) {\r\n\t\t\tp = b + (int) (keyChars[i]) + 1;\r\n\t\t\tif (b == check[p])\r\n\t\t\t\tb = base[p];\r\n\t\t\telse\r\n\t\t\t\treturn result;\r\n\t\t}\r\n\r\n\t\tp = b;\r\n\t\tint n = base[p];\r\n\t\tif (b == check[p] && n < 0) {\r\n\t\t\tresult = -n - 1;\r\n\t\t}\r\n\t\treturn result;\r\n\t}\r\n\r\n\tpublic List<Integer> commonPrefixSearch(String key) {\r\n\t\treturn commonPrefixSearch(key, 0, 0, 0);\r\n\t}\r\n\r\n\tpublic List<Integer> commonPrefixSearch(String key, int pos, int len, int nodePos) {\r\n\t\tif (len <= 0)\r\n\t\t\tlen = key.length();\r\n\t\tif (nodePos <= 0)\r\n\t\t\tnodePos = 0;\r\n\r\n\t\tList<Integer> result = new ArrayList<Integer>();\r\n\r\n\t\tchar[] keyChars = key.toCharArray();\r\n\r\n\t\tint b = base[nodePos];\r\n\t\tint n;\r\n\t\tint p;\r\n\r\n\t\tfor (int i = pos; i < len; i++) {\r\n\t\t\tp = b;\r\n\t\t\tn = base[p];\r\n\r\n\t\t\tif (b == check[p] && n < 0) {\r\n\t\t\t\tresult.add(-n - 1);\r\n\t\t\t}\r\n\r\n\t\t\tp = b + (int) (keyChars[i]) + 1;\r\n\t\t\tif (b == check[p])\r\n\t\t\t\tb = base[p];\r\n\t\t\telse\r\n\t\t\t\treturn result;\r\n\t\t}\r\n\r\n\t\tp = b;\r\n\t\tn = base[p];\r\n\r\n\t\tif (b == check[p] && n < 0) {\r\n\t\t\tresult.add(-n - 1);\r\n\t\t}\r\n\r\n\t\treturn result;\r\n\t}\r\n\r\n\t// debug\r\n\tpublic void dump() {\r\n\t\tfor (int i = 0; i < size; i++) {\r\n\t\t\tSystem.err.println(\"i: \" + i + \" [\" + base[i] + \", \" + check[i] + \"]\");\r\n\t\t}\r\n\t}\r\n\r\n\tpublic static void main(String[] args) {\r\n\t\tDoubleArrayTrie2 dat = new DoubleArrayTrie2();\r\n\r\n\t\tList<String> list = Lists.newArrayList();\r\n\t\t/*list.add(\"一举\");\r\n\t\tlist.add(\"一举成名\");\r\n\t\tlist.add(\"一举成名走四方\");*/\r\n\t\tlist.add(\"成名\");\r\n\t\tlist.add(\"走四方\");\r\n\r\n\t\tCollections.sort(list);\r\n\r\n\t\tdat.build(list);\r\n\t\t\r\n\t\tdat.put(\"一举\", null, null);\r\n\t\tdat.put(\"一举成名\", null, null);\r\n\r\n\t\tString content = \"一举成名走四方的是什么\";\r\n\t\tList<Integer> rect = dat.commonPrefixSearch(content);\r\n\t\tSystem.out.println();\r\n\t\tfor (int index : rect) {\r\n\t\t\tSystem.out.println(\"前缀  \" + list.get(index) + \" matched\");\r\n\t\t}\r\n\r\n\t\t// 检索key是否完全命中了词典中的某个词\r\n\t\tint index = dat.exactMatchSearch(\"成\");\r\n\t\tif (index >= 0) {\r\n\t\t\tSystem.out.println(content + \" match \" + list.get(index));\r\n\t\t} else {\r\n\t\t\tSystem.out.println(content + \" not match any term\");\r\n\t\t}\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/dfa/DfaFilter.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter.dfa;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.filter.AbstractFilter;\r\nimport com.cnblogs.hoojo.sensitivewords.filter.dfa.executor.DfaFilterExecutor;\r\n\r\n/**\r\n * DFA 算法实现敏感词脱敏过滤\r\n * \r\n * @author hoojo\r\n * @createDate 2018年2月2日 下午4:23:20\r\n * @file DFASWFilter.java\r\n * @package com.cnblogs.hoojo.sensitivewords.support.dfa\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic class DfaFilter extends AbstractFilter {\r\n\r\n\tpublic DfaFilter() {\r\n\t\tsuper(DfaFilterExecutor.getInstance());\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/dfa/executor/DfaFilterExecutor.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter.dfa.executor;\r\n\r\nimport java.util.HashMap;\r\n\r\nimport org.apache.commons.lang.StringUtils;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.filter.AbstractFilterExecutor;\r\nimport com.google.common.collect.Maps;\r\n\r\n/**\r\n * DFA 脱敏算法实现支持类\r\n * \r\n * @author hoojo\r\n * @createDate 2018年2月9日 上午10:34:42\r\n * @file DfaFilterExecutor.java\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic final class DfaFilterExecutor extends AbstractFilterExecutor<HashMap<Character, DfaNode>> {\r\n\r\n\tprivate static class SingleFactory {\r\n\t\tprivate static final DfaFilterExecutor INSTANCE = new DfaFilterExecutor();\r\n\t}\r\n\r\n\tpublic static final DfaFilterExecutor getInstance() {\r\n\t\treturn SingleFactory.INSTANCE;\r\n\t}\r\n\t\r\n\tprivate DfaFilterExecutor() {\r\n\t\tsuper(\"DFA 脱敏算法实现支持类\");\r\n\t}\r\n\t\r\n\t@Override\r\n\tprotected boolean put(String word) throws RuntimeException {\r\n\t\t\r\n\t\tif (StringUtils.isBlank(word)) {\r\n\t\t\treturn false;\r\n\t\t}\r\n\t\t\r\n\t\tword = StringUtils.trim(word);\r\n\t\tif (word.length() < 2) {\r\n\t\t\treturn false;\r\n\t\t}\r\n\t\t\r\n\t\tCharacter fisrtChar = word.charAt(0);\r\n\t\tDfaNode node = cacheNodes.get(fisrtChar);\r\n\t\tif (node == null) {\r\n\t\t\tnode = new DfaNode(fisrtChar);\r\n\t\t\tcacheNodes.put(fisrtChar, node);\r\n\t\t}\r\n\t\t\r\n\t\tfor (int i = 1; i < word.length(); i++) {\r\n\t\t\tCharacter nextChar = word.charAt(i); \r\n\t\t\t\r\n\t\t\tDfaNode nextNode = null;\r\n\t\t\tif (!node.isLeaf()) {\r\n\t\t\t\tnextNode = node.getChilds().get(nextChar);\r\n\t\t\t} \r\n\t\t\tif (nextNode == null) {\r\n\t\t\t\tnextNode = new DfaNode(nextChar);\r\n\t\t\t}\r\n\t\t\t\r\n\t\t\tnode.addChild(nextNode);\r\n\t\t\tnode = nextNode;\r\n\t\t\t\r\n\t\t\tif (i == word.length() - 1) {\r\n\t\t\t\tnode.setWord(true);\r\n\t\t\t}\r\n\t\t}\r\n\t\t\r\n\t\treturn true;\r\n\t}\r\n\t\r\n\t@Override\r\n\tprotected boolean processor(boolean partMatch, String content, Callback callback) throws RuntimeException {\r\n\t\tif (StringUtils.isBlank(content)) {\r\n\t\t\treturn false;\r\n\t\t}\r\n\t\t\r\n\t\tcontent = StringUtils.trim(content);\r\n\t\tif (content.length() < 2) {\r\n\t\t\treturn false;\r\n\t\t}\r\n\t\t\r\n\t\tfor (int index = 0; index < content.length(); index++) {\r\n\t\t\tchar fisrtChar = content.charAt(index);\r\n\t\t\t\r\n\t\t\tDfaNode node = cacheNodes.get(fisrtChar);\r\n\t\t\tif (node == null || node.isLeaf()) {\r\n\t\t\t\tcontinue;\r\n\t\t\t} \r\n\t\t\t\r\n\t\t\tint charCount = 1;\r\n\t\t\tfor (int i = index + 1; i < content.length(); i++) {\r\n\t\t\t\tchar wordChar = content.charAt(i);\r\n\t\t\t\t\r\n\t\t\t\tnode = node.getChilds().get(wordChar);\r\n\t\t\t\tif (node != null) {\r\n\t\t\t\t\tcharCount++;\r\n\t\t\t\t} else {\r\n\t\t\t\t\tbreak;\r\n\t\t\t\t}\r\n\t\t\t\t\r\n\t\t\t\tif (partMatch && node.isWord()) {\r\n\t\t\t\t\tif (callback.call(StringUtils.substring(content, index, index + charCount))) {\r\n\t\t\t\t\t\treturn true;\r\n\t\t\t\t\t}\r\n\t\t\t\t\tbreak;\r\n\t\t\t\t} else if (node.isWord()) {\r\n\t\t\t\t\tif (callback.call(StringUtils.substring(content, index, index + charCount))) {\r\n\t\t\t\t\t\treturn true;\r\n\t\t\t\t\t}\r\n\t\t\t\t}\r\n\t\t\t\t\r\n\t\t\t\tif (node.isLeaf()) {\r\n\t\t\t\t\tbreak;\r\n\t\t\t\t}\r\n\t\t\t}\r\n\t\t\t\r\n\t\t\tif (partMatch) {\r\n\t\t\t\tindex += charCount;\r\n\t\t\t}\r\n\t\t}\r\n\t\t\r\n\t\treturn false;\r\n\t}\r\n\t\r\n\t@Override\r\n\tprotected HashMap<Character, DfaNode> getCacheNodes() {\r\n\t\treturn Maps.newHashMap();\r\n\t}\r\n\t\r\n\tpublic static void main(String[] args) {\r\n\t\tDfaFilterExecutor.getInstance().init();\r\n\t\tDfaFilterExecutor.getInstance().put(\"中国人\");\r\n\t\tDfaFilterExecutor.getInstance().put(\"中国男人\");\r\n\t\tDfaFilterExecutor.getInstance().put(\"中国人民\");\r\n\t\tDfaFilterExecutor.getInstance().put(\"人民\");\r\n\t\tDfaFilterExecutor.getInstance().put(\"中间\");\r\n\t\tDfaFilterExecutor.getInstance().put(\"女人\");\r\n\r\n\t\tDfaFilterExecutor.getInstance().put(\"一举\");\r\n\t\tDfaFilterExecutor.getInstance().put(\"一举成名\");\r\n\t\tDfaFilterExecutor.getInstance().put(\"一举成名走四方\");\r\n\t\tDfaFilterExecutor.getInstance().put(\"成名\");\r\n\t\tDfaFilterExecutor.getInstance().put(\"走四方\");\r\n\t\t\r\n\t\tString content = \"我们中国人都是好人，在他们中间有男人和女人。中国男人很惨，中国人民长期被压迫。\";\r\n\t\tSystem.out.println(DfaFilterExecutor.getInstance().contains(true, content));\r\n\t\tSystem.out.println(DfaFilterExecutor.getInstance().getWords(true, content));\r\n\t\tSystem.out.println(DfaFilterExecutor.getInstance().getWords(false, content));\r\n\t\tSystem.out.println(DfaFilterExecutor.getInstance().filter(false, content, '*'));\r\n\t\tSystem.out.println(DfaFilterExecutor.getInstance().highlight(true, content));\r\n\t\tSystem.out.println(DfaFilterExecutor.getInstance().highlight(false, content));\r\n\t\t\r\n\t\tcontent = \"一举成名走四方的是什么\";\r\n\t\tSystem.out.println(DfaFilterExecutor.getInstance().getWords(true, content));\r\n\t\tSystem.out.println(DfaFilterExecutor.getInstance().getWords(false, content));\r\n\t\tSystem.out.println(DfaFilterExecutor.getInstance().filter(false, content, '*'));\r\n\t\tSystem.out.println(DfaFilterExecutor.getInstance().highlight(true, content));\r\n\t\tSystem.out.println(DfaFilterExecutor.getInstance().highlight(false, content));\r\n\t\t\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/dfa/executor/DfaNode.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter.dfa.executor;\r\n\r\nimport java.util.Map;\r\nimport java.util.Set;\r\n\r\nimport com.google.common.collect.Maps;\r\n\r\n/**\r\n * dfa多叉树模型\r\n * @author hoojo\r\n * @createDate 2018年2月8日 下午8:23:27\r\n * @file DfaNode.java\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic class DfaNode {\r\n\r\n\tprivate char _char;\r\n\tprivate DfaNode parent;\r\n\tprivate boolean word;\r\n\tprivate Map<Character, DfaNode> childs;\r\n\r\n\tpublic DfaNode() {\r\n\t}\r\n\r\n\tpublic DfaNode(char _char) {\r\n\t\tthis._char = _char;\r\n\t}\r\n\r\n\tpublic boolean isWord() {\r\n\t\treturn word;\r\n\t}\r\n\r\n\tpublic void setWord(boolean word) {\r\n\t\tthis.word = word;\r\n\t}\r\n\t\r\n\tpublic boolean isLeaf() {\r\n\t\treturn (childs == null || childs.isEmpty());\r\n\t}\r\n\r\n\tpublic char getChar() {\r\n\t\treturn _char;\r\n\t}\r\n\r\n\tpublic void setChar(char _char) {\r\n\t\tthis._char = _char;\r\n\t}\r\n\r\n\tpublic void addChild(DfaNode child) {\r\n\t\tif (this.childs == null) {\r\n\t\t\tchilds = Maps.newHashMap();\r\n\t\t}\r\n\t\t\r\n\t\tthis.childs.put(child.getChar(), child);\r\n\t\t//child.setParent(this);\r\n\t}\r\n\r\n\tpublic void removeChild(DfaNode child) {\r\n\t\tif (this.childs != null) {\r\n\t\t\tthis.childs.remove(child.getChar());\r\n\t\t}\r\n\t}\r\n\r\n\tpublic DfaNode getParent() {\r\n\t\treturn parent;\r\n\t}\r\n\r\n\tpublic void setParent(DfaNode parent) {\r\n\t\tthis.parent = parent;\r\n\t}\r\n\r\n\tpublic Map<Character, DfaNode> getChilds() {\r\n\t\t/*if (this.childs == null) {\r\n\t\t\tthis.childs = Maps.newHashMap();\r\n\t\t}*/\r\n\t\treturn this.childs;\r\n\t}\r\n\r\n\tpublic void setChilds(Map<Character, DfaNode> childs) {\r\n\t\tthis.childs = childs;\r\n\t}\r\n\r\n\tpublic void print(DfaNode node) {\r\n\t\tSystem.out.println(node.getChar());\r\n\t\tif (node.getChilds() != null) {\r\n\t\t\tSet<Character> keys = node.getChilds().keySet();\r\n\t\t\tfor (Character _char: keys) {\r\n\t\t\t\tprint(node.getChilds().get(_char));\r\n\t\t\t}\r\n\t\t}\r\n\t}\r\n\t\r\n\tpublic static void main(String[] args) {\r\n\t\tDfaNode node = new DfaNode('中');\r\n\t\t\r\n\t\tDfaNode g = new DfaNode('国');\r\n\t\tg.addChild(new DfaNode('人'));\r\n\t\t\r\n\t\tDfaNode n = new DfaNode('男');\r\n\t\tn.addChild(new DfaNode('人'));\r\n\t\tg.addChild(n);\r\n\t\t\r\n\t\tnode.addChild(g);\r\n\t\tnode.addChild(new DfaNode('间'));\r\n\t\t\r\n\t\tnode.print(node);\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/simhash/SimHashFilter.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter.simhash;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.filter.AbstractFilter;\r\nimport com.cnblogs.hoojo.sensitivewords.filter.simhash.executor.SimHashFilterExecutor;\r\n\r\n/**\r\n * simhash 算法\r\n * \r\n * @author hoojo\r\n * @createDate 2018年3月23日 下午5:55:49\r\n * @file SimHashFilter.java\r\n * @package com.cnblogs.hoojo.sensitivewords.filter.simhash.executor\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic class SimHashFilter extends AbstractFilter {\r\n\r\n\tpublic SimHashFilter() {\r\n\t\tsuper(SimHashFilterExecutor.getInstance());\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/simhash/executor/FNVHashUtils.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter.simhash.executor;\r\n\r\nimport java.math.BigInteger;\r\n\r\n/**\r\n * http://blog.csdn.net/hustfoxy/article/details/23687239\r\n * https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function\r\n * \r\n * @author hoojo\r\n * @createDate 2018年3月22日 下午6:48:41\r\n * @file FNVHashUtils.java\r\n * @package com.cnblogs.hoojo.sensitivewords.filter.bucket.executor\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic abstract class FNVHashUtils {\r\n\r\n\tpublic static int HASH_BITS = 64;\r\n\t/** 初始的哈希值 */\r\n\tpublic static final BigInteger FNV_64_OFFSET_BASIS = new BigInteger(\"14695981039346656037\");\r\n\t/** FNV用于散列的质数 */\r\n\tpublic static final BigInteger FNV_64_PRIME = new BigInteger(\"1099511628211\");\r\n\t/** 8位数据（即一个字节） */\r\n\tpublic static final BigInteger FNV_64_OCTET_OF_DATA = BigInteger.ONE.shiftLeft(HASH_BITS).subtract(BigInteger.ONE);\r\n\r\n\t/**\r\n\t * fnv-1 hash算法，将字符串转换为64位hash值\r\n\t */\r\n\tpublic static BigInteger hash64(String text) {\r\n\t\tBigInteger hash = FNV_64_OFFSET_BASIS;\r\n\t\t\r\n\t\tint len = text.length();\r\n\t\tfor (int i = 0; i < len; i++) {\r\n\t\t\thash = hash.multiply(FNV_64_PRIME);\r\n\t\t\thash = hash.xor(BigInteger.valueOf(text.charAt(i)));\r\n\t\t}\r\n\t\thash = hash.and(FNV_64_OCTET_OF_DATA);\r\n\t\t\r\n\t\treturn hash;\r\n\t}\r\n\r\n\t/**\r\n\t * fnv-1a hash算法，将字符串转换为64位hash值\r\n\t */\r\n\tpublic static BigInteger hash64_(String text) {\r\n\t\tBigInteger hash = FNV_64_OFFSET_BASIS;\r\n\t\t\r\n\t\tint len = text.length();\r\n\t\tfor (int i = 0; i < len; i++) {\r\n\t\t\thash = hash.xor(BigInteger.valueOf(text.charAt(i)));\r\n\t\t\thash = hash.multiply(FNV_64_PRIME);\r\n\t\t}\r\n\t\thash = hash.and(FNV_64_OCTET_OF_DATA);\r\n\t\t\r\n\t\treturn hash;\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/simhash/executor/Murmur3.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter.simhash.executor;\r\n\r\n/**\r\n *   Copyright 2014 Prasanth Jayachandran\r\n *\r\n * Licensed under the Apache License, Version 2.0 (the \"License\");\r\n * you may not use this file except in compliance with the License.\r\n * You may obtain a copy of the License at\r\n *\r\n *     http://www.apache.org/licenses/LICENSE-2.0\r\n *\r\n * Unless required by applicable law or agreed to in writing, software\r\n * distributed under the License is distributed on an \"AS IS\" BASIS,\r\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r\n * See the License for the specific language governing permissions and\r\n * limitations under the License.\r\n */\r\n\r\n/**\r\n * Murmur3 32 and 128 bit variants.\r\n * 32-bit Java port of https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp#94\r\n * 128-bit Java port of https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp#255\r\n */\r\npublic class Murmur3 {\r\n  // Constants for 32 bit variant\r\n  private static final int C1_32 = 0xcc9e2d51;\r\n  private static final int C2_32 = 0x1b873593;\r\n  private static final int R1_32 = 15;\r\n  private static final int R2_32 = 13;\r\n  private static final int M_32 = 5;\r\n  private static final int N_32 = 0xe6546b64;\r\n\r\n  // Constants for 128 bit variant\r\n  private static final long C1 = 0x87c37b91114253d5L;\r\n  private static final long C2 = 0x4cf5ad432745937fL;\r\n  private static final int R1 = 31;\r\n  private static final int R2 = 27;\r\n  private static final int R3 = 33;\r\n  private static final int M = 5;\r\n  private static final int N1 = 0x52dce729;\r\n  private static final int N2 = 0x38495ab5;\r\n\r\n  private static final int DEFAULT_SEED = 0;\r\n\r\n  /**\r\n   * Murmur3 32-bit variant.\r\n   *\r\n   * @param data - input byte array\r\n   * @return - hashcode\r\n   */\r\n  public static int hash32(byte[] data) {\r\n    return hash32(data, data.length, DEFAULT_SEED);\r\n  }\r\n\r\n  /**\r\n   * Murmur3 32-bit variant.\r\n   *\r\n   * @param data   - input byte array\r\n   * @param length - length of array\r\n   * @param seed   - seed. (default 0)\r\n   * @return - hashcode\r\n   */\r\n  public static int hash32(byte[] data, int length, int seed) {\r\n    int hash = seed;\r\n    final int nblocks = length >> 2;\r\n\r\n    // body\r\n    for (int i = 0; i < nblocks; i++) {\r\n      int i_4 = i << 2;\r\n      int k = (data[i_4] & 0xff)\r\n          | ((data[i_4 + 1] & 0xff) << 8)\r\n          | ((data[i_4 + 2] & 0xff) << 16)\r\n          | ((data[i_4 + 3] & 0xff) << 24);\r\n\r\n      // mix functions\r\n      k *= C1_32;\r\n      k = Integer.rotateLeft(k, R1_32);\r\n      k *= C2_32;\r\n      hash ^= k;\r\n      hash = Integer.rotateLeft(hash, R2_32) * M_32 + N_32;\r\n    }\r\n\r\n    // tail\r\n    int idx = nblocks << 2;\r\n    int k1 = 0;\r\n    switch (length - idx) {\r\n      case 3:\r\n        k1 ^= data[idx + 2] << 16;\r\n      case 2:\r\n        k1 ^= data[idx + 1] << 8;\r\n      case 1:\r\n        k1 ^= data[idx];\r\n\r\n        // mix functions\r\n        k1 *= C1_32;\r\n        k1 = Integer.rotateLeft(k1, R1_32);\r\n        k1 *= C2_32;\r\n        hash ^= k1;\r\n    }\r\n\r\n    // finalization\r\n    hash ^= length;\r\n    hash ^= (hash >>> 16);\r\n    hash *= 0x85ebca6b;\r\n    hash ^= (hash >>> 13);\r\n    hash *= 0xc2b2ae35;\r\n    hash ^= (hash >>> 16);\r\n\r\n    return hash;\r\n  }\r\n\r\n  /**\r\n   * Murmur3 64-bit variant. This is essentially MSB 8 bytes of Murmur3 128-bit variant.\r\n   *\r\n   * @param data - input byte array\r\n   * @return - hashcode\r\n   */\r\n  public static long hash64(byte[] data) {\r\n    return hash64(data, data.length, DEFAULT_SEED);\r\n  }\r\n\r\n  /**\r\n   * Murmur3 64-bit variant. This is essentially MSB 8 bytes of Murmur3 128-bit variant.\r\n   *\r\n   * @param data   - input byte array\r\n   * @param length - length of array\r\n   * @param seed   - seed. (default is 0)\r\n   * @return - hashcode\r\n   */\r\n  public static long hash64(byte[] data, int length, int seed) {\r\n    long hash = seed;\r\n    final int nblocks = length >> 3;\r\n\r\n    // body\r\n    for (int i = 0; i < nblocks; i++) {\r\n      final int i8 = i << 3;\r\n      long k = ((long) data[i8] & 0xff)\r\n          | (((long) data[i8 + 1] & 0xff) << 8)\r\n          | (((long) data[i8 + 2] & 0xff) << 16)\r\n          | (((long) data[i8 + 3] & 0xff) << 24)\r\n          | (((long) data[i8 + 4] & 0xff) << 32)\r\n          | (((long) data[i8 + 5] & 0xff) << 40)\r\n          | (((long) data[i8 + 6] & 0xff) << 48)\r\n          | (((long) data[i8 + 7] & 0xff) << 56);\r\n\r\n      // mix functions\r\n      k *= C1;\r\n      k = Long.rotateLeft(k, R1);\r\n      k *= C2;\r\n      hash ^= k;\r\n      hash = Long.rotateLeft(hash, R2) * M + N1;\r\n    }\r\n\r\n    // tail\r\n    long k1 = 0;\r\n    int tailStart = nblocks << 3;\r\n    switch (length - tailStart) {\r\n      case 7:\r\n        k1 ^= ((long) data[tailStart + 6] & 0xff) << 48;\r\n      case 6:\r\n        k1 ^= ((long) data[tailStart + 5] & 0xff) << 40;\r\n      case 5:\r\n        k1 ^= ((long) data[tailStart + 4] & 0xff) << 32;\r\n      case 4:\r\n        k1 ^= ((long) data[tailStart + 3] & 0xff) << 24;\r\n      case 3:\r\n        k1 ^= ((long) data[tailStart + 2] & 0xff) << 16;\r\n      case 2:\r\n        k1 ^= ((long) data[tailStart + 1] & 0xff) << 8;\r\n      case 1:\r\n        k1 ^= ((long) data[tailStart] & 0xff);\r\n        k1 *= C1;\r\n        k1 = Long.rotateLeft(k1, R1);\r\n        k1 *= C2;\r\n        hash ^= k1;\r\n    }\r\n\r\n    // finalization\r\n    hash ^= length;\r\n    hash = fmix64(hash);\r\n\r\n    return hash;\r\n  }\r\n\r\n  /**\r\n   * Murmur3 128-bit variant.\r\n   *\r\n   * @param data - input byte array\r\n   * @return - hashcode (2 longs)\r\n   */\r\n  public static long[] hash128(byte[] data) {\r\n    return hash128(data, data.length, DEFAULT_SEED);\r\n  }\r\n\r\n  /**\r\n   * Murmur3 128-bit variant.\r\n   *\r\n   * @param data   - input byte array\r\n   * @param length - length of array\r\n   * @param seed   - seed. (default is 0)\r\n   * @return - hashcode (2 longs)\r\n   */\r\n  public static long[] hash128(byte[] data, int length, int seed) {\r\n    long h1 = seed;\r\n    long h2 = seed;\r\n    final int nblocks = length >> 4;\r\n\r\n    // body\r\n    for (int i = 0; i < nblocks; i++) {\r\n      final int i16 = i << 4;\r\n      long k1 = ((long) data[i16] & 0xff)\r\n          | (((long) data[i16 + 1] & 0xff) << 8)\r\n          | (((long) data[i16 + 2] & 0xff) << 16)\r\n          | (((long) data[i16 + 3] & 0xff) << 24)\r\n          | (((long) data[i16 + 4] & 0xff) << 32)\r\n          | (((long) data[i16 + 5] & 0xff) << 40)\r\n          | (((long) data[i16 + 6] & 0xff) << 48)\r\n          | (((long) data[i16 + 7] & 0xff) << 56);\r\n\r\n      long k2 = ((long) data[i16 + 8] & 0xff)\r\n          | (((long) data[i16 + 9] & 0xff) << 8)\r\n          | (((long) data[i16 + 10] & 0xff) << 16)\r\n          | (((long) data[i16 + 11] & 0xff) << 24)\r\n          | (((long) data[i16 + 12] & 0xff) << 32)\r\n          | (((long) data[i16 + 13] & 0xff) << 40)\r\n          | (((long) data[i16 + 14] & 0xff) << 48)\r\n          | (((long) data[i16 + 15] & 0xff) << 56);\r\n\r\n      // mix functions for k1\r\n      k1 *= C1;\r\n      k1 = Long.rotateLeft(k1, R1);\r\n      k1 *= C2;\r\n      h1 ^= k1;\r\n      h1 = Long.rotateLeft(h1, R2);\r\n      h1 += h2;\r\n      h1 = h1 * M + N1;\r\n\r\n      // mix functions for k2\r\n      k2 *= C2;\r\n      k2 = Long.rotateLeft(k2, R3);\r\n      k2 *= C1;\r\n      h2 ^= k2;\r\n      h2 = Long.rotateLeft(h2, R1);\r\n      h2 += h1;\r\n      h2 = h2 * M + N2;\r\n    }\r\n\r\n    // tail\r\n    long k1 = 0;\r\n    long k2 = 0;\r\n    int tailStart = nblocks << 4;\r\n    switch (length - tailStart) {\r\n      case 15:\r\n        k2 ^= (long) (data[tailStart + 14] & 0xff) << 48;\r\n      case 14:\r\n        k2 ^= (long) (data[tailStart + 13] & 0xff) << 40;\r\n      case 13:\r\n        k2 ^= (long) (data[tailStart + 12] & 0xff) << 32;\r\n      case 12:\r\n        k2 ^= (long) (data[tailStart + 11] & 0xff) << 24;\r\n      case 11:\r\n        k2 ^= (long) (data[tailStart + 10] & 0xff) << 16;\r\n      case 10:\r\n        k2 ^= (long) (data[tailStart + 9] & 0xff) << 8;\r\n      case 9:\r\n        k2 ^= (long) (data[tailStart + 8] & 0xff);\r\n        k2 *= C2;\r\n        k2 = Long.rotateLeft(k2, R3);\r\n        k2 *= C1;\r\n        h2 ^= k2;\r\n\r\n      case 8:\r\n        k1 ^= (long) (data[tailStart + 7] & 0xff) << 56;\r\n      case 7:\r\n        k1 ^= (long) (data[tailStart + 6] & 0xff) << 48;\r\n      case 6:\r\n        k1 ^= (long) (data[tailStart + 5] & 0xff) << 40;\r\n      case 5:\r\n        k1 ^= (long) (data[tailStart + 4] & 0xff) << 32;\r\n      case 4:\r\n        k1 ^= (long) (data[tailStart + 3] & 0xff) << 24;\r\n      case 3:\r\n        k1 ^= (long) (data[tailStart + 2] & 0xff) << 16;\r\n      case 2:\r\n        k1 ^= (long) (data[tailStart + 1] & 0xff) << 8;\r\n      case 1:\r\n        k1 ^= (long) (data[tailStart] & 0xff);\r\n        k1 *= C1;\r\n        k1 = Long.rotateLeft(k1, R1);\r\n        k1 *= C2;\r\n        h1 ^= k1;\r\n    }\r\n\r\n    // finalization\r\n    h1 ^= length;\r\n    h2 ^= length;\r\n\r\n    h1 += h2;\r\n    h2 += h1;\r\n\r\n    h1 = fmix64(h1);\r\n    h2 = fmix64(h2);\r\n\r\n    h1 += h2;\r\n    h2 += h1;\r\n\r\n    return new long[]{h1, h2};\r\n  }\r\n\r\n  private static long fmix64(long h) {\r\n    h ^= (h >>> 33);\r\n    h *= 0xff51afd7ed558ccdL;\r\n    h ^= (h >>> 33);\r\n    h *= 0xc4ceb9fe1a85ec53L;\r\n    h ^= (h >>> 33);\r\n    return h;\r\n  }\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/simhash/executor/SimHashFilterExecutor.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter.simhash.executor;\r\n\r\nimport java.util.Map;\r\nimport java.util.Set;\r\n\r\nimport org.apache.commons.lang.StringUtils;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.filter.AbstractFilterExecutor;\r\nimport com.google.common.collect.Maps;\r\n\r\n/**\r\n * google simhash 算法实现脱敏过滤\r\n * \r\n * 由于simhash是对大文本进行比较，并且比较的是在支持分词的基础上对分词对象进行比较，进而确定相识度。\r\n * 故 在脱敏方面支持不是很友好，在大文本情况下，效率低下。\r\n * 改变情况，需要分词库支持。\r\n * \r\n * @author hoojo\r\n * @createDate 2018年3月22日 上午11:07:47\r\n * @file SimHashFilterExecutor.java\r\n * @package com.cnblogs.hoojo.sensitivewords.filter.simhash.executor\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic final class SimHashFilterExecutor extends AbstractFilterExecutor<Map<Character, Map<String, Set<String>>>> {\r\n\r\n\tprivate SimHashFilterExecutor() {\r\n\t\tsuper(\"simhash 算法脱敏实现\");\r\n\t}\r\n\t\r\n\tprivate static class SingleFactory {\r\n\t\tprivate static final SimHashFilterExecutor INSTANCE = new SimHashFilterExecutor();\r\n\t}\r\n\r\n\tpublic static final SimHashFilterExecutor getInstance() {\r\n\t\treturn SingleFactory.INSTANCE;\r\n\t}\r\n\r\n\t@Override\r\n\tprotected Map<Character, Map<String, Set<String>>> getCacheNodes() {\r\n\t\treturn Maps.newHashMap();\r\n\t}\r\n\r\n\t@Override\r\n\tprotected boolean put(String word) throws RuntimeException {\r\n\t\t\r\n\t\tif (StringUtils.isBlank(word)) {\r\n\t\t\treturn false;\r\n\t\t}\r\n\t\t\r\n\t\tword = StringUtils.trim(word);\r\n\t\tif (word.length() < 2) {\r\n\t\t\treturn false;\r\n\t\t}\r\n\t\t\r\n\t\tCharacter firstChar = word.charAt(0);\r\n\t\t\r\n\t\tMap<String, Set<String>> hashs = cacheNodes.get(firstChar);\r\n\t\tif (hashs == null) {\r\n\t\t\thashs = Maps.newHashMap();\r\n\t\t\tcacheNodes.put(firstChar, hashs);\r\n\t\t}\r\n\t\t\r\n\t\tString hash = SimHashUtils._simhash(word);\r\n\t\tString[] chunks = SimHashUtils.chunk(hash);\r\n\t\t\r\n\t\tMap<String, Set<String>> map = SimHashUtils.cartesianProduct(chunks);\r\n\t\tSet<String> keys = map.keySet();\r\n\t\tfor (String chunk : keys) {\r\n\t\t\tif (!hashs.containsKey(chunk)) {\r\n\t\t\t\thashs.put(chunk, map.get(chunk));\r\n\t\t\t}\r\n\t\t}\r\n\t\t\r\n\t\treturn true;\r\n\t}\r\n\r\n\t@Override\r\n\tprotected boolean processor(boolean partMatch, String content, Callback callback) throws RuntimeException {\r\n\t\t\r\n\t\tif (StringUtils.isBlank(content)) {\r\n\t\t\treturn false;\r\n\t\t}\r\n\t\t\r\n\t\tcontent = StringUtils.trim(content);\r\n\t\tif (content.length() < 2) {\r\n\t\t\treturn false;\r\n\t\t}\r\n\t\t\r\n\t\tfor (int i = 0; i < content.length(); i++) {\r\n\t\t\tCharacter wordChar = content.charAt(i);\r\n            \r\n\t\t\t// 判断是否属于脏字符\r\n            if (!cacheNodes.containsKey(wordChar)) {\r\n                continue;\r\n            }\r\n\t\t\t\r\n            Map<String, Set<String>> hashs = cacheNodes.get(wordChar);\r\n            int j = i + 1;\r\n            while (j < content.length()) {\r\n                String word = content.substring(i, j + 1);\r\n                // 判断是否是脏词\r\n                if (SimHashUtils.contains(word, hashs)) {\r\n                    \r\n                \tif (callback.call(word)) {\r\n                \t\treturn true;\r\n                \t}\r\n\r\n                \tif (partMatch) {\r\n                \t\ti += word.length();\r\n                \t} \r\n                }\r\n                \r\n                j++;\r\n            }\r\n        }\r\n\t\t\r\n\t\treturn false;\r\n\t}\r\n\r\n\tpublic static void main(String[] args) {\r\n\t\t\r\n\t\tSimHashFilterExecutor.getInstance().init();\r\n\t\tSimHashFilterExecutor.getInstance().put(\"中国人\");\r\n\t\tSimHashFilterExecutor.getInstance().put(\"中国男人\");\r\n\t\tSimHashFilterExecutor.getInstance().put(\"中国人民\");\r\n\t\tSimHashFilterExecutor.getInstance().put(\"人民\");\r\n\t\tSimHashFilterExecutor.getInstance().put(\"中间\");\r\n\t\tSimHashFilterExecutor.getInstance().put(\"女人\");\r\n\r\n\t\tSimHashFilterExecutor.getInstance().put(\"一举\");\r\n\t\tSimHashFilterExecutor.getInstance().put(\"一举成名\");\r\n\t\tSimHashFilterExecutor.getInstance().put(\"一举成名走四方\");\r\n\t\tSimHashFilterExecutor.getInstance().put(\"成名\");\r\n\t\tSimHashFilterExecutor.getInstance().put(\"走四方\");\r\n\t\t\r\n\t\tString content = \"我们中国人都是好人，在他们中间有男人和女人。中国男人很惨，中国人民长期被压迫。\";\r\n\t\tSystem.out.println(SimHashFilterExecutor.getInstance().getWords(true, content));\r\n\t\tSystem.out.println(SimHashFilterExecutor.getInstance().getWords(false, content));\r\n\t\tSystem.out.println(SimHashFilterExecutor.getInstance().filter(false, content, '*'));\r\n\t\tSystem.out.println(SimHashFilterExecutor.getInstance().highlight(false, content));\r\n\t\t\r\n\t\tcontent = \"一举成名走四方大大的好\";\r\n\t\tSystem.out.println(SimHashFilterExecutor.getInstance().getWords(true, content));\r\n\t\tSystem.out.println(SimHashFilterExecutor.getInstance().getWords(false, content));\r\n\t\tSystem.out.println(SimHashFilterExecutor.getInstance().filter(false, content, '*'));\r\n\t\tSystem.out.println(SimHashFilterExecutor.getInstance().highlight(false, content));\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/simhash/executor/SimHashUtils.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter.simhash.executor;\r\n\r\nimport java.math.BigInteger;\r\nimport java.util.ArrayList;\r\nimport java.util.Arrays;\r\nimport java.util.HashMap;\r\nimport java.util.HashSet;\r\nimport java.util.List;\r\nimport java.util.Map;\r\nimport java.util.Set;\r\n\r\nimport org.apache.commons.lang.StringUtils;\r\n\r\nimport com.google.common.collect.Maps;\r\n\r\n/**\r\n * simhash 算法工具类 \r\n * \r\n * 介绍下这个算法主要原理，为了便于理解尽量不使用数学公式，分为这几步： \r\n * \r\n * 1、分词，把需要判断文本分词形成这个文章的特征单词。\r\n * 最后形成去掉噪音词的单词序列并为每个词加上权重，我们假设权重分为5个级别（1~5）。\r\n * \r\n * 比如：“ 美国“51区”雇员称内部有9架飞碟，曾看见灰色外星人 ” ==> 分词后为 “ 美国（4） 51区（5） 雇员（3） 称（1） 内部（2） 有（1） 9架（3） 飞碟（5） 曾（1） 看见（3） 灰色（4） 外星人（5）”，\r\n * 括号里是代表单词在整个句子里重要程度，数字越大越重要。 \r\n * \r\n * 2、hash，通过hash算法把每个词变成hash值， \r\n * 比如“美国”通过hash算法计算为 100101,“51区”通过hash算法计算为 101011。\r\n * 这样我们的字符串就变成了一串串数字， 还记得文章开头说过的吗，要把文章变为数字计算才能提高相似度计算性能，现在是降维过程进行时。 \r\n * \r\n * 3、加权，通过 2步骤的hash生成结果，需要按照单词的权重形成加权数字串，\r\n * 比如“美国”的hash值为“100101”，通过加权计算为“4 -4 -4 4 -4 4”； \r\n * “51区”的hash值为“101011”，通过加权计算为 “ 5 -5 5 -5 5 5”。\r\n * \r\n * 4、合并，把上面各个单词算出来的序列值累加，变成只有一个序列串。\r\n *  比如 “美国”的 “4 -4 -4 4 -4 4”，“51区”的 “ 5 -5 5 -5 5 5”，\r\n *  把每一位进行累加， “4+5 -4+-5 -4+5 4+-5-4+5 4+5” ==》 “9 -9 1 -1 1 9”。 \r\n *  这里作为示例只算了两个单词的，真实计算需要把所有单词的序列串累加。 \r\n * \r\n * 5、降维，把4步算出来的 “9 -9 1 -1 1 9” 变成 0 1串，形成我们最终的simhash签名。 \r\n * \t如果每一位大于0 记为 1，小于0 记为 0。\r\n * \t最后算出结果为：“1 0 1 0 1 1”。 \r\n * \r\n * http://www.lanceyan.com/tag/simhash\r\n * \r\n * \r\n * 让我们来总结一下上述算法的实质：<br/>\r\n *  1、将64位的二进制串等分成四块 <br/>\r\n *  2、调整上述64位二进制，将任意一块作为前16位，总共有四种组合，生成四份table <br/>\r\n *  3、采用精确匹配的方式查找前16位<br/>\r\n *  4、如果样本库中存有2^34（差不多10亿）的哈希指纹，则每个table返回2^(34-16)=262144个候选结果，大大减少了海明距离的计算成本<br/>\r\n * \r\n * 具体simhash步骤如下：\r\n\t（1）将文档分词，取一个文章的TF-IDF权重最高的前20个词（feature）和权重（weight）。\r\n\t\t即一篇文档得到一个长度为20的（feature：weight）的集合。\r\n\t（2）对其中的词（feature），进行普通的哈希之后得到一个64为的二进制，得到长度为20的（hash : weight）的集合。\r\n\t（3）根据（2）中得到一串二进制数（hash）中相应位置是1是0，对相应位置取正值weight和负值weight。\r\n\t\t\t例如一个词进过（2）得到（010111：5）进过步骤（3）之后可以得到列表[-5,5,-5,5,5,5]，\r\n\t\t\t即对一个文档，我们可以得到20个长度为64的列表[weight，-weight...weight]。\r\n\t（4）对（3）中20个列表进行列向累加得到一个列表。如[-5,5,-5,5,5,5]、[-3,-3,-3,3,-3,3]、[1,-1,-1,1,1,1]进行列向累加得到[-7，1，-9，9，3，9]，这样，我们对一个文档得到，一个长度为64的列表。\r\n\t（5）对（4）中得到的列表中每个值进行判断，当为负值的时候去0，正值取1。例如，[-7，1，-9，9，3，9]得到010111，这样，我们就得到一个文档的simhash值了。\r\n\t（6）计算相似性。连个simhash取异或，看其中1的个数是否超过3。超过3则判定为不相似，小于等于3则判定为相似。\r\n * @author hoojo\r\n * @createDate 2018年3月22日 下午4:50:23\r\n * @file SimHashUtils.java\r\n * @package com.cnblogs.hoojo.sensitivewords.filter.bucket.executor\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic abstract class SimHashUtils {\r\n\r\n\tprivate static final int HASH_BITS = FNVHashUtils.HASH_BITS;\r\n\r\n\t/** 默认按照4段进行simhash存储 */\r\n\tprivate static final int CHUNK_COUNT = 4;\r\n\t/** 汉明距离的衡量标准 */\r\n\tprivate static final int HAMMING_THRESH = 3;\r\n\r\n\tpublic static Long simhash(Map<String, Integer> words) {\r\n\t\t//1、分词：直接加入单词、脏词，所以不存在分词\r\n\r\n\t\t// 按照词语的hash值，计算simHashWeight(低位对齐)\r\n\t\tList<Integer[]> mergeWeights = new ArrayList<Integer[]>(words.size());\r\n\t\t\r\n\t\tSet<String> wordSet = words.keySet();\r\n\t\tfor (String word : wordSet) {\r\n\r\n\t\t\t//2、hash: 计算分词的hash\r\n\t\t\tlong hash = hash(word);\r\n\t\t\t\r\n\t\t\t// 按照词语的hash值，计算simHashWeight(低位对齐)\r\n\t\t\tInteger[] weights = new Integer[HASH_BITS];\r\n\t\t\tArrays.fill(weights, 0);\r\n\t\t\t\r\n\t\t\t//3、加权：即W = Hash * weight，且遇到1则hash值和权值正相乘，遇到0则hash值和权值负相乘\r\n\t\t\tfor (int i = 0; i < HASH_BITS; i++) {\r\n\t\t\t\tif (((hash >> i) & 1) == 1) {\r\n\t\t\t\t\t// 3.1 正负值weight\r\n\t\t\t\t\tweights[i] += 1;\r\n\t\t\t\t} else {\r\n\t\t\t\t\tweights[i] -= 1;\r\n\t\t\t\t}\r\n\t\t\t\t\r\n\t\t\t\t// 3.2 增加权重： W = Hash * weight\r\n\t\t\t\tweights[i] *= words.get(word);\r\n\t\t\t}\r\n\t\t\t\r\n\t\t\tmergeWeights.add(weights);\r\n\t\t}\r\n\t\t\r\n\t\t//4、合并：把上面各个单词算出来的序列值累加\r\n\t\tInteger[] weights = new Integer[HASH_BITS];\r\n\t\tArrays.fill(weights, 0);\r\n\t\tfor (Integer[] weight : mergeWeights) {\r\n\r\n\t\t\tfor (int i = 0; i < HASH_BITS; i++) {\r\n\t\t\t\tweights[i] += weight[i];\r\n\t\t\t}\r\n\t\t}\r\n\t\t\r\n\t\t//5、降维：大于0 记为 1，小于0 记为 0\r\n\t\tStringBuilder sb = new StringBuilder();\r\n\t\tfor (int i = 0; i < HASH_BITS; i++) {\r\n\t\t\tif (weights[i] > 0) {\r\n\t\t\t\tsb.append(1);\r\n\t\t\t} else {\r\n\t\t\t\tsb.append(0);\r\n\t\t\t}\r\n\t\t}\r\n\t\t\r\n\t\tSystem.out.println(sb);\r\n\t\treturn new BigInteger(sb.toString(), 2).longValue();\r\n\t}\r\n\t\r\n\tpublic static Long simhash_(Map<String, Integer> words) {\r\n\t\t//1、分词：直接加入单词、脏词，所以不存在分词\r\n\r\n\t\t// 按照词语的hash值，计算simHashWeight(低位对齐)\r\n\t\tInteger[] weights = new Integer[HASH_BITS];\r\n\t\tArrays.fill(weights, 0);\t\t\r\n\t\t\r\n\t\tSet<String> wordSet = words.keySet();\r\n\t\tfor (String word : wordSet) {\r\n\r\n\t\t\t//2、hash: 计算分词的hash\r\n\t\t\tlong hash = hash(word);\r\n\t\t\t\r\n\t\t\t//3、加权：即W = Hash * weight，且遇到1则hash值和权值正相乘，遇到0则hash值和权值负相乘\r\n\t\t\tfor (int i = 0; i < HASH_BITS; i++) {\r\n\t\t\t\tInteger weight = 0;\r\n\t\t\t\tif (((hash >> i) & 1) == 1) {\r\n\t\t\t\t\t// 3.1 正负值weight\r\n\t\t\t\t\tweight += 1;\r\n\t\t\t\t} else {\r\n\t\t\t\t\tweight -= 1;\r\n\t\t\t\t}\r\n\t\t\t\t\r\n\t\t\t\t// 3.2 增加权重： W = Hash * weight\r\n\t\t\t\tweight *= words.get(word);\r\n\t\t\t\t\r\n\t\t\t\t//4、合并：把上面各个单词算出来的序列值累加\r\n\t\t\t\tweights[i] += weight;\r\n\t\t\t}\r\n\t\t}\r\n\t\t\r\n\t\t//5、降维：大于0 记为 1，小于0 记为 0\r\n\t\tStringBuilder sb = new StringBuilder();\r\n\t\tfor (int i = 0; i < HASH_BITS; i++) {\r\n\t\t\tif (weights[i] > 0) {\r\n\t\t\t\tsb.append(1);\r\n\t\t\t} else {\r\n\t\t\t\tsb.append(0);\r\n\t\t\t}\r\n\t\t}\r\n\t\t\r\n\t\tSystem.out.println(sb);\r\n\t\treturn new BigInteger(sb.toString(), 2).longValue();\r\n\t}\r\n\t\r\n\tpublic static Long simhash(String word, int weight) {\r\n\t\t//1、分词：直接加入单词、脏词，所以不存在分词\r\n\t\t\r\n\t\t//2、hash: 计算分词的hash\r\n\t\tlong hash = hash(word);\r\n\t\t\r\n\t\t//3、加权：即W = Hash * weight，且遇到1则hash值和权值正相乘，遇到0则hash值和权值负相乘\r\n\t\tInteger[] weights = new Integer[HASH_BITS];\r\n\t\tArrays.fill(weights, 0);\r\n\t\t// 按照词语的hash值，计算simHashWeight(低位对齐)\r\n\t\tfor (int i = 0; i < HASH_BITS; i++) {\r\n\t\t\tif (((hash >> i) & 1) == 1) {\r\n\t\t\t\t// 3.1 正负值weight\r\n\t\t\t\tweights[i] += 1;\r\n\t\t\t} else {\r\n\t\t\t\tweights[i] -= 1;\r\n\t\t\t}\r\n\t\t\t\r\n\t\t\t// 3.2 增加权重： W = Hash * weight\r\n\t\t\tweights[i] *= weight;\r\n\t\t}\r\n\t\t\r\n\t\t//4、合并：把上面各个单词算出来的序列值累加\r\n\t\t// 由于单个词，所以不存在合并\r\n\t\t\r\n\t\t//5、降维：大于0 记为 1，小于0 记为 0\r\n\t\tStringBuilder sb = new StringBuilder();\r\n\t\tfor (int i = 0; i < HASH_BITS; i++) {\r\n\t\t\tif (weights[i] > 0) {\r\n\t\t\t\tsb.append(1);\r\n\t\t\t} else {\r\n\t\t\t\tsb.append(0);\r\n\t\t\t}\r\n\t\t}\r\n\t\t\r\n\t\tSystem.out.println(sb);\r\n\t\treturn new BigInteger(sb.toString(), 2).longValue();\r\n\t}\r\n\t\r\n\tpublic static Long simhash(String word) {\r\n\t\t//1、分词：直接加入单词、脏词，所以不存在分词\r\n\t\t\r\n\t\t//2、hash: 计算分词的hash\r\n\t\tlong hash = hash(word);\r\n\t\t\r\n\t\t//3、加权：即W = Hash * weight，且遇到1则hash值和权值正相乘，遇到0则hash值和权值负相乘\r\n\t\tInteger[] weights = new Integer[HASH_BITS];\r\n\t\tArrays.fill(weights, 0);\r\n\t\t// 按照词语的hash值，计算simHashWeight(低位对齐)\r\n\t\tfor (int i = 0; i < HASH_BITS; i++) {\r\n\t\t\tif (((hash >> i) & 1) == 1) {\r\n\t\t\t\t// 3.1 正负值weight\r\n\t\t\t\tweights[i] += 1;\r\n\t\t\t} else {\r\n\t\t\t\tweights[i] -= 1;\r\n\t\t\t}\r\n\t\t\t\r\n\t\t\t// 3.2 增加权重： W = Hash * weight\r\n\t\t\t// 权重都一样，所以不存在权重\r\n\t\t}\r\n\t\t\r\n\t\t//4、合并：把上面各个单词算出来的序列值累加\r\n\t\t// 由于单个词，所以不存在合并\r\n\t\t\r\n\t\t//5、降维：大于0 记为 1，小于0 记为 0\r\n\t\tStringBuilder sb = new StringBuilder();\r\n\t\tfor (int i = 0; i < HASH_BITS; i++) {\r\n\t\t\tif (weights[i] > 0) {\r\n\t\t\t\tsb.append(1);\r\n\t\t\t} else {\r\n\t\t\t\tsb.append(0);\r\n\t\t\t}\r\n\t\t}\r\n\r\n\t\treturn new BigInteger(sb.toString(), 2).longValue();\r\n\t}\r\n\t\r\n\tpublic static String _simhash(String word) {\r\n\t\t//1、分词：直接加入单词、脏词，所以不存在分词\r\n\t\t\r\n\t\t//2、hash: 计算分词的hash\r\n\t\tlong hash = hash(word);\r\n\t\t\r\n\t\t//3、加权：即W = Hash * weight，且遇到1则hash值和权值正相乘，遇到0则hash值和权值负相乘\r\n\t\tInteger[] weights = new Integer[HASH_BITS];\r\n\t\tArrays.fill(weights, 0);\r\n\t\t// 按照词语的hash值，计算simHashWeight(低位对齐)\r\n\t\tfor (int i = 0; i < HASH_BITS; i++) {\r\n\t\t\tif (((hash >> i) & 1) == 1) {\r\n\t\t\t\t// 3.1 正负值weight\r\n\t\t\t\tweights[i] += 1;\r\n\t\t\t} else {\r\n\t\t\t\tweights[i] -= 1;\r\n\t\t\t}\r\n\t\t\t\r\n\t\t\t// 3.2 增加权重： W = Hash * weight\r\n\t\t\t// 权重都一样，所以不存在权重\r\n\t\t}\r\n\t\t\r\n\t\t//4、合并：把上面各个单词算出来的序列值累加\r\n\t\t// 由于单个词，所以不存在合并\r\n\t\t\r\n\t\t//5、降维：大于0 记为 1，小于0 记为 0\r\n\t\tStringBuilder sb = new StringBuilder();\r\n\t\tfor (int i = 0; i < HASH_BITS; i++) {\r\n\t\t\tif (weights[i] > 0) {\r\n\t\t\t\tsb.append(1);\r\n\t\t\t} else {\r\n\t\t\t\tsb.append(0);\r\n\t\t\t}\r\n\t\t}\r\n\r\n\t\treturn sb.toString();\r\n\t}\r\n\t\r\n\tprivate static long hash(String target) {\r\n\t\t\r\n\t\t//long hash = FNVHashUtils.hash64(target).longValue();\r\n\t\t//long hash = FNVHashUtils.hash64_(target).longValue();\r\n\t\tlong hash = Murmur3.hash64(target.getBytes());\r\n\t\t\r\n\t\treturn hash;\r\n\t}\r\n\t\r\n\tpublic static String[] _chunk(Long simhash) {\r\n\t\tString[] chunk = new String[CHUNK_COUNT];\r\n\t\t\r\n\t\tint chunkIndex = 0;\r\n\t\tint offset = HASH_BITS / CHUNK_COUNT;\r\n\r\n\t\tStringBuilder sb = new StringBuilder();\r\n\t\tfor (int i = 0; i < HASH_BITS; i++) {\r\n\t\t\tsb.append(simhash >> i & 1);\r\n\t\t\tif ((i + 1) % offset == 0) {\r\n\t\t\t\tchunk[chunkIndex++] = sb.toString();\r\n\t\t\t\tsb.setLength(0);\r\n\t\t\t}\r\n\t\t}\r\n\t\treturn chunk;\r\n\t}\r\n\t\r\n\tpublic static String[] chunk(String simhash) {\r\n\t\t\r\n\t\tString[] chunk = new String[CHUNK_COUNT];\r\n\t\t\r\n\t\tint offset = HASH_BITS / CHUNK_COUNT;\r\n\t\tfor (int i = 0; i < CHUNK_COUNT; i++) {\r\n\t\t\tchunk[i] = simhash.substring(i * offset, i * offset + offset);\r\n\t\t}\r\n\t\t\r\n\t\treturn chunk;\r\n\t}\r\n\t\r\n\tpublic static Map<String, Set<String>> cartesianProduct(String[] chunks) {\r\n\t\tMap<String, Set<String>> result = Maps.newHashMap();\r\n\t\tfor (int i = 0; i < chunks.length; i++) {\r\n\t\t\tSet<String> set = new HashSet<String>();\r\n\t\t\tfor (int j = 0; j < chunks.length; j++) {\r\n\t\t\t\tif (j != i) {\r\n\t\t\t\t\tset.add(chunks[j]);\r\n\t\t\t\t}\r\n\t\t\t}\r\n\t\t\t\r\n\t\t\tresult.put(chunks[i], set);\r\n\t\t}\r\n\t\treturn result;\r\n\t}\r\n\t\r\n\tpublic static Map<String, String> _cartesianProduct(String[] chunks, String simhash) {\r\n\t\tMap<String, String> result = Maps.newHashMap();\r\n\t\tfor (int i = 0; i < chunks.length; i++) {\r\n\t\t\tresult.put(chunks[i], simhash);\r\n\t\t}\r\n\t\treturn result;\r\n\t}\r\n\t\r\n\tpublic static Map<String, Long> _cartesianProduct(String[] chunks, Long simhash) {\r\n\t\tMap<String, Long> result = Maps.newHashMap();\r\n\t\tfor (int i = 0; i < chunks.length; i++) {\r\n\t\t\tresult.put(chunks[i], simhash);\r\n\t\t}\r\n\t\treturn result;\r\n\t}\r\n\t\r\n\tpublic static boolean contains(String word, Map<String, Set<String>> store) {\r\n\t\t\r\n\t\tString simhash = _simhash(word);\r\n\t\tString[] chunks = chunk(simhash);\r\n\t\tfor (int i = 0; i < chunks.length; i++) {\r\n\t\t\tif (!store.containsKey(chunks[i])) {\r\n\t\t\t\tcontinue;\r\n\t\t\t}\r\n\t\t\t\r\n\t\t\tSet<String> set = store.get(chunks[i]);\r\n\t\t\tfor (String hash : set) {\r\n\t\t\t\tfor (int j = 0; j < chunks.length; j++) {\r\n\t\t\t\t\tif (i != j && hammingDistance(hash, chunks[j]) < HAMMING_THRESH) {\r\n\t\t\t\t\t\treturn true;\r\n\t\t\t\t\t}\r\n\t\t\t\t}\r\n\t\t\t}\r\n\t\t}\r\n\t\t\r\n\t\treturn false;\r\n\t}\r\n\t\r\n\tpublic static boolean _contains(String word, Map<String, String> store) {\r\n\t\t\r\n\t\tString simhash = _simhash(word);\r\n\t\tString[] chunks = chunk(simhash);\r\n\t\tfor (int i = 0; i < chunks.length; i++) {\r\n\t\t\tif (!store.containsKey(chunks[i])) {\r\n\t\t\t\tcontinue;\r\n\t\t\t}\r\n\t\t\t\r\n\t\t\tif (hammingDistance(simhash, store.get(chunks[i])) < HAMMING_THRESH) {\r\n\t\t\t\treturn true;\r\n\t\t\t}\r\n\t\t}\r\n\t\t\r\n\t\treturn false;\r\n\t}\r\n\t\r\n\tpublic static boolean __contains(String word, Map<String, Long> store) {\r\n\t\t\r\n\t\tLong simhash = simhash(word);\r\n\t\tString[] chunks = _chunk(simhash);\r\n\t\tfor (int i = 0; i < chunks.length; i++) {\r\n\t\t\tif (!store.containsKey(chunks[i])) {\r\n\t\t\t\tcontinue;\r\n\t\t\t}\r\n\t\t\t\r\n\t\t\tif (hammingDistance(simhash, store.get(chunks[i])) < HAMMING_THRESH) {\r\n\t\t\t\treturn true;\r\n\t\t\t}\r\n\t\t}\r\n\t\t\r\n\t\treturn false;\r\n\t}\r\n\t\r\n\t/**\r\n\t * 求海明距离 二进制串A 和 二进制串B 的海明距离 就是 A xor B 后二进制中1的个数\r\n\t * @author hoojo\r\n\t * @createDate 2018年3月22日 下午4:53:07\r\n\t */\r\n\tpublic static int hammingDistance(int a, int b) {\r\n\t\tBigInteger _a = BigInteger.valueOf(a);\r\n\t\tBigInteger _b = BigInteger.valueOf(b);\r\n\r\n\t\tint distance = 0;\r\n\r\n\t\tchar[] bit2s = _a.xor(_b).toString(2).toCharArray();\r\n\t\tfor (char bit : bit2s) {\r\n\t\t\tif (bit == '1') {\r\n\t\t\t\tdistance++;\r\n\t\t\t}\r\n\t\t}\r\n\r\n\t\treturn distance;\r\n\t}\r\n\r\n\tpublic static int hammingDistance(Long a, Long b) {\r\n\t\tint distance = 0;\r\n\t\tfor (int i = 0; i < HASH_BITS; i++) {\r\n\t\t\tif ((a >> i & 1) != (b >> i & 1)) {\r\n\t\t\t\tdistance++;\r\n\t\t\t}\r\n\t\t}\r\n\t\treturn distance;\r\n\t}\r\n\r\n\t/**\r\n\t * 二进制字符串汉明距离，a=10001 b=01000，distance=3\r\n\t * \r\n\t * @author hoojo\r\n\t * @createDate 2018年3月22日 下午5:06:42\r\n\t */\r\n\tpublic static int hammingDistance(String a, String b) {\r\n\t\tint distance = 0;\r\n\r\n\t\tif (a.length() != b.length()) {\r\n\t\t\tdistance = -1;\r\n\t\t} else {\r\n\t\t\tfor (int i = 0; i < a.length(); i++) {\r\n\t\t\t\tif (a.charAt(i) != b.charAt(i)) {\r\n\t\t\t\t\tdistance++;\r\n\t\t\t\t}\r\n\t\t\t}\r\n\t\t}\r\n\t\treturn distance;\r\n\t}\r\n\r\n\tpublic static void main(String[] args) {\r\n\r\n\t\tMap<String, Integer> words = new HashMap<String, Integer>();\r\n\t\twords.put(\"CSDN\", 5);\r\n\t\twords.put(\"ABCD\", 1);\r\n\t\twords.put(\"中国\", 4);\r\n\t\t\r\n\t\tSystem.out.println(\"---------------------\");\r\n\t\tSystem.out.println(simhash(words));\r\n\t\tSystem.out.println(\"----------------------\");\r\n\t\tSystem.out.println(simhash_(words));\r\n\t\tSystem.out.println(\"----------------------\");\r\n\t\tSystem.out.println(simhash(\"中国\", 5));\r\n\t\tSystem.out.println(\"----------------------\");\r\n\t\tSystem.out.println(simhash(\"中国\"));\r\n\t\tSystem.out.println(\"----------------------\");\r\n\t\tSystem.out.println(\"0\" + BigInteger.valueOf(simhash(\"中国\")).toString(2));\r\n\t\tSystem.out.println(\"----------------------\");\r\n\t\t\r\n\t\tLong b = simhash(words);\r\n\r\n\t\twords = new HashMap<String, Integer>();\r\n\t\twords.put(\"CSDN\", 5);\r\n\t\twords.put(\"ABCE\", 1);\r\n\t\twords.put(\"中国\", 4);\r\n\t\tLong a = simhash(words);\r\n\t\t\r\n\t\tSystem.out.println(a);\r\n\t\tSystem.out.println(b);\r\n\t\tSystem.out.println(hammingDistance(b, a));\r\n\t\tSystem.out.println(hammingDistance(simhash(\"中国\"), simhash(\"中国\")));\r\n\t\t\r\n\t\tSystem.out.println(\"----------------------\");\r\n\t\tSystem.out.println(StringUtils.join(_chunk(b), \";\"));\r\n\t\tSystem.out.println(StringUtils.join(chunk(_simhash(\"中国\")), \";\"));\r\n\t\t\r\n\t\tSystem.out.println(cartesianProduct(chunk(_simhash(\"中国\"))));\r\n\t\t\r\n\t\tSystem.out.println(\"----------------------\");\r\n\t\tSystem.out.println(contains(\"中间\", cartesianProduct(chunk(_simhash(\"中国\")))));\r\n\t\t\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/tire/TireTreeFilter.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter.tire;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.filter.AbstractFilter;\r\nimport com.cnblogs.hoojo.sensitivewords.filter.tire.executor.TireTreeFilterExecutor;\r\n\r\n/**\r\n * trie 树算法实现敏感词脱敏过滤\r\n * \r\n * @author hoojo\r\n * @createDate 2018年2月2日 下午4:22:23\r\n * @file TrieSWFilter.java\r\n * @package com.cnblogs.hoojo.sensitivewords.support.trie\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic class TireTreeFilter extends AbstractFilter {\r\n\r\n\tpublic TireTreeFilter() {\r\n\t\tsuper(TireTreeFilterExecutor.getInstance());\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/tire/executor/TireTreeFilterExecutor.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter.tire.executor;\r\n\r\nimport org.apache.commons.lang.StringUtils;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.filter.AbstractFilterExecutor;\r\n\r\n/**\r\n * tire tree 算法脱敏词库支持类\r\n * \r\n * @author hoojo\r\n * @createDate 2018年2月9日 上午10:36:08\r\n * @file TireTreeFilterExecutor.java\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic final class TireTreeFilterExecutor extends AbstractFilterExecutor<TireTreeNode> {\r\n\r\n\tprivate TireTreeFilterExecutor() {\r\n\t\tsuper(\"tire tree 算法脱敏支持类\");\r\n\t}\r\n\t\r\n\tprivate static class SingleFactory {\r\n\t\tprivate static final TireTreeFilterExecutor INSTANCE = new TireTreeFilterExecutor();\r\n\t}\r\n\r\n\tpublic static final TireTreeFilterExecutor getInstance() {\r\n\t\treturn SingleFactory.INSTANCE;\r\n\t}\r\n\t\r\n\t@Override\r\n\tprotected TireTreeNode getCacheNodes() {\r\n\t\treturn new TireTreeNode(' ');\r\n\t}\r\n\r\n\t/**\r\n\t * 判断一段文字包含敏感词语，支持敏感词结果回调\r\n\t * @author hoojo\r\n\t * @createDate 2018年2月9日 下午2:54:59\r\n\t * @param partMatch 是否支持匹配词语的一部分\r\n\t * @param content 被匹配内容\r\n\t * @param start 开始的字符位置\r\n\t * @return 是否匹配到的词语\r\n\t */\r\n\tprotected boolean processor(boolean partMatch, String content, Callback callback) throws RuntimeException {\r\n\t\t\r\n\t\tif (StringUtils.isBlank(content)) {\r\n\t\t\treturn false;\r\n\t\t}\r\n\t\t\r\n\t\tcontent = StringUtils.trim(content);\r\n\t\tif (content.length() < 2) {\r\n\t\t\treturn false;\r\n\t\t}\r\n\t\t\r\n\t\tfor (int index = 0; index < content.length(); index++) {\r\n\t\t\tchar fisrtChar = content.charAt(index);\r\n\t\t\t\r\n\t\t\tTireTreeNode node = cacheNodes.find(fisrtChar);\r\n\t\t\tif (node == null || node.isLeaf()) {\r\n\t\t\t\tcontinue;\r\n\t\t\t} \r\n\t\t\t\r\n\t\t\tint charCount = 1;\r\n\t\t\tfor (int i = index + 1; i < content.length(); i++) {\r\n\t\t\t\tchar wordChar = content.charAt(i);\r\n\t\t\t\t\r\n\t\t\t\tnode = node.find(wordChar);\r\n\t\t\t\tif (node != null) {\r\n\t\t\t\t\tcharCount++;\r\n\t\t\t\t} else {\r\n\t\t\t\t\tbreak;\r\n\t\t\t\t}\r\n\t\t\t\t\r\n\t\t\t\tif (partMatch && node.isWord()) {\r\n\t\t\t\t\tif (callback.call(StringUtils.substring(content, index, index + charCount))) {\r\n\t\t\t\t\t\treturn true;\r\n\t\t\t\t\t}\r\n\t\t\t\t\tbreak;\r\n\t\t\t\t} else if (node.isWord()) {\r\n\t\t\t\t\tif (callback.call(StringUtils.substring(content, index, index + charCount))) {\r\n\t\t\t\t\t\treturn true;\r\n\t\t\t\t\t}\r\n\t\t\t\t}\r\n\t\t\t\t\r\n\t\t\t\tif (node.isLeaf()) {\r\n\t\t\t\t\tbreak;\r\n\t\t\t\t}\r\n\t\t\t}\r\n\t\t\t\r\n\t\t\tif (partMatch) {\r\n\t\t\t\tindex += charCount;\r\n\t\t\t}\r\n\t\t}\r\n\t\t\r\n\t\treturn false;\r\n\t}\r\n\t\r\n\t@Override\r\n\tprotected boolean put(String word) throws RuntimeException {\r\n\t\tif (StringUtils.isBlank(word)) {\r\n\t\t\treturn false;\r\n\t\t}\r\n\t\t\r\n\t\tword = StringUtils.trim(word);\r\n\t\tif (word.length() < 2) {\r\n\t\t\treturn false;\r\n\t\t}\r\n\t\t\r\n\t\tchar fisrtChar = word.charAt(0);\r\n\t\tTireTreeNode node = cacheNodes.find(fisrtChar);\r\n\t\tif (node == null) {\r\n\t\t\tnode = new TireTreeNode(fisrtChar);\r\n\t\t\tcacheNodes.addChild(node);\r\n\t\t}\r\n\t\t\r\n\t\tfor (int i = 1; i < word.length(); i++) {\r\n\t\t\tchar nextChar = word.charAt(i); // 转换成char型\r\n\t\t\t\r\n\t\t\tTireTreeNode nextNode = null;\r\n\t\t\tif (!node.isLeaf()) {\r\n\t\t\t\tnextNode = node.find(nextChar);\r\n\t\t\t} \r\n\t\t\tif (nextNode == null) {\r\n\t\t\t\tnextNode = new TireTreeNode(nextChar);\r\n\t\t\t}\r\n\t\t\t\r\n\t\t\tnode.addChild(nextNode);\r\n\t\t\tnode = nextNode;\r\n\t\t\t\r\n\t\t\tif (i == word.length() - 1) {\r\n\t\t\t\tnode.setWord(true);\r\n\t\t\t}\r\n\t\t}\r\n\t\t\r\n\t\treturn true;\r\n\t}\r\n\t\r\n\tpublic static void main(String[] args) {\r\n\t\tTireTreeFilterExecutor.getInstance().init();\r\n\t\tTireTreeFilterExecutor.getInstance().put(\"中国人\");\r\n\t\tTireTreeFilterExecutor.getInstance().put(\"中国男人\");\r\n\t\tTireTreeFilterExecutor.getInstance().put(\"中国人民\");\r\n\t\tTireTreeFilterExecutor.getInstance().put(\"中国\");\r\n\t\tTireTreeFilterExecutor.getInstance().put(\"人民\");\r\n\t\tTireTreeFilterExecutor.getInstance().put(\"中间\");\r\n\t\tTireTreeFilterExecutor.getInstance().put(\"女人\");\r\n\t\t\r\n\t\tString content = \"我们中国人都是好人，在中国人民中间有男人和女人。中国男人很惨，他们长期被压迫。\";\r\n\t\tSystem.out.println(TireTreeFilterExecutor.getInstance().contains(true, content));\r\n\t\tSystem.out.println(TireTreeFilterExecutor.getInstance().contains(false, content));\r\n\t\tSystem.out.println(TireTreeFilterExecutor.getInstance().getWords(true, content));\r\n\t\tSystem.out.println(TireTreeFilterExecutor.getInstance().getWords(false, content));\r\n\t\tSystem.out.println(TireTreeFilterExecutor.getInstance().filter(false, content, '*'));\r\n\t\tSystem.out.println(TireTreeFilterExecutor.getInstance().highlight(false, content));\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/tire/executor/TireTreeNode.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter.tire.executor;\r\n\r\nimport java.util.List;\r\n\r\nimport com.google.common.collect.Lists;\r\n\r\n/**\r\n * 多叉树模型\r\n * @author hoojo\r\n * @createDate 2018年2月8日 下午8:23:27\r\n * @file TireTreeNode.java\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic class TireTreeNode {\r\n\r\n\tprivate char _char;\r\n\tprivate boolean word;\r\n\tprivate List<TireTreeNode> childs;\r\n\t\r\n\tpublic TireTreeNode() {\r\n\t}\r\n\r\n\tpublic TireTreeNode(char _char) {\r\n\t\tthis._char = _char;\r\n\t}\r\n\t\r\n\tpublic boolean isLeaf() {\r\n\t\treturn (childs == null || childs.isEmpty());\r\n\t}\r\n\t\r\n\tpublic char getChar() {\r\n\t\treturn _char;\r\n\t}\r\n\r\n\tpublic void setChar(char _char) {\r\n\t\tthis._char = _char;\r\n\t}\r\n\r\n\tpublic boolean isWord() {\r\n\t\treturn word;\r\n\t}\r\n\r\n\tpublic void setWord(boolean word) {\r\n\t\tthis.word = word;\r\n\t}\r\n\t\r\n\tpublic List<TireTreeNode> getChilds() {\r\n\t\treturn childs;\r\n\t}\r\n\r\n\tpublic void setChilds(List<TireTreeNode> childs) {\r\n\t\tthis.childs = childs;\r\n\t}\r\n\t\r\n\tpublic void addChild(TireTreeNode child) {\r\n\t\tif (this.childs == null) {\r\n\t\t\tchilds = Lists.newArrayList();\r\n\t\t}\r\n\t\t\r\n\t\tthis.childs.add(child);\r\n\t}\r\n\r\n\tpublic void removeChild(TireTreeNode child) {\r\n\t\tif (this.childs != null) {\r\n\t\t\tthis.childs.remove(child);\r\n\t\t}\r\n\t}\r\n\t\r\n\tpublic TireTreeNode find(char _char) {\r\n\t\tif (this.childs != null) {\r\n\t\t\tfor (TireTreeNode item : this.childs) {\r\n\t\t\t\tif (item.getChar() == _char) {\r\n\t\t\t\t\treturn item;\r\n\t\t\t\t}\r\n\t\t\t}\r\n\t\t}\r\n\t\treturn null;\r\n\t}\r\n\r\n\tpublic void print(TireTreeNode node) {\r\n\t\tSystem.out.println(node.getChar());\r\n\t\tif (node.getChilds() != null) {\r\n\t\t\tfor (TireTreeNode childNode : node.getChilds()) {\r\n\t\t\t\t//System.out.println(childNode.getWord());\r\n\t\t\t\tprint(childNode);\r\n\t\t\t}\r\n\t\t}\r\n\t}\r\n\t\r\n\tpublic static void main(String[] args) {\r\n\t\tTireTreeNode node = new TireTreeNode('中');\r\n\t\t\r\n\t\tTireTreeNode g = new TireTreeNode('国');\r\n\t\tg.addChild(new TireTreeNode('人'));\r\n\t\t\r\n\t\tTireTreeNode n = new TireTreeNode('男');\r\n\t\tn.addChild(new TireTreeNode('人'));\r\n\t\tg.addChild(n);\r\n\t\t\r\n\t\tnode.addChild(g);\r\n\t\tnode.addChild(new TireTreeNode('间'));\r\n\t\t\r\n\t\tnode.print(node);\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/ttmp/TtmpFilter.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter.ttmp;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.filter.AbstractFilter;\r\nimport com.cnblogs.hoojo.sensitivewords.filter.ttmp.executor.TtmpFilterExecutor;\r\n\r\n/**\r\n * ttmp 算法过滤\r\n * \r\n * @author hoojo\r\n * @createDate 2018年3月20日 下午6:06:35\r\n * @file TtmpSWFilter.java\r\n * @package com.cnblogs.hoojo.sensitivewords.filter.ttmp\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic class TtmpFilter extends AbstractFilter {\r\n\r\n\tpublic TtmpFilter() {\r\n\t\tsuper(TtmpFilterExecutor.getInstance());\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/ttmp/executor/BadWordsFilter.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter.ttmp.executor;\r\n\r\nimport java.util.HashMap;\r\nimport java.util.HashSet;\r\nimport java.util.Map;\r\n\r\n/**\r\n * 敏感词过滤\r\n * @author hoojo\r\n * @createDate 2018年9月24日 下午9:31:52\r\n * @file BadWordsFilter.java\r\n * @package com.cnblogs.hoojo.sensitivewords.filter.ttmp.executor\r\n * @project sensitive-words-filter\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic class BadWordsFilter {\r\n\r\n\t// 保存所有脏词\r\n\tprivate HashSet<String> hash = new HashSet<String>();\r\n\t\r\n\t// 是否是脏词中首字符\r\n\tprivate byte[] fastCheck = new byte[Character.MAX_VALUE];\r\n\t// 脏词首字符 长度\r\n\tprivate byte[] fastLength = new byte[Character.MAX_VALUE];\r\n\t\r\n\t// 判断是否是一个字的脏词\r\n\tprivate Map<Character, Boolean> charCheck = new HashMap<Character, Boolean>();\r\n\t// 记录所有脏字中的停止字符\r\n\tprivate Map<Character, Boolean> endCheck = new HashMap<Character, Boolean>();\r\n\r\n\t// 脏词中长度最大词的length\r\n\tprivate int maxWordLength = 0;\r\n\t// 脏词中长度最小词的length\r\n\tprivate int minWordLength = Integer.MAX_VALUE;\r\n\r\n\tpublic BadWordsFilter() {\r\n\t}\r\n\r\n\tpublic void put(String word) {\r\n\t\tmaxWordLength = Math.max(maxWordLength, word.length());\r\n\t\tminWordLength = Math.min(minWordLength, word.length());\r\n\t\t\r\n\t\tfor (int i = 0; i < 7 && i < word.length(); i++) {\r\n\t\t\tfastCheck[word.charAt(i)] |= (byte) (1 << i);\r\n\t\t}\r\n\r\n\t\tfor (int i = 7; i < word.length(); i++) {\r\n\t\t\tfastCheck[word.charAt(i)] |= 0x80;\r\n\t\t}\r\n\r\n\t\tif (word.length() == 1) {\r\n\t\t\tcharCheck.put(word.charAt(0), true);\r\n\t\t} else {\r\n\t\t\tendCheck.put(word.charAt(word.length() - 1), true);\r\n\t\t\tfastLength[word.charAt(0)] |= (byte) (1 << (Math.min(7, word.length() - 2)));\r\n\r\n\t\t\thash.add(word);\r\n\t\t}\r\n\t}\r\n\t\r\n\tpublic void init(String[] badwords) {\r\n\t\tfor (String word : badwords) {\r\n\t\t\tput(word);\r\n\t\t}\r\n\t}\r\n\r\n\tpublic boolean contains(String text) {\r\n\t\tint index = 0;\r\n\r\n\t\twhile (index < text.length()) {\r\n\t\t\t@SuppressWarnings(\"unused\")\r\n\t\t\tint count = 1;\r\n\r\n\t\t\tif (index > 0 || (fastCheck[text.charAt(index)] & 1) == 0) {\r\n\t\t\t\t// 匹配到下一个“可能是脏词”首字符的位置\r\n\t\t\t\twhile (index < text.length() - 1 && (fastCheck[text.charAt(++index)] & 1) == 0);\r\n\t\t\t}\r\n\t\t\t\r\n\t\t\t// 取得下一个脏词文本的第一个字符\r\n\t\t\tchar begin = text.charAt(index);\r\n\r\n\t\t\t// 表示是简单脏词，单个字脏词\r\n\t\t\tif (minWordLength == 1 && charCheck.containsKey(begin)) {\r\n\t\t\t\treturn true;\r\n\t\t\t}\r\n\t\t\t\r\n\t\t\t// 比对的次数是 当前文本剩余比对长度 或者 脏词的最大长度\r\n\t\t\tfor (int j = 1; j <= Math.min(maxWordLength, text.length() - index - 1); j++) {\r\n\t\t\t\tchar current = text.charAt(index + j);\r\n\r\n\t\t\t\tif ((fastCheck[current] & 1) == 0) { // 非首字符\r\n\t\t\t\t\t++count;\r\n\t\t\t\t}\r\n\r\n\t\t\t\tif ((fastCheck[current] & (1 << Math.min(j, 7))) == 0) { // 当前字符在脏词中的位置超过7位\r\n\t\t\t\t\tbreak;\r\n\t\t\t\t}\r\n\r\n\t\t\t\tif (j + 1 >= minWordLength) { // 当前比对词长度小于等于最大脏词的长度\r\n\t\t\t\t\tSystem.out.println(begin + \"####\" + (fastLength[begin] & (1 << Math.min(j - 1, 7))));\r\n\t\t\t\t\t// 判断当前字符是否是脏词最后一个字符\r\n\t\t\t\t\tif ((fastLength[begin] & (1 << Math.min(j - 1, 7))) > 0 && endCheck.containsKey(current)) {\r\n\t\t\t\t\t\tString sub = text.substring(index, index + j + 1);\r\n\r\n\t\t\t\t\t\tif (hash.contains(sub)) { // 判断是否是脏词\r\n\t\t\t\t\t\t\tSystem.out.println(sub);\r\n\t\t\t\t\t\t\t//return true;\r\n\t\t\t\t\t\t}\r\n\t\t\t\t\t}\r\n\t\t\t\t}\r\n\t\t\t}\r\n\t\t\tindex++;\r\n\t\t\t//index += count;\r\n\t\t}\r\n\t\t\r\n\t\treturn false;\r\n\t}\r\n\t\r\n\tpublic static void main(String[] args) {\r\n\t\tBadWordsFilter filter = new BadWordsFilter();\r\n\t\tfilter.init(new String[] {\"一举\", \"一举成名\", \"一举成名走四方\", \"成名\", \"走四方\"/*, \"什\"*/, \"东南西北\", \"东南西北风呼呼呼的吹\"});\r\n\t\t\r\n\t\tString content = \"一举成名走四方的是什么，东南西北风呼呼呼的吹\";\r\n\t\tSystem.out.println(\"***************************************************\");\r\n\t\tSystem.out.println(filter.contains(content));\r\n\t\tSystem.out.println(\"***************************************************\");\r\n\t\t\r\n\t\tfor (char s : content.toCharArray()) {\r\n\t\t\tSystem.out.println(\"check: \" + s + \"->\" + filter.fastCheck[s]);\r\n\t\t\tSystem.out.println(\"length: \" + s + \"->\" + filter.fastLength[s]);\r\n\t\t\tSystem.out.println((filter.fastCheck[s] & 1));\r\n\t\t}\r\n\t}\r\n}"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/ttmp/executor/TtmpCacheNode.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter.ttmp.executor;\r\n\r\nimport java.util.HashSet;\r\n\r\n/**\r\n * 数据缓存节点\r\n * @author hoojo\r\n * @createDate 2018年3月20日 下午6:14:53\r\n * @file TtmpCacheNode.java\r\n * @package com.cnblogs.hoojo.sensitivewords.filter.ttmp.executor\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic class TtmpCacheNode {\r\n\r\n\t// 保存所有脏词\r\n\tprivate HashSet<String> hash = new HashSet<String>();\r\n\r\n\t// 是否是脏词中首字符\r\n\tprivate byte[] fastCheck = new byte[Character.MAX_VALUE];\r\n\t// 脏词首字符 长度\r\n\tprivate byte[] fastLength = new byte[Character.MAX_VALUE];\r\n\r\n\t// 判断是否是一个字的脏词\r\n\tprivate boolean[] charCheck = new boolean[Character.MAX_VALUE];\r\n\t// 记录所有脏字中的停止字符\r\n\tprivate boolean[] endCheck = new boolean[Character.MAX_VALUE];\r\n\r\n\t// 脏词中长度最大词的length\r\n\tprivate int maxWordLength = 0;\r\n\t// 脏词中长度最小词的length\r\n\tprivate int minWordLength = Integer.MAX_VALUE;\r\n\r\n\tpublic HashSet<String> getHash() {\r\n\t\treturn hash;\r\n\t}\r\n\r\n\tpublic void setHash(HashSet<String> hash) {\r\n\t\tthis.hash = hash;\r\n\t}\r\n\r\n\tpublic byte[] getFastCheck() {\r\n\t\treturn fastCheck;\r\n\t}\r\n\r\n\tpublic void setFastCheck(byte[] fastCheck) {\r\n\t\tthis.fastCheck = fastCheck;\r\n\t}\r\n\r\n\tpublic byte[] getFastLength() {\r\n\t\treturn fastLength;\r\n\t}\r\n\r\n\tpublic void setFastLength(byte[] fastLength) {\r\n\t\tthis.fastLength = fastLength;\r\n\t}\r\n\r\n\tpublic int getMaxWordLength() {\r\n\t\treturn maxWordLength;\r\n\t}\r\n\r\n\tpublic void setMaxWordLength(int maxWordLength) {\r\n\t\tthis.maxWordLength = maxWordLength;\r\n\t}\r\n\r\n\tpublic int getMinWordLength() {\r\n\t\treturn minWordLength;\r\n\t}\r\n\r\n\tpublic void setMinWordLength(int minWordLength) {\r\n\t\tthis.minWordLength = minWordLength;\r\n\t}\r\n\t\r\n\tpublic boolean[] getCharCheck() {\r\n\t\treturn charCheck;\r\n\t}\r\n\r\n\tpublic void setCharCheck(boolean[] charCheck) {\r\n\t\tthis.charCheck = charCheck;\r\n\t}\r\n\r\n\tpublic boolean[] getEndCheck() {\r\n\t\treturn endCheck;\r\n\t}\r\n\r\n\tpublic void setEndCheck(boolean[] endCheck) {\r\n\t\tthis.endCheck = endCheck;\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/filter/ttmp/executor/TtmpFilterExecutor.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.filter.ttmp.executor;\r\n\r\nimport org.apache.commons.lang.StringUtils;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.filter.AbstractFilterExecutor;\r\n\r\n/**\r\n * ttmp 过滤明干成实现\r\n * \r\n * @author hoojo\r\n * @createDate 2018年3月20日 下午6:09:01\r\n * @file TtmpFilterExecutor.java\r\n * @package com.cnblogs.hoojo.sensitivewords.filter.ttmp\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic final class TtmpFilterExecutor extends AbstractFilterExecutor<TtmpCacheNode> {\r\n\r\n\tprivate TtmpFilterExecutor() {\r\n\t\tsuper(\"ttmp 算法脱敏支持类\");\r\n\t}\r\n\t\r\n\tprivate static class SingleFactory {\r\n\t\tprivate static final TtmpFilterExecutor INSTANCE = new TtmpFilterExecutor();\r\n\t}\r\n\r\n\tpublic static final TtmpFilterExecutor getInstance() {\r\n\t\treturn SingleFactory.INSTANCE;\r\n\t}\r\n\t\r\n\t@Override\r\n\tprotected TtmpCacheNode getCacheNodes() {\r\n\t\treturn new TtmpCacheNode();\r\n\t}\r\n\t\r\n\tprotected boolean processor(boolean partMatch, String content, Callback callback) {\r\n\t\tif (StringUtils.isBlank(content)) {\r\n\t\t\treturn false;\r\n\t\t}\r\n\t\tcontent = StringUtils.trim(content);\r\n\t\t\r\n\t\tint index = 0;\r\n\t\twhile (index < content.length()) {\r\n\t\t\tint count = 1;\r\n\r\n\t\t\tif (partMatch) {\r\n\t\t\t\tif (index > 0 || (cacheNodes.getFastCheck()[content.charAt(index)] & 1) == 0) {\r\n\t\t\t\t\t// 匹配到下一个“可能是脏词”首字符的位置\r\n\t\t\t\t\twhile (index < content.length() - 1 && (cacheNodes.getFastCheck()[content.charAt(++index)] & 1) == 0);\r\n\t\t\t\t}\r\n\t\t\t}\r\n\t\t\t\r\n\t\t\t// 取得下一个脏词文本的第一个字符\r\n\t\t\tchar begin = content.charAt(index);\r\n\r\n\t\t\t// 表示是简单脏词，单个字脏词\r\n\t\t\tif (cacheNodes.getMinWordLength() == 1 && cacheNodes.getCharCheck()[begin]) {\r\n\t\t\t\t\r\n\t\t\t\tif (callback.call(String.valueOf(begin))) {\r\n\t\t\t\t\treturn true;\r\n\t\t\t\t}\r\n\t\t\t}\r\n\t\t\t\r\n\t\t\t// 比对的次数是 当前文本剩余比对长度 或者 脏词的最大长度\r\n\t\t\tfor (int j = 1; j <= Math.min(cacheNodes.getMaxWordLength(), content.length() - index - 1); j++) {\r\n\t\t\t\tchar current = content.charAt(index + j);\r\n\r\n\t\t\t\tif ((cacheNodes.getFastCheck()[current] & 1) == 0) { // 非首字符\r\n\t\t\t\t\t++count;\r\n\t\t\t\t}\r\n\r\n\t\t\t\tif ((cacheNodes.getFastCheck()[current] & (1 << Math.min(j, 7))) == 0) { // 当前字符在脏词中的位置超过7位\r\n\t\t\t\t\tbreak;\r\n\t\t\t\t}\r\n\r\n\t\t\t\tif (j + 1 >= cacheNodes.getMinWordLength()) { // 当前比对词长度小于等于最大脏词的长度\r\n\t\t\t\t\t// 判断当前字符是否是脏词最后一个字符\r\n\t\t\t\t\tif ((cacheNodes.getFastLength()[begin] & (1 << Math.min(j - 1, 7))) > 0 && cacheNodes.getEndCheck()[current]) {\r\n\t\t\t\t\t\tString sub = content.substring(index, index + j + 1);\r\n\t\t\t\t\t\t\r\n\t\t\t\t\t\tif (cacheNodes.getHash().contains(sub)) { // 判断是否是脏词\r\n\t\t\t\t\t\t\tif (callback.call(String.valueOf(sub))) {\r\n\t\t\t\t\t\t\t\treturn true;\r\n\t\t\t\t\t\t\t}\r\n\t\t\t\t\t\t}\r\n\t\t\t\t\t}\r\n\t\t\t\t}\r\n\t\t\t}\r\n\t\t\t\r\n\t\t\tif (partMatch) {\r\n\t\t\t\tindex++;\r\n\t\t\t} else {\r\n\t\t\t\tindex += count;\r\n\t\t\t}\r\n\t\t}\r\n\t\t\r\n\t\treturn false;\r\n\t}\r\n\r\n\t@Override\r\n\tprotected boolean put(String word) throws RuntimeException {\r\n\t\tthis.cacheNodes.setMaxWordLength(Math.max(this.cacheNodes.getMaxWordLength(), word.length()));\r\n\t\tthis.cacheNodes.setMinWordLength(Math.min(this.cacheNodes.getMinWordLength(), word.length()));\r\n\t\t\r\n\t\tfor (int i = 0; i < 7 && i < word.length(); i++) {\r\n\t\t\tbyte[] fastCheck = this.cacheNodes.getFastCheck();\r\n\t\t\tfastCheck[word.charAt(i)] |= (byte) (1 << i);\r\n\t\t\t\r\n\t\t\tthis.cacheNodes.setFastCheck(fastCheck);\r\n\t\t}\r\n\r\n\t\tfor (int i = 7; i < word.length(); i++) {\r\n\t\t\tbyte[] fastCheck = this.cacheNodes.getFastCheck();\r\n\t\t\tfastCheck[word.charAt(i)] |= 0x80;\r\n\t\t\t\r\n\t\t\tthis.cacheNodes.setFastCheck(fastCheck);\r\n\t\t}\r\n\r\n\t\tif (word.length() == 1) {\r\n\t\t\tcacheNodes.getCharCheck()[word.charAt(0)] = true;\r\n\t\t} else {\r\n\t\t\tcacheNodes.getEndCheck()[word.charAt(word.length() - 1)] = true;\r\n\t\t\t\r\n\t\t\tbyte[] fastLength = cacheNodes.getFastLength();\r\n\t\t\tfastLength[word.charAt(0)] |= (byte) (1 << (Math.min(7, word.length() - 2)));\r\n\t\t\t\r\n\t\t\tcacheNodes.setFastLength(fastLength);\r\n\r\n\t\t\tcacheNodes.getHash().add(word);\r\n\t\t}\r\n\t\t\r\n\t\treturn false;\r\n\t}\r\n\t\r\n\tpublic static void main(String[] args) {\r\n\t\tTtmpFilterExecutor.getInstance().init();\r\n\t\t\r\n\t\tTtmpFilterExecutor.getInstance().put(\"中国人\");\r\n\t\tTtmpFilterExecutor.getInstance().put(\"中国男人\");\r\n\t\tTtmpFilterExecutor.getInstance().put(\"中国人民\");\r\n\t\tTtmpFilterExecutor.getInstance().put(\"人民\");\r\n\t\tTtmpFilterExecutor.getInstance().put(\"中间\");\r\n\t\tTtmpFilterExecutor.getInstance().put(\"女人\");\r\n\t\t\r\n\t\tTtmpFilterExecutor.getInstance().put(\"一\");\r\n\t\tTtmpFilterExecutor.getInstance().put(\"一举成名\");\r\n\t\tTtmpFilterExecutor.getInstance().put(\"一举成名走四方\");\r\n\t\tTtmpFilterExecutor.getInstance().put(\"成名\");\r\n\t\tTtmpFilterExecutor.getInstance().put(\"走四方\");\r\n\t\tTtmpFilterExecutor.getInstance().put(\"是\");\r\n\t\t\r\n\t\tString content = \"我们中国人都是好人，在他们中间有男人和女人。中国男人很惨，中国人民长期被压迫。\";\r\n\t\tSystem.out.println(TtmpFilterExecutor.getInstance().getWords(true, content));\r\n\t\tSystem.out.println(TtmpFilterExecutor.getInstance().getWords(false, content));\r\n\t\tSystem.out.println(TtmpFilterExecutor.getInstance().filter(false, content, '*'));\r\n\t\tSystem.out.println(TtmpFilterExecutor.getInstance().highlight(false, content));\r\n\t\t\r\n\t\tcontent = \"一举成名走四方的是什么\";\r\n\t\tSystem.out.println(TtmpFilterExecutor.getInstance().getWords(true, content));\r\n\t\tSystem.out.println(TtmpFilterExecutor.getInstance().getWords(false, content));\r\n\t\tSystem.out.println(TtmpFilterExecutor.getInstance().filter(true, content, '*'));\r\n\t\tSystem.out.println(TtmpFilterExecutor.getInstance().highlight(false, content));\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/main/java/com/cnblogs/hoojo/sensitivewords/log/ApplicationLogging.java",
    "content": "package com.cnblogs.hoojo.sensitivewords.log;\r\n\r\nimport org.apache.commons.lang.exception.ExceptionUtils;\r\nimport org.slf4j.Logger;\r\nimport org.slf4j.LoggerFactory;\r\n\r\n/**\r\n * 系统调试、操作、错误日志基类\r\n * \r\n * @author hoojo\r\n * @createDate 2012-2-9 上午11:46:30\r\n * @file ApplicationLogging.java\r\n * @version 1.0\r\n */\r\npublic abstract class ApplicationLogging {\r\n\r\n    protected final Logger logger;\r\n\r\n    public ApplicationLogging() {\r\n        super();\r\n        logger = LoggerFactory.getLogger(this.getClass());\r\n    }\r\n\r\n    protected final void debug(Object o) {\r\n\r\n        logger.debug(String.valueOf(o));\r\n    }\r\n\r\n    protected final void debug(String msg) {\r\n\r\n        logger.debug(msg);\r\n    }\r\n\r\n    protected final void debug(String msg, Object... objects) {\r\n\r\n        logger.debug(msg, objects);\r\n    }\r\n\r\n    protected final void debug(Throwable ex) {\r\n\r\n        logger.debug(ex.getMessage(), ex);\r\n        Throwable re = ExceptionUtils.getRootCause(ex);\r\n        if (re != null && ex != re) {\r\n            logger.debug(\"root cause\", re);\r\n        }\r\n\r\n    }\r\n\r\n    protected final void error(String msg) {\r\n\r\n        logger.error(msg);\r\n    }\r\n\r\n    protected final void error(String msg, Object... objects) {\r\n\r\n        logger.error(msg, objects);\r\n    }\r\n\r\n    protected final void error(Throwable ex) {\r\n\r\n        logger.error(ex.getMessage(), ex);\r\n        Throwable re = ExceptionUtils.getRootCause(ex);\r\n        if (re != null && ex != re) {\r\n            logger.error(\"root cause\", re);\r\n        }\r\n    }\r\n\r\n    protected final void info(String msg) {\r\n\r\n        logger.info(msg);\r\n    }\r\n\r\n    protected final void info(String msg, Object... objects) {\r\n\r\n        logger.info(msg, objects);\r\n    }\r\n\r\n    protected final void info(Throwable ex) {\r\n\r\n        logger.info(ex.getMessage(), ex);\r\n        Throwable re = ExceptionUtils.getRootCause(ex);\r\n        if (re != null && ex != re) {\r\n            logger.info(\"root cause\", re);\r\n        }\r\n    }\r\n\r\n    protected final void trace(Object o) {\r\n\r\n        logger.trace(String.valueOf(o));\r\n    }\r\n\r\n    protected final void trace(String msg) {\r\n\r\n        logger.trace(msg);\r\n    }\r\n\r\n    protected final void trace(String msg, Object... objects) {\r\n\r\n        logger.trace(msg, objects);\r\n    }\r\n\r\n    protected final void trace(Throwable ex) {\r\n\r\n        logger.trace(ex.getMessage(), ex);\r\n        Throwable re = ExceptionUtils.getRootCause(ex);\r\n        if (re != null && ex != re) {\r\n            logger.trace(\"root cause\", re);\r\n        }\r\n    }\r\n\r\n    protected final void warn(String msg) {\r\n\r\n        logger.warn(msg);\r\n    }\r\n\r\n    protected final void warn(String msg, Object... objects) {\r\n\r\n        logger.warn(msg, objects);\r\n    }\r\n\r\n    protected final void warn(Throwable ex) {\r\n\r\n        logger.warn(ex.getMessage(), ex);\r\n        Throwable re = ExceptionUtils.getRootCause(ex);\r\n        if (re != null && ex != re) {\r\n            logger.warn(\"root cause\", re);\r\n        }\r\n    }\r\n}"
  },
  {
    "path": "src/main/resources/log4j.properties",
    "content": "#Global configuration\nlog4j.rootLogger = DEBUG, stdout,logfile\n\n#output in console\nlog4j.appender.stdout = org.apache.log4j.ConsoleAppender\nlog4j.appender.stdout.layout = org.apache.log4j.PatternLayout\nlog4j.appender.stdout.layout.ConversionPattern =%d{yyyy/MM/dd HH:mm:ss,SSS} %5p [%t] - %m%n\n\n#output in file\n#log4j.appender.logfile=org.apache.log4j.RollingFileAppender  \n#log4j.appender.logfile.File= logs/logfile.log  \n#log4j.appender.logfile.MaxFileSize=512KB  \n#log4j.appender.logfile.MaxBackupIndex=1000000 \n#log4j.appender.logfile.layout=org.apache.log4j.PatternLayout  \n#log4j.appender.logfile.layout.ConversionPattern=%5p [%t] - %m%n\n\nlog4j.appender.logfile=org.apache.log4j.DailyRollingFileAppender\nlog4j.appender.logfile.File=../logs/fengkongservice.log\nlog4j.appender.logfile.DatePattern='.'yyyy-MM-dd\nlog4j.appender.logfile.layout=org.apache.log4j.PatternLayout  \nlog4j.appender.logfile.layout.ConversionPattern=%d{yyyy/MM/dd HH:mm:ss,SSS} %5p [%t] - %m%n\n\n#Spring config\nlog4j.logger.org.springframewaork = DEBUG\n\n\n#Mybatis config\nlog4j.logger.com.ibatis=DEBUG  \nlog4j.logger.com.ibatis.common.jdbc.SimpleDataSource=DEBUG  \nlog4j.logger.com.ibatis.common.jdbc.ScriptRunner=DEBUG  \nlog4j.logger.com.ibatis.sqlmap.engine.impl.SqlMapClientDelegate=DEBUG  \nlog4j.logger.org.mybatis=DEBUG  \nlog4j.logger.org.apache.ibatis = DEBUG\nlog4j.logger.net.openwares.test.mapper = TRACE\n\n#JDBC config\nlog4j.logger.java.sql.Connection = DEBUG  \nlog4j.logger.java.sql.Statement = DEBUG  \nlog4j.logger.java.sql.PreparedStatement = DEBUG  \nlog4j.logger.java.sql.ResultSet = DEBUG\n\n"
  },
  {
    "path": "src/test/java/com/cnblogs/hoojo/sensitivewords/SensitiveWordsServiceDBTest.java",
    "content": "package com.cnblogs.hoojo.sensitivewords;\r\n\r\nimport java.io.BufferedReader;\r\nimport java.io.InputStream;\r\nimport java.io.InputStreamReader;\r\nimport java.nio.charset.StandardCharsets;\r\nimport java.util.Set;\r\n\r\nimport org.apache.commons.lang.StringUtils;\r\n\r\nimport com.cnblogs.hoojo.sensitivewords.business.enums.SensitiveWordsType;\r\nimport com.cnblogs.hoojo.sensitivewords.business.model.SensitiveWords;\r\nimport com.cnblogs.hoojo.sensitivewords.cache.RedisWordsCache;\r\nimport com.cnblogs.hoojo.sensitivewords.factory.FilterType;\r\nimport com.cnblogs.hoojo.sensitivewords.filter.AbstractFilterExecutor;\r\nimport com.cnblogs.hoojo.sensitivewords.filter.bucket.executor.HashBucketFilterExecutor;\r\nimport com.cnblogs.hoojo.sensitivewords.filter.dat.exectuor.DatFilterExecutor;\r\nimport com.cnblogs.hoojo.sensitivewords.filter.dfa.executor.DfaFilterExecutor;\r\nimport com.cnblogs.hoojo.sensitivewords.filter.tire.executor.TireTreeFilterExecutor;\r\nimport com.cnblogs.hoojo.sensitivewords.filter.ttmp.executor.TtmpFilterExecutor;\r\nimport com.google.common.collect.Lists;\r\nimport com.google.common.collect.Sets;\r\n\r\nimport junit.framework.TestCase;\r\n\r\n/**\r\n * 敏感词库测试\r\n * @author hoojo\r\n * @createDate 2018年2月2日 下午5:40:21\r\n * @file SensitiveWordsTest.java\r\n * @package com.hoojo.business.service.mgbase.sensitivewords\r\n * @project fengkong-service-provider\r\n * @blog http://hoojo.cnblogs.com\r\n * @email hoojo_@126.com\r\n * @version 1.0\r\n */\r\npublic class SensitiveWordsServiceDBTest extends TestCase {\r\n\r\n\tprivate SensitiveWordsService<SensitiveWords> service;\r\n\t\r\n\t@SuppressWarnings(\"unchecked\")\r\n\tpublic void setUp() {\r\n\t}\r\n\t\r\n\tpublic void testAdd() {\r\n\t\tSensitiveWords word = new SensitiveWords();\r\n\t\tword.setType(SensitiveWordsType.OTHERS);\r\n\t\tword.setWord(\"拿回扣\");\r\n\t\t\r\n\t\tword.setCreator(\"1\");\r\n\t\tword.setUpdater(\"2\");\r\n\t\t\r\n\t\ttry {\r\n\t\t\tservice.add(word);\r\n\t\t} catch (Exception e) {\r\n\t\t\te.printStackTrace();\r\n\t\t}\r\n\t}\r\n\t\r\n\tpublic void testBatchAdd() {\r\n\t\t\r\n\t\ttry {\r\n\t\t\tfor (SensitiveWordsType type : SensitiveWordsType.values()) {\r\n\t\t\t\t\r\n\t\t\t\tint i;\r\n\t\t\t\tSet<SensitiveWords> words;\r\n\t\t\t\ttry {\r\n\t\t\t\t\tInputStream stream = SensitiveWordsServiceDBTest.class.getResourceAsStream(type.getName() + \"词库.txt\");\r\n\t\t\t\t\tBufferedReader reader = new BufferedReader(new InputStreamReader(stream, \"gbk\"));\r\n\t\t\t\t\t\r\n\t\t\t\t\ti = 0;\r\n\t\t\t\t\twords = Sets.newHashSet();\r\n\t\t\t\t\twhile (true) {\r\n\t\t\t\t\t\tString line = reader.readLine();\r\n\t\t\t\t\t\tif (line == null) {\r\n\t\t\t\t\t\t\tbreak;\r\n\t\t\t\t\t\t}\r\n\t\t\t\t\t\t\r\n\t\t\t\t\t\ti++;\r\n\t\t\t\t\t\t\r\n\t\t\t\t\t\tif (StringUtils.isNotBlank(line)) {\r\n\t\t\t\t\t\t\tSensitiveWords entity = new SensitiveWords(line, \"admin\", \"admin\");\r\n\t\t\t\t\t\t\tentity.setType(type);\r\n\t\t\t\t\t\t\twords.add(entity);\r\n\t\t\t\t\t\t\t\r\n\t\t\t\t\t\t\ttry {\r\n\t\t\t\t\t\t\t\tservice.add(entity);\r\n\t\t\t\t\t\t\t} catch (Exception e) {\r\n\t\t\t\t\t\t\t\tSystem.err.println(e.getMessage());\r\n\t\t\t\t\t\t\t}\r\n\t\t\t\t\t\t}\r\n\t\t\t\t\t}\r\n\t\t\t\t\t\r\n\t\t\t\t\tSystem.out.println(\"循环单词: \" + i + \", 插入数据：\" + words.size());\r\n\t\t\t\t\tThread.sleep(1000 * 3);\r\n\t\t\t\t} catch (Exception e) {\r\n\t\t\t\t\te.printStackTrace();\r\n\t\t\t\t}\r\n\t\t\t\t\r\n\t\t\t\t\r\n\t\t\t}\r\n\t\t\tThread.sleep(1000 * 3);\r\n\t\t\t\r\n\t\t\t//service.batch(new ArrayList<>(words), SensitiveWordsType.REACTION);\r\n\t\t\t//System.out.println(\"循环单词: \" + i + \", 插入数据：\" + words.size());\r\n\t\t} catch (Exception e) {\r\n\t\t\te.printStackTrace();\r\n\t\t}\r\n\t}\r\n\t\r\n\tpublic void testBatchAdd2() {\r\n\t\t\r\n\t\ttry {\r\n\t\t\tint i;\r\n\t\t\tSet<SensitiveWords> words;\r\n\t\t\ttry {\r\n\t\t\t\tInputStream stream = SensitiveWordsServiceDBTest.class.getResourceAsStream(\"敏感词库大全.txt\");\r\n\t\t\t\tBufferedReader reader = new BufferedReader(new InputStreamReader(stream, \"gbk\"));\r\n\t\t\t\t\r\n\t\t\t\ti = 0;\r\n\t\t\t\twords = Sets.newHashSet();\r\n\t\t\t\twhile (true) {\r\n\t\t\t\t\tString line = reader.readLine();\r\n\t\t\t\t\tif (line == null) {\r\n\t\t\t\t\t\tbreak;\r\n\t\t\t\t\t}\r\n\t\t\t\t\t\r\n\t\t\t\t\tif (StringUtils.isNotBlank(line)) {\r\n\t\t\t\t\t\tSensitiveWords entity = new SensitiveWords(line, \"admin\", \"admin\");\r\n\t\t\t\t\t\tentity.setType(SensitiveWordsType.OTHERS);\r\n\t\t\t\t\t\twords.add(entity);\r\n\t\t\t\t\t\t\r\n\t\t\t\t\t\ttry {\r\n\t\t\t\t\t\t\tservice.add(entity);\r\n\t\t\t\t\t\t\ti++;\r\n\t\t\t\t\t\t} catch (Exception e) {\r\n\t\t\t\t\t\t\tSystem.err.println(e.getMessage());\r\n\t\t\t\t\t\t}\r\n\t\t\t\t\t}\r\n\t\t\t\t}\r\n\t\t\t\t\r\n\t\t\t\tSystem.out.println(\"循环单词: \" + i + \", 插入数据：\" + words.size());\r\n\t\t\t\tThread.sleep(1000 * 3);\r\n\t\t\t} catch (Exception e) {\r\n\t\t\t\te.printStackTrace();\r\n\t\t\t}\t\r\n\t\t} catch (Exception e) {\r\n\t\t\te.printStackTrace();\r\n\t\t}\r\n\t}\r\n\t\r\n\tpublic void testFilter() throws Exception {\r\n\t\tSystem.out.println(SensitiveWordsFilterUtils.getInstance().filter(FilterType.HASH_BUCKET, false, \"尼玛啊,然后，市长仓井空在婚礼上唱春天在哪里。\", '*'));\r\n\t}\r\n\t\r\n\tpublic void testPut() throws Exception {\r\n\t\tRedisWordsCache.getInstance().put(new SensitiveWords(\"ss\", \"11\", \"22\"));\r\n\t\tSystem.out.println(RedisWordsCache.getInstance().get().size());\r\n\t}\r\n\t\r\n\tpublic void testPutList() throws Exception {\r\n\t\t\r\n\t\tRedisWordsCache.getInstance().put(Lists.newArrayList(new SensitiveWords(\"ss3\", \"11\", \"22\"), new SensitiveWords(\"ss4\", \"11\", \"22\")));\r\n\t\tSystem.out.println(RedisWordsCache.getInstance().get().size());\r\n\t}\r\n\t\r\n\tpublic void testGetCache() {\r\n\t\ttry {\r\n\t\t\tSystem.out.println(RedisWordsCache.getInstance().get());\r\n\t\t} catch (Exception e) {\r\n\t\t\te.printStackTrace();\r\n\t\t}\r\n\t}\r\n\t\r\n\tpublic void testRemove() {\r\n\t\ttry {\r\n\t\t\tSystem.out.println(RedisWordsCache.getInstance().get());\r\n\t\t\tRedisWordsCache.getInstance().remove(new SensitiveWords(\"ss\", \"11\", \"22\"));\r\n\t\t\tSystem.out.println(RedisWordsCache.getInstance().get());\r\n\t\t} catch (Exception e) {\r\n\t\t\te.printStackTrace();\r\n\t\t}\r\n\t}\r\n\t\r\n\tpublic void testInit() {\r\n\t\t\r\n\t\tRedisWordsCache.getInstance().setDataSource(service);\r\n\t\ttry {\r\n\t\t\tRedisWordsCache.getInstance().init();\r\n\t\t} catch (Exception e) {\r\n\t\t\te.printStackTrace();\r\n\t\t}\r\n\t}\r\n\t\r\n\tpublic void testSensitiveWordsFilterUtils() throws Exception {\r\n\t\tSystem.out.println(SensitiveWordsFilterUtils.getInstance().getWords(false, \"我们中国人都是好人，在他们中间有男人和女人。中国男人很惨，中国人民长期被压迫。\"));\r\n\t}\r\n\t\r\n\t@SuppressWarnings(\"rawtypes\")\r\n\tpublic void testPerformance() {\r\n\t\t\r\n\t\tAbstractFilterExecutor[] executors = { \r\n\t\t\t\tDfaFilterExecutor.getInstance(), \r\n\t\t\t\tTireTreeFilterExecutor.getInstance(), HashBucketFilterExecutor.getInstance(),\r\n\t\t\t\tTtmpFilterExecutor.getInstance(), DatFilterExecutor.getInstance() \r\n\t\t\t};\r\n\t\t\r\n\t\tString content = \"\";\r\n\t\ttry {\r\n\t\t\tInputStreamReader reader = new InputStreamReader(ClassLoader.getSystemResourceAsStream(\"Talk.txt\"), StandardCharsets.UTF_8);\r\n\t\t\tBufferedReader bufferedReader = new BufferedReader(reader);\r\n\t\t\t\r\n\t\t\twhile (true) {\r\n\t\t\t\tString line = bufferedReader.readLine();\r\n\t\t\t\tif (line == null) {\r\n\t\t\t\t\tbreak;\r\n\t\t\t\t}\r\n\t\t\t\t\r\n\t\t\t\tcontent += line;\r\n\t\t\t}\r\n\t\t} catch (Exception e) {\r\n\t\t\te.printStackTrace();\r\n\t\t} \r\n\t\t\r\n\t\tfor (int i = 0; i < 15; i++) {\r\n\t\t\tcontent = content + \"，\" + content;\r\n\t\t}\r\n\r\n\t\tSystem.out.println(\"过滤字符数：\" + content.length());\r\n\t\tfor (AbstractFilterExecutor exec : executors) {\r\n\t\t\t\r\n\t\t\tlong m = Runtime.getRuntime().freeMemory();\r\n\t\t\tlong start = System.currentTimeMillis();\r\n\t\t\texec.init(\"BadWord.txt\");\r\n\t\t\tSystem.out.println(\"初始化填词耗时：\" + (System.currentTimeMillis() - start));\r\n\t\t\t\r\n\t\t\tstart = System.currentTimeMillis();\r\n\t\t\tSet<String> words = exec.getWords(false, content);\r\n\t\t\tSystem.out.println(exec.getListenerName() + \": \" + words + \", 数量：\" + words.size());\r\n\t\t\tSystem.out.println(\"查找耗时：\" + (System.currentTimeMillis() - start));\r\n\t\t\t//exec.filter(false, content, '*');\r\n\t\t\t//System.out.println(\"查找耗时：\" + (System.currentTimeMillis() - start));\r\n\t\t\tSystem.out.println(\"内存消耗：\" + ((m - Runtime.getRuntime().freeMemory()) / 1024));\r\n\t\t}\r\n\t}\r\n}\r\n"
  },
  {
    "path": "src/test/java/com/cnblogs/hoojo/sensitivewords/其他词库.txt",
    "content": "Ѩ\nЭ\nױ\nרҵ\nл\njq\n·\nһ\n\nƾ\n\nԪԳʴ\nϯ\n\n\nҪĬ\nڼ\nʳǼ\nѲ\n\n\nС\nըѧУ\nӹ\nоŮ\n\nڰ\n֮ĸ\nĸ\nķ\n΢\nȫ\nnowto\nchengdu\n\n·ѩ\nŰ\nϮ\n츮㳡\n\nϴ԰\n޴\nй\n̨\nȫ\n\n\n10010\n10086\n10159\n13423205670\n13725516608\n13875448369\n15112886328\n189\n6-4tianwang\n64\n68170802\n6a6.net\n7.31\n7.310\n89-64cdjp\n8945212\n23\n259o\n381929279\n3P\n4-Jun\nAV\nBJ\nCBD\nCCTV\nCDMA\nDICK\nDick\nFLG\nFOCUSC\nFUCK\nFuck\nGAMEMASTER\nGCD\nGameMaster\nIP17908\nKEY_TEXT\nNMD\nQQb\nSM\nSoccer01.com\nTMD\nUltraSurf\nbignews\nBitch\nboxun\nChinaliberal\nchinamz\nChinesenewsnet\ncnd\nCreaders\ndafa\nDajiyuan\ndf'd\nDfdz\ndie\ndpp\nFreechina\nfreedom\nFreenet\nfuck\nfuck your mother\nhongzhi\nhrichina\nhuanet\nhypermart.net\nincest\njiangdongriji\nl2590803027\nlihongzhi\nma\nmaking\nminghui\nminghuinews\nnacb\nna?ve\nnmis\npaper64\npeacehall\npiao\nplayboy\nrenminbao\nrenmingbao\nrfa\nsafeweb\nsex\nshit\nsimple\nsvdc\ntaip\ntibetalk\ntriangle\ntriangleboy\ntxwq.net\nunixbox\nustibet\nvoa\nvoachinese\nwangce\nwstaiji\nwww\nxinsheng\nyuming\nyyͨѶ¼\nzhengjian\nzhengjianwang\nzhenshanren\nzhuanfalu\nzhuanfalun\n˾\n\n"
  },
  {
    "path": "src/test/java/com/cnblogs/hoojo/sensitivewords/反动词库.txt",
    "content": "й\n\nҷ\n\nй\n򵹹\n򵹹\n򵹺\n򵹽\n򵹽ϯ\n\n޸\n¼ұ\nй\nF\nƹ\nƹ\nƺ\nƽ\nƽϯ\n\n޸\n¼ұ\nй\nF\nй\n\n˰\nй\nع\nع\nع\nzf¥\n\n̰۸\nǿƲ\nʽ\nη粨\n̫ӵ\nϺ\n\n廪\nɫ\nȨ\nз\nѪ\nŷ\n9\nʮߴ\nʮ7\n17da\nѧ\n9ѧ\nķ\n4\n˫\nϽִ\nԱ\n\nٷ\n\n̹\nǹִܱ\nǿƾ\n\nһִ\nһר\nһר\nרȨ\nܷԺ\nƽ\n\n\n̷\n\n\n־\n\n\n\n\n˾\n\n\nκ\nѰѵ\nγɻ\n˭й\nַ\nʿ\nʿ\n\nѡϯ\nһ\nmin\n\n\n\nchinesedemocracy\n\n\ndaѡ\nͶ\nͷ\n\nƽ\n\nάȨ\nð\n\n08\n08xz\n\n\nȭ\nľ\nquan\nrenquan\nйȨ\nй\nȺ¼\nȺ¼\n\nȥ\n˵\nԸ\n\n\n\n˴ǩ\nɧ\nط\nϷ\nshangfang\nŷ\n\n\n\n֯\n\nzuo\njing\nʾ\nʾwei\n\nyou\n\nxing\nyouxing\nٱ\nparty\n\n\n\n\n\n\ndi\nzhi\ndizhi\nboycott\nѪ\nй\nbaoluan\nѪͻ\nֱ\n\n𱩶\nbaodong\n\nɱ\nչ\nԹ\nտ\nղ\nԲ\nղ\nշ\nճ\nʳ\nտ\nke\nԿ\nba\nս\nѧ\n\n\nԱ\nԱ\në\në\n5ë\n\njieyan\njie\nyan\n8ƽ¼\n֪64\n˾\nʰ\n2o\n20г\nʰ\n\nз\nٶ\nг\n½\n½\n198964\n535\n89괺֮\n64Ұ\n64ʱ\n64˶\n4¼\n¼\n粨\nѧ\nѧchao\nxuechao\nѧٶȳ\nŰ\n찴\n̹ѹѧ\nŮ\nʷ˿\n\n\nѪϴ\nĶ\n\n\nͮ\nӵ\n\nά\nϣ\n\n½\n\n֮\n\n\n\n\nϼ\nһ\nл\nĺ\n\nܷ\nŷ\nϢ\nͬ\nر̳\n\nܹ\ngfw\n޽\n޽\n\n\nй\n׾\n޽\ný\nάٿ\nŦԼʱ\nbbc\nʢʱ\nձ\nɭ\nɭ\nǵձ\nwikipedia\nyoutube\ngoogleblogger\n㲥˾\nӢʱ\n\nʱ\nйʱ\n\nв\n\n㵺\n֯\nȨ\n\nէ\nľ\nʯФ\n˵\nqiang\nbaoը\nzha\nbaozha\nzhaҩ\nzha\nըdan\nըyao\nzhadan\nzhayao\nhmtd\nױ\n\nըҩ䷽\nҩ䷽\nը䷽\nƤը\nҩ䷽\nը\nը\nž\n\nת\n\n8341\n21ž\nߴ\n7\n\n\nϾ\nɶ\nݾ\nϾ\nݾ\nɫ\nģͻ\n\n֯\nֲ\nֲ\n\nӡ\nӡ¼\n\nӢ\nmayingjiu\n\n\n\nˮ\ns\n\n\na\nȫͬ\n̨ٶ\n̨\n̨wan\ntaiwan\n̨\n̨\n̨\n̨干͹\n̨\n̨\n̨\n̨du\ntaidu\ntwdl\nһһ̨\n̨\nս\nռ̨\n̨֧\n̨\nռ̨\nͳһ̨\nո̨\n½̨\n̨\ntw\n̨\n⸴\n̨\n̨\n̨\n̨Σ\n̨ͳһ\n̨ս\n̨ս\n̨\n\n\nл\n\nx\n\n\nֶ\nduli\nfenlie\nձ\nСһ\nӵ\n\nά\nά\nά\n\nȱ\nľ\n\nͻ˹̹֯\nͻ֯\nɹŷѷ\nȷ\n\n\n\nzang\n\nm\n\n\ndalai\n\ndl\nδ\n\n\nض\n\n갶\nض\ndu\nzangdu\n֧zd\nر\n\nѩɽʨ\n\n\nɳ\n\nsa\nlasa\nla\n\n\nش\nت\nض\nض\nظ\n\nʯ\nxizang\nxi\nx\nz\ntibet\nϣ\nϣ\n\nϡ\n\n\n\n\nԮ\nbjork\nǧԴ\n\nؽ\n\nػ\n\n˹\nºµ\nºĬ\nĬĬ\n˹\nʥս֯\n\nzhen\nqingzhen\n\n\n\n\n޵۹\n\n\n\nϵ\n\n\n\n\n\nڶ\n\nҰ\n\nɺͺ\n\nȪ\n\n\nϼ\nϼ\nξ\nίԱ\nͷ\n̫\nϷ\nϵ\nϵ\n\n\n\nϯ\n\n齫͸\n\n\nǹ\n혌\nҩ\nˎ\n̨\n̨ٱ\n̨\n̨\n̨Ӧö\n̨Ȩ\nй\nй\nй\nй\nйĻ\nй\nйѪ\nй\nй۹\nй\nй\nй\nй\nй\nйڰ\nй\nйȨ\nй־\nйȨ\nй\nй˵\nйϴ\nйа\nйа\nйϷ"
  },
  {
    "path": "src/test/java/com/cnblogs/hoojo/sensitivewords/敏感词库大全.txt",
    "content": "Ʒ\n\ne\nһ\nҺ\n\nְ칫¥\nֺ\n\nˎ\n׼ȷ\n˾\n˾ѧ\n˾\nѲ\nѵСƽ\nѧ\nչ\n׻ǩ\nѵ\n챾\n\n\nƱ\nƾ\nʵ\n֤\nʸ\nƾ\n\n֤\n̵\nkҵ\nkC\n\n\nң\nڸ\n\nִ\nɧ\nʡί\n\nָϮ\nй\n˾\n\nҵC\nƾ\n\n\n\n\n\n\n\nڻ\n\n\nͣ\n԰α\n鶼\nȫ\n˼Ļ\nŮ\næ\n\nֻ֪\nڿƼ\nɻ\n̤\nɽ\nӬˮ\nش\nت\nɩ\nɩ\nûв\nƨƨ\n\nǨ\n\n˵\n˿ͨ\n\nƬ\n\nͼ\n\nС\nǹ\n͹\n̰\n\n\nŴ\nܽ\nɼ\n۷Ʊ\n۾\n͸\nˮ\nȰ\nȻ\nͨ\nˮ\n\nҩ\nˎ\n\nҵ֤\n𰸰\nṩ\nɻר\n\n\nҰ칫\n󼦰\nu\nԪ\n¶\n\n̰\n\n\n췢Ʊ\n\n\nѧ\n\nk\n\nƱ\nƱ\n\n\nд\nд\n\n\n\n\n\n\nҪ\n\nĹ\nή\nǰɾ\nܱ\n\nʧ\n˵\n\nС\n\nòƼ\nͬ\nƲݺ\nȼC\nƨ\nϰ\n\n\nүүת\n\nز֮\n\n\n۹֮\nֽ\nŻ\n繷\n绰\n缦\n\n谴\n\n\n\n\n\n\n|\nСڽ\n\nС\n\n\n̨\n\nŽ\nǿӲ\n\nèè\n_˹\n\n\n\n԰\n԰\n԰ɱ\n԰\n̴\nƾ\nƱ\nƱ\nƱ\nlƱ\n\n׹\n\nַ\nάȨ\nһ\nԺ\nǬ\n\n״\n\n\n\n\nҩˮ\n\nǹ\n֤\n̰\n˽\n\nͬ\n޽\n\n޵A\n^\n\n\n\n\nǹ\nӡ\nӡ\n\nŸ\nĺ\n˿\nԱ\nؽ\n\n\n빷\nǹ\n۰Ĳ\nR\nA\n߾\n߿\nݺݺ\n½\n泤\n״\n֤\n࿼\nƾ\n\n̵\n\n\n\nС\nС\n\n\n\nƨר\nĶһЩ\nԹԷ\n̹\nҲ\n\nѧ\n\nԲ\nͶע\nҼ\n\n̵\n\nһ\n\nҩֱ\n\nȦǮ\n\n͹\n͹\n͹\nڻҩ\nɫֲ\n͸\ntɫ\nڶ\n\n\\\n\nҫ\n\nͷ\n\nſ\nѧɨä\nϹ\nᴵ\n\n֤\n\nʹͶע\nƱ\nԲ\n\nҲ\nλ\nŶ\n\n\n\n\n\n\n\n\n\n\n\n\n\n帯\n֭\nD֭\nѾ\nһ\n\n׳\n\n\nְ\n\n\nը\nڶ\n̫\nϵ\n\n\n\nο\n̰\nҹ\n\nְ\n\n\n\n\n\n\n\nҪ\n\n\n챻\nĻ\nŹ\n˵\n״\n\nƷ\nǹ\n\n\n̳\n\n\n\nͰ\nҪɫ\nٹ\n\n˵ȫ\nʳ\n\n\nƷ\n\nѡ\n\n_a\n_Ʊ\nɱ\n˶\nûв\n¥\n\n󸶿\n\n\n\nǰ\nǰ\nǰ\n豸\n԰\nԱ\nԴ\nԻ\n\nǹ\nп\nд\n\n˷\nǧ\n͸\nպŵ\n\n\ný\nǹ\n\nٰ\nѲ\n˵\nˮ\n\n\nȫ\nѨ\nϻ\nŮ\n׼ȷ\nƽ\n־\nӽԻ\n֤\nӰ\n֤\nʱ\nƭ\n\n\n\n\nM\n\n\n\n\nƵ\nի\n˼\nԺð\nǹ\nԘ\nC\n\nѪ\nϲ\n\n\n\n¼\n¡ָ\n½\n½ͬ\nȰͪ\nҼ\n\nС\ny\n\nӰ\në\nƬ\nֹ\nǹ\nĴ\n˹С\n\n\nϼ׺\n\n\n\n齫͸\n\nǹ\n혌\nˎ\nר\nز\nƱ\nп\nԿ\n˿\næ\nè۹\nëһ\ný\nÿһ\nٸ\nðĦ\n\nŰĦ\nű\nT\nåѵ\nɺҩ\nԻ\nԻҩ\nԻˎ\nԻ\nԻҩ\nԻˎ\nԻ\nԻҩ\nԻˎ\nԼҩ\nˮ\nҩ\nˎ\nռҩ\nѨ\n\n񴢺\n\n\n\nӡ\nĦС\nĸ\nľ\nĻûв\nĻǰ\n\nϳ\nѨ\n\n֮\n\nθ\n֮\n\nŨ\nŭ־Ը\nŮ˼Ҹ\nŮ\nŮʦ\nŮ˺͹\nŮְ\nŮ\nŮ\nŸ֮\nļҩ\nļ\nƷ\nƼ\nڵС\n㿼ǹ\n\n\nζ\nμ\nƽҰ\nƽе\nͲ\nͨ\nڻ\n漣Ļ\nɢ\nﵥ\n\nǹ\n\nǹ\n☌\nǦ\nǮ־\nǹ\nǹĲ\nǹķ\nǹĽ\nǹ\nǹ\nǹŮ\nǹֳ\nǹģ\nǹֶ\nǹ\nǹ\nǹе\nǹӵ\nǿȨ\nǿӲ\n\n\n\n\n\nⵯ\n\n弃\n\n\nƵ\n\n軯\n軯\n뼯\nʾ\nԸ\n\n\nȢ\nȫ֤\nȺ鱩\nȺ𿹱\nȺ\nƹ\nǵĹ\nȨ\n\n\n\nǮ\nƾ\n˹\nط\nⶴ\n\n\n齻\nĹ\nɧ\n\nƬ\n뵹\n\n\nɧ\nɧ\nɧѨ\nɧ\nɨүү\nɫӰ\nɫ\nɫƵ\nɫС˵\nɱָ\nɽ\nɿ\nɿȺ\nż\nչ\nƿ\nعض\nع\nع\nǹ\nӳϮ\n\n߼\n\n\n̤\nФ\nʥսϢ\nʢ\nʬ\nʧˮ\nʧҩ\nʨ\nʮ˵\nʮ\nʮ\nʮԤ\nʮ˲\nʮߴĻ\nʵҵ֤\nʵ\nʵѧ\nʿ¼\nʽ\nӽ\nǶè\nֱ\nִ\nֹ\nֻ\nֻ\nֻ\nֻ׷\n\nľ\n֘\n\n޽\n۲ǹ\n۴\n۵\n۵ɵ\n۷\n۹\nۻͷ\nۻҩ\nۼٱ\n۽\n۾\nǹ\nȰ\n\nð\nǹ֧\n\n\nǹ\n\n\nһԪӲ\nӵ\n\n\n츾\nƾ\n˫\n˫ƽ\nˮ\n˿ʿ\n˿\n˿ౣ\n˿\n˿\n˿\n˿\n˿㰴\n˾\n˾\n˽д\nֲ\nҪë\nĲ\nĴ󳶸\nС\nռͼ\nϼ\nŮ\nٴ\nȡ֤\nǰ\ṇ˰\n̫\n̩\n̩\n̩\n̰Ҳ\n̽⹷\nι\nһ\nع\n\n\n͸Ӿ\n濼\n\n쳯\n֮\nƹ\nչ\nɣ\nͣ\nͥ\nֱͥ\nܾͨ\n͵\n͵̰\n͵\n͵͵̰\nͷ˫\n͸ӹ\n͸Ӿ\n͸\n͸\n͸۾\n͸ҩ\n͸\nͺӥ\nͻƷ\nͻ·\nͰ\n\n˹\nఴĦ\n͸Ӿ\nΧ\n\nԿ\nɧ\n\n永\n\n\n\n΢\nΧ\nΧϺ\nάԱ\nάȨ\nάȨ\nάȨ̸\nί\nνĺг\n¼ұ\n˹\nӰ\nؼҌ\nӱ\nٱ\nƾ֤\nǿ\në\nű\nŷ\nͰ\nҵ\nҸ̨\nӬˮ\n޳¼\nר\n׹\n\nҹ\nҹ\n侯\n侯Ź\n侯\nԱ\nԱ\n\n\nȥ\nϣ\nϰƽ\nϰƽ\nϯ\nϯǰ\nϯָŻ\nϴ\nϲ̰\nҷ׷\nִ\nֽͶע\n͸Ӿ\n\nݺ\nݺ\n\n̳\n\nһ\nܲ\n\nСѨ\nУɧ\nЭ\nд\nй©\n½\n½\n½\n½ƿ\n\nŷר\nŽ\n\n\nг\n͸Ӿ\nǹ\nպ\n\n԰\nԸ\nԸ\nƹ\nϯ\nԪ\nѧɧ\nѧλC\nW\nѾ\n̸\n\nԱͽ\n\n\n\n没\nҦȥ\nҪȨ\nҪ侫\nҪ\nҪй\nҹ\nҺը\nһС\n\n\nע\nܻ\n\nꎴ\nꎵ\nꎑ\nħ\nŮ\n\n}\n\nѧ\nˮ\nѨ\nζ\n\nӦӵ\nӤ\nӽ\nǹ\nĹ\nξ\n̲һ\nת\n׳\n͸\nͬ\n\n빷\n\nŮ\nԩ\nԧϴ\n԰Ұ\n԰\n԰ɱ\n԰ɱ\n԰Ѫ\nԭһ\nԭװ\nԬڷ\nε\n\n\n⵽\n⾯\n侯\n¼\n\nը\nըң\nը\nը\nըҩ\nըҩ\nըҩ\nŴ\nǹ\nԮ\nί\n\n̰\n\n\n\n̽豸\nǮ\nǮͶע\n\nʵƾ\nʵʸ\nһ\n\n֤\n֤\n֤\n֤\n֤\n֤һ\n\n\nC\nֲ\nֳ\nָƿ\nָĤ\nָ\nҸ\n־Ը\nƷ\nǹ\n֤\n֤\nеİ\nй\nйǿ\nֹԱ\nѧ֤\n\nݲҰ\nݴ̰\n\n߷\n罫\nϯ\nסӢ\n\n\nרҵ\nרҵ\nרҵд\nרҵ\nת\n׬Ǯ\nװ\nװǹ\nװ\nŻʿ\nθ\n˲\nʸC\nй\nҩ\n\nԼǹ\nο\nʥ\n\nܻŮ\n淨\nţ\nǹ\nҩ\n\n\nת\n\n֤\n\n\nС\nԭӵ\n֤"
  },
  {
    "path": "src/test/java/com/cnblogs/hoojo/sensitivewords/暴恐词库.txt",
    "content": "\nйͽ\nͳһ\n\n庣ʦ\n̹\n־\n־\n־\nֹ\n\nַ\nȥ\nȥ\n۹\nx\no\n0\nһһ\nӹ\nع\nlun\nfa\nlg\nflg\nfl\nfalungong\n󷨵\nԪ\ndajiyuan\n\nܱ\n\n\nα\n˵\ntuidang\ndang\nԽǽ\nfen\n\n\n9\n9ping\nping\njiuping\nʯ\nɱ\nsha\nziɱ\nsuicide\ntitor\n8\n˱\n9\nű\nɪŵ\n\nŵ鵤˹\ṉ\n\nƱͼ\nbeiͼ\n\n\n繷\n绰λ\n绰\n绰\n绰\n绰\n绰׷ɱϵͳ\nǹ\n缦\n羯\nǹ\nǹ\nǹ\nǹŮ\nǹģ\nǹ\nǹ\nǹе\nǹӵ\n۲ǹ\n۴\n۵\n۵ɵ\n۷\n۹\nۻͷ\nۻҩ\nۼٱ\n۽\n۾\nǹ\nȰ\n\nǹ֧\n\n\nǹ\n\nһԪӲ\nӵ\n\n飨ᣩ\n\n\n\n\nþ\n\n\n\n\n\n\n\n\nȻ\nԭװ\nԭӵ\nԭӵ嵥\nͪ\nѪ\n\n߻\n\ntestosterone\nȵ\nٺϸ\n\nɲ\nغͪ\nadrenaline\nerythropoietin\nε\nĪ\n\nͱ\nټ\n\nƲ\nϩ\n\ntamoxifen\nstrychnine\nandrost\nͶƷ\n䶡\n˷ܼ\nmdma\n\nluo\nheroin\ndiamorphine\ndiacetylmorphine\nѻƬ\nܽ\n\ncocain\n\nɳͪ\n\nk\n\nketamine\n\n\ncannabis\n\n\nȰͪ\nbenzodiazepines\n׻\n\n"
  },
  {
    "path": "src/test/java/com/cnblogs/hoojo/sensitivewords/民生词库.txt",
    "content": "\n\nǨ\nǨ\n\n\n\n\nͪ\nҩﶾƷࣺѪ\n\n߻\n\ntestosterone\nȵ\nٺϸ\n\nɲ\nغͪ\nadrenaline\nerythropoietin\nε\nĪ\n\nͱ\nټ\n\nƲ\nϩ\n\ntamoxifen\nstrychnine\nandrost\nͶƷ\n䶡\n˷ܼ\nmdma\n\nluo\nheroin\ndiamorphine\ndiacetylmorphine\nѻƬ\nܽ\n\ncocain\n\nɳͪ\n\nk\n\nketamine\n\n\ncannabis\n\n\nȰͪ\nbenzodiazepines\n׻\n\n\nmorphine\nҡͷ\nҩ\nԹԷ\nnarcotic\nҩ\nҩƷ\nרҵ\næһ\næ\n\n\n˽̽\n˽̽\n\n\nÿ\n޵Ѻ\n\nԭ\n踹\nҸ\nҸְ\n\n\nƱ\nѶͷ绰\n\nѶ\nͼ۳\n\nظɼ\n\n\nѶ\nʹ\nȡ\n\nӤ\n\n°\nС\nƳ\n\nԴѶ\nƵԴ\nٶ\n\nȫ\n߲\ntxt\nλqq\n6λqq\nλqq\nqb\nqb\nõи\nȫɱֲ\nĺ\nͶע\nǮׯ\nй˵\n\n\nϲ\nľĻ\n濼\nζ\n۴\nд\nan\nda\nʽת\n\nȦǮ\n\nʽȱ\n֤\nѺ\nС\nС\n\nи\n׵\nо\n\nӡ̸߽н\nι\nnai\nƿ\nѿ\n75¼\n³ľ\n½ɧ\n\n\nʳǼ\nǼ\nh1n1\n߱\nyangjia\ny\nyang\n\nj\nϮ\nɱ\n\nb26931\n\n\npxĿ\nĴ\nl\nle\n˷\nͱ\n\nⲻ\nСϼ\n\nyuce\nǰԤ\nԤ\n\nĹԤ\nǨ\nø\n˼Ԥ\n̤¹\n\n\n\nŵ\n\n\n\n˰˼\nҰ\n\n\n\n\n\n\n\n628¼\nweng\nwengan\n̰\nͰ¼\nը\nֻع\n췢Ʊ\n\n\nѧ\n\nk\n\nƱ\n_\n\nƱ\nƱ\n\nծ\nд\nд\n\n׷ծ\n󸶿\n\n\n\nǰ\nǰ\nǰܾ\nǰԤ\n,\n,\n԰\nԱ\nԴ\nԻ\n\nǹ\n\n\nп\nд\n͸ӹ\n͸Ӿ\n͸\n͸\n͸۾\n͸۾\n͸ҩ\n͸\n\n\nҰ칫\n\nͪ\nѪ\n\n߻\n\ntestosterone\nȵ\nٺϸ\n\nɲ\nغͪ\nadrenaline\nerythropoietin\nε\nĪ\n\nͱ\nټ\n\nƲ\nϩ\n\ntamoxifen\nstrychnine\nandrost\nͶƷ\n䶡\n˷ܼ\nmdma\n\nluo\nheroin\ndiamorphine\ndiacetylmorphine\nѻƬ\nܽ\n\ncocain\n\nɳͪ\n\nk\n\nketamine\n\n\ncannabis\n\n\nȰͪ\nbenzodiazepines\n׻\n\n\nKC\nKC껪\nŹ\nȺ\nȺ\nС6ͨ\n\n¼\n޽\n޽\n޽\n\n\nް\nKCʾ\nKCվ\nUP8\nƤ\nUP\n\n״翨\n\nչ\nտ\nŦ\nֻ\nֻ\n\nͨӳַ\nȫ1ͨ\n6ͨ\n\n鶯\nǽջ\nн\n󸶿\nѾƬ\nĸ\nƱ\nͨ\nǰ\nɱ\nֻ̳ͨ\n\n4ͨ\nׯ\nֵ\nڳ\nͨ\n\n\n֮\nϢ\n߸ӵ\nϲĺ\nϲĺ\n߸Ů\nȫ\nַƱ\n߹\n߹ٻ\n߹Ů\nһһ\n\n\n\n\n\n㵺\n̨\nٿ̨\nУ\nŴ\nͻ˷Ƿַ\n\n֬\n\n\n󷨵\ndpp\nfa lun\nfalu\n\nչ\nϴ\n·\n\n\n׹\n\n\n\n\n۹\n۹\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n  \n*\n..\nL\nlun\n\n\n\nչ\n\nִ\nַ\nֹ\n\n\n\n\n\nʮʮ\n˹\nԺ\n\n\nع\n݆\n\n\n.݆\n\n\n\n\n\n岻\n岻̳\n\n׹\n\n\n\n\n\n\n\n\n\n\nݦ־\n־\nά\nȥ\nȥع\nת\n\n\n\n\n\n\n桢ơ\n\n\n\n\n\n־\n\n\n\n\nfalun\nFalundafa\nfa\nFlg\n\n½̻\n\n־\nִ\nչ\nֹ\n׹\nĦŽ\nˮ\n\n\nŭ\n\n\n\n\n\n\n\n\nؽ\nͽ\n\nʦ\n\n־\n־\n鴫\nͨ\n\n\nʤ\n־\n귨\n\n־\n־\n\n־\n\n\n"
  },
  {
    "path": "src/test/java/com/cnblogs/hoojo/sensitivewords/色情词库.txt",
    "content": "Ů\nҺ\nĦ\nγ\n\n\n\n\n\n\n\n\n\n\n\nƼ\n־\n屩\nٱ\nٺ\n\nH\nH\n\n\nū\n\nb\n\n\n\n\n\n\n\ndv\n˵Ӱ\n̳\nС˵\n˵\n˵Ӱ\n˿ͨ\n\nƬ\n\nͼ\n\nС\n˵Ӱ\n̳\nɫ\nվ\nѧ\nС˵\nС˵\nϷ\nԾ\n\n\nɲ\nһ\nҩ\n\n\n\n\nŮ\n\n\n\n\nʱ\nѨ\nŮ\n´½\n\nѨ\nؽ\n\nͷ\n\nav\n\n\nڱ\nͥ\nѨ\n\n\n޾ֲ\nƬ\n\n\n\n\nį\nįŮ\nŮ\n\n\n\nд\nŽ\nǳ\nǳ\nҺ\nȥ\nތ\nջ\n\n\n\nѨ\n\nڱ\nڻ\nڽ\n\n\n\n\n\n˱\n˸\n˽\nŮ\n\n\n\nľ\n\n©\n¶b\nҽ\n\nֱ\nֲ\nּ\n\n\n\nٸ\n\n\nѨ\n\nش\nԼ\nѨ\nѨ\nҺ\n\n\nĸ\n\n\nū\n\n۱\nŮ\nѨ\nŪ\nŮ\n\nh\n羫\nƨ\nƷ\nǰ͹\nǿjian\nǿ\nǿ鴦Ů\nȤƷ\nɫ\nȭ\nȫ\nȺ\nǻ\n\n\nձ\n\n\n\nⴽ\nⶴ\n\n\n⾥\n\n\nѨ\n\n鱬\n鷿\n鹵\n齻\nͷ\nƬ\nɧ\nɧ\nɧŮ\nɧˮ\nɧѨ\nɫ\nɫ\nɫè\nɫ\nɫվ\nɫ\nɫɫ\nɫ\nɫ\nɫb\n갢\n\nˬ\n\nʳ\n\n޼\n޽\n\n\n츾\nĸ\nŮ\nˬƬ\nˬ\n˫\n\n˿\n˿\nɵ\n\n\nŪ\n\nλ\n\n\n\n͵\n͵\n\nڿ\n\nҾɫ\n\nŮ\n\n\nĴ\n\nС\nУ\nСѨ\nСxue\nд\nԸ\nԸջ\nԻ\nԼ\nԼ\nԽ\nū\nŰ\nϢ\n\n\nѨ\nѧ\nѨͼ\n\n\n\n˼\nҪ\nҹڲ\nһ\nһҹ\nһҹ\nһye\n\n\n\n\n\nӰ\n\n\n\n\n\n\n\nʦ\n\n\n\n\n\nħ\nĸ\nŮ\nŰ\n\n\nɫ\n\nѧ԰\n\nʿ\nˮ\n\n\n\n\nҺ\n\nb\nӦ\n׽\n\nŮ\n\nŮ\nŮľ\n\n\n\nѨ\nԮ\nԭζ\nԮ\n\nм\nм\n\nץ\n\nο\n\n18\n99bb\na4u\na4y\nadult\namateur\nanal\naƬ\nfuck\ngayƬ\ng\ngƬ\nhardcore\nh\nh\nincest\nporn\nsecom\nsexinsex\nsmŮ\nxiao77\nxing\ntokyohot\nyin\n\nװb\nsb\nɵ\nɵb\nɷ\nɷ\nɲ\nɵ\nɳ\nǷ\n\n\nҲ\nҲ\nܳ\nԲ\n\nܳ\ncao\n\n\n\n\nѾ\n\n\n\n\n\n\n\nƤ\n\n\n\nյ\n\nȫ\nү\n\n\nԵ\n˸\n\n\n\nb\n¼\nȫ\nȫ\nȫҲú\nȫ\n׳\n޳\nsb\nɱb\nb\n\n\n\n\n쵰\n\n\nְ\n\n\n԰\nй\n\n\n侫\nռ\nǿ\n\n԰\nϵ\nĦ\n\n\n\nٸ\n\nƨ\n\naƬ\nڿ\nԲ\n\n\n̼\n\n\n\nɧ\n\nsm\n˸\n߳\n¶\n\nһ˿\nѹ\n\n\nҸ\nȹ˶\nҼ\n\n\nС\n\nӰ\në\nƬ\n\n\nд\n\n\nŮ\nŮд\nŮ\nٸ\nðĦ\n\nԻҩ\nԻˎ\nԻ\nԻҩ\nԻˎ\nԻ\nԻҩ\nԻˎ\nԼ\nԼҩ\n\nˮ\nҩ\nҩ\nˎ\nռҩ\nɧ\nɧ\nɧ\nɧŮ\nɧ\nɫӰ\nɫ\nɫ\nɫӰ\nɫ\nɫͼƬ\nɫС˵\nɫӰƬ\nɫ\nɫӰ\nɫ\nɫƬ\nɫƵ\nɫС˵\n԰\nԷ\nԸ\nԸ\nԻ\nԽ\nԽƵ\nԽͼƬ\nū\nūӪ\nŰ\n\n\n\n\n\n\n\n\në\nꎴ\nꎵ\nꎑ\n\nŮ\nƵ\nƬ\n\n\nħ\nħ\nŮ\nŮ\n\n}\n\nѧ\nˮ\nѨ\nmorphine\nҡͷ\nҩ\nԹԷ\nnarcotic\nҩ\nҩƷ\nŮ\nҺ\nĦ\nγ\n\n\n\n\n\n\n\n\n\n\n\nƼ\n־\n屩\nٱ\nٺ\n\nH\nH\n\n\nū\n\nb\n\n\n\n\n\n\n\n˵Ӱ\n̳\nɫ\nվ\nѧ\nС˵\nС˵\nϷ\nԾ\n\n\nɲ\nһ\nҩ\n\n\n\n\nŮ\n\n\n\n\nʱ\nѨ\nŮ\n´½\n\nѨ\nؽ\n\nͷ\n\nav\n\n\nڱ\nͥ\nѨ\n\n\n޾ֲ\nƬ\n\n\n\n\nį\nįŮ\nŮ\n\n\n\nд\nŽ\nǳ\nǳ\nҺ\nȥ\nތ\nջ\n\n\n\nѨ\n\nڱ\nڻ\nڽ\n\n\n\n\n\n˱\n˸\n˽\nŮ\n\n\n\nľ\n\n©\n¶b\nҽ\n\nֱ\nֲ\nּ\n\n\n\nٸ\n\n\nѨ\n\nش\nԼ\nѨ\nѨ\nҺ\n\n\nĸ\n\n\nū\n\n۱\nŮ\nѨ\nŪ\nŮ\n\nh\n羫\nƨ\nƷ\nǰ͹\nǿjian\nǿ\nǿ鴦Ů\nȤƷ\nɫ\nȭ\nȫ\nȺ\nǻ\n\n\nձ\n\n\n\nⴽ\nⶴ\n\n\n⾥\n\n\nѨ\n\n鱬\n鷿\n鹵\n齻\nͷ\nƬ\nɧ\nɧ\nɧŮ\nɧˮ\nɧѨ\nɫ\nɫ\nɫè\nɫ\nɫվ\nɫ\nɫɫ\nɫ\nɫ\nɫb\n갢\n\nˬ\n\nʳ\n\n޼\n޽\n\n\n츾\nĸ\nŮ\nˬƬ\nˬ\n˫\n\n˿\n˿\nɵ\n\n\nŪ\n\nλ\n\n\n\n͵\n͵\n\nڿ\n\nҾɫ\n\nŮ\n\n\nĴ\n\nС\nУ\nСѨ\nСxue\nд\nԸ\nԸջ\nԻ\nԼ\nԼ\nԽ\nū\nŰ\nϢ\n\n\nѨ\nѧ\nѨͼ\n\n\n\n˼\nҪ\nҹڲ\nһ\nһҹ\nһҹ\nһye\n\n\n\n\n\nӰ\n\n\n\n\n\n\n\nʦ\n\n\n\n\n\nħ\nĸ\nŮ\nŰ\n\n\nɫ\n\nѧ԰\n\nʿ\nˮ\n\n\n\n\nҺ\n\nb\nӦ\n׽\n\nŮ\n\nŮ\nŮľ\n\n\n\nѨ\nԮ\nԭζ\nԮ\n\nм\nм\n\nץ\n\nο\n\n18\n99bb\na4u\na4y\nadult\namateur\nanal\naƬ\nfuck\ngayƬ\ng\ngƬ\nhardcore\nh\nh\nincest\nporn\nsecom\nsexinsex\nsmŮ\nxiao77\nxing\ntokyohot\nyin"
  },
  {
    "path": "src/test/java/com/cnblogs/hoojo/sensitivewords/贪腐词库.txt",
    "content": "\n̰\ngcd\n̰\ngongchandang\n\nһһ\n\n\n\nc\nx\n\n\n\n\n\n\nе\n\n\n\nϹ\n\ngong\ngc\n\ngong\ng\n\n\n\nа\nר\n\n\n\ng\n\ng\n\n\n\nר\nıӦ\nĩ\nר\ncommunistparty\n֢\n\n\n\n\nf\nzhengfu\nzhi\n\n\nйzf\nzf\nwuԺ\nл۹\ngong\n½ٷ\nȨ\n־\n\n־\n\nŮ\n̰20\nں\n̰\n\n̰Ƹ\n\n߹\nϣͬ\n̰\nְ\nͽ\n\nܻ\nְȨ\nͽ\nûո˲Ʋ\nɿ˽\n\nά\nΥ\n\nְ\n\nͽ\nײ\n˽\n\nȨı˽\nְ\n͢\n̻\nŹ\n֥\nƽ˳\nɱ\n\n˫\nܻ\nд\nȨ\nûոȫƲ\nʯױ\n\nҫ\nȨ\n\nͼ\n\nƹ\n\n̩\nЧʱ\nٱ\n\nΥ\nܹƱ\n\n\n֣\nɽ\n\n֣\n\nٰ\n׾\n쿴\nκ\n־\nȪ\n\n\nкɳȶ\nɭ\n÷\nηɱ\nˮ\nƽ\n\n촨\n˺\n\n֪\nԸ\n\n޶Ʋ\nԴ\n\n\n´\n\n\nԶ\n\n±\nά\n˫\n־\n\n\n ຣ\nŷ 㶫\nΤ \nӢ \nҵ \nڷ 㶫\n ӱ\nز \n \nſ㶫\n꺣\nάϯ\nнܺ\n\n׷\n\nż㽭\n\nƼͳϱ\n\nʽ\nļ\n¼\n˹ɽ\n¸\n\nγ⽭\n»Э\n³ йͨ\n \nǿ \nӢ ֯\n \nС\n ҵ\nѩ\nֿ\n\nŶ\nͬ\n\n\nŴ\nԴ\nƽ\n\nС\n\nĽ\n\n翬\nȺ\n\n\nܶ\n췢 \nں\nη\n\n\nƽ\n\n\n\n\nƤǭ\nԪ\nҵ\n\n\nɸ\nФ\n վ\nʤ ܲ\nβ "
  },
  {
    "path": "src/test/resources/BadWord.txt",
    "content": "﻿15768\n32283\n40698\n40698\n71776\n985985\n。gm\n@sshole\n∪R\n∪R靠\n┻┼\n☭\n02jam\n1000y\n13點\n168www\n１６大\n17game\n17Game\n17爱网\n18dy\n18禁\n18摸\n１８摸\n18淫书\n21ｃｎ\n22park\n2逼靠\n306医院\n365 sex\n365情色\n39CK\n39仓Ku\n3p炮图\n3P炮图\n3P炮圖\n50zhou\n51vip\n51yxb\n58jb\n5jq\n5kuai\n5街区\n6。4\n６。４\n6?4靠\n64yl\n64动乱\n64動亂\n64事件\n68wow\n7。22\n777黄站\n77bbb\n7hero\n8 仙\n8 仙靠\n8。9\n8?9靠\n8？9靠\n88jb\n89-64cdjp\n89暴乱\n89暴亂\n89动乱\n89動亂\n89风波\n89風波\n89六四\n89学潮\n89學潮\n89运动\n8仙\n8仙靠\n933cn\n97sese\n988wow\n98bb\n99bb\n99BBS\n9JBW\n9v9e\n9城\n9之遊\n9之游\na$$hole\na$shole\na4u\na4y\nabc abc\nADMIN\nAdmin\nadmin\nadmin靠\nadult\naids\nAids\nAIDS\naika\nai滋\nａｉ滋\nALERT\nAlod\namateur\nanal\napesoft\napex\napexon\nApexsoft\nappie\nArchlord\nas$hole\nasgard\nasiasex\nasktao\nASS\nasshole\naszol\navatar\nav成人\nav贴图\nav貼圖\nayawawa\nAYAWAWA\na片\nA片\nａ片\nBaichi\nbaichi\nbankai\nBaopi\nBao皮\nBastard\nbastard\nBASTARD\nbasterd\nbatard\nbbscity\nBiaozi\nBIAOZI\nbiaozi\nBiao子\nbignews\nbingnews\nBitch\nbitch\nBITCH\nBi样\nｂｉ样\nbjzc\nblogbaby\nBLOW\nblowjobs\nBlowJobs\nblueeye\nBnB\nbo ke\nboke\nbong\nboxilai\nboxun\nbt成人\nBT激情\nBT淫穴\nbukake\nbukakke\nbukkake\nbutthead\nbutthole\nbxqt\nB博士\nb毛\nB毛\nb样\nB样\nｂ样\nc a o\nc a o靠\ncabal\ncamon\ncao\nCAO NI MA\ncao ni ma\ncaoB\ncaobi\ncao靠\ncao你\ncctv\nCCTV\ncdjp\nchangeu\nchao nv\ncha你\nchenxun\nchinamz\nchuan qi\nchui chui\nci77\ncjsh\nCM\ncnd\nCNN\ncnouc\ncom流氓\nconnard\nconquer\ncounselor\ncreaders\ncreate\nCREATE\ncronous\nctracer\ncunt\ncunt cunt\nd f d z\nd p p\nd7se\nda hua\nda tang\ndafa\ndajiyuan\ndalai\ndamm\ndamn\ndao jian\ndefan\ndefannet\ndekaron\ndelete\nDELETE\ndfdz\ndfjoy\ndh315\ndick\nDick\nDICK\nDJMAX\ndou dou\ndragon\ndroiyan\nDROP\ndtfy\ndyonline\nearthciv\neight仙\nenculer\nepochtime\neqsf\neverstar\nezgaming\nEzgaming\nf a l u n\nf a 轮\nf a 輪\nf l g\nf u c k\nF.L.G\nF。L。G\nF_U_C_K\nf_u_c_k\nFag\nfalu\nFALUN\nfalun\nfalundafa\nFALUNDAFA\nfa轮\nfelch\nfeltch\nfeng shen\nfeng yun\nfgmtv\nFku\nfku\nFLG\nflg\nflyfff\nfofg\nfosaon\nfoseaon\nfoutre\nfreechina\nfreedom\nfreenet\nfu(\nfuc\nFUCK\nFuck\nfuck\nfuck fuck\nFuck You\nfuck you\nFUCK YOU\nFUCKYOU\nfuckyou\nfuck靠\nfuck骚\nfuck傻B\nfuck傻逼\nfuck售ID\nfuck死gd\nfuck死GD\nfuck死gm\nfuck死GM\nfuk\nG m\nG M\ng m\ng。m\nG。M\ng8\ngame\ngame17\ngame588\ngamegold\ngameline\ngamemaste\nGAMEMASTE\nGameMaste\ngamemy\nGAN\ngangbang\ngan你\ngaowan\ngay\nGC365\ngc365\nGCD\ngcd\nggol\nghost\nGM\nＧＭ\nＧｍ\nｇｍ\nG-M\nGM001\nGMworker\ngmworkers\nGN\ngong fu\ngranado\nGroove\nGY\nＧＹ\nｇｙ\ng点\ng片\nha bao\nhabbo\nhanbit\nhanbiton\nhanxiang\nhardcore\nhdw\nhelbreath\nhellgate\nhelper\nhero108\nherogame\nheting\nheyong\nhkhk68\nhong yue\nhongzhi\nhotsex\nhrichina\nhua xia\nhuanet\nhui huang\nhujintao\nH动漫\nh动漫\nH動漫\nh站\nh站靠\ni3hun\nicpcn\nincest\ninsert\nitembay\niuiuu\nｊ8\nj8靠\nJap Jap\nJB\njb\nｊｂ\njb靠\nJB靠\njhsz\nJHSZ\nji zhan\njiabao\njianghu\njiangshan\njian你\njiaochun\nJiaochun\njing ling\njinku\njintao\njinv\njinyong\nJi女\nｊｉ女\nJJ\njoyxy\nJPEEN\nju shang\njushang\njx2\njy2\nkai tian\nkai xuan\nkaixuan\nkakajb\nkang zhan\nKao\nkarma\nkart\nke luo si\nKEFU\nKefu\nKeFu\nkele8\nkjking\nｋｋ粉\nkqking\nkuaik\nKurumi\nK粉\nｋ粉\nK姐\nK他命\nladeng\nlaghaim\nlaqia\nlastchaos\nLateinos\nlihongzhi\nlike999\nlineage2\nling tu\nlipeng\nliuqi\nLIUSI\nliuxiao\nlong hun\nlong zu\nlovebox\nluanshi\nluo qi\nmabinogi\nMAD MAD\nmadelove\nMADELOVE\nmakai\nMAKELOVE\nmakelove\nMaki\nmaking\nmanager\nmankind\nmannweib\nmarket\nMASTER\nmaster\nmbs\nmeimei穴\nmeinv\nmeinv穴\nmerde\nmeretriz\nmetin\nmforest\nmi zhuan\nmierda\nming yun\nminghui\nminhui\nmir\nmir3\nmixmaster\nMK 星云\nmland\nMM屄\nmm屄\nmm美图\nmm美圖\nMM嫩穴\nmo jian\nmo xiang\nmo yu\nmoxiang\nmoyu\nmuhon\nmwo\nmxd\nmystina\nmythos\nnabi\nnacb\nnage\nnaive\nnavyfield\nNeoSteam\nneosteam\nＮＥＴ\nｎｅｔ\nnetbar\nnetdream\nNEWSPACE\nnmis\nnnd\nnnd=\nNPC靠\nnude nude\no2jam\nObama\nOffgamers\noffgamers\nolgad\nOn Air\nON9\non9\nonair\nonewg\nonhave\noperator\norgasmus\norgasums\nPaki Paki\npangya\npao pao\npaper64\npartita\nParty\npcik\npeacehall\npeachall\npenis\nPENIS\npet520\npetgirl\npetrealm\nphuc\nphuck\npiao piao\npiss\npk1937\nplayboy\npnisse\npolla\nPoon Poon\npopkart\npopoming\nPORN\nporn\nproject\npussy\nPUSSY\nqeeloo\nqi ji\nqi shi\nqi xia\nqi yu\nqian nian\nqiangjian\nqqr2\nqqtang\nQQtang\nQqyinsu\nQQ幻想\nｑｑ堂\nQQ音速\nQ币\nRape\nrape\nraycity\nray-city\nredmoon\nrenewal\nrenminbao\nrepent\nri\nRI NI MA\nrivals\nrivals靠\nrjwg\nroi world\nroiworld\nrong yao\nrplan\nrunstar\nrx008\nrxjh\nrxjhhvip\nrxjhvip\nrxjhwg\nrxwg\ns_b\nsafeweb\nsaga\nsalop\nsan guo\nsanguohx\nsaobi\nSARS\nsars\nSb\nSB\nsb\nｓｂ\nscrew\nsega\nsegame\nserver\nservice\nsex\nｓｅｘ\nSex Sex\nsf\nsh!t\nshemale\nshen hua\nshen qi\nshengda\nShine\nshine\nshit\nSHIT\nShit\nShit Shit\nshizhang\nshyt\nsilkroad\nsimple\nslanglist\nSM\nsm調教\nsm女王\nSM女王\nSM舔穴\nsm舔穴\nsm调教\nSM援交\nsm援交\nsnatch\nsoma\nspace\nsperm\nsphincter\nsuck\nSUCK\nSuck\nsvdc\nsw2\nswdol\nSystem\nsystem\nSYSTEM\nT.M.D\nT.M.D靠\nT。M。D\nt2dk\nTABLE\nTaiwan国\ntampon\ntantra\ntaobao\ntaobao靠\nteen\nteen sexy\nteensexy\nTENGREN\nTENGWU\nTeSt\ntest\nTEsT\ntEST\ntESt\ntesticle\nthsale\ntian shi\ntianji\ntianjing\ntiao zhan\ntibet\ntibetalk\nTibet国\ntmd\nTmd\nTMD\nｔｍｄ\nTMD靠\nTNND\ntnnd\nto173\nTOM在线\ntr\ntrannie\ntranny\ntravesti\ntriangle\nTrickster\ntta2\nttee\ntth2\nTtmd\nTTMD\nttwg666\ntum tum\nTW\ntw18\nu   r靠\nU   R靠\nu  r靠\nU  R靠\nu r\nU R\nU R靠\nu r靠\nU/R\nU/R靠\nUltraSurf\nunixbox\nUPDATE\nUR\nur\nＵＲ\nU-R\nurban\nurban靠\nurTNND\nur靠\nUR靠\nＵＲ靠\nU-R靠\nustibet\nvaameline\nvgbh\nviprxjh\nvoa\nVoyage\nvrtank\nVULVA\nWaigua\nwangce\nwangyang\nwangyou99\nwanwang\nwebmaster\nWEB牌戰\nWEB战牌\nwetback\nwg17173\nwg2222\nwg666\nwg8800\nwg886\nwg9996\nwgpj\nWG武功\nwhore\nwhoyo\nwow520\nwow711\nwowassist\nwowbank\nwowcaifu\nwowjingye\nwstaiji\nwtf\nwu hun\nwyd\nxi you\nxiao77\nxinsheng\nxizang\nxucaihou\nxyq\nyeeplay\nYGBH\nygbh\nyifeng\nyong heng\nyouxia\nYOUXIKA\nyouxika\nyuanming\nyuckfou\nyuckfu\nyuetao\nyuming\nyutong\nyxzbw\nzeming\nzhengjian\nzhengqing\nzhuanfalu\nzhuxi\nzirui\nzjdfjoy\nzlywy\nzongli\nz毛二逼\nㄐ八\nㄐ巴\nㄐ掰\nㄖ\n阿扁\n阿拉\n阿沛\n阿片烟\n啊无卵\n哀悼\n挨球\n艾滋\n艾滋病\n爱色cc\n爱液\n爱幼阁\n爱滋\n愛女人\n愛色cc\n愛液\n愛幼閣\n愛滋\n安拉\n安理会\n安眠酮\n安纳咖\n安南\n按摩棒\n傲神傳\n傲视\n奥巴马\n奥马尔\n奥运\n奧運\n八 仙\n八嘎\n八九\n八老\n八仙\n八仙靠\n扒屄\n扒光\n扒穴\n拔屄\n拔出来\n罢餐\n罢吃\n罢饭\n罢工\n罢食\n掰穴\n白痴\n白癡\n白粉\n白烂\n白立忱\n白立朴\n白莲教\n白小姐\n百海\n百家乐\n班禅\n包pi\n包二奶\n包皮\n薄码\n薄碼\n薄一波\n宝德\n保钓\n保釣\n保监会\n保密局\n鸨\n鲍戈\n鲍彤\n鲍威尔\n鲍鱼\n鲍岳桥\n暴动\n暴動\n暴干\n暴幹\n暴奸\n暴乱\n暴亂\n暴乳\n暴徒\n暴淫\n鮑彤\n鮑嶽橋\n爆吧\n爆操\n爆草\n爆乳\n爆乳娘\n贝肉\n貝肉\n被操\n被插\n被干\n被幹\n本拉丹\n苯比\n苯丙胺\n笨屄\n笨逼\n笨比\n屄\n屄屄\n屄毛\n逼奸\n逼毛\n逼你老母\n逼痒\n逼癢\n逼样\n逼樣\n逼孕\n逼孕套\n比的\n比卵\n比毛\n比水\n比样\n碧香亭\n避孕\n避孕套\n鞭神\n鞭王\n变态\n婊\n婊zi\n婊子\n婊子靠\n瘪三\n癟三\n宾周\n賓周\n冰毒\n冰锋\n波霸\n波霸 \n勃起\n博白县\n博讯\n博訊\n不举\n不玩了\n布莱尔\n布雷尔\n布雷爾\n布什\n布希\n步飞烟\n步非烟\n财政部\n采花堂\n採花堂\n彩票机\n菜B\n菜b\n菜逼\n参事室\n残废\n殘廢\n殘废\n殘疾\n沧澜曲\n蔵獨\n藏独\n藏独靠\n藏獨\n藏妇会\n藏婦會\n藏青会\n藏青會\n藏青社\n操\n操 gm\n操78\n操Gm\n操GM\n操gm\n操gM\n操meimei\n操mm穴\n操XX\n操屄\n操逼\n操逼毛\n操逼样\n操比\n操比毛\n操表姐\n操婊\n操婊子\n操宾周\n操冰毒\n操波霸\n操博讯\n操布什\n操藏独\n操操\n操操gM\n操操gm\n操操Gm\n操操GM\n操操XX\n操操逼\n操操比\n操操蛋\n操操你\n操操他\n操草妈\n操肏\n操插gm\n操插gM\n操插Gm\n操插GM\n操插你\n操插妳\n操朝鲜\n操车臣\n操车仑\n操陈功\n操陈毅\n操吃屎\n操赤匪\n操抽插\n操吹箫\n操春药\n操蠢猪\n操戳你\n操达赖\n操打炮\n操大B\n操大逼\n操大便\n操大法\n操蛋\n操狗\n操黑\n操护士\n操机掰\n操鸡\n操雞\n操烂\n操爛\n操林\n操妹妹\n操母狗\n操嫩穴\n操尼\n操你\n操你爸\n操你妈\n操你妈屄\n操你媽\n操你娘\n操妳\n操妳妈\n操妳媽\n操妳娘\n操妻\n操嫂嫂\n操射\n操神\n操爽\n操死\n操他\n操他妈\n操她\n操她妈\n操王\n操我\n操小穴\n操穴\n操肿\n操腫\n曹刚川\n曹剛川\n曹建明\n曹你\n曹庆泽\n曹长青\n草的你妈\n草妈\n草拟妈\n草你\n草你妈\n草你媽\n草你娘\n草擬媽\n草嫣\n草芝麻\n草枝麻\n肏\n肏屄\n肏穴\n冊老\n冊那\n册老\n册那\n厕奴\n侧那\n测绘局\n测拿\n测试員\n测試员\n廁奴\n測试員\n測試员\n測試員\n曾道人\n曾培炎\n曾庆红\n曾庆紅\n插 10p\n插 11p\n插 12p\n插 13p\n插 14p\n插 15p\n插 3p\n插 4p\n插 5p\n插 6p\n插 7p\n插 8p\n插 9p\n插B\n插b\n插gm\n插gM\n插GM\n插Gm\n插暴\n插逼\n插比\n插后庭\n插後庭\n插进\n插進\n插嫩逼\n插嫩穴\n插你\n插你爸\n插你妈\n插你媽\n插你娘\n插妳\n插屁眼\n插深些\n插死你\n插他\n插她\n插我\n插小穴\n插穴\n插穴图\n插阴\n插阴茎\n插陰\n插淫屄\n柴玲\n蝉童\n产权局\n產黨\n猖妓\n娼\n娼妇\n娼妓\n長瀨\n常劲\n常委\n倡议书\n朝天穴\n朝鲜\n朝鮮\n潮吹\n潮喷\n潮噴\n车臣\n车仑\n車侖\n陈炳基\n陈伯达\n陈独秀\n陈功\n陈良\n陈良宇\n陈蒙\n陈丕显\n陈破空\n陈水扁\n陈随便\n陈希同\n陈晓宁\n陈宣良\n陈一谘\n陈毅\n陈永贵\n陈至立\n陈总统\n陳水扁\n陳希同\n陳曉寧\n陳毅\n陳雲\n陳宗興\n陳總統\n成克杰\n成人BT\n成人bt\n成人片\n程凯\n程真\n吃大便\n吃鸡巴\n吃雞巴\n吃精\n吃屎\n痴乳\n癡乳\n迟浩田\n遲浩田\n赤匪\n赤化\n赤裸\n抽插\n抽动肉\n抽你丫的\n臭b\n臭化西\n臭机\n臭机八\n臭鸡巴\n臭西\n臭西化\n臭作\n处女膜\n処女\n豖者\n畜生\n處女\n传奇3G\n传销\n傳奇3G\n傳銷\n创网\n吹喇叭\n吹萧\n吹箫\n吹簫\n锤锤\n锤子\n春药\n春藥\n蠢猪\n戳B\n戳b\n戳逼\n戳比\n戳你\n慈善会\n慈善會\n次郎\n崔英杰\n催情药\n错B\n错b\n错逼\n错比\n錯B\n錯比\n达赖\n达賴\n达癞\n達赖\n達賴\n打飞机\n打炮\n打砲\n打手枪\n打野炮\n打砸抢\n大B\n大b\n大j8\n大逼\n大比\n大便\n大波\n大波波\n大参考\n大东亚\n大東亞\n大法\n大花B\n大花逼\n大会堂\n大鸡巴\n大雞巴\n大纪元\n大纪园\n大紀元\n大揭露\n大力丸\n大卵泡\n大卵子\n大乱交\n大亂交\n大妈油\n大麻\n大麻油\n大奶头\n大肉棒\n大乳\n大乳頭\n大史\n大史记\n大史纪\n大使官\n大卫教\n大衛教\n大血B\n大血比\n大煙\n大阳具\n大跃进\n呆比\n呆卵\n代挂\n代炼\n代链\n代練\n带练\n带炼\n带链\n帶练\n帶炼\n帶链\n戴秉國\n戴海静\n戴红\n戴晶\n戴维教\n弹 劾\n弹劾\n彈 劾\n档案局\n党\n党委\n党卫兵\n党中央\n党主席\n黨中央\n荡妇\n荡妹\n荡女\n蕩婦\n蕩妹\n蕩女\n刀OL\n刀online\n刀剑\n倒台\n倒臺\n盗撮\n盗电\n盗窃犯\n道教\n盜撮\n盜竊犯\n登 辉\n登 輝\n登辉\n邓发\n邓力群\n邓小平\n邓笑贫\n邓颖超\n鄧朴方\n鄧小平\n鄧穎超\n迪凡\n地震局\n蒂巴因\n电监会\n叼你\n叼你妈\n叼你媽\n屌\n屌7\n屌鸠\n屌毛\n屌妳\n屌七\n屌西\n吊子\n钓鱼岛\n钓鱼台\n釣魚島\n丁关根\n丁關根\n丁元\n丁子霖\n丁字裤\n腚眼\n东社\n东升\n东条\n东突\n东亚\n东洋屄\n东正教\n東Mai骚\n東突\n東洋屄\n董必武\n董建华\n董建華\n董贱华\n董文华\n懂文华\n动乱\n胴体\n動乱\n動亂\n都江堰\n豆豆秀\n毒贩\n毒販\n毒品\n独夫\n独立\n獨裁\n赌马\n赌球\n杜冷丁\n杜鲁门\n杜智富\n渡口\n短信猫\n断电\n断水\n斷电\n斷電\n斷水\n多人轮\n多人輪\n多维\n多维社\n多维网\n多維社\n多維網\n堕淫\n墮淫\n屙\n屙民\n俄罗斯\n饿b\n饿B\n饿比\n餓比\n恩格斯\n二B\n二b\n二屄\n二逼\n发$$抡\n发$$仑\n发$$伦\n发$$沦\n发$$纶\n发$$轮\n发$$论\n发$抡\n发$仑\n发$伦\n发$沦\n发$纶\n发$轮\n发$论\n发**抡\n发**仑\n发**伦\n发**沦\n发**纶\n发**轮\n发**论\n发*抡\n发*仑\n发*伦\n发*沦\n发*纶\n发*轮\n发*论\n发@@抡\n发@@仑\n发@@伦\n发@@沦\n发@@纶\n发@@轮\n发@@论\n发@抡\n发@仑\n发@伦\n发@沦\n发@纶\n发@轮\n发@论\n发^^抡\n发^^仑\n发^^伦\n发^^沦\n发^^纶\n发^^轮\n发^^论\n发^抡\n发^仑\n发^伦\n发^沦\n发^纶\n发^轮\n发^论\n发~~抡\n发~~仑\n发~~伦\n发~~沦\n发~~纶\n发~~轮\n发~~论\n发~抡\n发~仑\n发~伦\n发~沦\n发~纶\n发~轮\n发~论\n发改委\n发浪\n发愣\n发抡\n发抡功\n发仑\n发伦\n发伦功\n发沦\n发纶\n发轮\n发论\n发论公\n发论功\n发骚\n发正念\n発妻\n発射\n發$$掄\n發$$倫\n發$$綸\n發$$論\n發$侖\n發$淪\n發$輪\n發**掄\n發**倫\n發**綸\n發**論\n發*侖\n發*淪\n發*輪\n發@@掄\n發@@倫\n發@@綸\n發@@論\n發@侖\n發@淪\n發@輪\n發^^掄\n發^^倫\n發^^綸\n發^^論\n發^掄\n發^侖\n發^倫\n發^綸\n發^輪\n發^論\n發~~掄\n發~~侖\n發~~倫\n發~~淪\n發~~綸\n發~~輪\n發~~論\n發~掄\n發~侖\n發~倫\n發~淪\n發~綸\n發~輪\n發~論\n發浪\n發掄\n發掄功\n發侖\n發倫\n發倫功\n發淪\n發綸\n發輪\n發論功\n廢墟\n法$$抡\n法$$掄\n法$$仑\n法$$伦\n法$$沦\n法$$纶\n法$$轮\n法$$侖\n法$$倫\n法$$淪\n法$$綸\n法$$輪\n法$$论\n法$$論\n法$抡\n法$掄\n法$仑\n法$伦\n法$沦\n法$纶\n法$轮\n法$侖\n法$倫\n法$淪\n法$綸\n法$輪\n法$论\n法$論\n法*\n法**抡\n法**掄\n法**仑\n法**伦\n法**沦\n法**纶\n法**轮\n法**侖\n法**倫\n法**淪\n法**綸\n法**輪\n法**论\n法**論\n法*功\n法*抡\n法*掄\n法*仑\n法*伦\n法*沦\n法*纶\n法*轮\n法*侖\n法*倫\n法*淪\n法*綸\n法*輪\n法*论\n法*論\n法@@抡\n法@@掄\n法@@仑\n法@@伦\n法@@沦\n法@@纶\n法@@轮\n法@@侖\n法@@倫\n法@@淪\n法@@綸\n法@@輪\n法@@论\n法@@論\n法@抡\n法@掄\n法@仑\n法@伦\n法@沦\n法@纶\n法@轮\n法@侖\n法@倫\n法@淪\n法@綸\n法@輪\n法@论\n法@論\n法^^抡\n法^^掄\n法^^仑\n法^^伦\n法^^沦\n法^^纶\n法^^轮\n法^^侖\n法^^倫\n法^^淪\n法^^綸\n法^^輪\n法^^论\n法^^論\n法^抡\n法^掄\n法^仑\n法^伦\n法^沦\n法^纶\n法^轮\n法^侖\n法^倫\n法^淪\n法^綸\n法^輪\n法^论\n法^論\n法~~抡\n法~~掄\n法~~仑\n法~~伦\n法~~沦\n法~~纶\n法~~轮\n法~~侖\n法~~倫\n法~~淪\n法~~綸\n法~~輪\n法~~论\n法~~論\n法~抡\n法~掄\n法~仑\n法~伦\n法~沦\n法~纶\n法~轮\n法~侖\n法~倫\n法~淪\n法~綸\n法~輪\n法~论\n法~論\n法lun功\n法功\n法国\n法愣\n法抡\n法抡功\n法掄\n法仑\n法仑工\n法仑公\n法仑功\n法仑攻\n法仑共\n法伦\n法伦功\n法囵功\n法沦\n法沦功\n法纶\n法轮\n法轮工\n法轮公\n法轮功\n法轮攻\n法轮共\n法侖\n法侖功\n法倫\n法陯功\n法菕功\n法崘功\n法淪\n法惀功\n法婨功\n法棆功\n法腀功\n法碖功\n法耣功\n法蜦功\n法綸\n法輪\n法輪功\n法踚功\n法磮功\n法錀功\n法鯩功\n法稐功\n法论\n法论工\n法论公\n法论功\n法论攻\n法论共\n法埨功\n法溣功\n法論\n法黁功\n法西斯\n法谪\n法谪功\n法制办\n反党\n反黨\n反动\n反動\n反封锁\n反革命\n反攻\n反共\n反华\n反人类\n反人民\n反日\n反社会\n反社會\n反政府\n犯践\n犯賤\n犯踐\n方励之\n方晓日\n方毅\n方舟子\n房事\n放荡\n放蕩\n放尿\n放屁\n非典\n扉之阴\n扉之陰\n肥逼\n肥西\n废墟\n分家在\n分裂\n焚烧\n粉屄\n粉红穴\n粉紅穴\n粉穴\n粪便\n糞\n糞便\n风尘劫\n风艳阁\n封杀\n封神榜\n冯东海\n冯素英\n佛教\n佛祖\n夫妻3p\n服务器\n福呵定\n付申奇\n复辟\n傅杰\n傅鹏\n傅全有\n傅申奇\n傅铁山\n傅志寰\n傅作义\n傅作義\n干bi\n干gM\n干gm\n干GM\n干Gm\n干ＧＭ\n干X\n干x娘\n干X娘\n干爆\n干逼\n干比\n干到\n干的你\n干的爽\n干干\n干机\n干机掰\n干鸡\n干啦\n干勒\n干拎娘\n干林\n干尼\n干你\n干你良\n干你妈\n干你娘\n干妳\n干妳妈\n干妳马\n干妳娘\n干娘\n干您\n干您娘\n干炮\n干七八\n干汝\n干入\n干骚女\n干嫂子\n干爽\n干死\n干死CS\n干死GM\n干死你\n干他\n干他妈\n干它\n干她\n干她妈\n干牠\n干我\n干穴\n干一干\n干一家\n幹\n幹bi\n幹GM\n幹ＧＭ\n幹x娘\n幹逼\n幹比\n幹的你\n幹的爽\n幹機掰\n幹拎娘\n幹你\n幹你良\n幹你媽\n幹你娘\n幹妳\n幹妳媽\n幹妳馬\n幹妳娘\n幹您娘\n幹炮\n幹砲\n幹七八\n幹全家\n幹死\n幹死CS\n幹死GM\n幹死你\n幹他\n幹她\n幹穴\n幹一家\n赣您娘\n贛您娘\n灨你娘\n冈峦\n刚比\n刚瘪三\n刚度\n肛\n肛jiao\n肛屄\n肛交\n肛门\n肛門\n岡巒\n钢管舞\n剛比\n剛癟三\n剛度\n鋼管舞\n港澳办\n高级逼\n高級逼\n高俊\n高丽棒\n高丽朴\n高麗棒\n高麗朴\n高麗樸\n高莺莺\n高治联\n高自联\n睪丸\n睾\n睾丸\n膏药旗\n膏藥旗\n搞B\n搞b\n搞比\n搞你\n搞他\n搞她\n戈万钧\n戈扬\n哥精\n歌华\n革命\n格老子\n个批\n個批\n给你爽\n給你爽\n工力\n工商局\n工自联\n工作員\n弓虽\n弓雖\n公安部\n公安局\n公务员\n公媳乱\n公媳亂\n公子冲\n公子开\n功法\n龚学平\n龚學平\n龔学平\n龔學平\n共*党\n共产\n共产党\n共铲党\n共產黨\n共鏟黨\n共黨\n共匪\n共狗\n共军\n共軍\n共荣圈\n共榮圈\n狗B\n狗b\n狗比\n狗操\n狗成\n狗诚\n狗城\n狗乘\n狗干\n狗幹\n狗卵\n狗卵子\n狗娘\n狗屁\n狗日\n狗日的\n狗日靠\n狗剩\n狗屎\n狗养\n狗養\n狗杂种\n狗雜種\n購金\n孤儿\n孤兒\n古柯\n古龙\n谷牧\n顾顺章\n瓜批\n瓜婆娘\n瓜娃子\n挂机\n掛機\n拐卖\n关卓中\n观世音\n觀世音\n管里\n管里员\n管理员\n管理員\n管理者\n管裏員\n光线\n光線\n广电\n广电局\n广闻\n龟儿子\n龟公\n龟毛\n龟奴\n龟孙子\n龟头\n龟投\n龟頭\n亀頭\n龜兒子\n龜公\n龜奴\n龜头\n龜投\n龜頭\n鬼村\n鬼公\n鬼轮奸\n鬼輪奸\n鬼輪姦\n滚滚球\n滚那吗\n滾那嗎\n郭 平\n郭伯雄\n郭罗基\n郭平\n郭岩华\n国 贼\n国安局\n国防部\n国管局\n国民党\n国务院\n国研\n国研室\n国贼\n国资委\n國 賊\n國產AV\n國軍\n國民黨\n國權\n國務院\n國研\n國賊\n哈宝\n哈批\n骸卒\n海盗王\n海盜王\n海乐神\n海洛因\n海明威\n海洋局\n駭客\n酣乐欣\n含屌\n含鳥\n韩光\n韩桂芝\n韩联潮\n韩启德\n韩天石\n韩正\n韩杼滨\n韓啟德\n韓正\n汉 奸\n汉奸\n汉娃娃\n漢 奸\n漢娃娃\n行房\n豪乳\n好嫩\n好朋友\n好色cc\n郝建秀\n何德普\n何候华\n何候華\n何加栋\n何勇\n河 殇\n河 殤\n河殇\n核潜艇\n核武器\n贺国强\n贺龙\n賀國強\n賀龍\n黑鲍娇\n黑鮑嬌\n黑屄\n黑逼\n黑鬼\n黑毛屄\n黑社会\n黑手党\n黑手黨\n黑天使\n黑五类\n红宝石\n红灯区\n红海湾\n红卫兵\n红月\n宏 法\n宏法\n宏象\n洪 传\n洪 傳\n洪 吟\n洪传\n洪兴\n洪興\n洪吟\n洪志\n紅燈區\n紅衛兵\n紅月\n后庭\n後庭\n胡XX\n胡嘉\n胡紧涛\n胡紧掏\n胡紧套\n胡锦涛\n胡锦滔\n胡锦濤\n胡锦淘\n胡緊掏\n胡緊濤\n胡緊套\n胡錦涛\n胡錦滔\n胡錦濤\n胡錦淘\n胡景涛\n胡景濤\n胡平\n胡启立\n胡乔木\n胡喬木\n胡耀邦\n胡耀帮\n胡志明\n胡主席\n葫芦案\n蝴蝶逼\n虎门\n虎门靠\n虎骑\n虎騎\n互淫\n护士穴\n花蕊\n华国锋\n华建敏\n华盛顿\n华义\n哗哗曼\n華國鋒\n華義\n嘩嘩曼\n欢城\n欢乐城\n歡樂城\n换妻\n換妻\n皇军\n皇軍\n黄　菊\n黄  菊\n黄大仙\n黄祸\n黄家骁\n黄菊\n黄克诚\n黄孟复\n黄片\n黄色\n黄色靠\n黄翔\n黄永胜\n黃　菊\n黃菊\n黃孟複\n黃片\n囘良玉\n回回\n回教\n回良玉\n回民\n回民暴\n悔过书\n汇款\n匯款\n昏药\n婚外恋\n婚外戀\n混沌\n混沌诀\n混沌靠\n火棒\n火麻\n獲救\n机八\n机巴\n机叭\n机吧\n机掰\n机战\n鸡8\n鸡八\n鸡巴\n鸡巴靠\n鸡叭\n鸡芭\n鸡吧\n鸡掰\n鸡店\n鸡鸡\n鸡奸\n鸡女\n鸡歪\n鸡院\n积克馆\n姬胜德\n基督\n基督教\n機八\n機巴\n機吧\n機戰\n激插\n激情MM\n激情色\n激情淫\n雞8\n雞八\n雞巴\n雞叭\n雞芭\n雞吧\n雞掰\n雞雞\n雞奸\n雞女\n雞歪\n雞院\n鷄巴\n极景\n極景\n集体淫\n集體淫\n几八\n几巴\n几叭\n几芭\n幾八\n幾巴\n幾叭\n幾芭\n擠母奶\n计生委\n纪登奎\n妓\n妓女\n妓女靠\n妓院\n家乐福\n嘉联\n贾庆林\n贾廷安\n贾育台\n假钞\n賈慶林\n奸\n奸暴\n奸你\n奸情\n奸染\n奸他\n奸她\n奸污\n奸一奸\n奸淫\n奸幼\n姦\n姦情\n姦染\n姦淫\n姦汙\n监察部\n监管\n监听器\n监听王\n检察院\n建国党\n贱B\n贱b\n贱bi\n贱逼\n贱比\n贱货\n贱人\n贱种\n剑网\n剑网2\n剑网3\n賤\n賤B\n賤bi\n賤逼\n賤比\n賤貨\n賤人\n賤種\n劍網\n江Core\n江core\n江ze民\n江八\n江八点\n江八条\n江八條\n江独裁\n江獨裁\n江核心\n江流氓\n江罗\n江绵恒\n江青\n江氏\n江戏子\n江则民\n江择民\n江泽慧\n江泽民\n江則民\n江擇民\n江澤民\n江贼\n江贼民\n江賊民\n江折民\n江猪\n江猪媳\n江豬\n江豬媳\n江主席\n将则民\n僵贼\n僵贼民\n僵賊民\n薑春雲\n疆独\n疆獨\n讲法\n蒋经国\n蒋树声\n蒋彦永\n蒋彦勇\n蒋震文\n蒋正华\n蒋中国\n蒋中正\n蔣經國\n蔣樹聲\n蔣彥永\n蔣彥勇\n蔣正華\n蔣中正\n降半旗\n酱猪媳\n醬豬媳\n交媾\n交通部\n姣西\n脚交\n腳交\n叫床\n叫春\n叫鸡\n叫雞\n叫小姐\n教派\n教徒\n教养院\n教育部\n揭批书\n劫机\n金币网\n金伯帆\n金酷\n金毛穴\n金瓶梅\n金日成\n金三角\n金新月\n金尧如\n金泽辰\n金正日\n津瑞\n紧穴\n锦涛\n锦天\n緊穴\n劲爆\n劲乐\n劲舞团\n劲樂\n勁暴\n勁爆\n勁乐\n勁樂\n禁书\n经血\n經血\n精蟲\n精水\n精童\n精液\n精液浴\n精子\n警奴\n靖国\n静坐\n纠察员\n鸠\n鸠屎\n糾察員\n九城\n九霾\n九评\n九評\n久遊\n久遊網\n久游\n久游网\n久之遊\n救災\n就去日\n菊花洞\n菊花蕾\n巨屌\n巨奶\n巨乳\n巨骚\n巨騷\n聚丰\n军妓\n军委\n军转\n軍妓\n卡弗蒂\n开苞\n开发\n开房\n开天\n開苞\n開天\n凯丰\n看牌器\n看棋器\n看中国\n康生\n抗日\n抗曰\n尻\n尻庇\n靠\n靠爸\n靠北\n靠背\n靠么\n靠母\n靠你妈\n靠你媽\n靠夭\n靠腰\n柯赐海\n柯賜海\n柯庆施\n柯慶施\n嗑药\n磕药\n磕藥\n可待因\n可卡叶\n可卡葉\n可卡因\n可可精\n克林顿\n克林頓\n客报\n客服\n掯\n孔雀王\n抠穴\n摳穴\n口爆\n口合\n口活\n口交\n口交靠\n口肯\n口射\n口淫\n寇晓伟\n哭么\n哭夭\n裤袜\n褲襪\n垮台\n垮臺\n快感\n快克\n快樂AV\n狂操\n狂插\n葵\n坤邁\n拉案);\n拉丹\n拉登\n拉凳\n拉客\n拉皮条\n拉皮條\n拉手冲\n喇嘛\n来插我\n来爽我\n赖昌星\n賴昌星\n瀨名\n拦截器\n览叫\n懒8\n懒八\n懒叫\n懒教\n懶8\n懶八\n懶叫\n懶教\n懶趴\n烂b\n烂B\n烂屄\n烂逼\n烂比\n烂屌\n烂货\n烂鸟\n烂人\n烂游戏\n滥B\n滥逼\n滥比\n滥货\n滥交\n濫B\n濫逼\n濫比\n濫貨\n濫交\n爛B\n爛逼\n爛比\n爛貨\n狼友\n浪妇\n浪婦\n浪叫\n浪女\n浪穴\n劳教\n老b\n老B\n老鸨\n老逼\n老比\n老瘪三\n老癟三\n老江\n老卵\n老毛\n老毛子\n老母\n老骚比\n老骚货\n老騷比\n老騷貨\n老味\n黎安友\n黎阳评\n礼品\n礼品机\n李 录\n李 禄\n李 祿\n李 錄\n李大师\n李大師\n李大钊\n李德生\n李登辉\n李登輝\n李登柱\n李贵鲜\n李弘旨\n李弘志\n李红志\n李红痔\n李红智\n李宏旨\n李宏志\n李洪\n李洪宽\n李洪志\n李洪智\n李紅志\n李紅智\n李继耐\n李金华\n李金華\n李克強\n李克强\n李兰菊\n李嵐清\n李老師\n李立三\n李录\n李禄\n李沛瑶\n李鹏\n李鹏*\n李鵬\n李瑞环\n李瑞環\n李山\n李少民\n李师父\n李师付\n李師父\n李師付\n李淑娴\n李铁映\n李鐵映\n李旺阳\n李维汉\n李先念\n李小朋\n李小鹏\n李雪峰\n李长春\n李兆焯\n李至伦\n李志绥\n李总理\n李总统\n李作鹏\n厉无畏\n例假\n厲無畏\n麗春苑\n连邦\n连胜德\n连线机\n连战\n莲花逼\n連戰\n联 总\n联大\n联合国\n联梦\n联易\n联众\n蓮花逼\n聯 總\n聯眾\n炼功\n梁保华\n梁光烈\n梁擎墩\n粮食局\n两国论\n兩國論\n亮屄\n亮穴\n廖承志\n廖晖\n廖暉\n廖锡龙\n列宁\n列寧\n林保华\n林彪\n林伯渠\n林果\n林肯\n林樵清\n林榮一\n林慎立\n林伟\n林文漪\n林长盛\n林祖涵\n淋病\n灵游记\n凌辱\n靈遊記\n领导\n刘宾深\n刘宾雁\n刘国凯\n刘华清\n刘杰\n刘军\n刘凯中\n刘澜涛\n刘丽英\n刘淇\n刘千石\n刘青\n刘全喜\n刘山青\n刘少奇\n刘士贤\n刘文胜\n刘晓竹\n刘延东\n刘云山\n流氓\n流蜜汁\n流淫\n流淫水\n劉傑\n劉淇\n劉少奇\n劉曉波\n劉曉竹\n柳下惠\n六.四\n六。四\n六?四\n六？四\n六合采\n六合彩\n六四\n六-四\n龙虎\n龙虎豹\n龙虎靠\n龙新民\n龍陽\n娄义\n婁義\n漏逼\n卢福坦\n卢跃刚\n陆定一\n陆肆\n陆委会\n陸肆\n路易\n露B\n露b\n露屄\n露逼\n露点\n露點\n露毛\n露乳\n露穴\n露阴照\n露陰照\n卵子\n乱交\n乱伦\n亂交\n亂倫\n抡功\n掄功\n仑功\n伦功\n沦功\n纶功\n轮暴\n轮操\n轮大\n轮干\n轮公\n轮功\n轮攻\n轮奸\n轮流干\n轮盘赌\n轮盘机\n轮子功\n侖功\n倫功\n淪\n淪功\n耣\n綸功\n輪暴\n輪公\n輪功\n輪攻\n輪奸\n輪姦\n輪子功\n罗　干\n罗  干\n罗富和\n罗干\n罗荣桓\n骡干\n羅幹\n騾幹\n裸聊\n裸陪\n躶\n洛奇\n吕京花\n吕秀莲\n呂秀蓮\n旅游局\n氯胺酮\n妈b\n妈B\n妈逼\n妈逼靠\n妈比\n妈的\n妈的b\n妈的B\n妈的靠\n妈个b\n妈个B\n妈个比\n妈妈的\n妈批\n妈祖\n媽B\n媽逼\n媽比\n媽的\n媽的B\n媽個B\n媽個比\n媽媽的\n媽祖\n麻痹\n麻黄素\n麻黃素\n麻醉枪\n麻醉药\n嗎b\n嗎逼\n嗎比\n嗎的\n嗎啡\n嗎個\n马大维\n马的\n马国瑞\n马加爵\n马凯\n马克思\n马良骏\n马巧珍\n马三家\n马时敏\n马万祺\n马英九\n玛雅网\n馬凱\n馬巧珍\n馬万祺\n馬萬祺\n馬英九\n瑪雅網\n鰢\n吗b\n吗逼\n吗比\n吗的\n吗的靠\n吗啡\n吗啡碱\n吗啡片\n吗个\n买财富\n买春\n买春堂\n買幣\n買財富\n買春\n買賣\n買月卡\n麦角酸\n麦叫酸\n売春婦\n卖.国\n卖。国\n卖b\n卖B\n卖ID\n卖QQ\n卖逼\n卖比\n卖财富\n卖国\n卖号\n卖号靠\n卖卡\n卖软件\n卖骚\n卖淫\n賣B\n賣ID\n賣逼\n賣比\n賣幣\n賣財富\n賣國\n賣號\n賣軟體\n賣騷\n賣淫\n賣月卡\n馒头屄\n瞒报\n满洲国\n滿洲國\n曼德拉\n蔓ぺ\n猫扑\n貓撲\n毛XX\n毛鲍\n毛鮑\n毛厕洞\n毛廁洞\n毛独立\n毛二B\n毛二屄\n毛二逼\n毛发抡\n毛发伦\n毛发轮\n毛发论\n毛发骚\n毛法功\n毛法愣\n毛法仑\n毛法轮\n毛反动\n毛反共\n毛反华\n毛反日\n毛佛教\n毛佛祖\n毛傅鹏\n毛干gm\n毛干gM\n毛干GM\n毛干Gm\n毛干你\n毛干妳\n毛肛\n毛肛交\n毛肛门\n毛高俊\n毛睾\n毛睾丸\n毛工力\n毛公安\n毛共匪\n毛共狗\n毛狗b\n毛狗操\n毛狗卵\n毛狗娘\n毛狗屁\n毛狗日\n毛狗屎\n毛狗养\n毛龟公\n毛龟头\n毛鬼村\n毛滚\n毛哈批\n毛贺龙\n毛洪兴\n毛洪志\n毛后庭\n毛胡XX\n毛花柳\n毛欢城\n毛换妻\n毛黄菊\n毛回回\n毛回教\n毛昏药\n毛火棒\n毛机八\n毛机巴\n毛鸡\n毛鸡八\n毛鸡巴\n毛鸡叭\n毛鸡芭\n毛鸡掰\n毛鸡鸡\n毛鸡奸\n毛基督\n毛妓\n毛妓女\n毛妓院\n毛奸\n毛奸你\n毛奸淫\n毛贱\n毛贱逼\n毛贱货\n毛贱人\n毛江八\n毛江青\n毛江猪\n毛疆独\n毛姣西\n毛叫床\n毛叫鸡\n毛禁书\n毛精液\n毛精子\n毛静坐\n毛鸠\n毛鸠屎\n毛军妓\n毛军委\n毛抗日\n毛尻\n毛靠\n毛靠腰\n毛客服\n毛口交\n毛狂操\n毛拉登\n毛懒教\n毛烂B\n毛烂屄\n毛烂逼\n毛烂比\n毛烂屌\n毛烂货\n毛老逼\n毛老母\n毛李鹏\n毛李山\n毛连战\n毛联大\n毛联易\n毛列宁\n毛林彪\n毛刘军\n毛刘淇\n毛流氓\n毛六四\n毛卵\n毛轮功\n毛轮奸\n毛罗干\n毛骡干\n毛妈B\n毛妈逼\n毛妈比\n毛妈的\n毛妈批\n毛妈祖\n毛吗啡\n毛卖B\n毛卖ID\n毛卖QQ\n毛卖逼\n毛卖比\n毛卖国\n毛卖号\n毛卖卡\n毛卖淫\n毛毛XX\n毛美国\n毛蒙独\n毛迷药\n毛密洞\n毛密宗\n毛民运\n毛奶子\n毛嫩b\n毛嫩B\n毛伱妈\n毛你爸\n毛你姥\n毛你妈\n毛你娘\n毛鸟gM\n毛鸟gm\n毛鸟GM\n毛鸟Gm\n毛鸟你\n毛牛逼\n毛牛比\n毛虐待\n毛喷你\n毛彭真\n毛皮条\n毛屁眼\n毛片\n毛嫖客\n毛破坏\n毛破鞋\n毛仆街\n毛普京\n毛强奸\n毛强卫\n毛抢劫\n毛乔石\n毛侨办\n毛切七\n毛情色\n毛去死\n毛人大\n毛人弹\n毛人民\n毛日Gm\n毛日GM\n毛日gm\n毛日gM\n毛日你\n毛肉棒\n毛肉壁\n毛肉洞\n毛肉缝\n毛肉棍\n毛肉穴\n毛乳\n毛乳房\n毛乳交\n毛乳头\n毛撒尿\n毛塞白\n毛三陪\n毛氵去\n毛骚\n毛骚B\n毛骚逼\n毛骚货\n毛色情\n毛傻B\n毛傻屄\n毛傻逼\n毛傻比\n毛傻吊\n毛傻卵\n毛傻子\n毛煞逼\n毛上你\n毛上妳\n毛射精\n毛石进\n毛食屎\n毛是鸡\n毛手淫\n毛售ID\n毛售号\n毛私服\n毛死gd\n毛死GD\n毛死gm\n毛死GM\n毛孙文\n毛他爹\n毛他妈\n毛台办\n毛台独\n毛台联\n毛太监\n毛特码\n毛天皇\n毛舔西\n毛推油\n毛退党\n毛外挂\n毛万税\n毛王刚\n毛王昊\n毛王震\n毛网管\n毛温馨\n毛倭国\n毛倭寇\n毛我操\n毛我干\n毛我日\n毛吴仪\n毛系统\n毛鲜族\n毛小泉\n毛小穴\n毛邪教\n毛新党\n毛性爱\n毛性交\n毛性欲\n毛学潮\n毛血逼\n毛丫的\n毛鸦片\n毛阳精\n毛阳具\n毛耶苏\n毛耶稣\n毛夜情\n毛一鲜\n毛伊朗\n毛阴部\n毛阴唇\n毛阴道\n毛阴蒂\n毛阴核\n毛阴户\n毛阴茎\n毛阴毛\n毛阴水\n毛淫\n毛淫荡\n毛淫秽\n毛淫货\n毛淫贱\n毛淫叫\n毛淫毛\n毛淫靡\n毛淫水\n毛淫娃\n毛淫欲\n毛硬挺\n毛游行\n毛玉杵\n毛泽东\n毛泽東\n毛澤东\n毛澤東\n毛贼东\n毛賊東\n毛主席\n毛阝月\n玫瑰园\n梅花屄\n梅花网\n梅花網\n美国佬\n美國佬\n美利坚\n美女穴\n美乳\n美骚妇\n美騷婦\n美沙酮\n美少妇\n美少婦\n美穴\n美幼\n妹的穴\n妹妹穴\n蒙巴顿\n蒙尘药\n蒙独\n蒙獨\n蒙古独\n猛插\n猛干\n梦遗\n迷幻药\n迷幻藥\n迷魂药\n迷奸\n迷奸药\n迷歼药\n迷药\n迷藥\n米青\n米田共\n秘唇\n秘裂\n秘书长\n密传\n密洞\n密碼\n密穴\n密宗\n蜜洞\n蜜穴\n绵恒\n喵的\n灭绝罪\n民國\n民航局\n民进党\n民進黨\n民运\n民運\n民阵\n民政部\n民政局\n民猪\n民主\n民主潮\n民主墙\n民族\n明慧\n明慧網\n摸咪咪\n摸你\n摸阴蒂\n摸陰蒂\n模拟机\n摩力遊\n摩力游\n摩洛客\n摩门教\n摩門教\n魔獸幣\n莫伟强\n墨香\n默哀\n谋杀\n母奸\n母親\n穆斯林\n那可汀\n那妈\n那媽\n那嗎B\n那嗎逼\n那吗B\n那吗逼\n纳粹\n納粹\n奶娘\n奶头\n奶頭\n奶罩\n奶子\n南联盟\n南蛮子\n南蠻子\n脑残\n嫐屄\n闹事\n內射\n內衣\n内测\n内挂\n内射\n嫩b\n嫩B\n嫩BB\n嫩bb\n嫩鲍\n嫩鲍鱼\n嫩鮑\n嫩鮑魚\n嫩屄\n嫩逼\n嫩缝\n嫩縫\n嫩奶\n嫩女\n嫩穴\n尼克松\n倪志福\n伱妈\n你爸\n你大爷\n你大爺\n你老妹\n你老母\n你老味\n你姥\n你姥姥\n你妈\n你妈逼\n你妈比\n你妈的\n你妈靠\n你媽\n你媽逼\n你媽比\n你媽的\n你马的\n你馬的\n你奶\n你奶奶\n你娘\n你娘的\n你娘咧\n你全家\n你色嗎\n你是鸡\n你是雞\n你是鸭\n你是鴨\n你爷\n你爺\n你祖宗\n妳妈的\n妳媽的\n妳马的\n妳馬的\n妳娘\n妳娘的\n捻\n娘b\n娘B\n娘比\n娘的\n娘饿比\n娘餓比\n娘个比\n娘個比\n鸟g M\n鸟Gm\n鸟GM\n鸟gM\n鸟gm\n鸟gm \n鸟你\n鳥g M\n鳥GM\n捏弄\n聶榮臻\n宁王府\n牛B\n牛B靠\n牛逼\n牛逼靠\n牛比\n牛比靠\n农业部\n奴畜抄\n奴事件\n虐待\n虐奴\n诺亚\n女屄\n女尔\n女干\n女幹\n女尻\n女良\n女马\n女馬\n女乃\n女死囚\n女也\n女優\n女友坊\n拍卖官\n潘国平\n叛党\n叛黨\n叛国\n叛國\n膀胱\n泡泡岛\n炮友\n喷　射\n喷精\n喷精 3p\n喷你\n喷尿\n噴精\n嘭嘭帮\n嘭嘭幫\n彭冲\n彭德怀\n彭德懷\n彭佩云\n彭珮云\n彭珮雲\n彭真\n蓬浪\n皮條\n皮條客\n屁蛋\n屁股\n屁精\n屁眼\n嫖\n嫖娼\n嫖客\n姘\n姘头\n姘頭\n品色堂\n品香堂\n品穴\n平可夫\n迫害\n迫奸\n破处\n破處\n破坏\n破鞋\n仆街\n僕街\n普京\n普贤\n萋\n齐墨\n祁建\n祁培文\n骑你\n骑他\n骑她\n起义\n气象局\n千年\n前网\n钱 达\n钱达\n钱国梁\n钱其琛\n钱运录\n錢 達\n錢其琛\n錢運錄\n欠操\n欠干\n欠幹\n欠骑\n欠人骑\n欠日\n強暴\n強姦\n強姦犯\n強姦你\n強衛\n强　奸\n强暴\n强奸\n强奸犯\n强奸你\n强卫\n抢火炬\n抢劫\n抢劫犯\n抢粮记\n抢尸\n搶劫犯\n乔石\n侨办\n喬石\n窃听器\n钦本立\n亲 美\n亲 日\n亲美\n亲民党\n亲日\n秦 晋\n秦 晉\n秦晋\n秦真\n禽兽\n禽獸\n青楼\n青樓\n氢弹\n情报\n情報\n情妇\n情色\n情色谷\n情兽\n情獸\n庆 红\n庆红\n慶 紅\n親 美\n親 日\n親民黨\n親日\n穷b\n穷逼\n邱会作\n区委\n去你的\n去你妈\n去妳的\n去妳妈\n去死\n去他妈\n去她妈\n全裸\n拳交\n瘸腿帮\n瘸腿幫\n群p\n群P\n群奸\n群交\n群阴会\n群陰會\n然后\n冉英\n让你操\n讓你操\n热比娅\n人大\n人代\n人代会\n人弹\n人民\n人民報\n人民币\n人民幣\n人妻\n任弼时\n任建新\n任你淫\n日b\n日B\n日Gm\n日GM\n日gM\n日gm\n日X 妈\n日X 媽\n日X妈\n日啊\n日本人\n日屄\n日逼\n日比\n日穿\n日蛋\n日翻\n日九城\n日军\n日軍\n日领馆\n日你\n日你爸\n日你妈\n日你媽\n日你娘\n日批\n日爽\n日死\n日死你\n日他\n日他娘\n日她\n日王\n鈤\n荣毅仁\n榮毅仁\n柔阴术\n肉棒\n肉逼\n肉壁\n肉便器\n肉唇\n肉洞\n肉缝\n肉縫\n肉沟\n肉棍\n肉棍子\n肉壶\n肉茎\n肉莖\n肉具\n肉蒲团\n肉蒲團\n肉箫\n肉簫\n肉穴\n肉欲\n肉慾\n乳霸\n乳爆\n乳房\n乳峰\n乳沟\n乳溝\n乳尖\n乳交\n乳尻\n乳射\n乳头\n乳頭\n乳腺\n乳晕\n乳暈\n乳罩\n润星\n潤星\n撒尿\n撒泡尿\n撒切尔\n萨达姆\n萨斯\n塞白\n塞你爸\n塞你公\n塞你母\n塞你娘\n赛你娘\n赛妳娘\n赛他娘\n赛她娘\n三K党\n三K黨\n三P\n三p\n三八淫\n三挫仑\n三国策\n三级片\n三級片\n三角裤\n三陪\n三陪女\n三唑仑\n氵去\n桑国卫\n桑國衛\n骚\n骚b\n骚B\n骚B贱\n骚棒\n骚包\n骚屄\n骚屄儿\n骚逼\n骚比\n骚洞\n骚棍\n骚货\n骚鸡\n骚姐姐\n骚浪\n骚卵\n骚妈\n骚妹\n骚妹妹\n骚母\n骚女\n骚批\n骚妻\n骚乳\n骚水\n骚穴\n骚姨妈\n懆您妈\n懆您娘\n騒\n騷\n騷B\n騷B賤\n騷棒\n騷包\n騷屄\n騷逼\n騷比\n騷洞\n騷棍\n騷貨\n騷雞\n騷姐姐\n騷浪\n騷卵\n騷媽\n騷妹\n騷妹妹\n騷母\n騷女\n騷批\n騷妻\n騷乳\n騷水\n騷穴\n騷姨媽\n色97爱\n色97愛\n色成人\n色弟弟\n色电影\n色鬼\n色界\n色空寺\n色链\n色猫\n色貓\n色咪咪\n色迷城\n色魔\n色情\n色情靠\n色区\n色區\n色色\n色色连\n色书库\n色图乡\n色窝窝\n色窩窩\n色影院\n色诱\n色欲\n色慾\n杀人\n杀人犯\n傻×\n傻b\n傻B\n傻B \n傻B靠\n傻屄\n傻逼\n傻逼靠\n傻比\n傻吊\n傻瓜\n傻卵\n傻鸟\n傻鳥\n傻批\n傻子\n煞逼\n煞笔\n煞笔靠\n煞筆\n山口組\n删 号\n删号\n删号靠\n伤亡\n商务部\n上访\n上海帮\n上你\n上妳\n少妇\n少妇穴\n少修正\n邵家健\n舌头穴\n舌頭穴\n社会院\n社科院\n射　精\n射精\n射了\n射奶\n射你\n射屏\n射爽\n射颜\n射顏\n身寸\n身障\n神汉\n神经病\n神泪\n神淚\n神泣\n神曲\n沈彤\n审计署\n升达\n升天\n生春袋\n生鸦片\n生殖器\n圣火\n圣母\n圣女峰\n圣战\n盛华仁\n盛宣鸣\n盛宣鳴\n聖火\n聖母\n聖女峰\n聖戰\n尸虫\n尸体\n尸體\n师春生\n屍\n屍体\n屍體\n師春生\n湿了\n湿穴\n濕穴\n十八代\n十八摸\n十景缎\n十七大\n十三点\n十三點\n石戈\n石进\n石首\n食精\n食捻屎\n食屎\n史迪威\n史玉柱\n驶你爸\n驶你公\n驶你母\n驶你娘\n屎你娘\n屎妳娘\n駛你爸\n駛你公\n駛你母\n駛你娘\n示威\n世界都\n世模\n世维会\n事屎\n试看片\n是鸡\n是雞\n释欲\n釋欲\n手淫\n受精\n受虐狂\n受伤\n受傷\n受灾\n受災\n售ID\n售号\n售號\n售软件\n售軟體\n兽奸\n兽交\n兽欲\n獸奸\n獸交\n獸欲\n熟妇\n熟婦\n熟母\n熟女\n数通\n刷钱\n双十节\n爽你\n爽图网\n爽穴\n水扁\n水利部\n税力\n司法部\n司马晋\n司马璐\n司徒华\n丝袜\n丝诱\n私！服\n私#服\n私%服\n私**服\n私*服\n私/服\n私？服\n私\\\\服\n私\\服\n私￥服\n私处\n私處\n私服\n私-服\n私—服\n斯大林\n絲襪\n死gd\n死GD\n死GM\n死gm\n死全家\n四川独\n四清\n四人帮\n四人幫\n四我周\n宋xx\n宋楚瑜\n宋美龄\n宋美齡\n宋庆龄\n宋任穷\n宋祖英\n搜狐\n搜神记\n苏家屯\n苏晓康\n苏振华\n酥穴\n酥痒\n酥癢\n蘇秀文\n孙大午\n孙文\n孙逸仙\n孙中山\n孫逸仙\n孫中山\n他NND\n他ㄇㄉ\n他ㄇ的\n他爸爸\n他爹\n他干\n他妈\n他妈的\n他妈地\n他妈靠\n他媽\n他媽的\n他媽地\n他嗎的\n他马的\n他馬的\n他吗的\n他母亲\n他奶奶\n他娘\n他娘的\n他祖宗\n它NND\n它爸爸\n它妈\n它妈的\n它妈地\n它媽的\n它媽地\n她NND\n她爸爸\n她妈\n她妈的\n她妈地\n她妈靠\n她媽的\n她媽地\n她马的\n她馬的\n她娘\n塔利班\n台办\n台幣\n台独\n台独靠\n台獨\n台联\n台聯\n台聯黨\n台盟\n台湾\n台湾党\n台湾独\n台湾狗\n台湾国\n台灣狗\n台灣国\n台灣國\n台灣豬\n臺\n臺幣\n臺獨\n臺湾國\n臺灣\n臺灣黨\n臺灣國\n太监\n太監\n太子党\n谭震林\n谭政\n汤光中\n唐家璇\n唐捷\n桃色\n淘宝\n淘宝靠\n套牌\n套子\n特码\n特派员\n腾人\n腾武\n滕人\n滕仁\n滕任\n滕文生\n滕武\n藤人\n藤仁\n藤任\n騰仁\n騰武\n騰訊\n踢踢球\n体奸\n體奸\n剃毛\n天安门\n天安門\n天畅\n天皇\n天骄\n天怒\n天上碑\n天堂2\n天下贰\n天下貳\n天阉\n天閹\n天遊\n天浴\n天葬\n天主教\n天纵\n天縱\n田纪云\n田纪雲\n田紀雲\n甜嫩穴\n舔 b\n舔b\n舔B\n舔屄\n舔逼\n舔鸡巴\n舔雞巴\n舔脚\n舔腳\n舔奶\n舔屁眼\n舔西\n调教\n跳大神\n铁道部\n同床\n同性恋\n同性戀\n童屹\n统独\n统战\n统治\n捅B\n捅逼\n捅比\n捅你\n捅死你\n捅他\n捅她\n捅我\n統治\n痛经\n偷欢\n偷歡\n偷拍\n偷情\n偷情網\n凸点装\n凸肉优\n凸肉優\n屠城\n屠杀\n推翻\n推推侠\n推推俠\n推油\n退党\n退黨\n退役\n吞精\n臀部\n脫內褲\n脫衣舞\n脱内裤\n脱衣舞\n挖挂\n瓦良格\n歪逼\n外　挂\n外  挂\n外$$挂\n外$$掛\n外$挂\n外$掛\n外**挂\n外**掛\n外*挂\n外*掛\n外/挂\n外/掛\n外?挂\n外？挂\n外？掛\n外@@挂\n外@@掛\n外@挂\n外@掛\n外\\\\挂\n外\\挂\n外\\掛\n外_挂\n外_掛\n外~~挂\n外~~掛\n外~挂\n外~掛\n外卦\n外挂\n外-挂\n外—挂\n外掛\n外-掛\n外—掛\n外汇局\n外交部\n外阴\n外陰\n完蛋操\n玩逼\n玩穴\n万钢\n万润南\n万税\n万晓东\n万淫堂\n卐\n萬鋼\n萬人暴\n萬稅\n萬淫堂\n汪东兴\n汪岷\n王八\n王八蛋\n王宝森\n王寶森\n王炳章\n王从吾\n王丹\n王辅臣\n王刚\n王剛\n王涵万\n王昊\n王洪文\n王沪宁\n王稼祥\n王军涛\n王俊博\n王克\n王乐泉\n王力雄\n王岐山\n王润生\n王若望\n王胜俊\n王太华\n王文京\n王希哲\n王秀丽\n王冶坪\n王樂泉\n王兆囯\n王兆国\n王兆國\n王震\n网 特\n网爱\n网捷信\n网龙\n網 特\n網愛\n網捷信\n網龍\n網星\n網易\n網域\n威而钢\n威而柔\n韦国清\n维权\n伟哥\n尾行\n猥亵\n猥褻\n卫生部\n卫生巾\n尉健行\n慰安妇\n慰安婦\n慰春情\n魏京生\n魏新生\n温B\n温b\n温逼\n温比\n温加宝\n温家宝\n温家保\n温家堡\n温家寶\n温元凯\n温总理\n溫B\n溫逼\n溫比\n溫加寶\n溫家宝\n溫家寶\n溫傢寳\n溫總理\n瘟B\n瘟b\n瘟比\n文 革\n文革\n文九天\n文物局\n文胸\n問道\n倭国\n倭寇\n窝窝客\n窩窩客\n我操\n我操靠\n我操你\n我草\n我干\n我幹\n我和她\n我奸\n我就色\n我考\n我靠\n我咧干\n我日\n我日靠\n我日你\n我有网\n我周容\n龌龊\n齷齪\n乌兰夫\n无疆界\n无界\n无码\n无码片\n无毛穴\n无网界\n无修正\n吴　仪\n吴邦国\n吴德\n吴方城\n吴官正\n吴桂贤\n吴仁华\n吴学璨\n吴仪\n吳邦囯\n吳邦國\n吳德\n吳官正\n吳桂賢\n吳儀\n無界\n無碼\n無毛穴\n無網界\n無修正\n五不\n午夜场\n伍凡\n武雷\n西藏\n西藏独\n西藏国\n西藏國\n吸毒\n吸毒犯\n吸精\n希拉克\n希特勒\n习近平\n习仲勋\n習近平\n洗脑\n洗脑班\n洗腦班\n洗钱\n系統\n狭义道\n狹義道\n下贱\n下賤\n下流\n下三烂\n下三滥\n下三濫\n下三爛\n下身\n下体\n下體\n下阴\n下陰\n下注\n夏树唯\n仙剑\n仙劍\n鲜族\n鮮族\n藓鲍\n蘚鮑\n现金\n現貨\n乡巴佬\n相奸\n想上你\n向忠发\n项怀诚\n项小吉\n嚮導\n肖强\n销魂洞\n销售\n銷魂洞\n小B\n小b\n小B样\n小B樣\n小逼\n小比样\n小比樣\n小瘪三\n小癟三\n小电影\n小電影\n小鸡巴\n小鸡鸡\n小雞巴\n小雞雞\n小灵通\n小卵泡\n小卵子\n小嫩逼\n小嫩鸡\n小嫩雞\n小平\n小泉\n小日本\n小肉粒\n小乳头\n小乳頭\n小骚逼\n小骚比\n小骚货\n小騷比\n小騷貨\n小穴\n小淫女\n小淫穴\n歇b\n歇逼\n邪教\n写真\n泄密\n谢非\n谢长廷\n谢中之\n謝長廷\n褻\n辛灏年\n新党\n新黨\n新斷線\n新幹線\n新光明\n新华社\n新江湖\n新疆独\n新疆国\n新疆國\n新浪\n新魔界\n新生网\n新生網\n新石器\n新四\n新唐人\n新闻办\n新义安\n新義安\n新语丝\n新郑和\n新鄭和\n鑫諾\n信访局\n信仰\n邢铮\n兴奋剂\n幸存\n性爱\n性愛\n性病\n性高潮\n性虎\n性饥渴\n性飢渴\n性交\n性交靠\n性交课\n性交課\n性交图\n性交圖\n性奴\n性奴会\n性奴會\n性虐\n性虐待\n性器\n性情\n性趣\n性骚扰\n性生活\n性无能\n性無能\n性息\n性佣\n性傭\n性欲\n性之站\n倖存\n胸罩\n熊炎\n熊焱\n羞耻母\n羞恥母\n徐邦秦\n徐炳慧\n徐才厚\n徐光春\n徐匡迪\n徐水良\n徐向前\n许家屯\n轩辕二\n轩辕剑\n薛伟\n穴海\n穴爽\n穴图\n穴圖\n穴淫\n学潮\n学联\n学生妹\n学习班\n学运\n学自联\n學潮\n學生妹\n血逼\n血比\n血洒\n血灑\n血洗\n寻仙\n巡查员\n巡查員\n巡府媳\n讯息\n丫的\n压制\n押大\n押小\n鸦片\n鸦片液\n鸦片渣\n鸭店\n鴉片\n壓迫\n壓制\n雅典娜\n亚洲\n亚洲色\n亞情\n亞無碼\n亞洲色\n烟草局\n烟膏\n煙膏\n严方军\n严家其\n严家祺\n严隽琪\n阎明复\n阎王\n颜射\n顏騎\n顏射\n嚴雋琪\n艳乳\n艳照\n艳照门\n艳照門\n艷照门\n艷照門\n豔乳\n豔照\n豔照門\n扬振\n扬震\n阳精\n阳具\n阳萎\n阳痿\n阳物\n杨白冰\n杨得志\n杨帆\n杨怀安\n杨建利\n杨林\n杨尚昆\n杨思敏\n杨巍\n杨勇\n杨元元\n杨振\n杨震\n杨周\n陽精\n陽具\n陽萎\n陽痿\n陽物\n揚振\n揚震\n楊帆\n楊思敏\n楊振\n姚月谦\n摇头丸\n摇头玩\n摇頭丸\n搖头丸\n搖頭丸\n要色色\n要射了\n耶和华\n耶苏\n耶稣\n耶蘇\n野合\n野鸡\n野雞\n叶剑英\n叶群\n夜情\n夜色城\n夜总会\n夜總會\n一本道\n一党\n一黨\n一贯道\n一貫道\n一起玩\n一四我\n一夜欢\n一夜歡\n一夜情\n伊拉克\n伊朗\n伊斯兰\n依星\n遗精\n遗嘱\n遗囑\n遺囑\n倚天二\n义解\n义母\n亦凡\n抑制剂\n易当\n易當\n阴屄\n阴部\n阴唇\n阴道\n阴蒂\n阴缔\n阴阜\n阴核\n阴户\n阴茎\n阴莖\n阴精\n阴毛\n阴门\n阴囊\n阴水\n阴穴\n陰屄\n陰部\n陰唇\n陰道\n陰蒂\n陰締\n陰阜\n陰核\n陰戶\n陰茎\n陰莖\n陰精\n陰毛\n陰門\n陰囊\n陰水\n隂\n银民吧\n淫\n淫B\n淫b\n淫meimei\n淫屄\n淫屄儿\n淫逼\n淫痴\n淫癡\n淫虫\n淫蟲\n淫荡\n淫蕩\n淫电影\n淫店\n淫东方\n淫洞\n淫妇\n淫婦\n淫告白\n淫棍\n淫河\n淫护士\n淫秽\n淫穢\n淫货\n淫貨\n淫奸\n淫间道\n淫贱\n淫賤\n淫浆\n淫漿\n淫叫\n淫浪\n淫流\n淫乱\n淫亂\n淫驴屯\n淫驢屯\n淫毛\n淫妹\n淫妹妹\n淫糜\n淫靡\n淫蜜\n淫民堂\n淫魔\n淫母\n淫妞\n淫奴\n淫虐\n淫女\n淫女穴\n淫妻\n淫腔\n淫情\n淫色\n淫少妇\n淫湿\n淫书\n淫書\n淫水\n淫图\n淫圖\n淫娃\n淫网\n淫窝窝\n淫西\n淫穴\n淫样\n淫樣\n淫液\n淫欲\n淫贼\n淫汁\n婬\n滛\n銀民吧\n尹庆民\n隐窝窝\n隱窩窩\n罂粟\n罌粟\n应招\n应召\n硬挺\n應招\n應召\n悠遊網\n悠游网\n由喜贵\n邮政局\n铀\n猶太豬\n遊戲幣\n游行\n游衍\n幼逼\n幼齿\n幼妓\n幼交\n幼男\n幼女\n幼图\n幼圖\n幼香阁\n幼香閣\n诱奸\n诱色uu\n誘姦\n誘色uu\n於天瑞\n於永波\n於幼軍\n于大海\n于浩成\n于天瑞\n于永波\n于幼军\n余秋里\n余英时\n俞正声\n俞正聲\n舆论\n餘震\n宇明网\n雨星网\n语句\n玉杵\n玉蒲团\n玉蒲團\n玉乳\n玉穴\n郁慕明\n育碧\n浴尿\n预审查\n欲火\n欲女\n慾\n慾火\n袁纯清\n援交\n援交妹\n圓满\n圓滿\n远志明\n曰GM\n曰Gm\n曰gM\n曰gm\n曰gＭ\n曰本\n曰你\n月经\n月經\n运营人\n运营长\n运营者\n运营组\n运營者\n运營组\n运營組\n運营者\n運营組\n運營者\n運營组\n運營組\n杂种\n雜種\n再奸\n昝爱宗\n昝愛宗\n早泄\n早洩\n造爱\n造愛\n造反\n则民\n择民\n泽民\n贼民\n扎卡维\n渣波波\n战牌\n战牌靠\n张 钢\n张朝阳\n张潮阳\n张春桥\n张大权\n张德江\n张钢\n张健\n张立昌\n张林\n张榕明\n张廷发\n张万年\n张萬年\n张伟国\n张伟华\n张闻天\n张小平\n张晓丞\n张筱雨\n张昭富\n张震\n张志清\n張 鋼\n張 林\n張朝陽\n張宏堡\n張立昌\n張梅穎\n張榕明\n張万年\n張萬年\n張筱雨\n張震\n招鸡\n招雞\n招妓\n兆鸿\n兆鴻\n赵海青\n赵洪祝\n赵品潞\n赵小明\n赵晓微\n赵紫阳\n哲 民\n哲民\n贞操\n针扎\n貞操\n真封神\n真理教\n真善忍\n真主\n姫辱\n姫野爱\n震级\n震級\n镇压\n鎮壓\n征途\n正见网\n正見網\n正清网\n正清網\n正悟网\n正悟網\n证监会\n郑源\n政变\n政變\n政府\n政权\n政协\n政協\n政治\n政治犯\n政治局\n鄭萬通\n支那\n知障\n指导员\n制服狩\n质检局\n致幻剂\n致幻劑\n智傲\n智凡迪\n智能H3\n智障\n中公网\n中公網\n中功\n中共\n中廣網\n中国\n中国猪\n中國\n中國狗\n中國豬\n中蕐\n中华\n中机电\n中機電\n中奖\n中科院\n中南海\n中宣部\n中央\n重题工\n周百刚\n周恩来\n周恩來\n周贺\n周容重\n周天法\n周铁农\n周鉄農\n周鐵農\n周旋\n周永康\n周子玉\n周总理\n周總理\n朱德\n朱嘉明\n朱骏\n朱駿\n朱林\n朱琳\n朱毛\n朱蒙\n朱狨基\n朱容鸡\n朱容基\n朱容雞\n朱溶剂\n朱熔基\n朱镕基\n朱鎔基\n朱胜文\n朱勝文\n朱总理\n诛仙\n猪操\n猪聋畸\n猪猡\n猪毛\n猪毛1\n猪容基\n猪头\n誅仙\n豬操\n豬容基\n豬頭\n主席\n专政\n专制\n專政\n專制\n转法轮\n轉法輪\n装B\n装B靠\n装屄\n装屄呢\n装逼\n装逼靠\n装逼呢\n卓奥\n卓奧\n子宫\n子宮\n梓霖\n紫黛\n自焚\n自民党\n自慰\n自由门\n宗教\n总裁\n总局\n总理\n总统号\n總理\n總書記\n邹德威\n邹家华\n鄒德威\n鄒家華\n走光\n走私\n走资派\n足脚交\n足腳交\n钻插\n鑽插\n阝月\n作ai\n作爱\n作愛\n作弊器\n坐脸\n坐台\n坐台的\n坐庄\n做ai\n做爱\n做爱图\n做愛\n做雞\n做鸭\n做鴨"
  },
  {
    "path": "src/test/resources/Talk.txt",
    "content": "﻿利反对派称包围卡扎菲和其儿子藏身建筑群\n[利反对派称今日将结束战斗][利反对派称8个月内举行大选][最新]\n云南海南3省区党委主要负责人职务调整\n[陈全国任西藏党委书记][秦光荣任云南书记][罗保铭任海南书记]\n苹果CEO乔布斯辞职 COO接任 乔布斯语录\n[乔布斯仍为董事会主席 辞职信 苹果股价盘后大跌5.39% 热议]\n首届政务微博论坛在浙江举行 \n赵洪祝致贺信[图文直播][视频直播][中国高官开微博成趋势][热议]\n铁道部要求年底前无事故 \n党员须做公开承诺 胡锦涛会见萨科齐 希望欧方确保中方对欧投资安全 \n杭州为接住坠楼小孩“最美妈妈”\n建城市雕塑(图) 神八将搭载“平民梦想”进太空 故宫诗词大赛 \n卡扎菲倒台了值班编辑：沈建军 \n乔布斯辞职了值班编辑：杨达 今日话题百人械斗 医院怎成黑社会  "
  }
]