Repository: hooj0/sensitive-words-filter
Branch: master
Commit: 2f6d1ac525d3
Files: 60
Total size: 168.8 KB

Directory structure:
gitextract_gmvsercm/

├── .classpath
├── .gitignore
├── .project
├── .settings/
│   ├── org.eclipse.core.resources.prefs
│   ├── org.eclipse.jdt.core.prefs
│   └── org.eclipse.m2e.core.prefs
├── _config.yml
├── pom.xml
├── readme.md
└── src/
    ├── main/
    │   ├── java/
    │   │   └── com/
    │   │       └── cnblogs/
    │   │           └── hoojo/
    │   │               └── sensitivewords/
    │   │                   ├── SensitiveWordsFilterUtils.java
    │   │                   ├── SensitiveWordsService.java
    │   │                   ├── business/
    │   │                   │   ├── dao/
    │   │                   │   │   └── SensitiveWordsDao.java
    │   │                   │   ├── enums/
    │   │                   │   │   └── SensitiveWordsType.java
    │   │                   │   └── model/
    │   │                   │       └── SensitiveWords.java
    │   │                   ├── cache/
    │   │                   │   ├── AbstractWordCache.java
    │   │                   │   ├── JvmWordsCache.java
    │   │                   │   ├── RedisWordsCache.java
    │   │                   │   └── WordsCache.java
    │   │                   ├── event/
    │   │                   │   ├── CacheChangedEvent.java
    │   │                   │   └── WordsCacheContext.java
    │   │                   ├── factory/
    │   │                   │   ├── FilterType.java
    │   │                   │   └── SensitiveWordsFactory.java
    │   │                   ├── filter/
    │   │                   │   ├── AbstractFilter.java
    │   │                   │   ├── AbstractFilterExecutor.java
    │   │                   │   ├── AbstractSensitiveWordsFilter.java
    │   │                   │   ├── AbstractSensitiveWordsFilterSupport.java
    │   │                   │   ├── SensitiveWordsFilter.java
    │   │                   │   ├── bucket/
    │   │                   │   │   ├── HashBucketFilter.java
    │   │                   │   │   └── executor/
    │   │                   │   │       └── HashBucketFilterExecutor.java
    │   │                   │   ├── dat/
    │   │                   │   │   ├── DatFilter.java
    │   │                   │   │   └── exectuor/
    │   │                   │   │       ├── DatCacheNode.java
    │   │                   │   │       ├── DatFilterExecutor.java
    │   │                   │   │       └── DoubleArrayTrie2.java
    │   │                   │   ├── dfa/
    │   │                   │   │   ├── DfaFilter.java
    │   │                   │   │   └── executor/
    │   │                   │   │       ├── DfaFilterExecutor.java
    │   │                   │   │       └── DfaNode.java
    │   │                   │   ├── simhash/
    │   │                   │   │   ├── SimHashFilter.java
    │   │                   │   │   └── executor/
    │   │                   │   │       ├── FNVHashUtils.java
    │   │                   │   │       ├── Murmur3.java
    │   │                   │   │       ├── SimHashFilterExecutor.java
    │   │                   │   │       └── SimHashUtils.java
    │   │                   │   ├── tire/
    │   │                   │   │   ├── TireTreeFilter.java
    │   │                   │   │   └── executor/
    │   │                   │   │       ├── TireTreeFilterExecutor.java
    │   │                   │   │       └── TireTreeNode.java
    │   │                   │   └── ttmp/
    │   │                   │       ├── TtmpFilter.java
    │   │                   │       └── executor/
    │   │                   │           ├── BadWordsFilter.java
    │   │                   │           ├── TtmpCacheNode.java
    │   │                   │           └── TtmpFilterExecutor.java
    │   │                   └── log/
    │   │                       └── ApplicationLogging.java
    │   └── resources/
    │       └── log4j.properties
    └── test/
        ├── java/
        │   └── com/
        │       └── cnblogs/
        │           └── hoojo/
        │               └── sensitivewords/
        │                   ├── SensitiveWordsServiceDBTest.java
        │                   ├── 其他词库.txt
        │                   ├── 反动词库.txt
        │                   ├── 敏感词库大全.txt
        │                   ├── 暴恐词库.txt
        │                   ├── 民生词库.txt
        │                   ├── 色情词库.txt
        │                   └── 贪腐词库.txt
        └── resources/
            ├── BadWord.txt
            └── Talk.txt

================================================
FILE CONTENTS
================================================

================================================
FILE: .classpath
================================================
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
	<classpathentry including="**/*.java" kind="src" output="target/classes" path="src/main/java">
		<attributes>
			<attribute name="optional" value="true"/>
			<attribute name="maven.pomderived" value="true"/>
		</attributes>
	</classpathentry>
	<classpathentry kind="src" path="src/main/resources"/>
	<classpathentry kind="src" output="target/test-classes" path="src/test/java">
		<attributes>
			<attribute name="optional" value="true"/>
			<attribute name="maven.pomderived" value="true"/>
		</attributes>
	</classpathentry>
	<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources">
		<attributes>
			<attribute name="maven.pomderived" value="true"/>
		</attributes>
	</classpathentry>
	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7">
		<attributes>
			<attribute name="maven.pomderived" value="true"/>
		</attributes>
	</classpathentry>
	<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
		<attributes>
			<attribute name="maven.pomderived" value="true"/>
		</attributes>
	</classpathentry>
	<classpathentry kind="output" path="target/classes"/>
</classpath>


================================================
FILE: .gitignore
================================================
/bin/
/target/
*/target/*
target/*


================================================
FILE: .project
================================================
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
	<name>sensitive-words-filter</name>
	<comment></comment>
	<projects>
	</projects>
	<buildSpec>
		<buildCommand>
			<name>org.eclipse.jdt.core.javabuilder</name>
			<arguments>
			</arguments>
		</buildCommand>
		<buildCommand>
			<name>org.eclipse.m2e.core.maven2Builder</name>
			<arguments>
			</arguments>
		</buildCommand>
	</buildSpec>
	<natures>
		<nature>org.eclipse.m2e.core.maven2Nature</nature>
		<nature>org.eclipse.jdt.core.javanature</nature>
	</natures>
</projectDescription>


================================================
FILE: .settings/org.eclipse.core.resources.prefs
================================================
eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding//src/test/java=UTF-8
encoding//src/test/resources=UTF-8
encoding/<project>=UTF-8


================================================
FILE: .settings/org.eclipse.jdt.core.prefs
================================================
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
org.eclipse.jdt.core.compiler.compliance=1.7
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.source=1.7


================================================
FILE: .settings/org.eclipse.m2e.core.prefs
================================================
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1


================================================
FILE: _config.yml
================================================
theme: jekyll-theme-cayman

================================================
FILE: pom.xml
================================================
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">

	<groupId>com.cnblogs.hoojo</groupId>
	<artifactId>sensitive-words-filter</artifactId>
	<version>1.0-SNAPSHOT</version>
	<modelVersion>4.0.0</modelVersion>

	<properties>
		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
		<maven.compiler.encoding>UTF-8</maven.compiler.encoding>
		<encoding>UTF-8</encoding>
		<skipTests>true</skipTests>
		<java_version>1.7</java_version>
		<java_source_version>1.7</java_source_version>
		<java_target_version>1.7</java_target_version>
		<maven_compiler_plugin>2.3.2</maven_compiler_plugin>
		<plexus_compiler_javac>1.8.1</plexus_compiler_javac>
		<maven_shade_plugin>1.7.1</maven_shade_plugin>
		<maven_resources_plugin>2.6</maven_resources_plugin>
		<spring_version>3.2.9.RELEASE</spring_version>
	</properties>

	<dependencies>

		<dependency>
			<groupId>com.google.guava</groupId>
			<artifactId>guava</artifactId>
			<version>20.0</version>
		</dependency>

		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>4.13.1</version>
			<scope>test</scope>
		</dependency>

		<dependency>
			<groupId>org.mockito</groupId>
			<artifactId>mockito-core</artifactId>
			<version>2.7.19</version>
			<scope>test</scope>
		</dependency>

		<!-- web servlet包开始 -->
		<dependency>
			<groupId>javax.servlet</groupId>
			<artifactId>javax.servlet-api</artifactId>
			<version>3.1.0</version>
		</dependency>
		<!-- web servlet包结束 -->

		<dependency>
			<groupId>commons-lang</groupId>
			<artifactId>commons-lang</artifactId>
			<version>2.6</version>
		</dependency>

		<dependency>
			<groupId>org.springframework</groupId>
			<artifactId>spring-web</artifactId>
			<version>${spring_version}</version>
		</dependency>

		<dependency>
			<groupId>org.slf4j</groupId>
			<artifactId>slf4j-api</artifactId>
			<version>1.6.1</version>
		</dependency>

		<dependency>
			<groupId>org.slf4j</groupId>
			<artifactId>slf4j-log4j12</artifactId>
			<version>1.6.1</version>
		</dependency>

		<!-- https://mvnrepository.com/artifact/oro/oro -->
		<dependency>
			<groupId>oro</groupId>
			<artifactId>oro</artifactId>
			<version>2.0.8</version>
		</dependency>

		<dependency>
			<groupId>commons-fileupload</groupId>
			<artifactId>commons-fileupload</artifactId>
			<version>1.3.3</version>
		</dependency>
	</dependencies>

	<build>
		<finalName>sensitive-words-filter-1.0</finalName>
		<plugins>
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-compiler-plugin</artifactId>
				<version>2.3.2</version>
				<configuration>
					<source>1.7</source>
					<target>1.7</target>
					<encoding>UTF-8</encoding>
				</configuration>
			</plugin>
		</plugins>
		
		<resources>
			<resource>
				<directory>src/main/java</directory>
				<includes>
					<include>**/*.xml</include>
					<include>**/*.json</include>
				</includes>
			</resource>
		</resources>
	</build>
</project>


================================================
FILE: readme.md
================================================


# 敏感词过滤、脱敏

[TOC]

## 1、简介
`sensitive-words-filter` 是一个`JavaWeb`工程下的过滤器，可以过滤一些敏感的字符串，如：`色情`、`政治`、`暴力`、`人名`等特殊字符，防止发表一些不和谐的词条给系统带来运营成本。

目前`sensitive-words-filter`提供了如下几种算法脱敏支持：

* `DFA("dfa算法", DfaFilter.class)` 综合性能比较高，表现突出，过滤效果好
* `TIRE("tire树算法", TireTreeFilter.class),` 大文本过滤效率稍低
* `HASH_BUCKET("二级hash算法", HashBucketFilter.class),` 综合性能中等，实现简单易懂
* `DAT("双数组算法", DatFilter.class),` 小文本过滤效果突出，实现超简单
* `TTMP("ttmp算法", TtmpFilter.class),` 综合性能突出，性能稍低占有内存大，但效率快，匹配有漏词情况
* `SIMHASH("simhash算法", SimHashFilter.class)` 可以了解，不推荐使用

以上每一种算法都有自己的特点，以供选择。


## 2、使用方法

> 提供两种方法，一种直接运行敏感词执行器`AbstractFilterExecutor`的实现类，该类的实现类是单例模式，提供常用的**判词、高亮、替换、查词**等接口；另一种就是工具类的接口`SensitiveWordsFilterUtils`,相对于执行器提供了**刷新缓存、刷新全部缓存**的接口


下面以 `DatFilterExecutor`算法实现类为例演示

### 2.1、初始化并添加敏感词
> 初始化是初始化缓存和数据存储根对象，init方法就是初始化根对象的，put方法是添加敏感词

```java
DfaFilterExecutor.getInstance().init();
DfaFilterExecutor.getInstance().put("中国人");
DfaFilterExecutor.getInstance().put("中国男人");
DfaFilterExecutor.getInstance().put("中国人民");
DfaFilterExecutor.getInstance().put("人民");
DfaFilterExecutor.getInstance().put("中间");
DfaFilterExecutor.getInstance().put("女人");

DfaFilterExecutor.getInstance().put("一举");
DfaFilterExecutor.getInstance().put("一举成名");
DfaFilterExecutor.getInstance().put("一举成名走四方");
DfaFilterExecutor.getInstance().put("成名");
DfaFilterExecutor.getInstance().put("走四方");
```


### 2.2、匹配敏感词

> 匹配敏感词会把符合的敏感词都找到，该方法有两个参数

+ **接口说明：**

```java
/**
* 返回匹配到的敏感词语
* @author hoojo
* @createDate 2018年2月9日 下午4:00:06
* @param partMatch 是否部分匹配
* @param content 被匹配的语句
* @return 返回匹配的敏感词语集合
*/
public Set<String> getWords(boolean partMatch, String content) throws RuntimeException;
```

+ **运行示例：**

```java
String content = "我们中国人都是好人，在他们中间有男人和女人。中国男人很惨，中国人民长期被压迫。";
System.out.println(DfaFilterExecutor.getInstance().getWords(true, content));
System.out.println(DfaFilterExecutor.getInstance().getWords(false, content));
```

+ **运行结果：**

```
[中国人, 女人, 中间, 中国男人]
[中国人, 女人, 中间, 中国男人, 中国人民, 人民]
```
> 从上面结果发现部分匹配和完整匹配的结果集不一样，部分匹配是匹配到敏感词后立即退出当前匹配；而完整匹配会把所有词都匹配出来，也就是把敏感词中的小词也匹配到。

### 2.3、过滤敏感词

> 过滤敏感词主要是将匹配到的敏感词过滤掉，以某种字符串进行替换敏感词字符


+ **接口说明：**

```java
/**
* 过滤敏感词，并把敏感词替换为指定字符
* @author hoojo
* @createDate 2018年2月9日 下午4:38:12
* @param partMatch 是否部分匹配
* @param content 被匹配的语句
* @param replaceChar 替换字符
* @return 过滤后的字符串
* @throws RuntimeException
*/
public String filter(boolean partMatch, String content, char replaceChar) throws RuntimeException;
```

+ **运行示例：**

```java
System.out.println(DfaFilterExecutor.getInstance().filter(false, content, '*'));
```


+ **运行结果：**
```
我们***都是好人，在他们**有男人和**。****很惨，***民长期被压迫。
```

### 2.4、敏感词高亮

> 敏感词高亮就是将匹配到的敏感字符以HTML的tag进行替换，这样在前端显示的时候就会比较突出

+ **接口介绍：**

```java
/**
 * html高亮敏感词
 * @author hoojo
 * @createDate 2018年2月9日 下午4:37:33
 * @param partMatch 是否部分匹配
 * @param content 被匹配的语句
 * @return 返回html高亮敏感词
 * @throws RuntimeException
 */
public String highlight(boolean partMatch, String content) throws RuntimeException;
```

+ **运行示例：**
```java
System.out.println(DfaFilterExecutor.getInstance().highlight(false, content));
```

+ **运行结果：**
```html
我们<font color='red'>中国人</font>都是好人，在他们<font color='red'>中间</font>有男人和<font color='red'>女人</font>。<font color='red'>中国男人</font>很惨，<font color='red'>中国人</font>民长期被压迫。
```


### 2.5、是否存在敏感词

> 判断一段文本是否包含敏感词，若包含立即返回true，否则 false

+ **接口说明：**

```java
/**
* 是否包含敏感字符
* @author hoojo
* @createDate 2018年2月9日 下午2:57:52
* @param partMatch 是否支持匹配词语的一部分
* @param content 被匹配内容
* @return 是否包含敏感字符
*/
public boolean contains(boolean partMatch, String content) throws RuntimeException;
```

+ **运行示例：**


```java
System.out.println(DfaFilterExecutor.getInstance().contains(true, content));
```

+ **运行结果：**

```
true
```


## 3、各算法实现测试说明

> 针对各算法进行测试，分别测试 匹配文本 344字符、5519字符、11.304.959字符

+ **测试结果**

| 算法接口                                     |       过滤字符数       |  耗时(毫秒)   |    内存消耗（KB）     |
| :--------------------------------------- | :---------------: | :-------: | :-------------: |
| `DFA("dfa算法", DfaFilter.class)`          | 344/5519/11304959 |  5/7/241  | 3276/3276/42470 |
| `TIRE("tire树算法", TireTreeFilter.class) ` | 344/5519/11304959 | 1/9/12413 | 1638/1638/47934 |
| `HASH_BUCKET("hash桶算法")`                 | 344/5519/11304959 |  0/4/659  | 1638/1638/79269 |
| `DAT("双数组算法", DatFilter.class)`          | 344/5519/11304959 |  1/4/720  | 819/819/424066  |
| `TTMP("ttmp算法", TtmpFilter.class)`       | 344/5519/11304959 |  0/2/226  | 819/819/567125  |

在小于5000字左右，各算法差距不大。但字符量大的情况下，差距明显。

## 4、总结
+ `TTMP`算法用的内存最多，但速度最快，但是存在一个漏词的问题。
+ `DFA`算法表现良好，各方面都不错，比较实用，特别在大量文本情况下很稳定。
+ `TIRE`算法在大量文本情况下，效率稍低。可以优化下查找速度。


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/SensitiveWordsFilterUtils.java
================================================
package com.cnblogs.hoojo.sensitivewords;

import java.util.Set;

import org.apache.commons.lang.StringUtils;

import com.cnblogs.hoojo.sensitivewords.factory.FilterType;
import com.cnblogs.hoojo.sensitivewords.factory.SensitiveWordsFactory;
import com.cnblogs.hoojo.sensitivewords.filter.AbstractSensitiveWordsFilter;
import com.cnblogs.hoojo.sensitivewords.log.ApplicationLogging;
import com.google.common.base.Optional;

/**
 * 分词执行器工具类
 * 
 * @author hoojo
 * @createDate 2018年2月2日 下午4:25:38
 * @file SensitiveWordsFilterUtils.java
 * @package com.cnblogs.hoojo.sensitivewords
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public final class SensitiveWordsFilterUtils extends ApplicationLogging {

	private static class SingleFactory {
		private static final SensitiveWordsFilterUtils INSTANCE = new SensitiveWordsFilterUtils();
	}

	public static final SensitiveWordsFilterUtils getInstance() {
		return SingleFactory.INSTANCE;
	}

	private SensitiveWordsFilterUtils() {
	}

	/**
	 * 是否包含敏感字符
	 * @author hoojo
	 * @createDate 2018年2月9日 下午2:57:52
	 * @param partMatch 是否支持匹配词语的一部分
	 * @param content 被匹配内容
	 * @return 是否包含敏感字符
	 */
	public boolean contains(FilterType type, boolean partMatch, String content) throws Exception {
		type = checkFilterType(type);

		if (StringUtils.isBlank(content)) {
			throw new RuntimeException("必填参数content 为空");
		}
		debug("执行敏感词接口：{}，算法：{}", type.getClazz().getSimpleName(), type.getDesc());
		
		AbstractSensitiveWordsFilter wordsFilter = SensitiveWordsFactory.create(type);

		wordsFilter.initAll();
		
		return wordsFilter.contains(partMatch, content);
	}
	
	/**
	 * 返回匹配到的敏感词语
	 * @author hoojo
	 * @createDate 2018年2月9日 下午4:00:06
	 * @param partMatch 是否部分匹配
	 * @param content 被匹配的语句
	 * @return 返回匹配的敏感词语集合
	 */
	public Set<String> getWords(FilterType type, boolean partMatch, String content) throws Exception {
		type = checkFilterType(type);

		if (StringUtils.isBlank(content)) {
			throw new RuntimeException("必填参数content 为空");
		}

		debug("执行敏感词接口：{}，算法：{}", type.getClazz().getSimpleName(), type.getDesc());
		AbstractSensitiveWordsFilter wordsFilter = SensitiveWordsFactory.create(type);

		wordsFilter.initAll();
		Set<String> words = wordsFilter.getWords(partMatch, content);

		debug("包含敏感词：{}", words);
		return words;
	}
	
	/**
	 * html高亮敏感词
	 * @author hoojo
	 * @createDate 2018年2月9日 下午4:37:33
	 * @param partMatch 是否部分匹配
	 * @param content 被匹配的语句
	 * @return 返回html高亮敏感词
	 * @throws RuntimeException
	 */
	public String highlight(FilterType type, boolean partMatch, String content) throws Exception {
		type = checkFilterType(type);

		if (StringUtils.isBlank(content)) {
			throw new RuntimeException("必填参数content 为空");
		}

		debug("执行“高亮”敏感词接口：{}，算法：{}", type.getClazz().getSimpleName(), type.getDesc());
		AbstractSensitiveWordsFilter wordsFilter = SensitiveWordsFactory.create(type);

		wordsFilter.initAll();
		content = wordsFilter.highlight(partMatch, content);

		debug("高亮敏感词：{}", content);
		return content;
	}
	
	/**
	 * 过滤敏感词，并把敏感词替换为指定字符
	 * @author hoojo
	 * @createDate 2018年2月9日 下午4:38:12
	 * @param partMatch 是否部分匹配
	 * @param content 被匹配的语句
	 * @param replaceChar 替换字符
	 * @return 过滤后的字符串
	 * @throws RuntimeException
	 */
	public String filter(FilterType type, boolean partMatch, String target, Character replaceChar) throws Exception {

		replaceChar = Optional.fromNullable(replaceChar).or('*');
		type = checkFilterType(type);

		if (StringUtils.isBlank(target)) {
			throw new RuntimeException("必填参数content 为空");
		}

		debug("执行“过滤”敏感词接口：{}，算法：{}", type.getClazz().getSimpleName(), type.getDesc());
		AbstractSensitiveWordsFilter wordsFilter = SensitiveWordsFactory.create(type);

		wordsFilter.initAll();
		String result = wordsFilter.filter(partMatch, target, replaceChar);

		debug("脱敏结果：{}", result);
		return result;
	}

	public void refresh(FilterType type) throws Exception {

		type = checkFilterType(type);

		debug("执行“刷新”敏感词库缓存接口：{}，算法：{}", type.getClazz().getSimpleName(), type.getDesc());
		AbstractSensitiveWordsFilter wordsFilter = SensitiveWordsFactory.create(type);

		wordsFilter.initAll();
		wordsFilter.refresh();
	}
	
	public void refreshAll(FilterType type) throws Exception {
		
		type = checkFilterType(type);

		debug("执行“刷新”敏感词库和所有缓存接口：{}，算法：{}", type.getClazz().getSimpleName(), type.getDesc());
		AbstractSensitiveWordsFilter wordsFilter = SensitiveWordsFactory.create(type);

		wordsFilter.initAll();
		wordsFilter.refreshAll();
	}
	
	public boolean contains(boolean firstPart, String content) throws Exception {
		
		return contains(null, firstPart, content);
	}
	
	public Set<String> getWords(boolean firstPart, String content) throws Exception {
		
		return getWords(null, firstPart, content);
	}
	
	public String highlight(boolean firstPart, String content) throws Exception {
		
		return highlight(null, firstPart, content);
	}
	
	public String filter(boolean firstPart, String target, Character replaceChar) throws Exception {
		
		return filter(null, firstPart, target, replaceChar);
	}
	
	public void refresh() throws Exception {
		
		this.refresh(null);
	}
	
	public void refreshAll() throws Exception {
		
		this.refreshAll(null);
	}
	
	private FilterType checkFilterType(FilterType type) {
		if (type == null) {
			type = FilterType.DFA;
		}
		return type;
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/SensitiveWordsService.java
================================================
package com.cnblogs.hoojo.sensitivewords;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;

import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import com.cnblogs.hoojo.sensitivewords.business.dao.SensitiveWordsDao;
import com.cnblogs.hoojo.sensitivewords.business.enums.SensitiveWordsType;
import com.cnblogs.hoojo.sensitivewords.business.model.SensitiveWords;
import com.cnblogs.hoojo.sensitivewords.cache.RedisWordsCache;
import com.cnblogs.hoojo.sensitivewords.event.CacheChangedEvent;
import com.cnblogs.hoojo.sensitivewords.event.CacheChangedEvent.Action;
import com.cnblogs.hoojo.sensitivewords.event.WordsCacheContext;
import com.cnblogs.hoojo.sensitivewords.log.ApplicationLogging;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;


/**
 * 敏感词库服务接口业务实现
 * @author hoojo
 * @createDate 2018-02-02 14:54:58
 * @file SensitiveWords.java
 * @package com.cnblogs.hoojo.sensitivewords
 * @project fengkong
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
@Service
public class SensitiveWordsService<T extends SensitiveWords> extends ApplicationLogging {

	@Autowired
	private SensitiveWordsDao<T> dao;
	
	public SensitiveWordsService() {
		
		RedisWordsCache.getInstance().setDataSource(this);
	}
	
	public boolean add(T entity) throws Exception {
		logger.debug("添加[敏感词库]数据参数：{}", entity);
		
		validate(entity);

		boolean flag = false;
		try {
			bind(entity);
			checkUnique(entity);
			
			flag = dao.add(entity) > 0;
			
			if (flag) {
				WordsCacheContext.getInstance().dispatchChanged(new CacheChangedEvent(entity, Action.PUT));
			}
        } catch (Exception e) {
	        logger.error(e.getMessage(), e);
	        throw e;
        }
		
		logger.debug("添加[敏感词库]数据是否成功：{}", flag);
		return flag;
	}
	
	private void checkUnique(T entity) throws Exception {
		@SuppressWarnings("unchecked")
		T card = dao.get((T) new SensitiveWords(entity.getWord()));
		
		checkState(card == null, "该敏感词：%s 已经存在，不能重复添加", entity.getWord());
	}
	
	public void validate(T entity) throws Exception {
		checkNotNull(entity, "敏感词库对象不能为空");
		
		checkArgument(!Strings.isNullOrEmpty(entity.getWord()), "敏感词不能为空");
		checkNotNull(entity.getType(), "敏感词类型，1：色情，2：政治，3：暴恐，4：民生，5：反动，6：贪腐，7：其他不能为空");
		checkArgument(!Strings.isNullOrEmpty(entity.getCreator()), "创建人不能为空");
		checkArgument(!Strings.isNullOrEmpty(entity.getUpdater()), "更新人不能为空");
	}
	
	public void bind(T entity) throws Exception {
		if (entity.getSensitiveWordsId() == null) {
			long sensitiveWordsId = System.currentTimeMillis();
			entity.setSensitiveWordsId(sensitiveWordsId);
		}

		Date now = new Date();
		entity.setCreateTime(now);
		entity.setUpdateTime(now);
	}
	
	public boolean batch(List<T> entities, SensitiveWordsType type) throws Exception {
		logger.debug("批量添加[敏感词库]数据参数：{}", entities.size());
		
		boolean flag = false;
		try {
			if (entities != null && !entities.isEmpty()) {
				
				List<T> result = Lists.newArrayList();
				
				Iterator<T> iter = entities.iterator();
				while (iter.hasNext()) {
					T entity = iter.next();
					entity.setType(type);
					
					validate(entity);
					bind(entity);
					checkUnique(entity);
				}
				
				int count = dao.batch(result);
				logger.debug("批量入库敏感词库数据：{}", count);
				
				if (count != result.size()) {
					throw new RuntimeException("批量入库敏感词库数据不完整");
				} 
				
				WordsCacheContext.getInstance().dispatchChanged(new CacheChangedEvent(result, Action.PUT_LIST));
			} 
			
			flag = true;
        } catch (Exception e) {
	        logger.error(e.getMessage(), e);
	        throw e;
        }
		
		logger.debug("批量添加[敏感词库]数据是否成功：{}", flag);
		return flag;
	}
	
    public boolean edit(T entity) throws Exception {
		logger.debug("修改[敏感词库]数据参数：{}", entity);
		
		if (entity != null) {
			entity.setUpdateTime(new Date());
		}
		
		boolean flag = false;
		try {
			flag = dao.edit(entity) > 0;
			
			if (flag) {
				WordsCacheContext.getInstance().dispatchChanged(new CacheChangedEvent(entity, Action.UPDATE));
			}
        } catch (Exception e) {
	        logger.error(e.getMessage(), e);
	        throw e;
        }
		
		logger.debug("修改[敏感词库]数据是否成功：{}", flag);
		return flag;
	}
	
    public boolean remove(T entity) throws Exception {
		logger.debug("删除[敏感词库]数据参数：{}", entity);
		
		boolean flag = false;
		try {
			flag = dao.remove(entity) > 0;
			
			if (flag) {
				WordsCacheContext.getInstance().dispatchChanged(new CacheChangedEvent(entity, Action.REMOVE));
			}
        } catch (Exception e) {
	        logger.error(e.getMessage(), e);
	        throw e;
        }
		
		logger.debug("删除[敏感词库]数据是否成功：{}", flag);
		return flag;
	}
	
    public List<T> list(T entity) throws Exception {
		logger.debug("动态查询[敏感词库]数据参数 Entity：{}", entity);
		
		List<T> result = new ArrayList<T>();
		try {
			result = dao.query(entity);
        } catch (Exception e) {
	        logger.error(e.getMessage(), e);
	        throw e;
        }
		
		logger.debug("动态查询[敏感词库]数据结果集：{}", result);
		return result;
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/business/dao/SensitiveWordsDao.java
================================================
package com.cnblogs.hoojo.sensitivewords.business.dao;

import java.util.List;

import org.springframework.stereotype.Repository;

import com.cnblogs.hoojo.sensitivewords.business.model.SensitiveWords;

/**
 * 敏感词库 MyBatis数据库操作
 * @author hoojo
 * @createDate 2018-02-02 14:54:58
 * @file SensitiveWordsDao.java
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
@Repository
public interface SensitiveWordsDao<T extends SensitiveWords> {

	public int add(T entity) throws Exception;
	
	public int batch(List<T> entity) throws Exception;
	
	public int edit(T entity) throws Exception;
	
	public int remove(T entity) throws Exception;
	
	public Long count(T entity) throws Exception;

	public List<T> query(T entity) throws Exception;
	
	public T get(T entity) throws Exception;
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/business/enums/SensitiveWordsType.java
================================================
package com.cnblogs.hoojo.sensitivewords.business.enums;

/**
 * 过滤敏感词类型
 * @author hoojo
 * @createDate 2018年9月24日 下午9:16:04
 * @file SensitiveWordsType.java
 * @package com.cnblogs.hoojo.sensitivewords.business.enums
 * @project sensitive-words-filter
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public enum SensitiveWordsType {
	/** 色情 */
	PORNO(1, "色情"), 
	/** 政治 */
	POLITICS(2, "政治"), 
	/** 暴恐 */
	TERROR(3, "暴恐"), 
	/** 民生 */
	LIVELIHOOD(4, "民生"), 
	/** 反动 */
	REACTION(5, "反动"), 
	/** 贪腐 */
	CORRUPTION(6, "贪腐"), 
	/** 其他 */
	OTHERS(7, "其他"); 

	private int code;
	private String name;
	
	SensitiveWordsType(int code, String name) {
		this.code = code;
		this.name = name;
	}

	public Integer getCode() {
		return code;
	}

	public String getName() {
		return name;
	}
	
	public String getEnumName() {
		return this.name();
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/business/model/SensitiveWords.java
================================================
package com.cnblogs.hoojo.sensitivewords.business.model;

import java.io.Serializable;
import java.util.Date;

import org.apache.commons.lang.builder.ToStringBuilder;
import org.apache.commons.lang.builder.ToStringStyle;

import com.cnblogs.hoojo.sensitivewords.business.enums.SensitiveWordsType;


/**
 * 敏感词库
 * @author hoojo
 * @createDate 2018-02-02 14:54:58
 * @file SensitiveWords.java
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public class SensitiveWords implements Serializable{
	
	private static final long serialVersionUID = 1L;
	
	/** 主键id */
	private Long sensitiveWordsId;
	/** 敏感词 */
	private String word;
	/** 敏感词类型，1：色情，2：政治，3：暴恐，4：民生，5：反动，6：贪腐，7：其他 */
	private SensitiveWordsType type;
	/** 创建人 */
	private String creator;
	/** 创建时间 */
	private Date createTime;
	/** 更新人 */
	private String updater;
	/** 更新时间 */
	private Date updateTime;
	
	public SensitiveWords() {
	}
	
	public SensitiveWords(String word, String creator, String updater) {
		super();
		this.word = word;
		this.creator = creator;
		this.updater = updater;
	}

	public SensitiveWords(String word) {
		super();
		this.word = word;
	}

	/** 主键id */
	public void setSensitiveWordsId(Long sensitiveWordsId) {
		this.sensitiveWordsId = sensitiveWordsId;
	}
	
	/** 主键id*/
	public Long getSensitiveWordsId() {
		return this.sensitiveWordsId;
	}
	/** 敏感词 */
	public void setWord(String word) {
		this.word = word;
	}
	
	/** 敏感词*/
	public String getWord() {
		return this.word;
	}
	/** 敏感词类型，1：色情，2：政治，3：暴恐，4：民生，5：反动，6：贪腐，7：其他 */
	public void setType(SensitiveWordsType type) {
		this.type = type;
	}
	
	/** 敏感词类型，1：色情，2：政治，3：暴恐，4：民生，5：反动，6：贪腐，7：其他*/
	public SensitiveWordsType getType() {
		return this.type;
	}
	/** 创建人 */
	public void setCreator(String creator) {
		this.creator = creator;
	}
	
	/** 创建人*/
	public String getCreator() {
		return this.creator;
	}
	/** 创建时间 */
	public void setCreateTime(Date createTime) {
		this.createTime = createTime;
	}
	
	/** 创建时间*/
	public Date getCreateTime() {
		return this.createTime;
	}
	/** 更新人 */
	public void setUpdater(String updater) {
		this.updater = updater;
	}
	
	/** 更新人*/
	public String getUpdater() {
		return this.updater;
	}
	/** 更新时间 */
	public void setUpdateTime(Date updateTime) {
		this.updateTime = updateTime;
	}
	
	/** 更新时间*/
	public Date getUpdateTime() {
		return this.updateTime;
	}
	
	@Override
	public String toString() {
		return ToStringBuilder.reflectionToString(this, ToStringStyle.SHORT_PREFIX_STYLE);
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/cache/AbstractWordCache.java
================================================
package com.cnblogs.hoojo.sensitivewords.cache;

import java.util.List;

import com.cnblogs.hoojo.sensitivewords.business.model.SensitiveWords;
import com.cnblogs.hoojo.sensitivewords.event.CacheChangedEvent;
import com.cnblogs.hoojo.sensitivewords.event.WordsCacheContext.CacheChangedListener;
import com.cnblogs.hoojo.sensitivewords.log.ApplicationLogging;

/**
 * abstract word cache interface method
 * @author hoojo
 * @createDate 2018年2月6日 下午2:33:54
 * @file AbstractWordCache.java
 * @package com.cnblogs.hoojo.sensitivewords.cache
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public abstract class AbstractWordCache extends ApplicationLogging implements WordsCache, CacheChangedListener {

	private String listenerName;

	public AbstractWordCache(String listenerName) {
		this.listenerName = listenerName;
	}
	
	public String getListenerName() {
		return listenerName;
	}
	
	@Override
	public void setDataSource(Object dataSource) {
		debug("{}: bindDataSource: {}", listenerName, dataSource);
	}
	
	@Override
	public boolean init() throws Exception {
		debug("{}: init word cache", listenerName);
		
		return true;
	}

	@Override
	public boolean put(SensitiveWords words) throws Exception {
		debug("{}: put word: {}", listenerName, words);
		
		return true;
	}

	@Override
	public boolean put(List<SensitiveWords> words) throws Exception {
		debug("{}: put word list: {}", listenerName, words);
		
		return true;
	}

	@Override
	public List<SensitiveWords> get() throws Exception {
		debug("{}: get word list", listenerName);
		
		return null;
	}
	
	@Override
	public boolean update(SensitiveWords word) throws Exception {
		debug("{}: update word: {}", listenerName, word);
		
		return true;
	}

	@Override
	public boolean remove(SensitiveWords words) throws Exception {
		debug("{}: remove word: {}", listenerName, words);
		
		return false;
	}

	@Override
	public boolean refresh() throws Exception {
		debug("{}: refresh word cache", listenerName);
		
		return false;
	}
	
	@SuppressWarnings("unchecked")
	@Override
	public void handleChangedEvent(CacheChangedEvent event) throws Exception {
		
		this.init();
		
		switch (event.getAction()) {
			
			case PUT:
				this.put((SensitiveWords) event.getSource());
				break;
				
			case PUT_LIST:
				this.put((List<SensitiveWords>) event.getSource());
				break;
				
			case REMOVE:
				this.remove((SensitiveWords) event.getSource());
				break;
				
			case UPDATE:
				this.update((SensitiveWords) event.getSource());
				break;
				
			case REFRESH:
				this.refresh();
				break;

			default:
				throw new UnsupportedOperationException();
		}
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/cache/JvmWordsCache.java
================================================
package com.cnblogs.hoojo.sensitivewords.cache;

import java.util.List;

import org.apache.commons.lang.StringUtils;

import com.cnblogs.hoojo.sensitivewords.business.model.SensitiveWords;
import com.google.common.base.Predicate;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;

/**
 * Jvm 敏感词缓存
 * @author hoojo
 * @createDate 2018年2月2日 下午4:56:38
 * @file JvmWordsCache.java
 * @package com.cnblogs.hoojo.sensitivewords.cache
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public final class JvmWordsCache extends AbstractWordCache {

	private WordsCache wordsCache;
	public static List<SensitiveWords> cache = null;

	private static class SingleFactory {
		
		private static final JvmWordsCache INSTANCE = new JvmWordsCache();
	}

	public static final JvmWordsCache getInstance() {
		
		return SingleFactory.INSTANCE;
	}
	
	private JvmWordsCache() {
		super("JVM 脱敏词库缓存");
	}

	@Override
	public void setDataSource(Object dataSource) {
		super.setDataSource(dataSource);

		if (dataSource instanceof WordsCache) {
			this.wordsCache = (WordsCache) dataSource;
		} else {
			throw new IllegalArgumentException("未知数据源类型" + getListenerName());
		}
	}

	@Override
	public boolean init() throws Exception {
		super.init();

		if (cache == null || cache.isEmpty()) {
			debug("{}: jvm cache 首次初始化", getListenerName());
			cache = Lists.newArrayList();

			return refresh();
		} else {
			debug("{}: jvm cache 已被初始化，无需重复执行", getListenerName());
		}

		return true;
	}

	public boolean put(SensitiveWords words) throws Exception {
		super.put(words);

		cache.add(words);
		return true;
	}

	public boolean put(List<SensitiveWords> words) throws Exception {
		super.put(words);

		cache.addAll(words);
		return true;
	}

	public List<SensitiveWords> get() throws Exception {
		super.get();

		return cache;
	}

	public boolean update(SensitiveWords word) throws Exception {
		super.update(word);

		if (remove(word)) {
			return put(word);
		}

		return false;
	}

	public boolean remove(final SensitiveWords word) throws Exception {
		super.remove(word);

		if (word == null) {
			return false;
		}

		return Iterators.removeIf(cache.iterator(), new Predicate<SensitiveWords>() {
			@Override
			public boolean apply(SensitiveWords item) {
				if (word.getSensitiveWordsId() == item.getSensitiveWordsId()) {
					return true;
				}
				if (StringUtils.equals(word.getWord(), item.getWord())) {
					return true;
				}
				return false;
			}
		});
	}

	public boolean refresh() throws Exception {
		super.refresh();

		debug("{}: 从新刷新初始化JVM缓存", getListenerName());
		try {
			cache.clear();
			
			List<SensitiveWords> words = wordsCache.get();
			if (words != null) {
				cache.addAll(words);
			}
			debug("{}: JVM缓存敏感词数量：{}", getListenerName(), cache.size());
		} catch (Exception e) {
			throw e;
		}

		return true;
	}
}

================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/cache/RedisWordsCache.java
================================================
package com.cnblogs.hoojo.sensitivewords.cache;

import java.util.List;

import com.cnblogs.hoojo.sensitivewords.SensitiveWordsService;
import com.cnblogs.hoojo.sensitivewords.business.model.SensitiveWords;
import com.google.common.collect.Lists;

/**
 * Redis 分布式缓存
 * 
 * @author hoojo
 * @createDate 2018年2月2日 下午4:55:39
 * @file RedisWordsCache.java
 * @package com.cnblogs.hoojo.sensitivewords.cache
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public final class RedisWordsCache extends AbstractWordCache {

	private SensitiveWordsService<SensitiveWords> service;
	
	private RedisWordsCache() {
		super("redis 脱敏词库缓存");
		
		JvmWordsCache.getInstance().setDataSource(this);
	}

	private static class SingleFactory {
		
		private static final RedisWordsCache INSTANCE = new RedisWordsCache();
	}

	public static final RedisWordsCache getInstance() {
		
		return SingleFactory.INSTANCE;
	}
	
	@SuppressWarnings("unchecked")
	@Override
	public void setDataSource(Object dataSource) {
		super.setDataSource(dataSource);
		
		if (dataSource instanceof SensitiveWordsService) {
			this.service = (SensitiveWordsService<SensitiveWords>) dataSource;
		} else {
			throw new IllegalArgumentException("未知数据源类型" + getListenerName());
		}
	}
	
	@Override
	public boolean init() throws Exception {
		super.init();
		
		/*
		if (RedisUtil.getCountQueue(RedisKeyUtil.FENGKONG_SW_FILTER_PREFIX) == 0) {
			debug("{}: redis cache 首次初始化", getListenerName());
			
			return refresh();
		} else {
			debug("{}: redis缓存已被初始化，无需再执行", getListenerName());
		}
		*/
		return true;
	}
	
	public boolean put(SensitiveWords words) throws Exception {
		super.put(words);
		
		//RedisUtil.push(RedisKeyUtil.FENGKONG_SW_FILTER_PREFIX, toJSON(words));
		return true;
	}

	public boolean put(List<SensitiveWords> words) throws Exception {
		super.put(words);
		
		List<String> rows = Lists.newArrayList();
		for (SensitiveWords word : words) {
			rows.add(toJSON(word));
		}
		
		//RedisUtil.push(RedisKeyUtil.FENGKONG_SW_FILTER_PREFIX, rows);
		return true;
	}

	public List<SensitiveWords> get() throws Exception {
		super.get();
		/*
		JSONObject json = RedisUtil.getQueue(RedisKeyUtil.FENGKONG_SW_FILTER_PREFIX, 0, -1);
		if (json == null) {
			return Lists.newArrayList();
		}
		
		JSONArray rows = json.getJSONArray("rows");
		List<SensitiveWords> words = Lists.newArrayList();
		for (int i = 0; i < rows.size(); i++) {
			words.add(JSONObject.parseObject(rows.get(i).toString(), SensitiveWords.class));
		}
		return words;
		*/
		return null;
	}
	
	public boolean update(SensitiveWords word) throws Exception {
		super.update(word);
		
		if (remove(word)) {
			return put(word);
		}
		
		return false;
	}

	public boolean remove(final SensitiveWords word) throws Exception {
		super.remove(word);
		
		if (word == null) {
			return false;
		}
		/*
		JSONObject json = RedisUtil.getQueue(RedisKeyUtil.FENGKONG_SW_FILTER_PREFIX, 0, -1);
		if (json == null) {
			return false;
		}
		
		List<String> newRows = Lists.newArrayList();

		JSONArray rows = json.getJSONArray("rows");
		for (int i = 0; i < rows.size(); i++) {
			SensitiveWords cacheWord = JSONObject.parseObject(rows.get(i).toString(), SensitiveWords.class);
			
			if (cacheWord.getSensitiveWordsId() == word.getSensitiveWordsId()) {
				continue;
			}
			if (StringUtils.equals(cacheWord.getWord(), word.getWord())) {
				continue;
			}
			
			newRows.add(rows.get(i).toString());
		}
		
		RedisUtil.removeRedisCache(RedisKeyUtil.FENGKONG_SW_FILTER_PREFIX);
		return RedisUtil.push(RedisKeyUtil.FENGKONG_SW_FILTER_PREFIX, rows);
		*/
		return false;
	}

	public boolean refresh() throws Exception {
		super.refresh();
		
		debug("{}: 从新刷新初始化redis缓存", getListenerName());
		/*
		try {
			RedisUtil.removeRedisCache(RedisKeyUtil.FENGKONG_SW_FILTER_PREFIX);
			
			SensitiveWords entity = new SensitiveWords();
			entity.setEnableFlag(EnableState.ENABLE);
			
			List<SensitiveWords> words = service.list(entity);
			
			List<String> jsonWords = Lists.newArrayList();
			for (SensitiveWords word : words) {
				jsonWords.add(toJSON(word));
			}
			
			RedisUtil.push(RedisKeyUtil.FENGKONG_SW_FILTER_PREFIX, jsonWords);
			
			debug("{}: redis 缓存敏感词数量：{}", getListenerName(), words.size());
		} catch(Exception e) {
			throw e;
		}
		*/
		return true;
	}
	
	private String toJSON(SensitiveWords word) {
		/*
		Map<String, Object> map = BeanMapUtils.transBean2Map(word);
		map.put("type", word.getType().name());
		
		return JSON.toJSONString(map);
		*/
		return null;
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/cache/WordsCache.java
================================================
package com.cnblogs.hoojo.sensitivewords.cache;

import java.util.List;

import com.cnblogs.hoojo.sensitivewords.business.model.SensitiveWords;

/**
 * 敏感词库缓存
 * 
 * @author hoojo
 * @createDate 2018年2月2日 下午4:57:07
 * @file WordsCache.java
 * @package com.cnblogs.hoojo.sensitivewords.cache
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public interface WordsCache {
	
	public void setDataSource(Object dataSource);

	public boolean init() throws Exception;
	
	public boolean put(SensitiveWords words) throws Exception;

	public boolean put(List<SensitiveWords> words) throws Exception;

	public List<SensitiveWords> get() throws Exception;
	
	public boolean remove(SensitiveWords words) throws Exception;
	
	public boolean refresh() throws Exception;

	boolean update(SensitiveWords word) throws Exception;
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/event/CacheChangedEvent.java
================================================
package com.cnblogs.hoojo.sensitivewords.event;

import java.util.EventObject;

/**
 * 缓存修改事件
 * 
 * @author hoojo
 * @createDate 2018年2月5日 下午5:50:51
 * @file Event.java
 * @package com.cnblogs.hoojo.sensitivewords.event
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public class CacheChangedEvent extends EventObject {

	private static final long serialVersionUID = 6217432220420884817L;

	public enum Action {
		PUT, PUT_LIST, REMOVE, REFRESH, UPDATE;
	}
	
	private Action action;
	
	public CacheChangedEvent(Object source, Action action) {
		super(source);
		
		this.action = action;
	}

	public void doEvent() {
		System.out.println("触发事件：" + action + "，" + this.getSource());
	}
	
	public Action getAction() {
		return action;
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/event/WordsCacheContext.java
================================================
package com.cnblogs.hoojo.sensitivewords.event;

import java.util.Enumeration;
import java.util.Vector;

import com.cnblogs.hoojo.sensitivewords.cache.JvmWordsCache;
import com.cnblogs.hoojo.sensitivewords.cache.RedisWordsCache;
import com.cnblogs.hoojo.sensitivewords.log.ApplicationLogging;

/**
 * 目标事件源对象上下文
 * @author hoojo
 * @createDate 2018年2月5日 下午5:22:47
 * @file TargetEventSource.java
 * @package com.cnblogs.hoojo.sensitivewords.event
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public final class WordsCacheContext extends ApplicationLogging {

	private Vector<CacheChangedListener> listeners = new Vector<CacheChangedListener>();

	private static class SingleFactory {
		
		private static final WordsCacheContext INSTANCE = new WordsCacheContext();
	}

	public static final WordsCacheContext getInstance() {
		
		return SingleFactory.INSTANCE;
	}
	
	private WordsCacheContext() {
		
		try {
			register(RedisWordsCache.getInstance());
			register(JvmWordsCache.getInstance());
		} catch (Exception e) {
			error(e);
			throw e;
		}
	}
	
	public void register(CacheChangedListener listener) {
		
		if (!this.listeners.contains(listener)) {
			this.listeners.add(listener);
		}
	}

	public void dispatchChanged(CacheChangedEvent event) throws Exception {
		
		event.doEvent();
		
		Enumeration<CacheChangedListener> enums = listeners.elements();
		while (enums.hasMoreElements()) {
			CacheChangedListener listener = enums.nextElement();

			try {
				info("触发事件：{}，执行监听业务：{}，数据：{}", event.getAction(), listener.getListenerName(), event.getSource());
				listener.handleChangedEvent(event);
			} catch (Exception e) {
				throw e;
			}
		}
	}

	public interface CacheChangedListener extends java.util.EventListener {

		public void handleChangedEvent(CacheChangedEvent event) throws Exception;
		
		public String getListenerName();
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/factory/FilterType.java
================================================
package com.cnblogs.hoojo.sensitivewords.factory;

import com.cnblogs.hoojo.sensitivewords.filter.AbstractSensitiveWordsFilter;
import com.cnblogs.hoojo.sensitivewords.filter.bucket.HashBucketFilter;
import com.cnblogs.hoojo.sensitivewords.filter.dat.DatFilter;
import com.cnblogs.hoojo.sensitivewords.filter.dfa.DfaFilter;
import com.cnblogs.hoojo.sensitivewords.filter.simhash.SimHashFilter;
import com.cnblogs.hoojo.sensitivewords.filter.tire.TireTreeFilter;
import com.cnblogs.hoojo.sensitivewords.filter.ttmp.TtmpFilter;

/**
 * 敏感词算法实现类型
 * @author hoojo
 * @createDate 2018年2月2日 下午4:28:11
 * @file FilterType.java
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public enum FilterType {

	DFA("dfa算法", DfaFilter.class),
	TIRE("tire树算法", TireTreeFilter.class),
	HASH_BUCKET("二级hash算法", HashBucketFilter.class),
	DAT("双数组算法", DatFilter.class),
	TTMP("ttmp算法", TtmpFilter.class),
	SIMHASH("simhash算法", SimHashFilter.class);
	
	private String desc;
	private Class<? extends AbstractSensitiveWordsFilter> clazz;
	
	FilterType(String desc, Class<? extends AbstractSensitiveWordsFilter> clazz) {
		this.desc = desc;
		this.clazz = clazz;
	}
	
	public String getDesc() {
		return desc;
	}

	public Class<? extends AbstractSensitiveWordsFilter> getClazz() {
		return clazz;
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/factory/SensitiveWordsFactory.java
================================================
package com.cnblogs.hoojo.sensitivewords.factory;

import com.cnblogs.hoojo.sensitivewords.filter.AbstractSensitiveWordsFilter;

/**
 * 敏感词库过滤实现工厂
 * @author hoojo
 * @createDate 2018年2月2日 下午4:05:29
 * @file SWFacotry.java
 * @package com.cnblogs.hoojo.sensitivewords.factory
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public abstract class SensitiveWordsFactory {

	public static final AbstractSensitiveWordsFilter create(FilterType filterType) throws Exception {
		
		return (AbstractSensitiveWordsFilter) Class.forName(filterType.getClazz().getName()).newInstance();
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/AbstractFilter.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter;

import java.util.Set;

import com.cnblogs.hoojo.sensitivewords.event.WordsCacheContext;

/**
 * 抽象过滤脱敏接口实现
 * 
 * @author hoojo
 * @createDate 2018年3月22日 上午9:30:25
 * @file AbstractFilter.java
 * @package com.cnblogs.hoojo.sensitivewords.filter
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public abstract class AbstractFilter extends AbstractSensitiveWordsFilter {

	private AbstractFilterExecutor<?> executor;
	
	public AbstractFilter(AbstractFilterExecutor<?> executor) {
		
		WordsCacheContext.getInstance().register(this.executor);
		
		this.executor = executor;
	}
	
	@Override
	public boolean contains(boolean partMatch, String content) throws RuntimeException {
		
		return executor.contains(partMatch, content);
	}

	@Override
	public Set<String> getWords(boolean partMatch, String content) throws RuntimeException {
		
		return executor.getWords(partMatch, content);
	}

	@Override
	public String highlight(boolean partMatch, String content) throws RuntimeException {
		
		return executor.highlight(partMatch, content);
	}

	@Override
	public String filter(boolean partMatch, String content, char replaceChar) throws RuntimeException {
		
		return executor.filter(partMatch, content, replaceChar);
	}

	@Override
	public void init() throws RuntimeException {
		
		executor.init();
	}

	@Override
	public void refresh() throws RuntimeException {

		executor.refresh();
	}

	@Override
	public void destroy() throws RuntimeException {

		executor.destroy();
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/AbstractFilterExecutor.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;

import com.cnblogs.hoojo.sensitivewords.business.model.SensitiveWords;
import com.cnblogs.hoojo.sensitivewords.event.CacheChangedEvent;
import com.cnblogs.hoojo.sensitivewords.event.WordsCacheContext.CacheChangedListener;

/**
 * 抽象敏感词过滤执行器，提供过滤的缓存初始化、刷新、销毁基础封装
 * 
 * @author hoojo
 * @createDate 2018年3月21日 下午2:54:07
 * @file AbstractFilterExecutor.java
 * @package com.cnblogs.hoojo.sensitivewords.filter
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public abstract class AbstractFilterExecutor<T> extends AbstractSensitiveWordsFilterSupport implements CacheChangedListener {

	protected T cacheNodes;
	private String listenerName;

	public AbstractFilterExecutor(String listenerName) {
		this.listenerName = listenerName;
	}
	
	@Override
	public String getListenerName() {
		return listenerName;
	}
	
	/**
	 * 分词数据对象模型
	 * @author hoojo
	 * @createDate 2018年3月21日 下午3:09:33
	 * @return 根模型
	 */
	protected abstract T getCacheNodes();
	
	/**
	 * 添加分词
	 * @author hoojo
	 * @createDate 2018年3月21日 下午3:09:59
	 * @throws RuntimeException
	 */
	protected abstract boolean put(String word) throws RuntimeException;
	
	public boolean put(SensitiveWords word) throws RuntimeException {
		trace("{}: put数据 {}", getListenerName(), word.getWord());
		
		return put(word.getWord());
	}
	
	public void init(String wordsFileName) throws RuntimeException {
		cacheNodes = this.getCacheNodes();
		
		BufferedReader reader = null;
		try {
			reader = readDic(wordsFileName);
			
			for (String line = reader.readLine(); line != null; line = reader.readLine()) {
				put(line);
			}
			
			info("{}: 从本地词库加载数据：", getListenerName());
		} catch (Exception e) {
			error("{}: 从本地词库加载数据异常：", getListenerName(), e);
		} finally {
			if (reader != null) {
				try {
					reader.close();
				} catch (IOException e) {
					logger.error(e.getMessage(), e);
				}
			}
		}
	}
	
	@Override
	public void init() throws RuntimeException {
		try {
			if (cacheNodes == null) {
				debug("{}: 初始化数据", getListenerName());
				refresh();
			} else {
				debug("{}: 已初始化数据，无需重复执行", getListenerName());
			}
		} catch (Exception e) {
			throw new RuntimeException(e);
		}		
	}

	@Override
	public void refresh() throws RuntimeException {
		debug("{}: 刷新数据", getListenerName());
		
		try {
			cacheNodes = this.getCacheNodes();
			/*
			List<SensitiveWords> list = JvmWordsCache.getInstance().get();
			for (SensitiveWords word : list) {
				put(word);
			}
			*/
		} catch (Exception e) {
			throw new RuntimeException(e);
		}
	}
	
	@Override
	public void destroy() throws RuntimeException {
		debug("{}: 销毁数据", getListenerName());
		
		try {
			cacheNodes = null;
		} catch (Exception e) {
			throw new RuntimeException(e);
		}
	}
	
	/**
	 * 读取敏感词库中的内容，将内容添加到set集合中
	 * @author hoojo
	 * @createDate 2018年2月8日 下午6:27:28
	 * @param wordsFileName 文件名
	 * @return 敏感词集合
	 * @throws Exception
	 */
	protected BufferedReader readDic(String wordsFileName) throws Exception {
		
		try {
			
			InputStreamReader reader = new InputStreamReader(ClassLoader.getSystemResourceAsStream(wordsFileName), StandardCharsets.UTF_8);
			BufferedReader bufferedReader = new BufferedReader(reader);
			
			return bufferedReader;
		} catch (Exception e) {
			throw e;
		} 
	}
	
	@Override
	public void handleChangedEvent(CacheChangedEvent event) throws Exception {
		
		this.init();
		switch (event.getAction()) {
			
			case PUT:
				this.put((SensitiveWords) event.getSource());
				break;
				
			case PUT_LIST:
				this.refresh();
				break;
				
			case REMOVE:
				this.refresh();
				break;
				
			case UPDATE:
				this.refresh();
				break;
				
			case REFRESH:
				this.refresh();
				break;

			default:
				throw new UnsupportedOperationException();
		}
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/AbstractSensitiveWordsFilter.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter;

import com.cnblogs.hoojo.sensitivewords.cache.JvmWordsCache;
import com.cnblogs.hoojo.sensitivewords.cache.RedisWordsCache;
import com.cnblogs.hoojo.sensitivewords.log.ApplicationLogging;

/**
 * 敏感词库抽象接口实现
 * 
 * @author hoojo
 * @createDate 2018年2月2日 下午4:09:02
 * @file AbstractSensitiveWordsFilter.java
 * @package com.cnblogs.hoojo.sensitivewords
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public abstract class AbstractSensitiveWordsFilter extends ApplicationLogging implements SensitiveWordsFilter {

	private volatile static boolean HAS_INIT_WORDS_CACHE = false;
	
	public void initAll() throws Exception {
		
		if (!HAS_INIT_WORDS_CACHE) {
			debug("初始化所有缓存");
			RedisWordsCache.getInstance().init();
			JvmWordsCache.getInstance().init();
			
			this.init();
		} else {
			debug("缓存已被初始化，无需重复执行！");
		}
	}
	
	public void refreshAll() throws Exception {
		
		debug("刷新所有缓存");
		RedisWordsCache.getInstance().refresh();
		JvmWordsCache.getInstance().refresh();
		
		this.refresh();
	}
	
	public abstract void init() throws RuntimeException;
	
	public abstract void refresh() throws RuntimeException;
	
	public abstract void destroy() throws RuntimeException;
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/AbstractSensitiveWordsFilterSupport.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter;

import java.util.Iterator;
import java.util.Set;

import com.google.common.base.Strings;
import com.google.common.collect.Sets;

/**
 * 各算法支持类抽象接口
 * 
 * @author hoojo
 * @createDate 2018年2月7日 下午6:35:02
 * @file AbstractSensitiveWordsFilterSupport.java
 * @package com.cnblogs.hoojo.sensitivewords.filter
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public abstract class AbstractSensitiveWordsFilterSupport extends AbstractSensitiveWordsFilter {

	private static final String HTML_HIGHLIGHT = "<font color='red'>%s</font>";

	/**
	 * 匹配到敏感词的回调接口
	 * @author hoojo
	 * @createDate 2018年3月21日 上午11:46:15
	 * @param 敏感词对象类型
	 */
	protected interface Callback {
		
		/**
		 * 匹配掉敏感词回调
		 * @author hoojo
		 * @createDate 2018年3月21日 上午11:48:11
		 * @param word 敏感词
		 * @return true 立即停止后续任务并返回，false 继续执行
		 */
		boolean call(String word);
	}
	
	/**
	 * 判断一段文字包含敏感词语，支持敏感词结果回调
	 * @author hoojo
	 * @createDate 2018年2月9日 下午2:54:59
	 * @param partMatch 是否支持匹配词语的一部分
	 * @param content 被匹配内容
	 * @param callback 回调接口
	 * @return 是否匹配到的词语
	 */
	protected abstract boolean processor(boolean partMatch, String content, Callback callback) throws RuntimeException;
	
	@Override
	public boolean contains(boolean partMatch, String content) throws RuntimeException {
		
		return processor(partMatch, content, new Callback() {
			@Override
			public boolean call(String word) {
				return true; // 有敏感词立即返回
			}
		});
	}

	@Override
	public Set<String> getWords(boolean partMatch, String content) throws RuntimeException {
		final Set<String> words = Sets.newHashSet();
		
		processor(partMatch, content, new Callback() {
			@Override
			public boolean call(String word) {
				words.add(word);
				return false; // 继续匹配后面的敏感词
			}
		});
		
		return words;
	}
	
	@Override
	public String highlight(boolean partMatch, String content) throws RuntimeException {
		Set<String> words = this.getWords(partMatch, content);
		
		Iterator<String> iter = words.iterator();
		while (iter.hasNext()) {
			String word = iter.next();
			content = content.replaceAll(word, String.format(HTML_HIGHLIGHT, word));
		}
		
		return content;
	}

	@Override
	public String filter(boolean partMatch, String content, char replaceChar) throws RuntimeException {
		Set<String> words = this.getWords(partMatch, content);
		
		Iterator<String> iter = words.iterator();
		while (iter.hasNext()) {
			String word = iter.next();
			content = content.replaceAll(word, Strings.repeat(String.valueOf(replaceChar), word.length()));
		}
		
		return content;
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/SensitiveWordsFilter.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter;

import java.util.Set;

/**
 * 敏感词库接口定义
 * 
 * @author hoojo
 * @createDate 2018年2月2日 下午4:03:46
 * @file SensitiveWordsFilter.java
 * @package com.cnblogs.hoojo.sensitivewords
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public interface SensitiveWordsFilter {

	/**
	 * 是否包含敏感字符
	 * @author hoojo
	 * @createDate 2018年2月9日 下午2:57:52
	 * @param partMatch 是否支持匹配词语的一部分
	 * @param content 被匹配内容
	 * @return 是否包含敏感字符
	 */
	public boolean contains(boolean partMatch, String content) throws RuntimeException;
	
	/**
	 * 返回匹配到的敏感词语
	 * @author hoojo
	 * @createDate 2018年2月9日 下午4:00:06
	 * @param partMatch 是否部分匹配
	 * @param content 被匹配的语句
	 * @return 返回匹配的敏感词语集合
	 */
	public Set<String> getWords(boolean partMatch, String content) throws RuntimeException;
	
	/**
	 * html高亮敏感词
	 * @author hoojo
	 * @createDate 2018年2月9日 下午4:37:33
	 * @param partMatch 是否部分匹配
	 * @param content 被匹配的语句
	 * @return 返回html高亮敏感词
	 * @throws RuntimeException
	 */
	public String highlight(boolean partMatch, String content) throws RuntimeException;
	
	/**
	 * 过滤敏感词，并把敏感词替换为指定字符
	 * @author hoojo
	 * @createDate 2018年2月9日 下午4:38:12
	 * @param partMatch 是否部分匹配
	 * @param content 被匹配的语句
	 * @param replaceChar 替换字符
	 * @return 过滤后的字符串
	 * @throws RuntimeException
	 */
	public String filter(boolean partMatch, String content, char replaceChar) throws RuntimeException;
	
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/bucket/HashBucketFilter.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter.bucket;

import com.cnblogs.hoojo.sensitivewords.filter.AbstractFilter;
import com.cnblogs.hoojo.sensitivewords.filter.bucket.executor.HashBucketFilterExecutor;

/**
 * hash bucket 脱敏过滤算法实现
 * 
 * @author hoojo
 * @createDate 2018年3月22日 上午9:25:16
 * @file HashBucketFilter.java
 * @package com.cnblogs.hoojo.sensitivewords.filter.bucket
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public class HashBucketFilter extends AbstractFilter {

	public HashBucketFilter() {
		super(HashBucketFilterExecutor.getInstance());
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/bucket/executor/HashBucketFilterExecutor.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter.bucket.executor;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

import org.apache.commons.lang.StringUtils;

import com.cnblogs.hoojo.sensitivewords.filter.AbstractFilterExecutor;
import com.google.common.collect.Maps;

/**
 * hash bucket 脱敏过滤算法实现
 * 
 * @author hoojo
 * @createDate 2018年3月21日 下午4:59:33
 * @file HashBucketFilterExecutor.java
 * @package com.cnblogs.hoojo.sensitivewords.filter.bucket.executor
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public final class HashBucketFilterExecutor extends AbstractFilterExecutor<Map<Character, Map<Integer, Set<String>>>> {

	private HashBucketFilterExecutor() {
		super("二级hash(hash bucket)脱敏算法实现");
	}

	private static class SingleFactory {
		private static final HashBucketFilterExecutor INSTANCE = new HashBucketFilterExecutor();
	}

	public static final HashBucketFilterExecutor getInstance() {
		return SingleFactory.INSTANCE;
	}
	
	@Override
	protected Map<Character, Map<Integer, Set<String>>> getCacheNodes() {
		return new HashMap<Character, Map<Integer, Set<String>>>();
	}

	@Override
	protected boolean put(String word) throws RuntimeException {
		
		if (StringUtils.isBlank(word)) {
			return false;
		}
		
		word = StringUtils.trim(word);
		if (word.length() < 2) {
			return false;
		}
		
		
		char firstChar = word.charAt(0);
		
		Map<Integer, Set<String>> buckets = cacheNodes.get(firstChar);
		if (buckets == null) {
			buckets = Maps.newHashMap();
			cacheNodes.put(firstChar, buckets);
		}
		
		Set<String> words = buckets.get(word.length());
		if (words == null) {
			words = new HashSet<String>();
			buckets.put(word.length(), words);
		}
		words.add(word);
		
		return true;
	}

	@Override
	protected boolean processor(boolean partMatch, String content, Callback callback) throws RuntimeException {
		
		if (StringUtils.isBlank(content)) {
			return false;
		}
		
		content = StringUtils.trim(content);
		if (content.length() < 2) {
			return false;
		}
		
		for (int i = 0; i < content.length(); i++) {
            Character wordChar = content.charAt(i);
            
            // 判断是否属于脏字符
            if (!cacheNodes.containsKey(wordChar)) {
                continue;
            }
            
            Map<Integer, Set<String>> buckets = cacheNodes.get(wordChar);
            Set<Integer> sizes = buckets.keySet();
            for (int size : sizes) {
            	
            	if (i + size > content.length()) {
            		continue;
            	}
            	
            	String word = content.substring(i, i + size);
            	Set<String> words = buckets.get(size);
            	// 判断是否是脏词
                if (words.contains(word)) {
                	if (callback.call(word)) {
                		return true;
                	}

                	if (partMatch) {
                		i += word.length();
                	} 
                }
            }
        }
		
		return false;
	}
	
	public static void main(String[] args) {
		HashBucketFilterExecutor.getInstance().init();
		HashBucketFilterExecutor.getInstance().put("中国人");
		HashBucketFilterExecutor.getInstance().put("中国男人");
		HashBucketFilterExecutor.getInstance().put("中国人民");
		HashBucketFilterExecutor.getInstance().put("人民");
		HashBucketFilterExecutor.getInstance().put("中间");
		HashBucketFilterExecutor.getInstance().put("女人");

		HashBucketFilterExecutor.getInstance().put("一举");
		HashBucketFilterExecutor.getInstance().put("一举成名");
		HashBucketFilterExecutor.getInstance().put("一举成名走四方");
		HashBucketFilterExecutor.getInstance().put("成名");
		HashBucketFilterExecutor.getInstance().put("走四方");
		
		String content = "我们中国人都是好人，在他们中间有男人和女人。中国男人很惨，中国人民长期被压迫。";
		System.out.println(HashBucketFilterExecutor.getInstance().getWords(true, content));
		System.out.println(HashBucketFilterExecutor.getInstance().getWords(false, content));
		System.out.println(HashBucketFilterExecutor.getInstance().filter(false, content, '*'));
		System.out.println(HashBucketFilterExecutor.getInstance().highlight(false, content));
		
		content = "一举成名走四方大大的好";
		System.out.println(HashBucketFilterExecutor.getInstance().getWords(true, content));
		System.out.println(HashBucketFilterExecutor.getInstance().getWords(false, content));
		System.out.println(HashBucketFilterExecutor.getInstance().filter(false, content, '*'));
		System.out.println(HashBucketFilterExecutor.getInstance().highlight(false, content));
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/dat/DatFilter.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter.dat;

import com.cnblogs.hoojo.sensitivewords.filter.AbstractFilter;
import com.cnblogs.hoojo.sensitivewords.filter.dat.exectuor.DatFilterExecutor;

/**
 * DAT 算法实现敏感词脱敏过滤
 * 
 * @author hoojo
 * @createDate 2018年2月2日 下午4:23:20
 * @file DFASWFilter.java
 * @package com.cnblogs.hoojo.sensitivewords.support.dfa
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public class DatFilter extends AbstractFilter {

	public DatFilter() {
		super(DatFilterExecutor.getInstance());
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/dat/exectuor/DatCacheNode.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter.dat.exectuor;

import java.util.Set;

import com.google.common.collect.Sets;

/**
 * 双数组脏词缓存节点
 * 
 * @author hoojo
 * @createDate 2018年3月21日 下午3:29:03
 * @file DatCacheNode.java
 * @package com.cnblogs.hoojo.sensitivewords.filter.dat.exectuor
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public class DatCacheNode {

	//脏字库
    private Set<Character> chars = Sets.newHashSet();
    
    //敏感词库
    private Set<String> words = Sets.newHashSet();

	public Set<Character> getChars() {
		return chars;
	}

	public void setChars(Set<Character> chars) {
		this.chars = chars;
	}

	public Set<String> getWords() {
		return words;
	}

	public void setWords(Set<String> words) {
		this.words = words;
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/dat/exectuor/DatFilterExecutor.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter.dat.exectuor;

import org.apache.commons.lang.StringUtils;

import com.cnblogs.hoojo.sensitivewords.filter.AbstractFilterExecutor;

/**
 * 双数组算法过滤敏感词
 * 
 * @author hoojo
 * @createDate 2018年3月21日 下午3:28:21
 * @package com.cnblogs.hoojo.sensitivewords.filter.dat.exectuor
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public final class DatFilterExecutor extends AbstractFilterExecutor<DatCacheNode> {

	private DatFilterExecutor() {
		super("dat 双数组算法脱敏实现");
	}
	
	private static class SingleFactory {
		private static final DatFilterExecutor INSTANCE = new DatFilterExecutor();
	}

	public static final DatFilterExecutor getInstance() {
		return SingleFactory.INSTANCE;
	}

	@Override
	protected DatCacheNode getCacheNodes() {
		return new DatCacheNode();
	}

	@Override
	protected boolean put(String word) throws RuntimeException {
		
		if (StringUtils.isBlank(word)) {
			return false;
		}
		
		word = StringUtils.trim(word);
		if (word.length() < 2) {
			return false;
		}
		
		cacheNodes.getWords().add(word);
		
        for (Character character : word.toCharArray()) {
        	cacheNodes.getChars().add(character);
        }
        
		return true;
	}

	@Override
	protected boolean processor(boolean partMatch, String content, Callback callback) throws RuntimeException {
		
		if (StringUtils.isBlank(content)) {
			return false;
		}
		
		content = StringUtils.trim(content);
		if (content.length() < 2) {
			return false;
		}
		
		for (int i = 0; i < content.length(); i++) {
            Character wordChar = content.charAt(i);
            // 判断是否属于脏字符
            if (!cacheNodes.getChars().contains(wordChar)) {
                continue;
            }
            
            int j = i + 1;
            while (j < content.length()) {
            	
            	// 判断下一个字符是否属于脏字符
            	wordChar = content.charAt(j);
                if (!cacheNodes.getChars().contains(wordChar)) {
                    break;
                }
                
                String word = content.substring(i, j + 1);
                // 判断是否是脏词
                if (cacheNodes.getWords().contains(word)) {
                    
                	if (callback.call(word)) {
                		return true;
                	}

                	if (partMatch) {
                		i += word.length();
                	} 
                }
                
                j++;
            }
        }
		
		return false;
	}

	public static void main(String[] args) {
		DatFilterExecutor.getInstance().init();
		DatFilterExecutor.getInstance().put("中国人");
		DatFilterExecutor.getInstance().put("中国男人");
		DatFilterExecutor.getInstance().put("中国人民");
		DatFilterExecutor.getInstance().put("人民");
		DatFilterExecutor.getInstance().put("中间");
		DatFilterExecutor.getInstance().put("女人");

		DatFilterExecutor.getInstance().put("一举");
		DatFilterExecutor.getInstance().put("一举成名");
		DatFilterExecutor.getInstance().put("一举成名走四方");
		DatFilterExecutor.getInstance().put("成名");
		DatFilterExecutor.getInstance().put("走四方");
		
		String content = "我们中国人都是好人，在他们中间有男人和女人。中国男人很惨，中国人民长期被压迫。";
		System.out.println(DatFilterExecutor.getInstance().getWords(true, content));
		System.out.println(DatFilterExecutor.getInstance().getWords(false, content));
		System.out.println(DatFilterExecutor.getInstance().filter(false, content, '*'));
		System.out.println(DatFilterExecutor.getInstance().highlight(false, content));
		
		content = "一举成名走四方的是什么";
		System.out.println(DatFilterExecutor.getInstance().getWords(true, content));
		System.out.println(DatFilterExecutor.getInstance().getWords(false, content));
		System.out.println(DatFilterExecutor.getInstance().filter(false, content, '*'));
		System.out.println(DatFilterExecutor.getInstance().highlight(false, content));
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/dat/exectuor/DoubleArrayTrie2.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter.dat.exectuor;

/**
 * DoubleArrayTrie: Java implementation of Darts (Double-ARray Trie System)
 * 
 * <p>
 * Copyright(C) 2001-2007 Taku Kudo &lt;taku@chasen.org&gt;<br />
 * Copyright(C) 2009 MURAWAKI Yugo &lt;murawaki@nlp.kuee.kyoto-u.ac.jp&gt;
 * Copyright(C) 2012 KOMIYA Atsushi &lt;komiya.atsushi@gmail.com&gt;
 * </p>
 * 
 * <p>
 * The contents of this file may be used under the terms of either of the GNU
 * Lesser General Public License Version 2.1 or later (the "LGPL"), or the BSD
 * License (the "BSD").
 * </p>
 */
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import com.google.common.collect.Lists;

public class DoubleArrayTrie2 {

	private final static int BUF_SIZE = 16384;
	private final static int UNIT_SIZE = 8; // size of int + int

	private static class Node {
		int code;
		int depth;
		int left;
		int right;
	};

	private int check[];
	private int base[];

	private boolean used[];
	private int size;
	private int allocSize;
	private List<String> key;
	private int keySize;
	private int length[];
	private int value[];
	private int progress;
	private int nextCheckPos;
	// boolean no_delete_;
	int error_;

	// int (*progressfunc_) (size_t, size_t);

	// inline _resize expanded
	private int resize(int newSize) {
		int[] base2 = new int[newSize];
		int[] check2 = new int[newSize];
		boolean used2[] = new boolean[newSize];

		if (allocSize > 0) {
			System.arraycopy(base, 0, base2, 0, allocSize);
			System.arraycopy(check, 0, check2, 0, allocSize);
			System.arraycopy(used2, 0, used2, 0, allocSize);
		}

		base = base2;
		check = check2;
		used = used2;

		return allocSize = newSize;
	}

	private int fetch(Node parent, List<Node> siblings) {
		if (error_ < 0)
			return 0;

		int prev = 0;

		for (int i = parent.left; i < parent.right; i++) {
			if ((length != null ? length[i] : key.get(i).length()) < parent.depth)
				continue;

			String tmp = key.get(i);

			int cur = 0;
			if ((length != null ? length[i] : tmp.length()) != parent.depth)
				cur = (int) tmp.charAt(parent.depth) + 1;

			if (prev > cur) {
				error_ = -3;
				return 0;
			}

			if (cur != prev || siblings.size() == 0) {
				Node tmp_node = new Node();
				tmp_node.depth = parent.depth + 1;
				tmp_node.code = cur;
				tmp_node.left = i;
				if (siblings.size() != 0)
					siblings.get(siblings.size() - 1).right = i;

				siblings.add(tmp_node);
			}

			prev = cur;
		}

		if (siblings.size() != 0)
			siblings.get(siblings.size() - 1).right = parent.right;

		return siblings.size();
	}

	private int insert(List<Node> siblings) {
		if (error_ < 0)
			return 0;

		int begin = 0;
		int pos = ((siblings.get(0).code + 1 > nextCheckPos) ? siblings.get(0).code + 1 : nextCheckPos) - 1;
		int nonzero_num = 0;
		int first = 0;

		if (allocSize <= pos)
			resize(pos + 1);

		outer: while (true) {
			pos++;

			if (allocSize <= pos)
				resize(pos + 1);

			if (check[pos] != 0) {
				nonzero_num++;
				continue;
			} else if (first == 0) {
				nextCheckPos = pos;
				first = 1;
			}

			begin = pos - siblings.get(0).code;
			if (allocSize <= (begin + siblings.get(siblings.size() - 1).code)) {
				// progress can be zero
				double l = (1.05 > 1.0 * keySize / (progress + 1)) ? 1.05 : 1.0 * keySize / (progress + 1);
				resize((int) (allocSize * l));
			}

			if (used[begin])
				continue;

			for (int i = 1; i < siblings.size(); i++)
				if (check[begin + siblings.get(i).code] != 0)
					continue outer;

			break;
		}

		// -- Simple heuristics --
		// if the percentage of non-empty contents in check between the
		// index
		// 'next_check_pos' and 'check' is greater than some constant value
		// (e.g. 0.9),
		// new 'next_check_pos' index is written by 'check'.
		if (1.0 * nonzero_num / (pos - nextCheckPos + 1) >= 0.95)
			nextCheckPos = pos;

		used[begin] = true;
		size = (size > begin + siblings.get(siblings.size() - 1).code + 1) ? size
		        : begin + siblings.get(siblings.size() - 1).code + 1;

		for (int i = 0; i < siblings.size(); i++)
			check[begin + siblings.get(i).code] = begin;

		for (int i = 0; i < siblings.size(); i++) {
			List<Node> new_siblings = new ArrayList<Node>();

			if (fetch(siblings.get(i), new_siblings) == 0) {
				base[begin + siblings.get(i).code] = (value != null) ? (-value[siblings.get(i).left] - 1)
				        : (-siblings.get(i).left - 1);

				if (value != null && (-value[siblings.get(i).left] - 1) >= 0) {
					error_ = -2;
					return 0;
				}

				progress++;
				// if (progress_func_) (*progress_func_) (progress,
				// keySize);
			} else {
				int h = insert(new_siblings);
				base[begin + siblings.get(i).code] = h;
			}
		}
		return begin;
	}

	public DoubleArrayTrie2() {
		check = null;
		base = null;
		used = null;
		size = 0;
		allocSize = 0;
		// no_delete_ = false;
		error_ = 0;
	}

	// no deconstructor

	// set_result omitted
	// the search methods returns (the list of) the value(s) instead
	// of (the list of) the pair(s) of value(s) and length(s)

	// set_array omitted
	// array omitted

	void clear() {
		// if (! no_delete_)
		check = null;
		base = null;
		used = null;
		allocSize = 0;
		size = 0;
		// no_delete_ = false;
	}

	public int getUnitSize() {
		return UNIT_SIZE;
	}

	public int getSize() {
		return size;
	}

	public int getTotalSize() {
		return size * UNIT_SIZE;
	}

	public int getNonzeroSize() {
		int result = 0;
		for (int i = 0; i < size; i++)
			if (check[i] != 0)
				result++;
		return result;
	}

	public int build(List<String> key) {
		return build(key, null, null, key.size());
	}

	public int build(List<String> _key, int _length[], int _value[], int _keySize) {
		if (_keySize > _key.size() || _key == null)
			return 0;

		// progress_func_ = progress_func;
		key = _key;
		length = _length;
		keySize = _keySize;
		value = _value;
		progress = 0;

		resize(65536 * 32);

		base[0] = 1;
		nextCheckPos = 0;

		Node root_node = new Node();
		root_node.left = 0;
		root_node.right = keySize;
		root_node.depth = 0;

		List<Node> siblings = new ArrayList<Node>();
		fetch(root_node, siblings);
		insert(siblings);

		// size += (1 << 8 * 2) + 1; // ???
		// if (size >= allocSize) resize (size);

		used = null;
		key = null;

		return error_;
	}
	
	public int put(String _key, int _length[], int _value[]) {
		if ( _key == null)
			return 0;

		// progress_func_ = progress_func;
		if (key == null) {
			key = Lists.newArrayList();
		}
		key.add(_key);
		keySize = key.size();
		
		length = _length;
		value = _value;
		progress = 0;

		resize(65536 * 32);

		base[0] = 1;
		nextCheckPos = 0;

		Node root_node = new Node();
		root_node.left = 0;
		root_node.right = keySize;
		root_node.depth = 0;

		List<Node> siblings = new ArrayList<Node>();
		fetch(root_node, siblings);
		insert(siblings);

		// size += (1 << 8 * 2) + 1; // ???
		// if (size >= allocSize) resize (size);

		used = null;
		key = null;

		return error_;
	}

	public void open(String fileName) throws IOException {
		File file = new File(fileName);
		size = (int) file.length() / UNIT_SIZE;
		check = new int[size];
		base = new int[size];

		DataInputStream is = null;
		try {
			is = new DataInputStream(new BufferedInputStream(new FileInputStream(file), BUF_SIZE));
			for (int i = 0; i < size; i++) {
				base[i] = is.readInt();
				check[i] = is.readInt();
			}
		} finally {
			if (is != null)
				is.close();
		}
	}

	public void save(String fileName) throws IOException {
		DataOutputStream out = null;
		try {
			out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(fileName)));
			for (int i = 0; i < size; i++) {
				out.writeInt(base[i]);
				out.writeInt(check[i]);
			}
			out.close();
		} finally {
			if (out != null)
				out.close();
		}
	}

	public int exactMatchSearch(String key) {
		return exactMatchSearch(key, 0, 0, 0);
	}

	public int exactMatchSearch(String key, int pos, int len, int nodePos) {
		if (len <= 0)
			len = key.length();
		if (nodePos <= 0)
			nodePos = 0;

		int result = -1;

		char[] keyChars = key.toCharArray();

		int b = base[nodePos];
		int p;

		for (int i = pos; i < len; i++) {
			p = b + (int) (keyChars[i]) + 1;
			if (b == check[p])
				b = base[p];
			else
				return result;
		}

		p = b;
		int n = base[p];
		if (b == check[p] && n < 0) {
			result = -n - 1;
		}
		return result;
	}

	public List<Integer> commonPrefixSearch(String key) {
		return commonPrefixSearch(key, 0, 0, 0);
	}

	public List<Integer> commonPrefixSearch(String key, int pos, int len, int nodePos) {
		if (len <= 0)
			len = key.length();
		if (nodePos <= 0)
			nodePos = 0;

		List<Integer> result = new ArrayList<Integer>();

		char[] keyChars = key.toCharArray();

		int b = base[nodePos];
		int n;
		int p;

		for (int i = pos; i < len; i++) {
			p = b;
			n = base[p];

			if (b == check[p] && n < 0) {
				result.add(-n - 1);
			}

			p = b + (int) (keyChars[i]) + 1;
			if (b == check[p])
				b = base[p];
			else
				return result;
		}

		p = b;
		n = base[p];

		if (b == check[p] && n < 0) {
			result.add(-n - 1);
		}

		return result;
	}

	// debug
	public void dump() {
		for (int i = 0; i < size; i++) {
			System.err.println("i: " + i + " [" + base[i] + ", " + check[i] + "]");
		}
	}

	public static void main(String[] args) {
		DoubleArrayTrie2 dat = new DoubleArrayTrie2();

		List<String> list = Lists.newArrayList();
		/*list.add("一举");
		list.add("一举成名");
		list.add("一举成名走四方");*/
		list.add("成名");
		list.add("走四方");

		Collections.sort(list);

		dat.build(list);
		
		dat.put("一举", null, null);
		dat.put("一举成名", null, null);

		String content = "一举成名走四方的是什么";
		List<Integer> rect = dat.commonPrefixSearch(content);
		System.out.println();
		for (int index : rect) {
			System.out.println("前缀  " + list.get(index) + " matched");
		}

		// 检索key是否完全命中了词典中的某个词
		int index = dat.exactMatchSearch("成");
		if (index >= 0) {
			System.out.println(content + " match " + list.get(index));
		} else {
			System.out.println(content + " not match any term");
		}
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/dfa/DfaFilter.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter.dfa;

import com.cnblogs.hoojo.sensitivewords.filter.AbstractFilter;
import com.cnblogs.hoojo.sensitivewords.filter.dfa.executor.DfaFilterExecutor;

/**
 * DFA 算法实现敏感词脱敏过滤
 * 
 * @author hoojo
 * @createDate 2018年2月2日 下午4:23:20
 * @file DFASWFilter.java
 * @package com.cnblogs.hoojo.sensitivewords.support.dfa
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public class DfaFilter extends AbstractFilter {

	public DfaFilter() {
		super(DfaFilterExecutor.getInstance());
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/dfa/executor/DfaFilterExecutor.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter.dfa.executor;

import java.util.HashMap;

import org.apache.commons.lang.StringUtils;

import com.cnblogs.hoojo.sensitivewords.filter.AbstractFilterExecutor;
import com.google.common.collect.Maps;

/**
 * DFA 脱敏算法实现支持类
 * 
 * @author hoojo
 * @createDate 2018年2月9日 上午10:34:42
 * @file DfaFilterExecutor.java
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public final class DfaFilterExecutor extends AbstractFilterExecutor<HashMap<Character, DfaNode>> {

	private static class SingleFactory {
		private static final DfaFilterExecutor INSTANCE = new DfaFilterExecutor();
	}

	public static final DfaFilterExecutor getInstance() {
		return SingleFactory.INSTANCE;
	}
	
	private DfaFilterExecutor() {
		super("DFA 脱敏算法实现支持类");
	}
	
	@Override
	protected boolean put(String word) throws RuntimeException {
		
		if (StringUtils.isBlank(word)) {
			return false;
		}
		
		word = StringUtils.trim(word);
		if (word.length() < 2) {
			return false;
		}
		
		Character fisrtChar = word.charAt(0);
		DfaNode node = cacheNodes.get(fisrtChar);
		if (node == null) {
			node = new DfaNode(fisrtChar);
			cacheNodes.put(fisrtChar, node);
		}
		
		for (int i = 1; i < word.length(); i++) {
			Character nextChar = word.charAt(i); 
			
			DfaNode nextNode = null;
			if (!node.isLeaf()) {
				nextNode = node.getChilds().get(nextChar);
			} 
			if (nextNode == null) {
				nextNode = new DfaNode(nextChar);
			}
			
			node.addChild(nextNode);
			node = nextNode;
			
			if (i == word.length() - 1) {
				node.setWord(true);
			}
		}
		
		return true;
	}
	
	@Override
	protected boolean processor(boolean partMatch, String content, Callback callback) throws RuntimeException {
		if (StringUtils.isBlank(content)) {
			return false;
		}
		
		content = StringUtils.trim(content);
		if (content.length() < 2) {
			return false;
		}
		
		for (int index = 0; index < content.length(); index++) {
			char fisrtChar = content.charAt(index);
			
			DfaNode node = cacheNodes.get(fisrtChar);
			if (node == null || node.isLeaf()) {
				continue;
			} 
			
			int charCount = 1;
			for (int i = index + 1; i < content.length(); i++) {
				char wordChar = content.charAt(i);
				
				node = node.getChilds().get(wordChar);
				if (node != null) {
					charCount++;
				} else {
					break;
				}
				
				if (partMatch && node.isWord()) {
					if (callback.call(StringUtils.substring(content, index, index + charCount))) {
						return true;
					}
					break;
				} else if (node.isWord()) {
					if (callback.call(StringUtils.substring(content, index, index + charCount))) {
						return true;
					}
				}
				
				if (node.isLeaf()) {
					break;
				}
			}
			
			if (partMatch) {
				index += charCount;
			}
		}
		
		return false;
	}
	
	@Override
	protected HashMap<Character, DfaNode> getCacheNodes() {
		return Maps.newHashMap();
	}
	
	public static void main(String[] args) {
		DfaFilterExecutor.getInstance().init();
		DfaFilterExecutor.getInstance().put("中国人");
		DfaFilterExecutor.getInstance().put("中国男人");
		DfaFilterExecutor.getInstance().put("中国人民");
		DfaFilterExecutor.getInstance().put("人民");
		DfaFilterExecutor.getInstance().put("中间");
		DfaFilterExecutor.getInstance().put("女人");

		DfaFilterExecutor.getInstance().put("一举");
		DfaFilterExecutor.getInstance().put("一举成名");
		DfaFilterExecutor.getInstance().put("一举成名走四方");
		DfaFilterExecutor.getInstance().put("成名");
		DfaFilterExecutor.getInstance().put("走四方");
		
		String content = "我们中国人都是好人，在他们中间有男人和女人。中国男人很惨，中国人民长期被压迫。";
		System.out.println(DfaFilterExecutor.getInstance().contains(true, content));
		System.out.println(DfaFilterExecutor.getInstance().getWords(true, content));
		System.out.println(DfaFilterExecutor.getInstance().getWords(false, content));
		System.out.println(DfaFilterExecutor.getInstance().filter(false, content, '*'));
		System.out.println(DfaFilterExecutor.getInstance().highlight(true, content));
		System.out.println(DfaFilterExecutor.getInstance().highlight(false, content));
		
		content = "一举成名走四方的是什么";
		System.out.println(DfaFilterExecutor.getInstance().getWords(true, content));
		System.out.println(DfaFilterExecutor.getInstance().getWords(false, content));
		System.out.println(DfaFilterExecutor.getInstance().filter(false, content, '*'));
		System.out.println(DfaFilterExecutor.getInstance().highlight(true, content));
		System.out.println(DfaFilterExecutor.getInstance().highlight(false, content));
		
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/dfa/executor/DfaNode.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter.dfa.executor;

import java.util.Map;
import java.util.Set;

import com.google.common.collect.Maps;

/**
 * dfa多叉树模型
 * @author hoojo
 * @createDate 2018年2月8日 下午8:23:27
 * @file DfaNode.java
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public class DfaNode {

	private char _char;
	private DfaNode parent;
	private boolean word;
	private Map<Character, DfaNode> childs;

	public DfaNode() {
	}

	public DfaNode(char _char) {
		this._char = _char;
	}

	public boolean isWord() {
		return word;
	}

	public void setWord(boolean word) {
		this.word = word;
	}
	
	public boolean isLeaf() {
		return (childs == null || childs.isEmpty());
	}

	public char getChar() {
		return _char;
	}

	public void setChar(char _char) {
		this._char = _char;
	}

	public void addChild(DfaNode child) {
		if (this.childs == null) {
			childs = Maps.newHashMap();
		}
		
		this.childs.put(child.getChar(), child);
		//child.setParent(this);
	}

	public void removeChild(DfaNode child) {
		if (this.childs != null) {
			this.childs.remove(child.getChar());
		}
	}

	public DfaNode getParent() {
		return parent;
	}

	public void setParent(DfaNode parent) {
		this.parent = parent;
	}

	public Map<Character, DfaNode> getChilds() {
		/*if (this.childs == null) {
			this.childs = Maps.newHashMap();
		}*/
		return this.childs;
	}

	public void setChilds(Map<Character, DfaNode> childs) {
		this.childs = childs;
	}

	public void print(DfaNode node) {
		System.out.println(node.getChar());
		if (node.getChilds() != null) {
			Set<Character> keys = node.getChilds().keySet();
			for (Character _char: keys) {
				print(node.getChilds().get(_char));
			}
		}
	}
	
	public static void main(String[] args) {
		DfaNode node = new DfaNode('中');
		
		DfaNode g = new DfaNode('国');
		g.addChild(new DfaNode('人'));
		
		DfaNode n = new DfaNode('男');
		n.addChild(new DfaNode('人'));
		g.addChild(n);
		
		node.addChild(g);
		node.addChild(new DfaNode('间'));
		
		node.print(node);
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/simhash/SimHashFilter.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter.simhash;

import com.cnblogs.hoojo.sensitivewords.filter.AbstractFilter;
import com.cnblogs.hoojo.sensitivewords.filter.simhash.executor.SimHashFilterExecutor;

/**
 * simhash 算法
 * 
 * @author hoojo
 * @createDate 2018年3月23日 下午5:55:49
 * @file SimHashFilter.java
 * @package com.cnblogs.hoojo.sensitivewords.filter.simhash.executor
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public class SimHashFilter extends AbstractFilter {

	public SimHashFilter() {
		super(SimHashFilterExecutor.getInstance());
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/simhash/executor/FNVHashUtils.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter.simhash.executor;

import java.math.BigInteger;

/**
 * http://blog.csdn.net/hustfoxy/article/details/23687239
 * https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
 * 
 * @author hoojo
 * @createDate 2018年3月22日 下午6:48:41
 * @file FNVHashUtils.java
 * @package com.cnblogs.hoojo.sensitivewords.filter.bucket.executor
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public abstract class FNVHashUtils {

	public static int HASH_BITS = 64;
	/** 初始的哈希值 */
	public static final BigInteger FNV_64_OFFSET_BASIS = new BigInteger("14695981039346656037");
	/** FNV用于散列的质数 */
	public static final BigInteger FNV_64_PRIME = new BigInteger("1099511628211");
	/** 8位数据（即一个字节） */
	public static final BigInteger FNV_64_OCTET_OF_DATA = BigInteger.ONE.shiftLeft(HASH_BITS).subtract(BigInteger.ONE);

	/**
	 * fnv-1 hash算法，将字符串转换为64位hash值
	 */
	public static BigInteger hash64(String text) {
		BigInteger hash = FNV_64_OFFSET_BASIS;
		
		int len = text.length();
		for (int i = 0; i < len; i++) {
			hash = hash.multiply(FNV_64_PRIME);
			hash = hash.xor(BigInteger.valueOf(text.charAt(i)));
		}
		hash = hash.and(FNV_64_OCTET_OF_DATA);
		
		return hash;
	}

	/**
	 * fnv-1a hash算法，将字符串转换为64位hash值
	 */
	public static BigInteger hash64_(String text) {
		BigInteger hash = FNV_64_OFFSET_BASIS;
		
		int len = text.length();
		for (int i = 0; i < len; i++) {
			hash = hash.xor(BigInteger.valueOf(text.charAt(i)));
			hash = hash.multiply(FNV_64_PRIME);
		}
		hash = hash.and(FNV_64_OCTET_OF_DATA);
		
		return hash;
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/simhash/executor/Murmur3.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter.simhash.executor;

/**
 *   Copyright 2014 Prasanth Jayachandran
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 * Murmur3 32 and 128 bit variants.
 * 32-bit Java port of https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp#94
 * 128-bit Java port of https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp#255
 */
public class Murmur3 {
  // Constants for 32 bit variant
  private static final int C1_32 = 0xcc9e2d51;
  private static final int C2_32 = 0x1b873593;
  private static final int R1_32 = 15;
  private static final int R2_32 = 13;
  private static final int M_32 = 5;
  private static final int N_32 = 0xe6546b64;

  // Constants for 128 bit variant
  private static final long C1 = 0x87c37b91114253d5L;
  private static final long C2 = 0x4cf5ad432745937fL;
  private static final int R1 = 31;
  private static final int R2 = 27;
  private static final int R3 = 33;
  private static final int M = 5;
  private static final int N1 = 0x52dce729;
  private static final int N2 = 0x38495ab5;

  private static final int DEFAULT_SEED = 0;

  /**
   * Murmur3 32-bit variant.
   *
   * @param data - input byte array
   * @return - hashcode
   */
  public static int hash32(byte[] data) {
    return hash32(data, data.length, DEFAULT_SEED);
  }

  /**
   * Murmur3 32-bit variant.
   *
   * @param data   - input byte array
   * @param length - length of array
   * @param seed   - seed. (default 0)
   * @return - hashcode
   */
  public static int hash32(byte[] data, int length, int seed) {
    int hash = seed;
    final int nblocks = length >> 2;

    // body
    for (int i = 0; i < nblocks; i++) {
      int i_4 = i << 2;
      int k = (data[i_4] & 0xff)
          | ((data[i_4 + 1] & 0xff) << 8)
          | ((data[i_4 + 2] & 0xff) << 16)
          | ((data[i_4 + 3] & 0xff) << 24);

      // mix functions
      k *= C1_32;
      k = Integer.rotateLeft(k, R1_32);
      k *= C2_32;
      hash ^= k;
      hash = Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
    }

    // tail
    int idx = nblocks << 2;
    int k1 = 0;
    switch (length - idx) {
      case 3:
        k1 ^= data[idx + 2] << 16;
      case 2:
        k1 ^= data[idx + 1] << 8;
      case 1:
        k1 ^= data[idx];

        // mix functions
        k1 *= C1_32;
        k1 = Integer.rotateLeft(k1, R1_32);
        k1 *= C2_32;
        hash ^= k1;
    }

    // finalization
    hash ^= length;
    hash ^= (hash >>> 16);
    hash *= 0x85ebca6b;
    hash ^= (hash >>> 13);
    hash *= 0xc2b2ae35;
    hash ^= (hash >>> 16);

    return hash;
  }

  /**
   * Murmur3 64-bit variant. This is essentially MSB 8 bytes of Murmur3 128-bit variant.
   *
   * @param data - input byte array
   * @return - hashcode
   */
  public static long hash64(byte[] data) {
    return hash64(data, data.length, DEFAULT_SEED);
  }

  /**
   * Murmur3 64-bit variant. This is essentially MSB 8 bytes of Murmur3 128-bit variant.
   *
   * @param data   - input byte array
   * @param length - length of array
   * @param seed   - seed. (default is 0)
   * @return - hashcode
   */
  public static long hash64(byte[] data, int length, int seed) {
    long hash = seed;
    final int nblocks = length >> 3;

    // body
    for (int i = 0; i < nblocks; i++) {
      final int i8 = i << 3;
      long k = ((long) data[i8] & 0xff)
          | (((long) data[i8 + 1] & 0xff) << 8)
          | (((long) data[i8 + 2] & 0xff) << 16)
          | (((long) data[i8 + 3] & 0xff) << 24)
          | (((long) data[i8 + 4] & 0xff) << 32)
          | (((long) data[i8 + 5] & 0xff) << 40)
          | (((long) data[i8 + 6] & 0xff) << 48)
          | (((long) data[i8 + 7] & 0xff) << 56);

      // mix functions
      k *= C1;
      k = Long.rotateLeft(k, R1);
      k *= C2;
      hash ^= k;
      hash = Long.rotateLeft(hash, R2) * M + N1;
    }

    // tail
    long k1 = 0;
    int tailStart = nblocks << 3;
    switch (length - tailStart) {
      case 7:
        k1 ^= ((long) data[tailStart + 6] & 0xff) << 48;
      case 6:
        k1 ^= ((long) data[tailStart + 5] & 0xff) << 40;
      case 5:
        k1 ^= ((long) data[tailStart + 4] & 0xff) << 32;
      case 4:
        k1 ^= ((long) data[tailStart + 3] & 0xff) << 24;
      case 3:
        k1 ^= ((long) data[tailStart + 2] & 0xff) << 16;
      case 2:
        k1 ^= ((long) data[tailStart + 1] & 0xff) << 8;
      case 1:
        k1 ^= ((long) data[tailStart] & 0xff);
        k1 *= C1;
        k1 = Long.rotateLeft(k1, R1);
        k1 *= C2;
        hash ^= k1;
    }

    // finalization
    hash ^= length;
    hash = fmix64(hash);

    return hash;
  }

  /**
   * Murmur3 128-bit variant.
   *
   * @param data - input byte array
   * @return - hashcode (2 longs)
   */
  public static long[] hash128(byte[] data) {
    return hash128(data, data.length, DEFAULT_SEED);
  }

  /**
   * Murmur3 128-bit variant.
   *
   * @param data   - input byte array
   * @param length - length of array
   * @param seed   - seed. (default is 0)
   * @return - hashcode (2 longs)
   */
  public static long[] hash128(byte[] data, int length, int seed) {
    long h1 = seed;
    long h2 = seed;
    final int nblocks = length >> 4;

    // body
    for (int i = 0; i < nblocks; i++) {
      final int i16 = i << 4;
      long k1 = ((long) data[i16] & 0xff)
          | (((long) data[i16 + 1] & 0xff) << 8)
          | (((long) data[i16 + 2] & 0xff) << 16)
          | (((long) data[i16 + 3] & 0xff) << 24)
          | (((long) data[i16 + 4] & 0xff) << 32)
          | (((long) data[i16 + 5] & 0xff) << 40)
          | (((long) data[i16 + 6] & 0xff) << 48)
          | (((long) data[i16 + 7] & 0xff) << 56);

      long k2 = ((long) data[i16 + 8] & 0xff)
          | (((long) data[i16 + 9] & 0xff) << 8)
          | (((long) data[i16 + 10] & 0xff) << 16)
          | (((long) data[i16 + 11] & 0xff) << 24)
          | (((long) data[i16 + 12] & 0xff) << 32)
          | (((long) data[i16 + 13] & 0xff) << 40)
          | (((long) data[i16 + 14] & 0xff) << 48)
          | (((long) data[i16 + 15] & 0xff) << 56);

      // mix functions for k1
      k1 *= C1;
      k1 = Long.rotateLeft(k1, R1);
      k1 *= C2;
      h1 ^= k1;
      h1 = Long.rotateLeft(h1, R2);
      h1 += h2;
      h1 = h1 * M + N1;

      // mix functions for k2
      k2 *= C2;
      k2 = Long.rotateLeft(k2, R3);
      k2 *= C1;
      h2 ^= k2;
      h2 = Long.rotateLeft(h2, R1);
      h2 += h1;
      h2 = h2 * M + N2;
    }

    // tail
    long k1 = 0;
    long k2 = 0;
    int tailStart = nblocks << 4;
    switch (length - tailStart) {
      case 15:
        k2 ^= (long) (data[tailStart + 14] & 0xff) << 48;
      case 14:
        k2 ^= (long) (data[tailStart + 13] & 0xff) << 40;
      case 13:
        k2 ^= (long) (data[tailStart + 12] & 0xff) << 32;
      case 12:
        k2 ^= (long) (data[tailStart + 11] & 0xff) << 24;
      case 11:
        k2 ^= (long) (data[tailStart + 10] & 0xff) << 16;
      case 10:
        k2 ^= (long) (data[tailStart + 9] & 0xff) << 8;
      case 9:
        k2 ^= (long) (data[tailStart + 8] & 0xff);
        k2 *= C2;
        k2 = Long.rotateLeft(k2, R3);
        k2 *= C1;
        h2 ^= k2;

      case 8:
        k1 ^= (long) (data[tailStart + 7] & 0xff) << 56;
      case 7:
        k1 ^= (long) (data[tailStart + 6] & 0xff) << 48;
      case 6:
        k1 ^= (long) (data[tailStart + 5] & 0xff) << 40;
      case 5:
        k1 ^= (long) (data[tailStart + 4] & 0xff) << 32;
      case 4:
        k1 ^= (long) (data[tailStart + 3] & 0xff) << 24;
      case 3:
        k1 ^= (long) (data[tailStart + 2] & 0xff) << 16;
      case 2:
        k1 ^= (long) (data[tailStart + 1] & 0xff) << 8;
      case 1:
        k1 ^= (long) (data[tailStart] & 0xff);
        k1 *= C1;
        k1 = Long.rotateLeft(k1, R1);
        k1 *= C2;
        h1 ^= k1;
    }

    // finalization
    h1 ^= length;
    h2 ^= length;

    h1 += h2;
    h2 += h1;

    h1 = fmix64(h1);
    h2 = fmix64(h2);

    h1 += h2;
    h2 += h1;

    return new long[]{h1, h2};
  }

  private static long fmix64(long h) {
    h ^= (h >>> 33);
    h *= 0xff51afd7ed558ccdL;
    h ^= (h >>> 33);
    h *= 0xc4ceb9fe1a85ec53L;
    h ^= (h >>> 33);
    return h;
  }
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/simhash/executor/SimHashFilterExecutor.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter.simhash.executor;

import java.util.Map;
import java.util.Set;

import org.apache.commons.lang.StringUtils;

import com.cnblogs.hoojo.sensitivewords.filter.AbstractFilterExecutor;
import com.google.common.collect.Maps;

/**
 * google simhash 算法实现脱敏过滤
 * 
 * 由于simhash是对大文本进行比较，并且比较的是在支持分词的基础上对分词对象进行比较，进而确定相识度。
 * 故 在脱敏方面支持不是很友好，在大文本情况下，效率低下。
 * 改变情况，需要分词库支持。
 * 
 * @author hoojo
 * @createDate 2018年3月22日 上午11:07:47
 * @file SimHashFilterExecutor.java
 * @package com.cnblogs.hoojo.sensitivewords.filter.simhash.executor
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public final class SimHashFilterExecutor extends AbstractFilterExecutor<Map<Character, Map<String, Set<String>>>> {

	private SimHashFilterExecutor() {
		super("simhash 算法脱敏实现");
	}
	
	private static class SingleFactory {
		private static final SimHashFilterExecutor INSTANCE = new SimHashFilterExecutor();
	}

	public static final SimHashFilterExecutor getInstance() {
		return SingleFactory.INSTANCE;
	}

	@Override
	protected Map<Character, Map<String, Set<String>>> getCacheNodes() {
		return Maps.newHashMap();
	}

	@Override
	protected boolean put(String word) throws RuntimeException {
		
		if (StringUtils.isBlank(word)) {
			return false;
		}
		
		word = StringUtils.trim(word);
		if (word.length() < 2) {
			return false;
		}
		
		Character firstChar = word.charAt(0);
		
		Map<String, Set<String>> hashs = cacheNodes.get(firstChar);
		if (hashs == null) {
			hashs = Maps.newHashMap();
			cacheNodes.put(firstChar, hashs);
		}
		
		String hash = SimHashUtils._simhash(word);
		String[] chunks = SimHashUtils.chunk(hash);
		
		Map<String, Set<String>> map = SimHashUtils.cartesianProduct(chunks);
		Set<String> keys = map.keySet();
		for (String chunk : keys) {
			if (!hashs.containsKey(chunk)) {
				hashs.put(chunk, map.get(chunk));
			}
		}
		
		return true;
	}

	@Override
	protected boolean processor(boolean partMatch, String content, Callback callback) throws RuntimeException {
		
		if (StringUtils.isBlank(content)) {
			return false;
		}
		
		content = StringUtils.trim(content);
		if (content.length() < 2) {
			return false;
		}
		
		for (int i = 0; i < content.length(); i++) {
			Character wordChar = content.charAt(i);
            
			// 判断是否属于脏字符
            if (!cacheNodes.containsKey(wordChar)) {
                continue;
            }
			
            Map<String, Set<String>> hashs = cacheNodes.get(wordChar);
            int j = i + 1;
            while (j < content.length()) {
                String word = content.substring(i, j + 1);
                // 判断是否是脏词
                if (SimHashUtils.contains(word, hashs)) {
                    
                	if (callback.call(word)) {
                		return true;
                	}

                	if (partMatch) {
                		i += word.length();
                	} 
                }
                
                j++;
            }
        }
		
		return false;
	}

	public static void main(String[] args) {
		
		SimHashFilterExecutor.getInstance().init();
		SimHashFilterExecutor.getInstance().put("中国人");
		SimHashFilterExecutor.getInstance().put("中国男人");
		SimHashFilterExecutor.getInstance().put("中国人民");
		SimHashFilterExecutor.getInstance().put("人民");
		SimHashFilterExecutor.getInstance().put("中间");
		SimHashFilterExecutor.getInstance().put("女人");

		SimHashFilterExecutor.getInstance().put("一举");
		SimHashFilterExecutor.getInstance().put("一举成名");
		SimHashFilterExecutor.getInstance().put("一举成名走四方");
		SimHashFilterExecutor.getInstance().put("成名");
		SimHashFilterExecutor.getInstance().put("走四方");
		
		String content = "我们中国人都是好人，在他们中间有男人和女人。中国男人很惨，中国人民长期被压迫。";
		System.out.println(SimHashFilterExecutor.getInstance().getWords(true, content));
		System.out.println(SimHashFilterExecutor.getInstance().getWords(false, content));
		System.out.println(SimHashFilterExecutor.getInstance().filter(false, content, '*'));
		System.out.println(SimHashFilterExecutor.getInstance().highlight(false, content));
		
		content = "一举成名走四方大大的好";
		System.out.println(SimHashFilterExecutor.getInstance().getWords(true, content));
		System.out.println(SimHashFilterExecutor.getInstance().getWords(false, content));
		System.out.println(SimHashFilterExecutor.getInstance().filter(false, content, '*'));
		System.out.println(SimHashFilterExecutor.getInstance().highlight(false, content));
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/simhash/executor/SimHashUtils.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter.simhash.executor;

import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.lang.StringUtils;

import com.google.common.collect.Maps;

/**
 * simhash 算法工具类 
 * 
 * 介绍下这个算法主要原理，为了便于理解尽量不使用数学公式，分为这几步： 
 * 
 * 1、分词，把需要判断文本分词形成这个文章的特征单词。
 * 最后形成去掉噪音词的单词序列并为每个词加上权重，我们假设权重分为5个级别（1~5）。
 * 
 * 比如：“ 美国“51区”雇员称内部有9架飞碟，曾看见灰色外星人 ” ==> 分词后为 “ 美国（4） 51区（5） 雇员（3） 称（1） 内部（2） 有（1） 9架（3） 飞碟（5） 曾（1） 看见（3） 灰色（4） 外星人（5）”，
 * 括号里是代表单词在整个句子里重要程度，数字越大越重要。 
 * 
 * 2、hash，通过hash算法把每个词变成hash值， 
 * 比如“美国”通过hash算法计算为 100101,“51区”通过hash算法计算为 101011。
 * 这样我们的字符串就变成了一串串数字， 还记得文章开头说过的吗，要把文章变为数字计算才能提高相似度计算性能，现在是降维过程进行时。 
 * 
 * 3、加权，通过 2步骤的hash生成结果，需要按照单词的权重形成加权数字串，
 * 比如“美国”的hash值为“100101”，通过加权计算为“4 -4 -4 4 -4 4”； 
 * “51区”的hash值为“101011”，通过加权计算为 “ 5 -5 5 -5 5 5”。
 * 
 * 4、合并，把上面各个单词算出来的序列值累加，变成只有一个序列串。
 *  比如 “美国”的 “4 -4 -4 4 -4 4”，“51区”的 “ 5 -5 5 -5 5 5”，
 *  把每一位进行累加， “4+5 -4+-5 -4+5 4+-5-4+5 4+5” ==》 “9 -9 1 -1 1 9”。 
 *  这里作为示例只算了两个单词的，真实计算需要把所有单词的序列串累加。 
 * 
 * 5、降维，把4步算出来的 “9 -9 1 -1 1 9” 变成 0 1串，形成我们最终的simhash签名。 
 * 	如果每一位大于0 记为 1，小于0 记为 0。
 * 	最后算出结果为：“1 0 1 0 1 1”。 
 * 
 * http://www.lanceyan.com/tag/simhash
 * 
 * 
 * 让我们来总结一下上述算法的实质：<br/>
 *  1、将64位的二进制串等分成四块 <br/>
 *  2、调整上述64位二进制，将任意一块作为前16位，总共有四种组合，生成四份table <br/>
 *  3、采用精确匹配的方式查找前16位<br/>
 *  4、如果样本库中存有2^34（差不多10亿）的哈希指纹，则每个table返回2^(34-16)=262144个候选结果，大大减少了海明距离的计算成本<br/>
 * 
 * 具体simhash步骤如下：
	（1）将文档分词，取一个文章的TF-IDF权重最高的前20个词（feature）和权重（weight）。
		即一篇文档得到一个长度为20的（feature：weight）的集合。
	（2）对其中的词（feature），进行普通的哈希之后得到一个64为的二进制，得到长度为20的（hash : weight）的集合。
	（3）根据（2）中得到一串二进制数（hash）中相应位置是1是0，对相应位置取正值weight和负值weight。
			例如一个词进过（2）得到（010111：5）进过步骤（3）之后可以得到列表[-5,5,-5,5,5,5]，
			即对一个文档，我们可以得到20个长度为64的列表[weight，-weight...weight]。
	（4）对（3）中20个列表进行列向累加得到一个列表。如[-5,5,-5,5,5,5]、[-3,-3,-3,3,-3,3]、[1,-1,-1,1,1,1]进行列向累加得到[-7，1，-9，9，3，9]，这样，我们对一个文档得到，一个长度为64的列表。
	（5）对（4）中得到的列表中每个值进行判断，当为负值的时候去0，正值取1。例如，[-7，1，-9，9，3，9]得到010111，这样，我们就得到一个文档的simhash值了。
	（6）计算相似性。连个simhash取异或，看其中1的个数是否超过3。超过3则判定为不相似，小于等于3则判定为相似。
 * @author hoojo
 * @createDate 2018年3月22日 下午4:50:23
 * @file SimHashUtils.java
 * @package com.cnblogs.hoojo.sensitivewords.filter.bucket.executor
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public abstract class SimHashUtils {

	private static final int HASH_BITS = FNVHashUtils.HASH_BITS;

	/** 默认按照4段进行simhash存储 */
	private static final int CHUNK_COUNT = 4;
	/** 汉明距离的衡量标准 */
	private static final int HAMMING_THRESH = 3;

	public static Long simhash(Map<String, Integer> words) {
		//1、分词：直接加入单词、脏词，所以不存在分词

		// 按照词语的hash值，计算simHashWeight(低位对齐)
		List<Integer[]> mergeWeights = new ArrayList<Integer[]>(words.size());
		
		Set<String> wordSet = words.keySet();
		for (String word : wordSet) {

			//2、hash: 计算分词的hash
			long hash = hash(word);
			
			// 按照词语的hash值，计算simHashWeight(低位对齐)
			Integer[] weights = new Integer[HASH_BITS];
			Arrays.fill(weights, 0);
			
			//3、加权：即W = Hash * weight，且遇到1则hash值和权值正相乘，遇到0则hash值和权值负相乘
			for (int i = 0; i < HASH_BITS; i++) {
				if (((hash >> i) & 1) == 1) {
					// 3.1 正负值weight
					weights[i] += 1;
				} else {
					weights[i] -= 1;
				}
				
				// 3.2 增加权重： W = Hash * weight
				weights[i] *= words.get(word);
			}
			
			mergeWeights.add(weights);
		}
		
		//4、合并：把上面各个单词算出来的序列值累加
		Integer[] weights = new Integer[HASH_BITS];
		Arrays.fill(weights, 0);
		for (Integer[] weight : mergeWeights) {

			for (int i = 0; i < HASH_BITS; i++) {
				weights[i] += weight[i];
			}
		}
		
		//5、降维：大于0 记为 1，小于0 记为 0
		StringBuilder sb = new StringBuilder();
		for (int i = 0; i < HASH_BITS; i++) {
			if (weights[i] > 0) {
				sb.append(1);
			} else {
				sb.append(0);
			}
		}
		
		System.out.println(sb);
		return new BigInteger(sb.toString(), 2).longValue();
	}
	
	public static Long simhash_(Map<String, Integer> words) {
		//1、分词：直接加入单词、脏词，所以不存在分词

		// 按照词语的hash值，计算simHashWeight(低位对齐)
		Integer[] weights = new Integer[HASH_BITS];
		Arrays.fill(weights, 0);		
		
		Set<String> wordSet = words.keySet();
		for (String word : wordSet) {

			//2、hash: 计算分词的hash
			long hash = hash(word);
			
			//3、加权：即W = Hash * weight，且遇到1则hash值和权值正相乘，遇到0则hash值和权值负相乘
			for (int i = 0; i < HASH_BITS; i++) {
				Integer weight = 0;
				if (((hash >> i) & 1) == 1) {
					// 3.1 正负值weight
					weight += 1;
				} else {
					weight -= 1;
				}
				
				// 3.2 增加权重： W = Hash * weight
				weight *= words.get(word);
				
				//4、合并：把上面各个单词算出来的序列值累加
				weights[i] += weight;
			}
		}
		
		//5、降维：大于0 记为 1，小于0 记为 0
		StringBuilder sb = new StringBuilder();
		for (int i = 0; i < HASH_BITS; i++) {
			if (weights[i] > 0) {
				sb.append(1);
			} else {
				sb.append(0);
			}
		}
		
		System.out.println(sb);
		return new BigInteger(sb.toString(), 2).longValue();
	}
	
	public static Long simhash(String word, int weight) {
		//1、分词：直接加入单词、脏词，所以不存在分词
		
		//2、hash: 计算分词的hash
		long hash = hash(word);
		
		//3、加权：即W = Hash * weight，且遇到1则hash值和权值正相乘，遇到0则hash值和权值负相乘
		Integer[] weights = new Integer[HASH_BITS];
		Arrays.fill(weights, 0);
		// 按照词语的hash值，计算simHashWeight(低位对齐)
		for (int i = 0; i < HASH_BITS; i++) {
			if (((hash >> i) & 1) == 1) {
				// 3.1 正负值weight
				weights[i] += 1;
			} else {
				weights[i] -= 1;
			}
			
			// 3.2 增加权重： W = Hash * weight
			weights[i] *= weight;
		}
		
		//4、合并：把上面各个单词算出来的序列值累加
		// 由于单个词，所以不存在合并
		
		//5、降维：大于0 记为 1，小于0 记为 0
		StringBuilder sb = new StringBuilder();
		for (int i = 0; i < HASH_BITS; i++) {
			if (weights[i] > 0) {
				sb.append(1);
			} else {
				sb.append(0);
			}
		}
		
		System.out.println(sb);
		return new BigInteger(sb.toString(), 2).longValue();
	}
	
	public static Long simhash(String word) {
		//1、分词：直接加入单词、脏词，所以不存在分词
		
		//2、hash: 计算分词的hash
		long hash = hash(word);
		
		//3、加权：即W = Hash * weight，且遇到1则hash值和权值正相乘，遇到0则hash值和权值负相乘
		Integer[] weights = new Integer[HASH_BITS];
		Arrays.fill(weights, 0);
		// 按照词语的hash值，计算simHashWeight(低位对齐)
		for (int i = 0; i < HASH_BITS; i++) {
			if (((hash >> i) & 1) == 1) {
				// 3.1 正负值weight
				weights[i] += 1;
			} else {
				weights[i] -= 1;
			}
			
			// 3.2 增加权重： W = Hash * weight
			// 权重都一样，所以不存在权重
		}
		
		//4、合并：把上面各个单词算出来的序列值累加
		// 由于单个词，所以不存在合并
		
		//5、降维：大于0 记为 1，小于0 记为 0
		StringBuilder sb = new StringBuilder();
		for (int i = 0; i < HASH_BITS; i++) {
			if (weights[i] > 0) {
				sb.append(1);
			} else {
				sb.append(0);
			}
		}

		return new BigInteger(sb.toString(), 2).longValue();
	}
	
	public static String _simhash(String word) {
		//1、分词：直接加入单词、脏词，所以不存在分词
		
		//2、hash: 计算分词的hash
		long hash = hash(word);
		
		//3、加权：即W = Hash * weight，且遇到1则hash值和权值正相乘，遇到0则hash值和权值负相乘
		Integer[] weights = new Integer[HASH_BITS];
		Arrays.fill(weights, 0);
		// 按照词语的hash值，计算simHashWeight(低位对齐)
		for (int i = 0; i < HASH_BITS; i++) {
			if (((hash >> i) & 1) == 1) {
				// 3.1 正负值weight
				weights[i] += 1;
			} else {
				weights[i] -= 1;
			}
			
			// 3.2 增加权重： W = Hash * weight
			// 权重都一样，所以不存在权重
		}
		
		//4、合并：把上面各个单词算出来的序列值累加
		// 由于单个词，所以不存在合并
		
		//5、降维：大于0 记为 1，小于0 记为 0
		StringBuilder sb = new StringBuilder();
		for (int i = 0; i < HASH_BITS; i++) {
			if (weights[i] > 0) {
				sb.append(1);
			} else {
				sb.append(0);
			}
		}

		return sb.toString();
	}
	
	private static long hash(String target) {
		
		//long hash = FNVHashUtils.hash64(target).longValue();
		//long hash = FNVHashUtils.hash64_(target).longValue();
		long hash = Murmur3.hash64(target.getBytes());
		
		return hash;
	}
	
	public static String[] _chunk(Long simhash) {
		String[] chunk = new String[CHUNK_COUNT];
		
		int chunkIndex = 0;
		int offset = HASH_BITS / CHUNK_COUNT;

		StringBuilder sb = new StringBuilder();
		for (int i = 0; i < HASH_BITS; i++) {
			sb.append(simhash >> i & 1);
			if ((i + 1) % offset == 0) {
				chunk[chunkIndex++] = sb.toString();
				sb.setLength(0);
			}
		}
		return chunk;
	}
	
	public static String[] chunk(String simhash) {
		
		String[] chunk = new String[CHUNK_COUNT];
		
		int offset = HASH_BITS / CHUNK_COUNT;
		for (int i = 0; i < CHUNK_COUNT; i++) {
			chunk[i] = simhash.substring(i * offset, i * offset + offset);
		}
		
		return chunk;
	}
	
	public static Map<String, Set<String>> cartesianProduct(String[] chunks) {
		Map<String, Set<String>> result = Maps.newHashMap();
		for (int i = 0; i < chunks.length; i++) {
			Set<String> set = new HashSet<String>();
			for (int j = 0; j < chunks.length; j++) {
				if (j != i) {
					set.add(chunks[j]);
				}
			}
			
			result.put(chunks[i], set);
		}
		return result;
	}
	
	public static Map<String, String> _cartesianProduct(String[] chunks, String simhash) {
		Map<String, String> result = Maps.newHashMap();
		for (int i = 0; i < chunks.length; i++) {
			result.put(chunks[i], simhash);
		}
		return result;
	}
	
	public static Map<String, Long> _cartesianProduct(String[] chunks, Long simhash) {
		Map<String, Long> result = Maps.newHashMap();
		for (int i = 0; i < chunks.length; i++) {
			result.put(chunks[i], simhash);
		}
		return result;
	}
	
	public static boolean contains(String word, Map<String, Set<String>> store) {
		
		String simhash = _simhash(word);
		String[] chunks = chunk(simhash);
		for (int i = 0; i < chunks.length; i++) {
			if (!store.containsKey(chunks[i])) {
				continue;
			}
			
			Set<String> set = store.get(chunks[i]);
			for (String hash : set) {
				for (int j = 0; j < chunks.length; j++) {
					if (i != j && hammingDistance(hash, chunks[j]) < HAMMING_THRESH) {
						return true;
					}
				}
			}
		}
		
		return false;
	}
	
	public static boolean _contains(String word, Map<String, String> store) {
		
		String simhash = _simhash(word);
		String[] chunks = chunk(simhash);
		for (int i = 0; i < chunks.length; i++) {
			if (!store.containsKey(chunks[i])) {
				continue;
			}
			
			if (hammingDistance(simhash, store.get(chunks[i])) < HAMMING_THRESH) {
				return true;
			}
		}
		
		return false;
	}
	
	public static boolean __contains(String word, Map<String, Long> store) {
		
		Long simhash = simhash(word);
		String[] chunks = _chunk(simhash);
		for (int i = 0; i < chunks.length; i++) {
			if (!store.containsKey(chunks[i])) {
				continue;
			}
			
			if (hammingDistance(simhash, store.get(chunks[i])) < HAMMING_THRESH) {
				return true;
			}
		}
		
		return false;
	}
	
	/**
	 * 求海明距离 二进制串A 和 二进制串B 的海明距离 就是 A xor B 后二进制中1的个数
	 * @author hoojo
	 * @createDate 2018年3月22日 下午4:53:07
	 */
	public static int hammingDistance(int a, int b) {
		BigInteger _a = BigInteger.valueOf(a);
		BigInteger _b = BigInteger.valueOf(b);

		int distance = 0;

		char[] bit2s = _a.xor(_b).toString(2).toCharArray();
		for (char bit : bit2s) {
			if (bit == '1') {
				distance++;
			}
		}

		return distance;
	}

	public static int hammingDistance(Long a, Long b) {
		int distance = 0;
		for (int i = 0; i < HASH_BITS; i++) {
			if ((a >> i & 1) != (b >> i & 1)) {
				distance++;
			}
		}
		return distance;
	}

	/**
	 * 二进制字符串汉明距离，a=10001 b=01000，distance=3
	 * 
	 * @author hoojo
	 * @createDate 2018年3月22日 下午5:06:42
	 */
	public static int hammingDistance(String a, String b) {
		int distance = 0;

		if (a.length() != b.length()) {
			distance = -1;
		} else {
			for (int i = 0; i < a.length(); i++) {
				if (a.charAt(i) != b.charAt(i)) {
					distance++;
				}
			}
		}
		return distance;
	}

	public static void main(String[] args) {

		Map<String, Integer> words = new HashMap<String, Integer>();
		words.put("CSDN", 5);
		words.put("ABCD", 1);
		words.put("中国", 4);
		
		System.out.println("---------------------");
		System.out.println(simhash(words));
		System.out.println("----------------------");
		System.out.println(simhash_(words));
		System.out.println("----------------------");
		System.out.println(simhash("中国", 5));
		System.out.println("----------------------");
		System.out.println(simhash("中国"));
		System.out.println("----------------------");
		System.out.println("0" + BigInteger.valueOf(simhash("中国")).toString(2));
		System.out.println("----------------------");
		
		Long b = simhash(words);

		words = new HashMap<String, Integer>();
		words.put("CSDN", 5);
		words.put("ABCE", 1);
		words.put("中国", 4);
		Long a = simhash(words);
		
		System.out.println(a);
		System.out.println(b);
		System.out.println(hammingDistance(b, a));
		System.out.println(hammingDistance(simhash("中国"), simhash("中国")));
		
		System.out.println("----------------------");
		System.out.println(StringUtils.join(_chunk(b), ";"));
		System.out.println(StringUtils.join(chunk(_simhash("中国")), ";"));
		
		System.out.println(cartesianProduct(chunk(_simhash("中国"))));
		
		System.out.println("----------------------");
		System.out.println(contains("中间", cartesianProduct(chunk(_simhash("中国")))));
		
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/tire/TireTreeFilter.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter.tire;

import com.cnblogs.hoojo.sensitivewords.filter.AbstractFilter;
import com.cnblogs.hoojo.sensitivewords.filter.tire.executor.TireTreeFilterExecutor;

/**
 * trie 树算法实现敏感词脱敏过滤
 * 
 * @author hoojo
 * @createDate 2018年2月2日 下午4:22:23
 * @file TrieSWFilter.java
 * @package com.cnblogs.hoojo.sensitivewords.support.trie
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public class TireTreeFilter extends AbstractFilter {

	public TireTreeFilter() {
		super(TireTreeFilterExecutor.getInstance());
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/tire/executor/TireTreeFilterExecutor.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter.tire.executor;

import org.apache.commons.lang.StringUtils;

import com.cnblogs.hoojo.sensitivewords.filter.AbstractFilterExecutor;

/**
 * tire tree 算法脱敏词库支持类
 * 
 * @author hoojo
 * @createDate 2018年2月9日 上午10:36:08
 * @file TireTreeFilterExecutor.java
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public final class TireTreeFilterExecutor extends AbstractFilterExecutor<TireTreeNode> {

	private TireTreeFilterExecutor() {
		super("tire tree 算法脱敏支持类");
	}
	
	private static class SingleFactory {
		private static final TireTreeFilterExecutor INSTANCE = new TireTreeFilterExecutor();
	}

	public static final TireTreeFilterExecutor getInstance() {
		return SingleFactory.INSTANCE;
	}
	
	@Override
	protected TireTreeNode getCacheNodes() {
		return new TireTreeNode(' ');
	}

	/**
	 * 判断一段文字包含敏感词语，支持敏感词结果回调
	 * @author hoojo
	 * @createDate 2018年2月9日 下午2:54:59
	 * @param partMatch 是否支持匹配词语的一部分
	 * @param content 被匹配内容
	 * @param start 开始的字符位置
	 * @return 是否匹配到的词语
	 */
	protected boolean processor(boolean partMatch, String content, Callback callback) throws RuntimeException {
		
		if (StringUtils.isBlank(content)) {
			return false;
		}
		
		content = StringUtils.trim(content);
		if (content.length() < 2) {
			return false;
		}
		
		for (int index = 0; index < content.length(); index++) {
			char fisrtChar = content.charAt(index);
			
			TireTreeNode node = cacheNodes.find(fisrtChar);
			if (node == null || node.isLeaf()) {
				continue;
			} 
			
			int charCount = 1;
			for (int i = index + 1; i < content.length(); i++) {
				char wordChar = content.charAt(i);
				
				node = node.find(wordChar);
				if (node != null) {
					charCount++;
				} else {
					break;
				}
				
				if (partMatch && node.isWord()) {
					if (callback.call(StringUtils.substring(content, index, index + charCount))) {
						return true;
					}
					break;
				} else if (node.isWord()) {
					if (callback.call(StringUtils.substring(content, index, index + charCount))) {
						return true;
					}
				}
				
				if (node.isLeaf()) {
					break;
				}
			}
			
			if (partMatch) {
				index += charCount;
			}
		}
		
		return false;
	}
	
	@Override
	protected boolean put(String word) throws RuntimeException {
		if (StringUtils.isBlank(word)) {
			return false;
		}
		
		word = StringUtils.trim(word);
		if (word.length() < 2) {
			return false;
		}
		
		char fisrtChar = word.charAt(0);
		TireTreeNode node = cacheNodes.find(fisrtChar);
		if (node == null) {
			node = new TireTreeNode(fisrtChar);
			cacheNodes.addChild(node);
		}
		
		for (int i = 1; i < word.length(); i++) {
			char nextChar = word.charAt(i); // 转换成char型
			
			TireTreeNode nextNode = null;
			if (!node.isLeaf()) {
				nextNode = node.find(nextChar);
			} 
			if (nextNode == null) {
				nextNode = new TireTreeNode(nextChar);
			}
			
			node.addChild(nextNode);
			node = nextNode;
			
			if (i == word.length() - 1) {
				node.setWord(true);
			}
		}
		
		return true;
	}
	
	public static void main(String[] args) {
		TireTreeFilterExecutor.getInstance().init();
		TireTreeFilterExecutor.getInstance().put("中国人");
		TireTreeFilterExecutor.getInstance().put("中国男人");
		TireTreeFilterExecutor.getInstance().put("中国人民");
		TireTreeFilterExecutor.getInstance().put("中国");
		TireTreeFilterExecutor.getInstance().put("人民");
		TireTreeFilterExecutor.getInstance().put("中间");
		TireTreeFilterExecutor.getInstance().put("女人");
		
		String content = "我们中国人都是好人，在中国人民中间有男人和女人。中国男人很惨，他们长期被压迫。";
		System.out.println(TireTreeFilterExecutor.getInstance().contains(true, content));
		System.out.println(TireTreeFilterExecutor.getInstance().contains(false, content));
		System.out.println(TireTreeFilterExecutor.getInstance().getWords(true, content));
		System.out.println(TireTreeFilterExecutor.getInstance().getWords(false, content));
		System.out.println(TireTreeFilterExecutor.getInstance().filter(false, content, '*'));
		System.out.println(TireTreeFilterExecutor.getInstance().highlight(false, content));
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/tire/executor/TireTreeNode.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter.tire.executor;

import java.util.List;

import com.google.common.collect.Lists;

/**
 * 多叉树模型
 * @author hoojo
 * @createDate 2018年2月8日 下午8:23:27
 * @file TireTreeNode.java
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public class TireTreeNode {

	private char _char;
	private boolean word;
	private List<TireTreeNode> childs;
	
	public TireTreeNode() {
	}

	public TireTreeNode(char _char) {
		this._char = _char;
	}
	
	public boolean isLeaf() {
		return (childs == null || childs.isEmpty());
	}
	
	public char getChar() {
		return _char;
	}

	public void setChar(char _char) {
		this._char = _char;
	}

	public boolean isWord() {
		return word;
	}

	public void setWord(boolean word) {
		this.word = word;
	}
	
	public List<TireTreeNode> getChilds() {
		return childs;
	}

	public void setChilds(List<TireTreeNode> childs) {
		this.childs = childs;
	}
	
	public void addChild(TireTreeNode child) {
		if (this.childs == null) {
			childs = Lists.newArrayList();
		}
		
		this.childs.add(child);
	}

	public void removeChild(TireTreeNode child) {
		if (this.childs != null) {
			this.childs.remove(child);
		}
	}
	
	public TireTreeNode find(char _char) {
		if (this.childs != null) {
			for (TireTreeNode item : this.childs) {
				if (item.getChar() == _char) {
					return item;
				}
			}
		}
		return null;
	}

	public void print(TireTreeNode node) {
		System.out.println(node.getChar());
		if (node.getChilds() != null) {
			for (TireTreeNode childNode : node.getChilds()) {
				//System.out.println(childNode.getWord());
				print(childNode);
			}
		}
	}
	
	public static void main(String[] args) {
		TireTreeNode node = new TireTreeNode('中');
		
		TireTreeNode g = new TireTreeNode('国');
		g.addChild(new TireTreeNode('人'));
		
		TireTreeNode n = new TireTreeNode('男');
		n.addChild(new TireTreeNode('人'));
		g.addChild(n);
		
		node.addChild(g);
		node.addChild(new TireTreeNode('间'));
		
		node.print(node);
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/ttmp/TtmpFilter.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter.ttmp;

import com.cnblogs.hoojo.sensitivewords.filter.AbstractFilter;
import com.cnblogs.hoojo.sensitivewords.filter.ttmp.executor.TtmpFilterExecutor;

/**
 * ttmp 算法过滤
 * 
 * @author hoojo
 * @createDate 2018年3月20日 下午6:06:35
 * @file TtmpSWFilter.java
 * @package com.cnblogs.hoojo.sensitivewords.filter.ttmp
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public class TtmpFilter extends AbstractFilter {

	public TtmpFilter() {
		super(TtmpFilterExecutor.getInstance());
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/ttmp/executor/BadWordsFilter.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter.ttmp.executor;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;

/**
 * 敏感词过滤
 * @author hoojo
 * @createDate 2018年9月24日 下午9:31:52
 * @file BadWordsFilter.java
 * @package com.cnblogs.hoojo.sensitivewords.filter.ttmp.executor
 * @project sensitive-words-filter
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public class BadWordsFilter {

	// 保存所有脏词
	private HashSet<String> hash = new HashSet<String>();
	
	// 是否是脏词中首字符
	private byte[] fastCheck = new byte[Character.MAX_VALUE];
	// 脏词首字符 长度
	private byte[] fastLength = new byte[Character.MAX_VALUE];
	
	// 判断是否是一个字的脏词
	private Map<Character, Boolean> charCheck = new HashMap<Character, Boolean>();
	// 记录所有脏字中的停止字符
	private Map<Character, Boolean> endCheck = new HashMap<Character, Boolean>();

	// 脏词中长度最大词的length
	private int maxWordLength = 0;
	// 脏词中长度最小词的length
	private int minWordLength = Integer.MAX_VALUE;

	public BadWordsFilter() {
	}

	public void put(String word) {
		maxWordLength = Math.max(maxWordLength, word.length());
		minWordLength = Math.min(minWordLength, word.length());
		
		for (int i = 0; i < 7 && i < word.length(); i++) {
			fastCheck[word.charAt(i)] |= (byte) (1 << i);
		}

		for (int i = 7; i < word.length(); i++) {
			fastCheck[word.charAt(i)] |= 0x80;
		}

		if (word.length() == 1) {
			charCheck.put(word.charAt(0), true);
		} else {
			endCheck.put(word.charAt(word.length() - 1), true);
			fastLength[word.charAt(0)] |= (byte) (1 << (Math.min(7, word.length() - 2)));

			hash.add(word);
		}
	}
	
	public void init(String[] badwords) {
		for (String word : badwords) {
			put(word);
		}
	}

	public boolean contains(String text) {
		int index = 0;

		while (index < text.length()) {
			@SuppressWarnings("unused")
			int count = 1;

			if (index > 0 || (fastCheck[text.charAt(index)] & 1) == 0) {
				// 匹配到下一个“可能是脏词”首字符的位置
				while (index < text.length() - 1 && (fastCheck[text.charAt(++index)] & 1) == 0);
			}
			
			// 取得下一个脏词文本的第一个字符
			char begin = text.charAt(index);

			// 表示是简单脏词，单个字脏词
			if (minWordLength == 1 && charCheck.containsKey(begin)) {
				return true;
			}
			
			// 比对的次数是 当前文本剩余比对长度 或者 脏词的最大长度
			for (int j = 1; j <= Math.min(maxWordLength, text.length() - index - 1); j++) {
				char current = text.charAt(index + j);

				if ((fastCheck[current] & 1) == 0) { // 非首字符
					++count;
				}

				if ((fastCheck[current] & (1 << Math.min(j, 7))) == 0) { // 当前字符在脏词中的位置超过7位
					break;
				}

				if (j + 1 >= minWordLength) { // 当前比对词长度小于等于最大脏词的长度
					System.out.println(begin + "####" + (fastLength[begin] & (1 << Math.min(j - 1, 7))));
					// 判断当前字符是否是脏词最后一个字符
					if ((fastLength[begin] & (1 << Math.min(j - 1, 7))) > 0 && endCheck.containsKey(current)) {
						String sub = text.substring(index, index + j + 1);

						if (hash.contains(sub)) { // 判断是否是脏词
							System.out.println(sub);
							//return true;
						}
					}
				}
			}
			index++;
			//index += count;
		}
		
		return false;
	}
	
	public static void main(String[] args) {
		BadWordsFilter filter = new BadWordsFilter();
		filter.init(new String[] {"一举", "一举成名", "一举成名走四方", "成名", "走四方"/*, "什"*/, "东南西北", "东南西北风呼呼呼的吹"});
		
		String content = "一举成名走四方的是什么，东南西北风呼呼呼的吹";
		System.out.println("***************************************************");
		System.out.println(filter.contains(content));
		System.out.println("***************************************************");
		
		for (char s : content.toCharArray()) {
			System.out.println("check: " + s + "->" + filter.fastCheck[s]);
			System.out.println("length: " + s + "->" + filter.fastLength[s]);
			System.out.println((filter.fastCheck[s] & 1));
		}
	}
}

================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/ttmp/executor/TtmpCacheNode.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter.ttmp.executor;

import java.util.HashSet;

/**
 * 数据缓存节点
 * @author hoojo
 * @createDate 2018年3月20日 下午6:14:53
 * @file TtmpCacheNode.java
 * @package com.cnblogs.hoojo.sensitivewords.filter.ttmp.executor
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public class TtmpCacheNode {

	// 保存所有脏词
	private HashSet<String> hash = new HashSet<String>();

	// 是否是脏词中首字符
	private byte[] fastCheck = new byte[Character.MAX_VALUE];
	// 脏词首字符 长度
	private byte[] fastLength = new byte[Character.MAX_VALUE];

	// 判断是否是一个字的脏词
	private boolean[] charCheck = new boolean[Character.MAX_VALUE];
	// 记录所有脏字中的停止字符
	private boolean[] endCheck = new boolean[Character.MAX_VALUE];

	// 脏词中长度最大词的length
	private int maxWordLength = 0;
	// 脏词中长度最小词的length
	private int minWordLength = Integer.MAX_VALUE;

	public HashSet<String> getHash() {
		return hash;
	}

	public void setHash(HashSet<String> hash) {
		this.hash = hash;
	}

	public byte[] getFastCheck() {
		return fastCheck;
	}

	public void setFastCheck(byte[] fastCheck) {
		this.fastCheck = fastCheck;
	}

	public byte[] getFastLength() {
		return fastLength;
	}

	public void setFastLength(byte[] fastLength) {
		this.fastLength = fastLength;
	}

	public int getMaxWordLength() {
		return maxWordLength;
	}

	public void setMaxWordLength(int maxWordLength) {
		this.maxWordLength = maxWordLength;
	}

	public int getMinWordLength() {
		return minWordLength;
	}

	public void setMinWordLength(int minWordLength) {
		this.minWordLength = minWordLength;
	}
	
	public boolean[] getCharCheck() {
		return charCheck;
	}

	public void setCharCheck(boolean[] charCheck) {
		this.charCheck = charCheck;
	}

	public boolean[] getEndCheck() {
		return endCheck;
	}

	public void setEndCheck(boolean[] endCheck) {
		this.endCheck = endCheck;
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/filter/ttmp/executor/TtmpFilterExecutor.java
================================================
package com.cnblogs.hoojo.sensitivewords.filter.ttmp.executor;

import org.apache.commons.lang.StringUtils;

import com.cnblogs.hoojo.sensitivewords.filter.AbstractFilterExecutor;

/**
 * ttmp 过滤明干成实现
 * 
 * @author hoojo
 * @createDate 2018年3月20日 下午6:09:01
 * @file TtmpFilterExecutor.java
 * @package com.cnblogs.hoojo.sensitivewords.filter.ttmp
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public final class TtmpFilterExecutor extends AbstractFilterExecutor<TtmpCacheNode> {

	private TtmpFilterExecutor() {
		super("ttmp 算法脱敏支持类");
	}
	
	private static class SingleFactory {
		private static final TtmpFilterExecutor INSTANCE = new TtmpFilterExecutor();
	}

	public static final TtmpFilterExecutor getInstance() {
		return SingleFactory.INSTANCE;
	}
	
	@Override
	protected TtmpCacheNode getCacheNodes() {
		return new TtmpCacheNode();
	}
	
	protected boolean processor(boolean partMatch, String content, Callback callback) {
		if (StringUtils.isBlank(content)) {
			return false;
		}
		content = StringUtils.trim(content);
		
		int index = 0;
		while (index < content.length()) {
			int count = 1;

			if (partMatch) {
				if (index > 0 || (cacheNodes.getFastCheck()[content.charAt(index)] & 1) == 0) {
					// 匹配到下一个“可能是脏词”首字符的位置
					while (index < content.length() - 1 && (cacheNodes.getFastCheck()[content.charAt(++index)] & 1) == 0);
				}
			}
			
			// 取得下一个脏词文本的第一个字符
			char begin = content.charAt(index);

			// 表示是简单脏词，单个字脏词
			if (cacheNodes.getMinWordLength() == 1 && cacheNodes.getCharCheck()[begin]) {
				
				if (callback.call(String.valueOf(begin))) {
					return true;
				}
			}
			
			// 比对的次数是 当前文本剩余比对长度 或者 脏词的最大长度
			for (int j = 1; j <= Math.min(cacheNodes.getMaxWordLength(), content.length() - index - 1); j++) {
				char current = content.charAt(index + j);

				if ((cacheNodes.getFastCheck()[current] & 1) == 0) { // 非首字符
					++count;
				}

				if ((cacheNodes.getFastCheck()[current] & (1 << Math.min(j, 7))) == 0) { // 当前字符在脏词中的位置超过7位
					break;
				}

				if (j + 1 >= cacheNodes.getMinWordLength()) { // 当前比对词长度小于等于最大脏词的长度
					// 判断当前字符是否是脏词最后一个字符
					if ((cacheNodes.getFastLength()[begin] & (1 << Math.min(j - 1, 7))) > 0 && cacheNodes.getEndCheck()[current]) {
						String sub = content.substring(index, index + j + 1);
						
						if (cacheNodes.getHash().contains(sub)) { // 判断是否是脏词
							if (callback.call(String.valueOf(sub))) {
								return true;
							}
						}
					}
				}
			}
			
			if (partMatch) {
				index++;
			} else {
				index += count;
			}
		}
		
		return false;
	}

	@Override
	protected boolean put(String word) throws RuntimeException {
		this.cacheNodes.setMaxWordLength(Math.max(this.cacheNodes.getMaxWordLength(), word.length()));
		this.cacheNodes.setMinWordLength(Math.min(this.cacheNodes.getMinWordLength(), word.length()));
		
		for (int i = 0; i < 7 && i < word.length(); i++) {
			byte[] fastCheck = this.cacheNodes.getFastCheck();
			fastCheck[word.charAt(i)] |= (byte) (1 << i);
			
			this.cacheNodes.setFastCheck(fastCheck);
		}

		for (int i = 7; i < word.length(); i++) {
			byte[] fastCheck = this.cacheNodes.getFastCheck();
			fastCheck[word.charAt(i)] |= 0x80;
			
			this.cacheNodes.setFastCheck(fastCheck);
		}

		if (word.length() == 1) {
			cacheNodes.getCharCheck()[word.charAt(0)] = true;
		} else {
			cacheNodes.getEndCheck()[word.charAt(word.length() - 1)] = true;
			
			byte[] fastLength = cacheNodes.getFastLength();
			fastLength[word.charAt(0)] |= (byte) (1 << (Math.min(7, word.length() - 2)));
			
			cacheNodes.setFastLength(fastLength);

			cacheNodes.getHash().add(word);
		}
		
		return false;
	}
	
	public static void main(String[] args) {
		TtmpFilterExecutor.getInstance().init();
		
		TtmpFilterExecutor.getInstance().put("中国人");
		TtmpFilterExecutor.getInstance().put("中国男人");
		TtmpFilterExecutor.getInstance().put("中国人民");
		TtmpFilterExecutor.getInstance().put("人民");
		TtmpFilterExecutor.getInstance().put("中间");
		TtmpFilterExecutor.getInstance().put("女人");
		
		TtmpFilterExecutor.getInstance().put("一");
		TtmpFilterExecutor.getInstance().put("一举成名");
		TtmpFilterExecutor.getInstance().put("一举成名走四方");
		TtmpFilterExecutor.getInstance().put("成名");
		TtmpFilterExecutor.getInstance().put("走四方");
		TtmpFilterExecutor.getInstance().put("是");
		
		String content = "我们中国人都是好人，在他们中间有男人和女人。中国男人很惨，中国人民长期被压迫。";
		System.out.println(TtmpFilterExecutor.getInstance().getWords(true, content));
		System.out.println(TtmpFilterExecutor.getInstance().getWords(false, content));
		System.out.println(TtmpFilterExecutor.getInstance().filter(false, content, '*'));
		System.out.println(TtmpFilterExecutor.getInstance().highlight(false, content));
		
		content = "一举成名走四方的是什么";
		System.out.println(TtmpFilterExecutor.getInstance().getWords(true, content));
		System.out.println(TtmpFilterExecutor.getInstance().getWords(false, content));
		System.out.println(TtmpFilterExecutor.getInstance().filter(true, content, '*'));
		System.out.println(TtmpFilterExecutor.getInstance().highlight(false, content));
	}
}


================================================
FILE: src/main/java/com/cnblogs/hoojo/sensitivewords/log/ApplicationLogging.java
================================================
package com.cnblogs.hoojo.sensitivewords.log;

import org.apache.commons.lang.exception.ExceptionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * 系统调试、操作、错误日志基类
 * 
 * @author hoojo
 * @createDate 2012-2-9 上午11:46:30
 * @file ApplicationLogging.java
 * @version 1.0
 */
public abstract class ApplicationLogging {

    protected final Logger logger;

    public ApplicationLogging() {
        super();
        logger = LoggerFactory.getLogger(this.getClass());
    }

    protected final void debug(Object o) {

        logger.debug(String.valueOf(o));
    }

    protected final void debug(String msg) {

        logger.debug(msg);
    }

    protected final void debug(String msg, Object... objects) {

        logger.debug(msg, objects);
    }

    protected final void debug(Throwable ex) {

        logger.debug(ex.getMessage(), ex);
        Throwable re = ExceptionUtils.getRootCause(ex);
        if (re != null && ex != re) {
            logger.debug("root cause", re);
        }

    }

    protected final void error(String msg) {

        logger.error(msg);
    }

    protected final void error(String msg, Object... objects) {

        logger.error(msg, objects);
    }

    protected final void error(Throwable ex) {

        logger.error(ex.getMessage(), ex);
        Throwable re = ExceptionUtils.getRootCause(ex);
        if (re != null && ex != re) {
            logger.error("root cause", re);
        }
    }

    protected final void info(String msg) {

        logger.info(msg);
    }

    protected final void info(String msg, Object... objects) {

        logger.info(msg, objects);
    }

    protected final void info(Throwable ex) {

        logger.info(ex.getMessage(), ex);
        Throwable re = ExceptionUtils.getRootCause(ex);
        if (re != null && ex != re) {
            logger.info("root cause", re);
        }
    }

    protected final void trace(Object o) {

        logger.trace(String.valueOf(o));
    }

    protected final void trace(String msg) {

        logger.trace(msg);
    }

    protected final void trace(String msg, Object... objects) {

        logger.trace(msg, objects);
    }

    protected final void trace(Throwable ex) {

        logger.trace(ex.getMessage(), ex);
        Throwable re = ExceptionUtils.getRootCause(ex);
        if (re != null && ex != re) {
            logger.trace("root cause", re);
        }
    }

    protected final void warn(String msg) {

        logger.warn(msg);
    }

    protected final void warn(String msg, Object... objects) {

        logger.warn(msg, objects);
    }

    protected final void warn(Throwable ex) {

        logger.warn(ex.getMessage(), ex);
        Throwable re = ExceptionUtils.getRootCause(ex);
        if (re != null && ex != re) {
            logger.warn("root cause", re);
        }
    }
}

================================================
FILE: src/main/resources/log4j.properties
================================================
#Global configuration
log4j.rootLogger = DEBUG, stdout,logfile

#output in console
log4j.appender.stdout = org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern =%d{yyyy/MM/dd HH:mm:ss,SSS} %5p [%t] - %m%n

#output in file
#log4j.appender.logfile=org.apache.log4j.RollingFileAppender  
#log4j.appender.logfile.File= logs/logfile.log  
#log4j.appender.logfile.MaxFileSize=512KB  
#log4j.appender.logfile.MaxBackupIndex=1000000 
#log4j.appender.logfile.layout=org.apache.log4j.PatternLayout  
#log4j.appender.logfile.layout.ConversionPattern=%5p [%t] - %m%n

log4j.appender.logfile=org.apache.log4j.DailyRollingFileAppender
log4j.appender.logfile.File=../logs/fengkongservice.log
log4j.appender.logfile.DatePattern='.'yyyy-MM-dd
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout  
log4j.appender.logfile.layout.ConversionPattern=%d{yyyy/MM/dd HH:mm:ss,SSS} %5p [%t] - %m%n

#Spring config
log4j.logger.org.springframewaork = DEBUG


#Mybatis config
log4j.logger.com.ibatis=DEBUG  
log4j.logger.com.ibatis.common.jdbc.SimpleDataSource=DEBUG  
log4j.logger.com.ibatis.common.jdbc.ScriptRunner=DEBUG  
log4j.logger.com.ibatis.sqlmap.engine.impl.SqlMapClientDelegate=DEBUG  
log4j.logger.org.mybatis=DEBUG  
log4j.logger.org.apache.ibatis = DEBUG
log4j.logger.net.openwares.test.mapper = TRACE

#JDBC config
log4j.logger.java.sql.Connection = DEBUG  
log4j.logger.java.sql.Statement = DEBUG  
log4j.logger.java.sql.PreparedStatement = DEBUG  
log4j.logger.java.sql.ResultSet = DEBUG


================================================
FILE: src/test/java/com/cnblogs/hoojo/sensitivewords/SensitiveWordsServiceDBTest.java
================================================
package com.cnblogs.hoojo.sensitivewords;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Set;

import org.apache.commons.lang.StringUtils;

import com.cnblogs.hoojo.sensitivewords.business.enums.SensitiveWordsType;
import com.cnblogs.hoojo.sensitivewords.business.model.SensitiveWords;
import com.cnblogs.hoojo.sensitivewords.cache.RedisWordsCache;
import com.cnblogs.hoojo.sensitivewords.factory.FilterType;
import com.cnblogs.hoojo.sensitivewords.filter.AbstractFilterExecutor;
import com.cnblogs.hoojo.sensitivewords.filter.bucket.executor.HashBucketFilterExecutor;
import com.cnblogs.hoojo.sensitivewords.filter.dat.exectuor.DatFilterExecutor;
import com.cnblogs.hoojo.sensitivewords.filter.dfa.executor.DfaFilterExecutor;
import com.cnblogs.hoojo.sensitivewords.filter.tire.executor.TireTreeFilterExecutor;
import com.cnblogs.hoojo.sensitivewords.filter.ttmp.executor.TtmpFilterExecutor;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;

import junit.framework.TestCase;

/**
 * 敏感词库测试
 * @author hoojo
 * @createDate 2018年2月2日 下午5:40:21
 * @file SensitiveWordsTest.java
 * @package com.hoojo.business.service.mgbase.sensitivewords
 * @project fengkong-service-provider
 * @blog http://hoojo.cnblogs.com
 * @email hoojo_@126.com
 * @version 1.0
 */
public class SensitiveWordsServiceDBTest extends TestCase {

	private SensitiveWordsService<SensitiveWords> service;
	
	@SuppressWarnings("unchecked")
	public void setUp() {
	}
	
	public void testAdd() {
		SensitiveWords word = new SensitiveWords();
		word.setType(SensitiveWordsType.OTHERS);
		word.setWord("拿回扣");
		
		word.setCreator("1");
		word.setUpdater("2");
		
		try {
			service.add(word);
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	public void testBatchAdd() {
		
		try {
			for (SensitiveWordsType type : SensitiveWordsType.values()) {
				
				int i;
				Set<SensitiveWords> words;
				try {
					InputStream stream = SensitiveWordsServiceDBTest.class.getResourceAsStream(type.getName() + "词库.txt");
					BufferedReader reader = new BufferedReader(new InputStreamReader(stream, "gbk"));
					
					i = 0;
					words = Sets.newHashSet();
					while (true) {
						String line = reader.readLine();
						if (line == null) {
							break;
						}
						
						i++;
						
						if (StringUtils.isNotBlank(line)) {
							SensitiveWords entity = new SensitiveWords(line, "admin", "admin");
							entity.setType(type);
							words.add(entity);
							
							try {
								service.add(entity);
							} catch (Exception e) {
								System.err.println(e.getMessage());
							}
						}
					}
					
					System.out.println("循环单词: " + i + ", 插入数据：" + words.size());
					Thread.sleep(1000 * 3);
				} catch (Exception e) {
					e.printStackTrace();
				}
				
				
			}
			Thread.sleep(1000 * 3);
			
			//service.batch(new ArrayList<>(words), SensitiveWordsType.REACTION);
			//System.out.println("循环单词: " + i + ", 插入数据：" + words.size());
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	public void testBatchAdd2() {
		
		try {
			int i;
			Set<SensitiveWords> words;
			try {
				InputStream stream = SensitiveWordsServiceDBTest.class.getResourceAsStream("敏感词库大全.txt");
				BufferedReader reader = new BufferedReader(new InputStreamReader(stream, "gbk"));
				
				i = 0;
				words = Sets.newHashSet();
				while (true) {
					String line = reader.readLine();
					if (line == null) {
						break;
					}
					
					if (StringUtils.isNotBlank(line)) {
						SensitiveWords entity = new SensitiveWords(line, "admin", "admin");
						entity.setType(SensitiveWordsType.OTHERS);
						words.add(entity);
						
						try {
							service.add(entity);
							i++;
						} catch (Exception e) {
							System.err.println(e.getMessage());
						}
					}
				}
				
				System.out.println("循环单词: " + i + ", 插入数据：" + words.size());
				Thread.sleep(1000 * 3);
			} catch (Exception e) {
				e.printStackTrace();
			}	
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	public void testFilter() throws Exception {
		System.out.println(SensitiveWordsFilterUtils.getInstance().filter(FilterType.HASH_BUCKET, false, "尼玛啊,然后，市长仓井空在婚礼上唱春天在哪里。", '*'));
	}
	
	public void testPut() throws Exception {
		RedisWordsCache.getInstance().put(new SensitiveWords("ss", "11", "22"));
		System.out.println(RedisWordsCache.getInstance().get().size());
	}
	
	public void testPutList() throws Exception {
		
		RedisWordsCache.getInstance().put(Lists.newArrayList(new SensitiveWords("ss3", "11", "22"), new SensitiveWords("ss4", "11", "22")));
		System.out.println(RedisWordsCache.getInstance().get().size());
	}
	
	public void testGetCache() {
		try {
			System.out.println(RedisWordsCache.getInstance().get());
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	public void testRemove() {
		try {
			System.out.println(RedisWordsCache.getInstance().get());
			RedisWordsCache.getInstance().remove(new SensitiveWords("ss", "11", "22"));
			System.out.println(RedisWordsCache.getInstance().get());
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	public void testInit() {
		
		RedisWordsCache.getInstance().setDataSource(service);
		try {
			RedisWordsCache.getInstance().init();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	public void testSensitiveWordsFilterUtils() throws Exception {
		System.out.println(SensitiveWordsFilterUtils.getInstance().getWords(false, "我们中国人都是好人，在他们中间有男人和女人。中国男人很惨，中国人民长期被压迫。"));
	}
	
	@SuppressWarnings("rawtypes")
	public void testPerformance() {
		
		AbstractFilterExecutor[] executors = { 
				DfaFilterExecutor.getInstance(), 
				TireTreeFilterExecutor.getInstance(), HashBucketFilterExecutor.getInstance(),
				TtmpFilterExecutor.getInstance(), DatFilterExecutor.getInstance() 
			};
		
		String content = "";
		try {
			InputStreamReader reader = new InputStreamReader(ClassLoader.getSystemResourceAsStream("Talk.txt"), StandardCharsets.UTF_8);
			BufferedReader bufferedReader = new BufferedReader(reader);
			
			while (true) {
				String line = bufferedReader.readLine();
				if (line == null) {
					break;
				}
				
				content += line;
			}
		} catch (Exception e) {
			e.printStackTrace();
		} 
		
		for (int i = 0; i < 15; i++) {
			content = content + "，" + content;
		}

		System.out.println("过滤字符数：" + content.length());
		for (AbstractFilterExecutor exec : executors) {
			
			long m = Runtime.getRuntime().freeMemory();
			long start = System.currentTimeMillis();
			exec.init("BadWord.txt");
			System.out.println("初始化填词耗时：" + (System.currentTimeMillis() - start));
			
			start = System.currentTimeMillis();
			Set<String> words = exec.getWords(false, content);
			System.out.println(exec.getListenerName() + ": " + words + ", 数量：" + words.size());
			System.out.println("查找耗时：" + (System.currentTimeMillis() - start));
			//exec.filter(false, content, '*');
			//System.out.println("查找耗时：" + (System.currentTimeMillis() - start));
			System.out.println("内存消耗：" + ((m - Runtime.getRuntime().freeMemory()) / 1024));
		}
	}
}


================================================
FILE: src/test/java/com/cnblogs/hoojo/sensitivewords/其他词库.txt
================================================
Ѩ
Э
ױ
רҵ
л
jq
·
һ

ƾ

ԪԳʴ
ϯ


ҪĬ
ڼ
ʳǼ
Ѳ


С
ըѧУ
ӹ
оŮ

ڰ
֮ĸ
ĸ
ķ
΢
ȫ
nowto
chengdu

·ѩ
Ű
Ϯ
츮㳡

ϴ԰
޴
й
̨
ȫ


10010
10086
10159
13423205670
13725516608
13875448369
15112886328
189
6-4tianwang
64
68170802
6a6.net
7.31
7.310
89-64cdjp
8945212
23
259o
381929279
3P
4-Jun
AV
BJ
CBD
CCTV
CDMA
DICK
Dick
FLG
FOCUSC
FUCK
Fuck
GAMEMASTER
GCD
GameMaster
IP17908
KEY_TEXT
NMD
QQb
SM
Soccer01.com
TMD
UltraSurf
bignews
Bitch
boxun
Chinaliberal
chinamz
Chinesenewsnet
cnd
Creaders
dafa
Dajiyuan
df'd
Dfdz
die
dpp
Freechina
freedom
Freenet
fuck
fuck your mother
hongzhi
hrichina
huanet
hypermart.net
incest
jiangdongriji
l2590803027
lihongzhi
ma
making
minghui
minghuinews
nacb
na?ve
nmis
paper64
peacehall
piao
playboy
renminbao
renmingbao
rfa
safeweb
sex
shit
simple
svdc
taip
tibetalk
triangle
triangleboy
txwq.net
unixbox
ustibet
voa
voachinese
wangce
wstaiji
www
xinsheng
yuming
yyͨѶ¼
zhengjian
zhengjianwang
zhenshanren
zhuanfalu
zhuanfalun
˾


================================================
FILE: src/test/java/com/cnblogs/hoojo/sensitivewords/反动词库.txt
================================================
й

ҷ

й
򵹹
򵹹
򵹺
򵹽
򵹽ϯ

޸
¼ұ
й
F
ƹ
ƹ
ƺ
ƽ
ƽϯ

޸
¼ұ
й
F
й

˰
й
ع
ع
ع
zf¥

̰۸
ǿƲ
ʽ
η粨
̫ӵ
Ϻ

廪
ɫ
Ȩ
з
Ѫ
ŷ
9
ʮߴ
ʮ7
17da
ѧ
9ѧ
ķ
4
˫
Ͻִ
Ա

ٷ

̹
ǹִܱ
ǿƾ

һִ
һר
һר
רȨ
ܷԺ
ƽ


̷


־


˾


κ
Ѱѵ
γɻ
˭й
ַ
ʿ
ʿ

ѡϯ
һ
min


chinesedemocracy


daѡ
Ͷ
ͷ

ƽ

άȨ
ð

08
08xz


ȭ
ľ
quan
renquan
йȨ
й
Ⱥ¼
Ⱥ¼

ȥ
˵
Ը


˴ǩ
ɧ
ط
Ϸ
shangfang
ŷ


֯

zuo
jing
ʾ
ʾwei

you

xing
youxing
ٱ
party


di
zhi
dizhi
boycott
Ѫ
й
baoluan
Ѫͻ
ֱ

𱩶
baodong

ɱ
չ
Թ
տ
ղ
Բ
ղ
շ
ճ
ʳ
տ
ke
Կ
ba
ս
ѧ


Ա
Ա
ë
ë
5ë

jieyan
jie
yan
8ƽ¼
֪64
˾
ʰ
2o
20г
ʰ

з
ٶ
г
½
½
198964
535
89괺֮
64Ұ
64ʱ
64˶
4¼
¼
粨
ѧ
ѧchao
xuechao
ѧٶȳ
Ű
찴
̹ѹѧ
Ů
ʷ˿


Ѫϴ
Ķ


ͮ
ӵ

ά
ϣ

½

֮


ϼ
һ
л
ĺ

ܷ
ŷ
Ϣ
ͬ
ر̳

ܹ
gfw
޽
޽


й
׾
޽
ý
άٿ
ŦԼʱ
bbc
ʢʱ
ձ
ɭ
ɭ
ǵձ
wikipedia
youtube
googleblogger
㲥˾
Ӣʱ

ʱ
йʱ

в

㵺
֯
Ȩ

է
ľ
ʯФ
˵
qiang
baoը
zha
baozha
zhaҩ
zha
ըdan
ըyao
zhadan
zhayao
hmtd
ױ

ըҩ䷽
ҩ䷽
ը䷽
Ƥը
ҩ䷽
ը
ը
ž

ת

8341
21ž
ߴ
7


Ͼ
ɶ
ݾ
Ͼ
ݾ
ɫ
ģͻ

֯
ֲ
ֲ

ӡ
ӡ¼

Ӣ
mayingjiu


ˮ
s


a
ȫͬ
̨ٶ
̨
̨wan
taiwan
̨
̨
̨
̨干͹
̨
̨
̨
̨du
taidu
twdl
һһ̨
̨
ս
ռ̨
̨֧
̨
ռ̨
ͳһ̨
ո̨
½̨
̨
tw
̨
⸴
̨
̨
̨
̨Σ
̨ͳһ
̨ս
̨ս
̨


л

x


ֶ
duli
fenlie
ձ
Сһ
ӵ

ά
ά
ά

ȱ
ľ

ͻ˹̹֯
ͻ֯
ɹŷѷ
ȷ


zang

m


dalai

dl
δ


ض

갶
ض
du
zangdu
֧zd
ر

ѩɽʨ


ɳ

sa
lasa
la


ش
ت
ض
ض
ظ

ʯ
xizang
xi
x
z
tibet
ϣ
ϣ

ϡ


Ԯ
bjork
ǧԴ

ؽ

ػ

˹
ºµ
ºĬ
ĬĬ
˹
ʥս֯

zhen
qingzhen


޵۹


ϵ


ڶ

Ұ

ɺͺ

Ȫ


ϼ
ϼ
ξ
ίԱ
ͷ
̫
Ϸ
ϵ
ϵ


ϯ

齫͸


ǹ
혌
ҩ
ˎ
̨
̨ٱ
̨
̨
̨Ӧö
̨Ȩ
й
й
й
й
йĻ
й
йѪ
й
й۹
й
й
й
й
й
йڰ
й
йȨ
й־
йȨ
й
й˵
йϴ
йа
йа
йϷ

================================================
FILE: src/test/java/com/cnblogs/hoojo/sensitivewords/敏感词库大全.txt
================================================
Ʒ

e
һ
Һ

ְ칫¥
ֺ

ˎ
׼ȷ
˾
˾ѧ
˾
Ѳ
ѵСƽ
ѧ
չ
׻ǩ
ѵ
챾


Ʊ
ƾ
ʵ
֤
ʸ
ƾ

֤
̵
kҵ
kC


ң
ڸ

ִ
ɧ
ʡί

ָϮ
й
˾

ҵC
ƾ


ڻ


ͣ
԰α
鶼
ȫ
˼Ļ
Ů
æ

ֻ֪
ڿƼ
ɻ
̤
ɽ
Ӭˮ
ش
ت
ɩ
ɩ
ûв
ƨƨ

Ǩ

˵
˿ͨ

Ƭ

ͼ

С
ǹ
͹
̰


Ŵ
ܽ
ɼ
۷Ʊ
۾
͸
ˮ
Ȱ
Ȼ
ͨ
ˮ

ҩ
ˎ

ҵ֤
𰸰
ṩ
ɻר


Ұ칫
󼦰
u
Ԫ
¶

̰


췢Ʊ


ѧ

k

Ʊ
Ʊ


д
д


Ҫ

Ĺ
ή
ǰɾ
ܱ

ʧ
˵

С

òƼ
ͬ
Ʋݺ
ȼC
ƨ
ϰ


үүת

ز֮


۹֮
ֽ
Ż
繷
绰
缦

谴


|
Сڽ

С


̨

Ž
ǿӲ

èè
_˹


԰
԰
԰ɱ
԰
̴
ƾ
Ʊ
Ʊ
Ʊ
lƱ

׹

ַ
άȨ
һ
Ժ
Ǭ

״


ҩˮ

ǹ
֤
̰
˽

ͬ
޽

޵A
^


ǹ
ӡ
ӡ

Ÿ
ĺ
˿
Ա
ؽ


빷
ǹ
۰Ĳ
R
A
߾
߿
ݺݺ
½
泤
״
֤
࿼
ƾ

̵


С
С


ƨר
ĶһЩ
ԹԷ
̹
Ҳ

ѧ

Բ
Ͷע
Ҽ

̵

һ

ҩֱ

ȦǮ

͹
͹
͹
ڻҩ
ɫֲ
͸
tɫ
ڶ

\

ҫ

ͷ

ſ
ѧɨä
Ϲ
ᴵ

֤

ʹͶע
Ʊ
Բ

Ҳ
λ
Ŷ


帯
֭
D֭
Ѿ
һ

׳


ְ


ը
ڶ
̫
ϵ


ο
̰
ҹ

ְ


Ҫ


챻
Ļ
Ź
˵
״

Ʒ
ǹ


̳


Ͱ
Ҫɫ
ٹ

˵ȫ
ʳ


Ʒ

ѡ

_a
_Ʊ
ɱ
˶
ûв
¥

󸶿


ǰ
ǰ
ǰ
豸
԰
Ա
Դ
Ի

ǹ
п
д

˷
ǧ
͸
պŵ


ý
ǹ

ٰ
Ѳ
˵
ˮ


ȫ
Ѩ
ϻ
Ů
׼ȷ
ƽ
־
ӽԻ
֤
Ӱ
֤
ʱ
ƭ


M


Ƶ
ի
˼
Ժð
ǹ
Ԙ
C

Ѫ
ϲ


¼
¡ָ
½
½ͬ
Ȱͪ
Ҽ

С
y

Ӱ
ë
Ƭ
ֹ
ǹ
Ĵ
˹С


ϼ׺


齫͸

ǹ
혌
ˎ
ר
ز
Ʊ
п
Կ
˿
æ
è۹
ëһ
ý
ÿһ
ٸ
ðĦ

ŰĦ
ű
T
åѵ
ɺҩ
Ի
Իҩ
Իˎ
Ի
Իҩ
Իˎ
Ի
Իҩ
Իˎ
Լҩ
ˮ
ҩ
ˎ
ռҩ
Ѩ

񴢺


ӡ
ĦС
ĸ
ľ
Ļûв
Ļǰ

ϳ
Ѩ

֮

θ
֮

Ũ
ŭ־Ը
Ů˼Ҹ
Ů
Ůʦ
Ů˺͹
Ůְ
Ů
Ů
Ÿ֮
ļҩ
ļ
Ʒ
Ƽ
ڵС
㿼ǹ


ζ
μ
ƽҰ
ƽе
Ͳ
ͨ
ڻ
漣Ļ
ɢ
ﵥ

ǹ

ǹ
☌
Ǧ
Ǯ־
ǹ
ǹĲ
ǹķ
ǹĽ
ǹ
ǹ
ǹŮ
ǹֳ
ǹģ
ǹֶ
ǹ
ǹ
ǹе
ǹӵ
ǿȨ
ǿӲ


ⵯ

弃


Ƶ

軯
軯
뼯
ʾ
Ը


Ȣ
ȫ֤
Ⱥ鱩
Ⱥ𿹱
Ⱥ
ƹ
ǵĹ
Ȩ


Ǯ
ƾ
˹
ط
ⶴ


齻
Ĺ
ɧ

Ƭ
뵹


ɧ
ɧ
ɧѨ
ɧ
ɨүү
ɫӰ
ɫ
ɫƵ
ɫС˵
ɱָ
ɽ
ɿ
ɿȺ
ż
չ
ƿ
عض
ع
ع
ǹ
ӳϮ

߼


̤
Ф
ʥսϢ
ʢ
ʬ
ʧˮ
ʧҩ
ʨ
ʮ˵
ʮ
ʮ
ʮԤ
ʮ˲
ʮߴĻ
ʵҵ֤
ʵ
ʵѧ
ʿ¼
ʽ
ӽ
Ƕè
ֱ
ִ
ֹ
ֻ
ֻ
ֻ
ֻ׷

ľ
֘

޽
۲ǹ
۴
۵
۵ɵ
۷
۹
ۻͷ
ۻҩ
ۼٱ
۽
۾
ǹ
Ȱ

ð
ǹ֧


ǹ


һԪӲ
ӵ


츾
ƾ
˫
˫ƽ
ˮ
˿ʿ
˿
˿ౣ
˿
˿
˿
˿
˿㰴
˾
˾
˽д
ֲ
Ҫë
Ĳ
Ĵ󳶸
С
ռͼ
ϼ
Ů
ٴ
ȡ֤
ǰ
̣˰
̫
̩
̩
̩
̰Ҳ
̽⹷
ι
һ
ع


͸Ӿ
濼

쳯
֮
ƹ
չ
ɣ
ͣ
ͥ
ֱͥ
ܾͨ
͵
͵̰
͵
͵͵̰
ͷ˫
͸ӹ
͸Ӿ
͸
͸
͸۾
͸ҩ
͸
ͺӥ
ͻƷ
ͻ·
Ͱ

˹
ఴĦ
͸Ӿ
Χ

Կ
ɧ

永


΢
Χ
ΧϺ
άԱ
άȨ
άȨ
άȨ̸
ί
νĺг
¼ұ
˹
Ӱ
ؼҌ
ӱ
ٱ
ƾ֤
ǿ
ë
ű
ŷ
Ͱ
ҵ
Ҹ̨
Ӭˮ
޳¼
ר
׹

ҹ
ҹ
侯
侯Ź
侯
Ա
Ա


ȥ
ϣ
ϰƽ
ϰƽ
ϯ
ϯǰ
ϯָŻ
ϴ
ϲ̰
ҷ׷
ִ
ֽͶע
͸Ӿ

ݺ
ݺ

̳

һ
ܲ

СѨ
Уɧ
Э
д
й©
½
½
½
½ƿ

ŷר
Ž


г
͸Ӿ
ǹ
պ

԰
Ը
Ը
ƹ
ϯ
Ԫ
ѧɧ
ѧλC
W
Ѿ
̸

Աͽ


没
Ҧȥ
ҪȨ
Ҫ侫
Ҫ
Ҫй
ҹ
Һը
һС


ע
ܻ

ꎴ
ꎵ
ꎑ
ħ
Ů

}

ѧ
ˮ
Ѩ
ζ

Ӧӵ
Ӥ
ӽ
ǹ
Ĺ
ξ
̲һ
ת
׳
͸
ͬ

빷

Ů
ԩ
ԧϴ
԰Ұ
԰
԰ɱ
԰ɱ
԰Ѫ
ԭһ
ԭװ
Ԭڷ
ε


⵽
⾯
侯
¼

ը
ըң
ը
ը
ըҩ
ըҩ
ըҩ
Ŵ
ǹ
Ԯ
ί

̰


̽豸
Ǯ
ǮͶע

ʵƾ
ʵʸ
һ

֤
֤
֤
֤
֤
֤һ


C
ֲ
ֳ
ָƿ
ָĤ
ָ
Ҹ
־Ը
Ʒ
ǹ
֤
֤
еİ
й
йǿ
ֹԱ
ѧ֤

ݲҰ
ݴ̰

߷
罫
ϯ
סӢ


רҵ
רҵ
רҵд
רҵ
ת
׬Ǯ
װ
װǹ
װ
Żʿ
θ
˲
ʸC
й
ҩ

Լǹ
ο
ʥ

ܻŮ
淨
ţ
ǹ
ҩ


ת

֤


С
ԭӵ
֤

================================================
FILE: src/test/java/com/cnblogs/hoojo/sensitivewords/暴恐词库.txt
================================================

йͽ
ͳһ

庣ʦ
̹
־
־
־
ֹ

ַ
ȥ
ȥ
۹
x
o
0
һһ
ӹ
ع
lun
fa
lg
flg
fl
falungong
󷨵
Ԫ
dajiyuan

ܱ


α
˵
tuidang
dang
Խǽ
fen


9
9ping
ping
jiuping
ʯ
ɱ
sha
ziɱ
suicide
titor
8
˱
9
ű
ɪŵ

ŵ鵤˹
̱

Ʊͼ
beiͼ


繷
绰λ
绰
绰
绰
绰
绰׷ɱϵͳ
ǹ
缦
羯
ǹ
ǹ
ǹ
ǹŮ
ǹģ
ǹ
ǹ
ǹе
ǹӵ
۲ǹ
۴
۵
۵ɵ
۷
۹
ۻͷ
ۻҩ
ۼٱ
۽
۾
ǹ
Ȱ

ǹ֧


ǹ

һԪӲ
ӵ

飨ᣩ


þ


Ȼ
ԭװ
ԭӵ
ԭӵ嵥
ͪ
Ѫ

߻

testosterone
ȵ
ٺϸ

ɲ
غͪ
adrenaline
erythropoietin
ε
Ī

ͱ
ټ

Ʋ
ϩ

tamoxifen
strychnine
androst
ͶƷ
䶡
˷ܼ
mdma

luo
heroin
diamorphine
diacetylmorphine
ѻƬ
ܽ

cocain

ɳͪ

k

ketamine


cannabis


Ȱͪ
benzodiazepines
׻


================================================
FILE: src/test/java/com/cnblogs/hoojo/sensitivewords/民生词库.txt
================================================


Ǩ
Ǩ


ͪ
ҩﶾƷࣺѪ

߻

testosterone
ȵ
ٺϸ

ɲ
غͪ
adrenaline
erythropoietin
ε
Ī

ͱ
ټ

Ʋ
ϩ

tamoxifen
strychnine
androst
ͶƷ
䶡
˷ܼ
mdma

luo
heroin
diamorphine
diacetylmorphine
ѻƬ
ܽ

cocain

ɳͪ

k

ketamine


cannabis


Ȱͪ
benzodiazepines
׻


morphine
ҡͷ
ҩ
ԹԷ
narcotic
ҩ
ҩƷ
רҵ
æһ
æ


˽̽
˽̽


ÿ
޵Ѻ

ԭ
踹
Ҹ
Ҹְ


Ʊ
Ѷͷ绰

Ѷ
ͼ۳

ظɼ


Ѷ
ʹ
ȡ

Ӥ

°
С
Ƴ

ԴѶ
ƵԴ
ٶ

ȫ
߲
txt
λqq
6λqq
λqq
qb
qb
õи
ȫɱֲ
ĺ
Ͷע
Ǯׯ
й˵


ϲ
ľĻ
濼
ζ
۴
д
an
da
ʽת

ȦǮ

ʽȱ
֤
Ѻ
С
С

и
׵
о

ӡ̸߽н
ι
nai
ƿ
ѿ
75¼
³ľ
½ɧ


ʳǼ
Ǽ
h1n1
߱
yangjia
y
yang

j
Ϯ
ɱ

b26931


pxĿ
Ĵ
l
le
˷
ͱ

ⲻ
Сϼ

yuce
ǰԤ
Ԥ

ĹԤ
Ǩ
ø
˼Ԥ
̤¹


ŵ


˰˼
Ұ


628¼
weng
wengan
̰
Ͱ¼
ը
ֻع
췢Ʊ


ѧ

k

Ʊ
_

Ʊ
Ʊ

ծ
д
д

׷ծ
󸶿


ǰ
ǰ
ǰܾ
ǰԤ
,
,
԰
Ա
Դ
Ի

ǹ


п
д
͸ӹ
͸Ӿ
͸
͸
͸۾
͸۾
͸ҩ
͸


Ұ칫

ͪ
Ѫ

߻

testosterone
ȵ
ٺϸ

ɲ
غͪ
adrenaline
erythropoietin
ε
Ī

ͱ
ټ

Ʋ
ϩ

tamoxifen
strychnine
androst
ͶƷ
䶡
˷ܼ
mdma

luo
heroin
diamorphine
diacetylmorphine
ѻƬ
ܽ

cocain

ɳͪ

k

ketamine


cannabis


Ȱͪ
benzodiazepines
׻


KC
KC껪
Ź
Ⱥ
Ⱥ
С6ͨ

¼
޽
޽
޽


ް
KCʾ
KCվ
UP8
Ƥ
UP

״翨

չ
տ
Ŧ
ֻ
ֻ

ͨӳַ
ȫ1ͨ
6ͨ

鶯
ǽջ
н
󸶿
ѾƬ
ĸ
Ʊ
ͨ
ǰ
ɱ
ֻ̳ͨ

4ͨ
ׯ
ֵ
ڳ
ͨ


֮
Ϣ
߸ӵ
ϲĺ
ϲĺ
߸Ů
ȫ
ַƱ
߹
߹ٻ
߹Ů
һһ


㵺
̨
ٿ̨
У
Ŵ
ͻ˷Ƿַ

֬


󷨵
dpp
fa lun
falu

չ
ϴ
·


׹


۹
۹


*
..
L
lun


չ

ִ
ַ
ֹ


ʮʮ
˹
Ժ


ع
݆


.݆


岻
岻̳

׹


ݦ־
־
ά
ȥ
ȥع
ת


桢ơ


־


falun
Falundafa
fa
Flg

½̻

־
ִ
չ
ֹ
׹
ĦŽ
ˮ


ŭ


ؽ
ͽ

ʦ

־
־
鴫
ͨ


ʤ
־
귨

־
־

־


================================================
FILE: src/test/java/com/cnblogs/hoojo/sensitivewords/色情词库.txt
================================================
Ů
Һ
Ħ
γ


Ƽ
־
屩
ٱ
ٺ

H
H


ū

b


dv
˵Ӱ
̳
С˵
˵
˵Ӱ
˿ͨ

Ƭ

ͼ

С
˵Ӱ
̳
ɫ
վ
ѧ
С˵
С˵
Ϸ
Ծ


ɲ
һ
ҩ


Ů


ʱ
Ѩ
Ů
´½

Ѩ
ؽ

ͷ

av


ڱ
ͥ
Ѩ


޾ֲ
Ƭ


į
įŮ
Ů


д
Ž
ǳ
ǳ
Һ
ȥ
ތ
ջ


Ѩ

ڱ
ڻ
ڽ


˱
˸
˽
Ů


ľ

©
¶b
ҽ

ֱ
ֲ
ּ


ٸ


Ѩ

ش
Լ
Ѩ
Ѩ
Һ


ĸ


ū

۱
Ů
Ѩ
Ū
Ů

h
羫
ƨ
Ʒ
ǰ͹
ǿjian
ǿ
ǿ鴦Ů
ȤƷ
ɫ
ȭ
ȫ
Ⱥ
ǻ


ձ


ⴽ
ⶴ


⾥


Ѩ

鱬
鷿
鹵
齻
ͷ
Ƭ
ɧ
ɧ
ɧŮ
ɧˮ
ɧѨ
ɫ
ɫ
ɫè
ɫ
ɫվ
ɫ
ɫɫ
ɫ
ɫ
ɫb
갢

ˬ

ʳ

޼
޽


츾
ĸ
Ů
ˬƬ
ˬ
˫

˿
˿
ɵ


Ū

λ


͵
͵

ڿ

Ҿɫ

Ů


Ĵ

С
У
СѨ
Сxue
д
Ը
Ըջ
Ի
Լ
Լ
Խ
ū
Ű
Ϣ


Ѩ
ѧ
Ѩͼ


˼
Ҫ
ҹڲ
һ
һҹ
һҹ
һye


Ӱ


ʦ


ħ
ĸ
Ů
Ű


ɫ

ѧ԰

ʿ
ˮ


Һ

b
Ӧ
׽

Ů

Ů
Ůľ


Ѩ
Ԯ
ԭζ
Ԯ

м
м

ץ

ο

18
99bb
a4u
a4y
adult
amateur
anal
aƬ
fuck
gayƬ
g
gƬ
hardcore
h
h
incest
porn
secom
sexinsex
smŮ
xiao77
xing
tokyohot
yin

װb
sb
ɵ
ɵb
ɷ
ɷ
ɲ
ɵ
ɳ
Ƿ


Ҳ
Ҳ
ܳ
Բ

ܳ
cao


Ѿ


Ƥ


յ

ȫ
ү


Ե
˸


b
¼
ȫ
ȫ
ȫҲú
ȫ
׳
޳
sb
ɱb
b


쵰


ְ


԰
й


侫
ռ
ǿ

԰
ϵ
Ħ


ٸ

ƨ

aƬ
ڿ
Բ


̼


ɧ

sm
˸
߳
¶

һ˿
ѹ


Ҹ
ȹ˶
Ҽ


С

Ӱ
ë
Ƭ


д


Ů
Ůд
Ů
ٸ
ðĦ

Իҩ
Իˎ
Ի
Իҩ
Իˎ
Ի
Իҩ
Իˎ
Լ
Լҩ

ˮ
ҩ
ҩ
ˎ
ռҩ
ɧ
ɧ
ɧ
ɧŮ
ɧ
ɫӰ
ɫ
ɫ
ɫӰ
ɫ
ɫͼƬ
ɫС˵
ɫӰƬ
ɫ
ɫӰ
ɫ
ɫƬ
ɫƵ
ɫС˵
԰
Է
Ը
Ը
Ի
Խ
ԽƵ
ԽͼƬ
ū
ūӪ
Ű


ë
ꎴ
ꎵ
ꎑ

Ů
Ƶ
Ƭ


ħ
ħ
Ů
Ů

}

ѧ
ˮ
Ѩ
morphine
ҡͷ
ҩ
ԹԷ
narcotic
ҩ
ҩƷ
Ů
Һ
Ħ
γ


Ƽ
־
屩
ٱ
ٺ

H
H


ū

b


˵Ӱ
̳
ɫ
վ
ѧ
С˵
С˵
Ϸ
Ծ


ɲ
һ
ҩ


Ů


ʱ
Ѩ
Ů
´½

Ѩ
ؽ

ͷ

av


ڱ
ͥ
Ѩ


޾ֲ
Ƭ


į
įŮ
Ů


д
Ž
ǳ
ǳ
Һ
ȥ
ތ
ջ


Ѩ

ڱ
ڻ
ڽ


˱
˸
˽
Ů


ľ

©
¶b
ҽ

ֱ
ֲ
ּ


ٸ


Ѩ

ش
Լ
Ѩ
Ѩ
Һ


ĸ


ū

۱
Ů
Ѩ
Ū
Ů

h
羫
ƨ
Ʒ
ǰ͹
ǿjian
ǿ
ǿ鴦Ů
ȤƷ
ɫ
ȭ
ȫ
Ⱥ
ǻ


ձ


ⴽ
ⶴ


⾥


Ѩ

鱬
鷿
鹵
齻
ͷ
Ƭ
ɧ
ɧ
ɧŮ
ɧˮ
ɧѨ
ɫ
ɫ
ɫè
ɫ
ɫվ
ɫ
ɫɫ
ɫ
ɫ
ɫb
갢

ˬ

ʳ

޼
޽


츾
ĸ
Ů
ˬƬ
ˬ
˫

˿
˿
ɵ


Ū

λ


͵
͵

ڿ

Ҿɫ

Ů


Ĵ

С
У
СѨ
Сxue
д
Ը
Ըջ
Ի
Լ
Լ
Խ
ū
Ű
Ϣ


Ѩ
ѧ
Ѩͼ


˼
Ҫ
ҹڲ
һ
һҹ
һҹ
һye


Ӱ


ʦ


ħ
ĸ
Ů
Ű


ɫ

ѧ԰

ʿ
ˮ


Һ

b
Ӧ
׽

Ů

Ů
Ůľ


Ѩ
Ԯ
ԭζ
Ԯ

м
м

ץ

ο

18
99bb
a4u
a4y
adult
amateur
anal
aƬ
fuck
gayƬ
g
gƬ
hardcore
h
h
incest
porn
secom
sexinsex
smŮ
xiao77
xing
tokyohot
yin

================================================
FILE: src/test/java/com/cnblogs/hoojo/sensitivewords/贪腐词库.txt
================================================

̰
gcd
̰
gongchandang

һһ


c
x


е


Ϲ

gong
gc

gong
g


а
ר


g

g


ר
ıӦ
ĩ
ר
communistparty
֢


f
zhengfu
zhi


йzf
zf
wuԺ
л۹
gong
½ٷ
Ȩ
־

־

Ů
̰20
ں
̰

̰Ƹ

߹
ϣͬ
̰
ְ
ͽ

ܻ
ְȨ
ͽ
ûո˲Ʋ
ɿ˽

ά
Υ

ְ

ͽ
ײ
˽

Ȩı˽
ְ
͢
̻
Ź
֥
ƽ˳
ɱ

˫
ܻ
д
Ȩ
ûոȫƲ
ʯױ

ҫ
Ȩ

ͼ

ƹ

̩
Чʱ
ٱ

Υ
ܹƱ


֣
ɽ

֣

ٰ
׾
쿴
κ
־
Ȫ


кɳȶ
ɭ
÷
ηɱ
ˮ
ƽ

촨
˺

֪
Ը

޶Ʋ
Դ


´


Զ

±
ά
˫
־


 ຣ
ŷ 㶫
Τ 
Ӣ 
ҵ 
ڷ 㶫
 ӱ
ز 
 
ſ㶫
꺣
άϯ
нܺ

׷

ż㽭

Ƽͳϱ

ʽ
ļ
¼
˹ɽ
¸

γ⽭
»Э
³ йͨ
 
ǿ 
Ӣ ֯
 
С
 ҵ
ѩ
ֿ

Ŷ
ͬ


Ŵ
Դ
ƽ

С

Ľ

翬
Ⱥ


ܶ
췢 
ں
η


ƽ


Ƥǭ
Ԫ
ҵ


ɸ
Ф
 վ
ʤ ܲ
β 

================================================
FILE: src/test/resources/BadWord.txt
================================================
﻿15768
32283
40698
40698
71776
985985
。gm
@sshole
∪R
∪R靠
┻┼
☭
02jam
1000y
13點
168www
１６大
17game
17Game
17爱网
18dy
18禁
18摸
１８摸
18淫书
21ｃｎ
22park
2逼靠
306医院
365 sex
365情色
39CK
39仓Ku
3p炮图
3P炮图
3P炮圖
50zhou
51vip
51yxb
58jb
5jq
5kuai
5街区
6。4
６。４
6?4靠
64yl
64动乱
64動亂
64事件
68wow
7。22
777黄站
77bbb
7hero
8 仙
8 仙靠
8。9
8?9靠
8？9靠
88jb
89-64cdjp
89暴乱
89暴亂
89动乱
89動亂
89风波
89風波
89六四
89学潮
89學潮
89运动
8仙
8仙靠
933cn
97sese
988wow
98bb
99bb
99BBS
9JBW
9v9e
9城
9之遊
9之游
a$$hole
a$shole
a4u
a4y
abc abc
ADMIN
Admin
admin
admin靠
adult
aids
Aids
AIDS
aika
ai滋
ａｉ滋
ALERT
Alod
amateur
anal
apesoft
apex
apexon
Apexsoft
appie
Archlord
as$hole
asgard
asiasex
asktao
ASS
asshole
aszol
avatar
av成人
av贴图
av貼圖
ayawawa
AYAWAWA
a片
A片
ａ片
Baichi
baichi
bankai
Baopi
Bao皮
Bastard
bastard
BASTARD
basterd
batard
bbscity
Biaozi
BIAOZI
biaozi
Biao子
bignews
bingnews
Bitch
bitch
BITCH
Bi样
ｂｉ样
bjzc
blogbaby
BLOW
blowjobs
BlowJobs
blueeye
BnB
bo ke
boke
bong
boxilai
boxun
bt成人
BT激情
BT淫穴
bukake
bukakke
bukkake
butthead
butthole
bxqt
B博士
b毛
B毛
b样
B样
ｂ样
c a o
c a o靠
cabal
camon
cao
CAO NI MA
cao ni ma
caoB
caobi
cao靠
cao你
cctv
CCTV
cdjp
changeu
chao nv
cha你
chenxun
chinamz
chuan qi
chui chui
ci77
cjsh
CM
cnd
CNN
cnouc
com流氓
connard
conquer
counselor
creaders
create
CREATE
cronous
ctracer
cunt
cunt cunt
d f d z
d p p
d7se
da hua
da tang
dafa
dajiyuan
dalai
damm
damn
dao jian
defan
defannet
dekaron
delete
DELETE
dfdz
dfjoy
dh315
dick
Dick
DICK
DJMAX
dou dou
dragon
droiyan
DROP
dtfy
dyonline
earthciv
eight仙
enculer
epochtime
eqsf
everstar
ezgaming
Ezgaming
f a l u n
f a 轮
f a 輪
f l g
f u c k
F.L.G
F。L。G
F_U_C_K
f_u_c_k
Fag
falu
FALUN
falun
falundafa
FALUNDAFA
fa轮
felch
feltch
feng shen
feng yun
fgmtv
Fku
fku
FLG
flg
flyfff
fofg
fosaon
foseaon
foutre
freechina
freedom
freenet
fu(
fuc
FUCK
Fuck
fuck
fuck fuck
Fuck You
fuck you
FUCK YOU
FUCKYOU
fuckyou
fuck靠
fuck骚
fuck傻B
fuck傻逼
fuck售ID
fuck死gd
fuck死GD
fuck死gm
fuck死GM
fuk
G m
G M
g m
g。m
G。M
g8
game
game17
game588
gamegold
gameline
gamemaste
GAMEMASTE
GameMaste
gamemy
GAN
gangbang
gan你
gaowan
gay
GC365
gc365
GCD
gcd
ggol
ghost
GM
ＧＭ
Ｇｍ
ｇｍ
G-M
GM001
GMworker
gmworkers
GN
gong fu
granado
Groove
GY
ＧＹ
ｇｙ
g点
g片
ha bao
habbo
hanbit
hanbiton
hanxiang
hardcore
hdw
helbreath
hellgate
helper
hero108
herogame
heting
heyong
hkhk68
hong yue
hongzhi
hotsex
hrichina
hua xia
huanet
hui huang
hujintao
H动漫
h动漫
H動漫
h站
h站靠
i3hun
icpcn
incest
insert
itembay
iuiuu
ｊ8
j8靠
Jap Jap
JB
jb
ｊｂ
jb靠
JB靠
jhsz
JHSZ
ji zhan
jiabao
jianghu
jiangshan
jian你
jiaochun
Jiaochun
jing ling
jinku
jintao
jinv
jinyong
Ji女
ｊｉ女
JJ
joyxy
JPEEN
ju shang
jushang
jx2
jy2
kai tian
kai xuan
kaixuan
kakajb
kang zhan
Kao
karma
kart
ke luo si
KEFU
Kefu
KeFu
kele8
kjking
ｋｋ粉
kqking
kuaik
Kurumi
K粉
ｋ粉
K姐
K他命
ladeng
laghaim
laqia
lastchaos
Lateinos
lihongzhi
like999
lineage2
ling tu
lipeng
liuqi
LIUSI
liuxiao
long hun
long zu
lovebox
luanshi
luo qi
mabinogi
MAD MAD
madelove
MADELOVE
makai
MAKELOVE
makelove
Maki
making
manager
mankind
mannweib
market
MASTER
master
mbs
meimei穴
meinv
meinv穴
merde
meretriz
metin
mforest
mi zhuan
mierda
ming yun
minghui
minhui
mir
mir3
mixmaster
MK 星云
mland
MM屄
mm屄
mm美图
mm美圖
MM嫩穴
mo jian
mo xiang
mo yu
moxiang
moyu
muhon
mwo
mxd
mystina
mythos
nabi
nacb
nage
naive
navyfield
NeoSteam
neosteam
ＮＥＴ
ｎｅｔ
netbar
netdream
NEWSPACE
nmis
nnd
nnd=
NPC靠
nude nude
o2jam
Obama
Offgamers
offgamers
olgad
On Air
ON9
on9
onair
onewg
onhave
operator
orgasmus
orgasums
Paki Paki
pangya
pao pao
paper64
partita
Party
pcik
peacehall
peachall
penis
PENIS
pet520
petgirl
petrealm
phuc
phuck
piao piao
piss
pk1937
playboy
pnisse
polla
Poon Poon
popkart
popoming
PORN
porn
project
pussy
PUSSY
qeeloo
qi ji
qi shi
qi xia
qi yu
qian nian
qiangjian
qqr2
qqtang
QQtang
Qqyinsu
QQ幻想
ｑｑ堂
QQ音速
Q币
Rape
rape
raycity
ray-city
redmoon
renewal
renminbao
repent
ri
RI NI MA
rivals
rivals靠
rjwg
roi world
roiworld
rong yao
rplan
runstar
rx008
rxjh
rxjhhvip
rxjhvip
rxjhwg
rxwg
s_b
safeweb
saga
salop
san guo
sanguohx
saobi
SARS
sars
Sb
SB
sb
ｓｂ
screw
sega
segame
server
service
sex
ｓｅｘ
Sex Sex
sf
sh!t
shemale
shen hua
shen qi
shengda
Shine
shine
shit
SHIT
Shit
Shit Shit
shizhang
shyt
silkroad
simple
slanglist
SM
sm調教
sm女王
SM女王
SM舔穴
sm舔穴
sm调教
SM援交
sm援交
snatch
soma
space
sperm
sphincter
suck
SUCK
Suck
svdc
sw2
swdol
System
system
SYSTEM
T.M.D
T.M.D靠
T。M。D
t2dk
TABLE
Taiwan国
tampon
tantra
taobao
taobao靠
teen
teen sexy
teensexy
TENGREN
TENGWU
TeSt
test
TEsT
tEST
tESt
testicle
thsale
tian shi
tianji
tianjing
tiao zhan
tibet
tibetalk
Tibet国
tmd
Tmd
TMD
ｔｍｄ
TMD靠
TNND
tnnd
to173
TOM在线
tr
trannie
tranny
travesti
triangle
Trickster
tta2
ttee
tth2
Ttmd
TTMD
ttwg666
tum tum
TW
tw18
u   r靠
U   R靠
u  r靠
U  R靠
u r
U R
U R靠
u r靠
U/R
U/R靠
UltraSurf
unixbox
UPDATE
UR
ur
ＵＲ
U-R
urban
urban靠
urTNND
ur靠
UR靠
ＵＲ靠
U-R靠
ustibet
vaameline
vgbh
viprxjh
voa
Voyage
vrtank
VULVA
Waigua
wangce
wangyang
wangyou99
wanwang
webmaster
WEB牌戰
WEB战牌
wetback
wg17173
wg2222
wg666
wg8800
wg886
wg9996
wgpj
WG武功
whore
whoyo
wow520
wow711
wowassist
wowbank
wowcaifu
wowjingye
wstaiji
wtf
wu hun
wyd
xi you
xiao77
xinsheng
xizang
xucaihou
xyq
yeeplay
YGBH
ygbh
yifeng
yong heng
youxia
YOUXIKA
youxika
yuanming
yuckfou
yuckfu
yuetao
yuming
yutong
yxzbw
zeming
zhengjian
zhengqing
zhuanfalu
zhuxi
zirui
zjdfjoy
zlywy
zongli
z毛二逼
ㄐ八
ㄐ巴
ㄐ掰
ㄖ
阿扁
阿拉
阿沛
阿片烟
啊无卵
哀悼
挨球
艾滋
艾滋病
爱色cc
爱液
爱幼阁
爱滋
愛女人
愛色cc
愛液
愛幼閣
愛滋
安拉
安理会
安眠酮
安纳咖
安南
按摩棒
傲神傳
傲视
奥巴马
奥马尔
奥运
奧運
八 仙
八嘎
八九
八老
八仙
八仙靠
扒屄
扒光
扒穴
拔屄
拔出来
罢餐
罢吃
罢饭
罢工
罢食
掰穴
白痴
白癡
白粉
白烂
白立忱
白立朴
白莲教
白小姐
百海
百家乐
班禅
包pi
包二奶
包皮
薄码
薄碼
薄一波
宝德
保钓
保釣
保监会
保密局
鸨
鲍戈
鲍彤
鲍威尔
鲍鱼
鲍岳桥
暴动
暴動
暴干
暴幹
暴奸
暴乱
暴亂
暴乳
暴徒
暴淫
鮑彤
鮑嶽橋
爆吧
爆操
爆草
爆乳
爆乳娘
贝肉
貝肉
被操
被插
被干
被幹
本拉丹
苯比
苯丙胺
笨屄
笨逼
笨比
屄
屄屄
屄毛
逼奸
逼毛
逼你老母
逼痒
逼癢
逼样
逼樣
逼孕
逼孕套
比的
比卵
比毛
比水
比样
碧香亭
避孕
避孕套
鞭神
鞭王
变态
婊
婊zi
婊子
婊子靠
瘪三
癟三
宾周
賓周
冰毒
冰锋
波霸
波霸 
勃起
博白县
博讯
博訊
不举
不玩了
布莱尔
布雷尔
布雷爾
布什
布希
步飞烟
步非烟
财政部
采花堂
採花堂
彩票机
菜B
菜b
菜逼
参事室
残废
殘廢
殘废
殘疾
沧澜曲
蔵獨
藏独
藏独靠
藏獨
藏妇会
藏婦會
藏青会
藏青會
藏青社
操
操 gm
操78
操Gm
操GM
操gm
操gM
操meimei
操mm穴
操XX
操屄
操逼
操逼毛
操逼样
操比
操比毛
操表姐
操婊
操婊子
操宾周
操冰毒
操波霸
操博讯
操布什
操藏独
操操
操操gM
操操gm
操操Gm
操操GM
操操XX
操操逼
操操比
操操蛋
操操你
操操他
操草妈
操肏
操插gm
操插gM
操插Gm
操插GM
操插你
操插妳
操朝鲜
操车臣
操车仑
操陈功
操陈毅
操吃屎
操赤匪
操抽插
操吹箫
操春药
操蠢猪
操戳你
操达赖
操打炮
操大B
操大逼
操大便
操大法
操蛋
操狗
操黑
操护士
操机掰
操鸡
操雞
操烂
操爛
操林
操妹妹
操母狗
操嫩穴
操尼
操你
操你爸
操你妈
操你妈屄
操你媽
操你娘
操妳
操妳妈
操妳媽
操妳娘
操妻
操嫂嫂
操射
操神
操爽
操死
操他
操他妈
操她
操她妈
操王
操我
操小穴
操穴
操肿
操腫
曹刚川
曹剛川
曹建明
曹你
曹庆泽
曹长青
草的你妈
草妈
草拟妈
草你
草你妈
草你媽
草你娘
草擬媽
草嫣
草芝麻
草枝麻
肏
肏屄
肏穴
冊老
冊那
册老
册那
厕奴
侧那
测绘局
测拿
测试員
测試员
廁奴
測试員
測試员
測試員
曾道人
曾培炎
曾庆红
曾庆紅
插 10p
插 11p
插 12p
插 13p
插 14p
插 15p
插 3p
插 4p
插 5p
插 6p
插 7p
插 8p
插 9p
插B
插b
插gm
插gM
插GM
插Gm
插暴
插逼
插比
插后庭
插後庭
插进
插進
插嫩逼
插嫩穴
插你
插你爸
插你妈
插你媽
插你娘
插妳
插屁眼
插深些
插死你
插他
插她
插我
插小穴
插穴
插穴图
插阴
插阴茎
插陰
插淫屄
柴玲
蝉童
产权局
產黨
猖妓
娼
娼妇
娼妓
長瀨
常劲
常委
倡议书
朝天穴
朝鲜
朝鮮
潮吹
潮喷
潮噴
车臣
车仑
車侖
陈炳基
陈伯达
陈独秀
陈功
陈良
陈良宇
陈蒙
陈丕显
陈破空
陈水扁
陈随便
陈希同
陈晓宁
陈宣良
陈一谘
陈毅
陈永贵
陈至立
陈总统
陳水扁
陳希同
陳曉寧
陳毅
陳雲
陳宗興
陳總統
成克杰
成人BT
成人bt
成人片
程凯
程真
吃大便
吃鸡巴
吃雞巴
吃精
吃屎
痴乳
癡乳
迟浩田
遲浩田
赤匪
赤化
赤裸
抽插
抽动肉
抽你丫的
臭b
臭化西
臭机
臭机八
臭鸡巴
臭西
臭西化
臭作
处女膜
処女
豖者
畜生
處女
传奇3G
传销
傳奇3G
傳銷
创网
吹喇叭
吹萧
吹箫
吹簫
锤锤
锤子
春药
春藥
蠢猪
戳B
戳b
戳逼
戳比
戳你
慈善会
慈善會
次郎
崔英杰
催情药
错B
错b
错逼
错比
錯B
錯比
达赖
达賴
达癞
達赖
達賴
打飞机
打炮
打砲
打手枪
打野炮
打砸抢
大B
大b
大j8
大逼
大比
大便
大波
大波波
大参考
大东亚
大東亞
大法
大花B
大花逼
大会堂
大鸡巴
大雞巴
大纪元
大纪园
大紀元
大揭露
大力丸
大卵泡
大卵子
大乱交
大亂交
大妈油
大麻
大麻油
大奶头
大肉棒
大乳
大乳頭
大史
大史记
大史纪
大使官
大卫教
大衛教
大血B
大血比
大煙
大阳具
大跃进
呆比
呆卵
代挂
代炼
代链
代練
带练
带炼
带链
帶练
帶炼
帶链
戴秉國
戴海静
戴红
戴晶
戴维教
弹 劾
弹劾
彈 劾
档案局
党
党委
党卫兵
党中央
党主席
黨中央
荡妇
荡妹
荡女
蕩婦
蕩妹
蕩女
刀OL
刀online
刀剑
倒台
倒臺
盗撮
盗电
盗窃犯
道教
盜撮
盜竊犯
登 辉
登 輝
登辉
邓发
邓力群
邓小平
邓笑贫
邓颖超
鄧朴方
鄧小平
鄧穎超
迪凡
地震局
蒂巴因
电监会
叼你
叼你妈
叼你媽
屌
屌7
屌鸠
屌毛
屌妳
屌七
屌西
吊子
钓鱼岛
钓鱼台
釣魚島
丁关根
丁關根
丁元
丁子霖
丁字裤
腚眼
东社
东升
东条
东突
东亚
东洋屄
东正教
東Mai骚
東突
東洋屄
董必武
董建华
董建華
董贱华
董文华
懂文华
动乱
胴体
動乱
動亂
都江堰
豆豆秀
毒贩
毒販
毒品
独夫
独立
獨裁
赌马
赌球
杜冷丁
杜鲁门
杜智富
渡口
短信猫
断电
断水
斷电
斷電
斷水
多人轮
多人輪
多维
多维社
多维网
多維社
多維網
堕淫
墮淫
屙
屙民
俄罗斯
饿b
饿B
饿比
餓比
恩格斯
二B
二b
二屄
二逼
发$$抡
发$$仑
发$$伦
发$$沦
发$$纶
发$$轮
发$$论
发$抡
发$仑
发$伦
发$沦
发$纶
发$轮
发$论
发**抡
发**仑
发**伦
发**沦
发**纶
发**轮
发**论
发*抡
发*仑
发*伦
发*沦
发*纶
发*轮
发*论
发@@抡
发@@仑
发@@伦
发@@沦
发@@纶
发@@轮
发@@论
发@抡
发@仑
发@伦
发@沦
发@纶
发@轮
发@论
发^^抡
发^^仑
发^^伦
发^^沦
发^^纶
发^^轮
发^^论
发^抡
发^仑
发^伦
发^沦
发^纶
发^轮
发^论
发~~抡
发~~仑
发~~伦
发~~沦
发~~纶
发~~轮
发~~论
发~抡
发~仑
发~伦
发~沦
发~纶
发~轮
发~论
发改委
发浪
发愣
发抡
发抡功
发仑
发伦
发伦功
发沦
发纶
发轮
发论
发论公
发论功
发骚
发正念
発妻
発射
發$$掄
發$$倫
發$$綸
發$$論
發$侖
發$淪
發$輪
發**掄
發**倫
發**綸
發**論
發*侖
發*淪
發*輪
發@@掄
發@@倫
發@@綸
發@@論
發@侖
發@淪
發@輪
發^^掄
發^^倫
發^^綸
發^^論
發^掄
發^侖
發^倫
發^綸
發^輪
發^論
發~~掄
發~~侖
發~~倫
發~~淪
發~~綸
發~~輪
發~~論
發~掄
發~侖
發~倫
發~淪
發~綸
發~輪
發~論
發浪
發掄
發掄功
發侖
發倫
發倫功
發淪
發綸
發輪
發論功
廢墟
法$$抡
法$$掄
法$$仑
法$$伦
法$$沦
法$$纶
法$$轮
法$$侖
法$$倫
法$$淪
法$$綸
法$$輪
法$$论
法$$論
法$抡
法$掄
法$仑
法$伦
法$沦
法$纶
法$轮
法$侖
法$倫
法$淪
法$綸
法$輪
法$论
法$論
法*
法**抡
法**掄
法**仑
法**伦
法**沦
法**纶
法**轮
法**侖
法**倫
法**淪
法**綸
法**輪
法**论
法**論
法*功
法*抡
法*掄
法*仑
法*伦
法*沦
法*纶
法*轮
法*侖
法*倫
法*淪
法*綸
法*輪
法*论
法*論
法@@抡
法@@掄
法@@仑
法@@伦
法@@沦
法@@纶
法@@轮
法@@侖
法@@倫
法@@淪
法@@綸
法@@輪
法@@论
法@@論
法@抡
法@掄
法@仑
法@伦
法@沦
法@纶
法@轮
法@侖
法@倫
法@淪
法@綸
法@輪
法@论
法@論
法^^抡
法^^掄
法^^仑
法^^伦
法^^沦
法^^纶
法^^轮
法^^侖
法^^倫
法^^淪
法^^綸
法^^輪
法^^论
法^^論
法^抡
法^掄
法^仑
法^伦
法^沦
法^纶
法^轮
法^侖
法^倫
法^淪
法^綸
法^輪
法^论
法^論
法~~抡
法~~掄
法~~仑
法~~伦
法~~沦
法~~纶
法~~轮
法~~侖
法~~倫
法~~淪
法~~綸
法~~輪
法~~论
法~~論
法~抡
法~掄
法~仑
法~伦
法~沦
法~纶
法~轮
法~侖
法~倫
法~淪
法~綸
法~輪
法~论
法~論
法lun功
法功
法国
法愣
法抡
法抡功
法掄
法仑
法仑工
法仑公
法仑功
法仑攻
法仑共
法伦
法伦功
法囵功
法沦
法沦功
法纶
法轮
法轮工
法轮公
法轮功
法轮攻
法轮共
法侖
法侖功
法倫
法陯功
法菕功
法崘功
法淪
法惀功
法婨功
法棆功
法腀功
法碖功
法耣功
法蜦功
法綸
法輪
法輪功
法踚功
法磮功
法錀功
法鯩功
法稐功
法论
法论工
法论公
法论功
法论攻
法论共
法埨功
法溣功
法論
法黁功
法西斯
法谪
法谪功
法制办
反党
反黨
反动
反動
反封锁
反革命
反攻
反共
反华
反人类
反人民
反日
反社会
反社會
反政府
犯践
犯賤
犯踐
方励之
方晓日
方毅
方舟子
房事
放荡
放蕩
放尿
放屁
非典
扉之阴
扉之陰
肥逼
肥西
废墟
分家在
分裂
焚烧
粉屄
粉红穴
粉紅穴
粉穴
粪便
糞
糞便
风尘劫
风艳阁
封杀
封神榜
冯东海
冯素英
佛教
佛祖
夫妻3p
服务器
福呵定
付申奇
复辟
傅杰
傅鹏
傅全有
傅申奇
傅铁山
傅志寰
傅作义
傅作義
干bi
干gM
干gm
干GM
干Gm
干ＧＭ
干X
干x娘
干X娘
干爆
干逼
干比
干到
干的你
干的爽
干干
干机
干机掰
干鸡
干啦
干勒
干拎娘
干林
干尼
干你
干你良
干你妈
干你娘
干妳
干妳妈
干妳马
干妳娘
干娘
干您
干您娘
干炮
干七八
干汝
干入
干骚女
干嫂子
干爽
干死
干死CS
干死GM
干死你
干他
干他妈
干它
干她
干她妈
干牠
干我
干穴
干一干
干一家
幹
幹bi
幹GM
幹ＧＭ
幹x娘
幹逼
幹比
幹的你
幹的爽
幹機掰
幹拎娘
幹你
幹你良
幹你媽
幹你娘
幹妳
幹妳媽
幹妳馬
幹妳娘
幹您娘
幹炮
幹砲
幹七八
幹全家
幹死
幹死CS
幹死GM
幹死你
幹他
幹她
幹穴
幹一家
赣您娘
贛您娘
灨你娘
冈峦
刚比
刚瘪三
刚度
肛
肛jiao
肛屄
肛交
肛门
肛門
岡巒
钢管舞
剛比
剛癟三
剛度
鋼管舞
港澳办
高级逼
高級逼
高俊
高丽棒
高丽朴
高麗棒
高麗朴
高麗樸
高莺莺
高治联
高自联
睪丸
睾
睾丸
膏药旗
膏藥旗
搞B
搞b
搞比
搞你
搞他
搞她
戈万钧
戈扬
哥精
歌华
革命
格老子
个批
個批
给你爽
給你爽
工力
工商局
工自联
工作員
弓虽
弓雖
公安部
公安局
公务员
公媳乱
公媳亂
公子冲
公子开
功法
龚学平
龚學平
龔学平
龔學平
共*党
共产
共产党
共铲党
共產黨
共鏟黨
共黨
共匪
共狗
共军
共軍
共荣圈
共榮圈
狗B
狗b
狗比
狗操
狗成
狗诚
狗城
狗乘
狗干
狗幹
狗卵
狗卵子
狗娘
狗屁
狗日
狗日的
狗日靠
狗剩
狗屎
狗养
狗養
狗杂种
狗雜種
購金
孤儿
孤兒
古柯
古龙
谷牧
顾顺章
瓜批
瓜婆娘
瓜娃子
挂机
掛機
拐卖
关卓中
观世音
觀世音
管里
管里员
管理员
管理員
管理者
管裏員
光线
光線
广电
广电局
广闻
龟儿子
龟公
龟毛
龟奴
龟孙子
龟头
龟投
龟頭
亀頭
龜兒子
龜公
龜奴
龜头
龜投
龜頭
鬼村
鬼公
鬼轮奸
鬼輪奸
鬼輪姦
滚滚球
滚那吗
滾那嗎
郭 平
郭伯雄
郭罗基
郭平
郭岩华
国 贼
国安局
国防部
国管局
国民党
国务院
国研
国研室
国贼
国资委
國 賊
國產AV
國軍
國民黨
國權
國務院
國研
國賊
哈宝
哈批
骸卒
海盗王
海盜王
海乐神
海洛因
海明威
海洋局
駭客
酣乐欣
含屌
含鳥
韩光
韩桂芝
韩联潮
韩启德
韩天石
韩正
韩杼滨
韓啟德
韓正
汉 奸
汉奸
汉娃娃
漢 奸
漢娃娃
行房
豪乳
好嫩
好朋友
好色cc
郝建秀
何德普
何候华
何候華
何加栋
何勇
河 殇
河 殤
河殇
核潜艇
核武器
贺国强
贺龙
賀國強
賀龍
黑鲍娇
黑鮑嬌
黑屄
黑逼
黑鬼
黑毛屄
黑社会
黑手党
黑手黨
黑天使
黑五类
红宝石
红灯区
红海湾
红卫兵
红月
宏 法
宏法
宏象
洪 传
洪 傳
洪 吟
洪传
洪兴
洪興
洪吟
洪志
紅燈區
紅衛兵
紅月
后庭
後庭
胡XX
胡嘉
胡紧涛
胡紧掏
胡紧套
胡锦涛
胡锦滔
胡锦濤
胡锦淘
胡緊掏
胡緊濤
胡緊套
胡錦涛
胡錦滔
胡錦濤
胡錦淘
胡景涛
胡景濤
胡平
胡启立
胡乔木
胡喬木
胡耀邦
胡耀帮
胡志明
胡主席
葫芦案
蝴蝶逼
虎门
虎门靠
虎骑
虎騎
互淫
护士穴
花蕊
华国锋
华建敏
华盛顿
华义
哗哗曼
華國鋒
華義
嘩嘩曼
欢城
欢乐城
歡樂城
换妻
換妻
皇军
皇軍
黄　菊
黄  菊
黄大仙
黄祸
黄家骁
黄菊
黄克诚
黄孟复
黄片
黄色
黄色靠
黄翔
黄永胜
黃　菊
黃菊
黃孟複
黃片
囘良玉
回回
回教
回良玉
回民
回民暴
悔过书
汇款
匯款
昏药
婚外恋
婚外戀
混沌
混沌诀
混沌靠
火棒
火麻
獲救
机八
机巴
机叭
机吧
机掰
机战
鸡8
鸡八
鸡巴
鸡巴靠
鸡叭
鸡芭
鸡吧
鸡掰
鸡店
鸡鸡
鸡奸
鸡女
鸡歪
鸡院
积克馆
姬胜德
基督
基督教
機八
機巴
機吧
機戰
激插
激情MM
激情色
激情淫
雞8
雞八
雞巴
雞叭
雞芭
雞吧
雞掰
雞雞
雞奸
雞女
雞歪
雞院
鷄巴
极景
極景
集体淫
集體淫
几八
几巴
几叭
几芭
幾八
幾巴
幾叭
幾芭
擠母奶
计生委
纪登奎
妓
妓女
妓女靠
妓院
家乐福
嘉联
贾庆林
贾廷安
贾育台
假钞
賈慶林
奸
奸暴
奸你
奸情
奸染
奸他
奸她
奸污
奸一奸
奸淫
奸幼
姦
姦情
姦染
姦淫
姦汙
监察部
监管
监听器
监听王
检察院
建国党
贱B
贱b
贱bi
贱逼
贱比
贱货
贱人
贱种
剑网
剑网2
剑网3
賤
賤B
賤bi
賤逼
賤比
賤貨
賤人
賤種
劍網
江Core
江core
江ze民
江八
江八点
江八条
江八條
江独裁
江獨裁
江核心
江流氓
江罗
江绵恒
江青
江氏
江戏子
江则民
江择民
江泽慧
江泽民
江則民
江擇民
江澤民
江贼
江贼民
江賊民
江折民
江猪
江猪媳
江豬
江豬媳
江主席
将则民
僵贼
僵贼民
僵賊民
薑春雲
疆独
疆獨
讲法
蒋经国
蒋树声
蒋彦永
蒋彦勇
蒋震文
蒋正华
蒋中国
蒋中正
蔣經國
蔣樹聲
蔣彥永
蔣彥勇
蔣正華
蔣中正
降半旗
酱猪媳
醬豬媳
交媾
交通部
姣西
脚交
腳交
叫床
叫春
叫鸡
叫雞
叫小姐
教派
教徒
教养院
教育部
揭批书
劫机
金币网
金伯帆
金酷
金毛穴
金瓶梅
金日成
金三角
金新月
金尧如
金泽辰
金正日
津瑞
紧穴
锦涛
锦天
緊穴
劲爆
劲乐
劲舞团
劲樂
勁暴
勁爆
勁乐
勁樂
禁书
经血
經血
精蟲
精水
精童
精液
精液浴
精子
警奴
靖国
静坐
纠察员
鸠
鸠屎
糾察員
九城
九霾
九评
九評
久遊
久遊網
久游
久游网
久之遊
救災
就去日
菊花洞
菊花蕾
巨屌
巨奶
巨乳
巨骚
巨騷
聚丰
军妓
军委
军转
軍妓
卡弗蒂
开苞
开发
开房
开天
開苞
開天
凯丰
看牌器
看棋器
看中国
康生
抗日
抗曰
尻
尻庇
靠
靠爸
靠北
靠背
靠么
靠母
靠你妈
靠你媽
靠夭
靠腰
柯赐海
柯賜海
柯庆施
柯慶施
嗑药
磕药
磕藥
可待因
可卡叶
可卡葉
可卡因
可可精
克林顿
克林頓
客报
客服
掯
孔雀王
抠穴
摳穴
口爆
口合
口活
口交
口交靠
口肯
口射
口淫
寇晓伟
哭么
哭夭
裤袜
褲襪
垮台
垮臺
快感
快克
快樂AV
狂操
狂插
葵
坤邁
拉案);
拉丹
拉登
拉凳
拉客
拉皮条
拉皮條
拉手冲
喇嘛
来插我
来爽我
赖昌星
賴昌星
瀨名
拦截器
览叫
懒8
懒八
懒叫
懒教
懶8
懶八
懶叫
懶教
懶趴
烂b
烂B
烂屄
烂逼
烂比
烂屌
烂货
烂鸟
烂人
烂游戏
滥B
滥逼
滥比
滥货
滥交
濫B
濫逼
濫比
濫貨
濫交
爛B
爛逼
爛比
爛貨
狼友
浪妇
浪婦
浪叫
浪女
浪穴
劳教
老b
老B
老鸨
老逼
老比
老瘪三
老癟三
老江
老卵
老毛
老毛子
老母
老骚比
老骚货
老騷比
老騷貨
老味
黎安友
黎阳评
礼品
礼品机
李 录
李 禄
李 祿
李 錄
李大师
李大師
李大钊
李德生
李登辉
李登輝
李登柱
李贵鲜
李弘旨
李弘志
李红志
李红痔
李红智
李宏旨
李宏志
李洪
李洪宽
李洪志
李洪智
李紅志
李紅智
李继耐
李金华
李金華
李克強
李克强
李兰菊
李嵐清
李老師
李立三
李录
李禄
李沛瑶
李鹏
李鹏*
李鵬
李瑞环
李瑞環
李山
李少民
李师父
李师付
李師父
李師付
李淑娴
李铁映
李鐵映
李旺阳
李维汉
李先念
李小朋
李小鹏
李雪峰
李长春
李兆焯
李至伦
李志绥
李总理
李总统
李作鹏
厉无畏
例假
厲無畏
麗春苑
连邦
连胜德
连线机
连战
莲花逼
連戰
联 总
联大
联合国
联梦
联易
联众
蓮花逼
聯 總
聯眾
炼功
梁保华
梁光烈
梁擎墩
粮食局
两国论
兩國論
亮屄
亮穴
廖承志
廖晖
廖暉
廖锡龙
列宁
列寧
林保华
林彪
林伯渠
林果
林肯
林樵清
林榮一
林慎立
林伟
林文漪
林长盛
林祖涵
淋病
灵游记
凌辱
靈遊記
领导
刘宾深
刘宾雁
刘国凯
刘华清
刘杰
刘军
刘凯中
刘澜涛
刘丽英
刘淇
刘千石
刘青
刘全喜
刘山青
刘少奇
刘士贤
刘文胜
刘晓竹
刘延东
刘云山
流氓
流蜜汁
流淫
流淫水
劉傑
劉淇
劉少奇
劉曉波
劉曉竹
柳下惠
六.四
六。四
六?四
六？四
六合采
六合彩
六四
六-四
龙虎
龙虎豹
龙虎靠
龙新民
龍陽
娄义
婁義
漏逼
卢福坦
卢跃刚
陆定一
陆肆
陆委会
陸肆
路易
露B
露b
露屄
露逼
露点
露點
露毛
露乳
露穴
露阴照
露陰照
卵子
乱交
乱伦
亂交
亂倫
抡功
掄功
仑功
伦功
沦功
纶功
轮暴
轮操
轮大
轮干
轮公
轮功
轮攻
轮奸
轮流干
轮盘赌
轮盘机
轮子功
侖功
倫功
淪
淪功
耣
綸功
輪暴
輪公
輪功
輪攻
輪奸
輪姦
輪子功
罗　干
罗  干
罗富和
罗干
罗荣桓
骡干
羅幹
騾幹
裸聊
裸陪
躶
洛奇
吕京花
吕秀莲
呂秀蓮
旅游局
氯胺酮
妈b
妈B
妈逼
妈逼靠
妈比
妈的
妈的b
妈的B
妈的靠
妈个b
妈个B
妈个比
妈妈的
妈批
妈祖
媽B
媽逼
媽比
媽的
媽的B
媽個B
媽個比
媽媽的
媽祖
麻痹
麻黄素
麻黃素
麻醉枪
麻醉药
嗎b
嗎逼
嗎比
嗎的
嗎啡
嗎個
马大维
马的
马国瑞
马加爵
马凯
马克思
马良骏
马巧珍
马三家
马时敏
马万祺
马英九
玛雅网
馬凱
馬巧珍
馬万祺
馬萬祺
馬英九
瑪雅網
鰢
吗b
吗逼
吗比
吗的
吗的靠
吗啡
吗啡碱
吗啡片
吗个
买财富
买春
买春堂
買幣
買財富
買春
買賣
買月卡
麦角酸
麦叫酸
売春婦
卖.国
卖。国
卖b
卖B
卖ID
卖QQ
卖逼
卖比
卖财富
卖国
卖号
卖号靠
卖卡
卖软件
卖骚
卖淫
賣B
賣ID
賣逼
賣比
賣幣
賣財富
賣國
賣號
賣軟體
賣騷
賣淫
賣月卡
馒头屄
瞒报
满洲国
滿洲國
曼德拉
蔓ぺ
猫扑
貓撲
毛XX
毛鲍
毛鮑
毛厕洞
毛廁洞
毛独立
毛二B
毛二屄
毛二逼
毛发抡
毛发伦
毛发轮
毛发论
毛发骚
毛法功
毛法愣
毛法仑
毛法轮
毛反动
毛反共
毛反华
毛反日
毛佛教
毛佛祖
毛傅鹏
毛干gm
毛干gM
毛干GM
毛干Gm
毛干你
毛干妳
毛肛
毛肛交
毛肛门
毛高俊
毛睾
毛睾丸
毛工力
毛公安
毛共匪
毛共狗
毛狗b
毛狗操
毛狗卵
毛狗娘
毛狗屁
毛狗日
毛狗屎
毛狗养
毛龟公
毛龟头
毛鬼村
毛滚
毛哈批
毛贺龙
毛洪兴
毛洪志
毛后庭
毛胡XX
毛花柳
毛欢城
毛换妻
毛黄菊
毛回回
毛回教
毛昏药
毛火棒
毛机八
毛机巴
毛鸡
毛鸡八
毛鸡巴
毛鸡叭
毛鸡芭
毛鸡掰
毛鸡鸡
毛鸡奸
毛基督
毛妓
毛妓女
毛妓院
毛奸
毛奸你
毛奸淫
毛贱
毛贱逼
毛贱货
毛贱人
毛江八
毛江青
毛江猪
毛疆独
毛姣西
毛叫床
毛叫鸡
毛禁书
毛精液
毛精子
毛静坐
毛鸠
毛鸠屎
毛军妓
毛军委
毛抗日
毛尻
毛靠
毛靠腰
毛客服
毛口交
毛狂操
毛拉登
毛懒教
毛烂B
毛烂屄
毛烂逼
毛烂比
毛烂屌
毛烂货
毛老逼
毛老母
毛李鹏
毛李山
毛连战
毛联大
毛联易
毛列宁
毛林彪
毛刘军
毛刘淇
毛流氓
毛六四
毛卵
毛轮功
毛轮奸
毛罗干
毛骡干
毛妈B
毛妈逼
毛妈比
毛妈的
毛妈批
毛妈祖
毛吗啡
毛卖B
毛卖ID
毛卖QQ
毛卖逼
毛卖比
毛卖国
毛卖号
毛卖卡
毛卖淫
毛毛XX
毛美国
毛蒙独
毛迷药
毛密洞
毛密宗
毛民运
毛奶子
毛嫩b
毛嫩B
毛伱妈
毛你爸
毛你姥
毛你妈
毛你娘
毛鸟gM
毛鸟gm
毛鸟GM
毛鸟Gm
毛鸟你
毛牛逼
毛牛比
毛虐待
毛喷你
毛彭真
毛皮条
毛屁眼
毛片
毛嫖客
毛破坏
毛破鞋
毛仆街
毛普京
毛强奸
毛强卫
毛抢劫
毛乔石
毛侨办
毛切七
毛情色
毛去死
毛人大
毛人弹
毛人民
毛日Gm
毛日GM
毛日gm
毛日gM
毛日你
毛肉棒
毛肉壁
毛肉洞
毛肉缝
毛肉棍
毛肉穴
毛乳
毛乳房
毛乳交
毛乳头
毛撒尿
毛塞白
毛三陪
毛氵去
毛骚
毛骚B
毛骚逼
毛骚货
毛色情
毛傻B
毛傻屄
毛傻逼
毛傻比
毛傻吊
毛傻卵
毛傻子
毛煞逼
毛上你
毛上妳
毛射精
毛石进
毛食屎
毛是鸡
毛手淫
毛售ID
毛售号
毛私服
毛死gd
毛死GD
毛死gm
毛死GM
毛孙文
毛他爹
毛他妈
毛台办
毛台独
毛台联
毛太监
毛特码
毛天皇
毛舔西
毛推油
毛退党
毛外挂
毛万税
毛王刚
毛王昊
毛王震
毛网管
毛温馨
毛倭国
毛倭寇
毛我操
毛我干
毛我日
毛吴仪
毛系统
毛鲜族
毛小泉
毛小穴
毛邪教
毛新党
毛性爱
毛性交
毛性欲
毛学潮
毛血逼
毛丫的
毛鸦片
毛阳精
毛阳具
毛耶苏
毛耶稣
毛夜情
毛一鲜
毛伊朗
毛阴部
毛阴唇
毛阴道
毛阴蒂
毛阴核
毛阴户
毛阴茎
毛阴毛
毛阴水
毛淫
毛淫荡
毛淫秽
毛淫货
毛淫贱
毛淫叫
毛淫毛
毛淫靡
毛淫水
毛淫娃
毛淫欲
毛硬挺
毛游行
毛玉杵
毛泽东
毛泽東
毛澤东
毛澤東
毛贼东
毛賊東
毛主席
毛阝月
玫瑰园
梅花屄
梅花网
梅花網
美国佬
美國佬
美利坚
美女穴
美乳
美骚妇
美騷婦
美沙酮
美少妇
美少婦
美穴
美幼
妹的穴
妹妹穴
蒙巴顿
蒙尘药
蒙独
蒙獨
蒙古独
猛插
猛干
梦遗
迷幻药
迷幻藥
迷魂药
迷奸
迷奸药
迷歼药
迷药
迷藥
米青
米田共
秘唇
秘裂
秘书长
密传
密洞
密碼
密穴
密宗
蜜洞
蜜穴
绵恒
喵的
灭绝罪
民國
民航局
民进党
民進黨
民运
民運
民阵
民政部
民政局
民猪
民主
民主潮
民主墙
民族
明慧
明慧網
摸咪咪
摸你
摸阴蒂
摸陰蒂
模拟机
摩力遊
摩力游
摩洛客
摩门教
摩門教
魔獸幣
莫伟强
墨香
默哀
谋杀
母奸
母親
穆斯林
那可汀
那妈
那媽
那嗎B
那嗎逼
那吗B
那吗逼
纳粹
納粹
奶娘
奶头
奶頭
奶罩
奶子
南联盟
南蛮子
南蠻子
脑残
嫐屄
闹事
內射
內衣
内测
内挂
内射
嫩b
嫩B
嫩BB
嫩bb
嫩鲍
嫩鲍鱼
嫩鮑
嫩鮑魚
嫩屄
嫩逼
嫩缝
嫩縫
嫩奶
嫩女
嫩穴
尼克松
倪志福
伱妈
你爸
你大爷
你大爺
你老妹
你老母
你老味
你姥
你姥姥
你妈
你妈逼
你妈比
你妈的
你妈靠
你媽
你媽逼
你媽比
你媽的
你马的
你馬的
你奶
你奶奶
你娘
你娘的
你娘咧
你全家
你色嗎
你是鸡
你是雞
你是鸭
你是鴨
你爷
你爺
你祖宗
妳妈的
妳媽的
妳马的
妳馬的
妳娘
妳娘的
捻
娘b
娘B
娘比
娘的
娘饿比
娘餓比
娘个比
娘個比
鸟g M
鸟Gm
鸟GM
鸟gM
鸟gm
鸟gm 
鸟你
鳥g M
鳥GM
捏弄
聶榮臻
宁王府
牛B
牛B靠
牛逼
牛逼靠
牛比
牛比靠
农业部
奴畜抄
奴事件
虐待
虐奴
诺亚
女屄
女尔
女干
女幹
女尻
女良
女马
女馬
女乃
女死囚
女也
女優
女友坊
拍卖官
潘国平
叛党
叛黨
叛国
叛國
膀胱
泡泡岛
炮友
喷　射
喷精
喷精 3p
喷你
喷尿
噴精
嘭嘭帮
嘭嘭幫
彭冲
彭德怀
彭德懷
彭佩云
彭珮云
彭珮雲
彭真
蓬浪
皮條
皮條客
屁蛋
屁股
屁精
屁眼
嫖
嫖娼
嫖客
姘
姘头
姘頭
品色堂
品香堂
品穴
平可夫
迫害
迫奸
破处
破處
破坏
破鞋
仆街
僕街
普京
普贤
萋
齐墨
祁建
祁培文
骑你
骑他
骑她
起义
气象局
千年
前网
钱 达
钱达
钱国梁
钱其琛
钱运录
錢 達
錢其琛
錢運錄
欠操
欠干
欠幹
欠骑
欠人骑
欠日
強暴
強姦
強姦犯
強姦你
強衛
强　奸
强暴
强奸
强奸犯
强奸你
强卫
抢火炬
抢劫
抢劫犯
抢粮记
抢尸
搶劫犯
乔石
侨办
喬石
窃听器
钦本立
亲 美
亲 日
亲美
亲民党
亲日
秦 晋
秦 晉
秦晋
秦真
禽兽
禽獸
青楼
青樓
氢弹
情报
情報
情妇
情色
情色谷
情兽
情獸
庆 红
庆红
慶 紅
親 美
親 日
親民黨
親日
穷b
穷逼
邱会作
区委
去你的
去你妈
去妳的
去妳妈
去死
去他妈
去她妈
全裸
拳交
瘸腿帮
瘸腿幫
群p
群P
群奸
群交
群阴会
群陰會
然后
冉英
让你操
讓你操
热比娅
人大
人代
人代会
人弹
人民
人民報
人民币
人民幣
人妻
任弼时
任建新
任你淫
日b
日B
日Gm
日GM
日gM
日gm
日X 妈
日X 媽
日X妈
日啊
日本人
日屄
日逼
日比
日穿
日蛋
日翻
日九城
日军
日軍
日领馆
日你
日你爸
日你妈
日你媽
日你娘
日批
日爽
日死
日死你
日他
日他娘
日她
日王
鈤
荣毅仁
榮毅仁
柔阴术
肉棒
肉逼
肉壁
肉便器
肉唇
肉洞
肉缝
肉縫
肉沟
肉棍
肉棍子
肉壶
肉茎
肉莖
肉具
肉蒲团
肉蒲團
肉箫
肉簫
肉穴
肉欲
肉慾
乳霸
乳爆
乳房
乳峰
乳沟
乳溝
乳尖
乳交
乳尻
乳射
乳头
乳頭
乳腺
乳晕
乳暈
乳罩
润星
潤星
撒尿
撒泡尿
撒切尔
萨达姆
萨斯
塞白
塞你爸
塞你公
塞你母
塞你娘
赛你娘
赛妳娘
赛他娘
赛她娘
三K党
三K黨
三P
三p
三八淫
三挫仑
三国策
三级片
三級片
三角裤
三陪
三陪女
三唑仑
氵去
桑国卫
桑國衛
骚
骚b
骚B
骚B贱
骚棒
骚包
骚屄
骚屄儿
骚逼
骚比
骚洞
骚棍
骚货
骚鸡
骚姐姐
骚浪
骚卵
骚妈
骚妹
骚妹妹
骚母
骚女
骚批
骚妻
骚乳
骚水
骚穴
骚姨妈
懆您妈
懆您娘
騒
騷
騷B
騷B賤
騷棒
騷包
騷屄
騷逼
騷比
騷洞
騷棍
騷貨
騷雞
騷姐姐
騷浪
騷卵
騷媽
騷妹
騷妹妹
騷母
騷女
騷批
騷妻
騷乳
騷水
騷穴
騷姨媽
色97爱
色97愛
色成人
色弟弟
色电影
色鬼
色界
色空寺
色链
色猫
色貓
色咪咪
色迷城
色魔
色情
色情靠
色区
色區
色色
色色连
色书库
色图乡
色窝窝
色窩窩
色影院
色诱
色欲
色慾
杀人
杀人犯
傻×
傻b
傻B
傻B 
傻B靠
傻屄
傻逼
傻逼靠
傻比
傻吊
傻瓜
傻卵
傻鸟
傻鳥
傻批
傻子
煞逼
煞笔
煞笔靠
煞筆
山口組
删 号
删号
删号靠
伤亡
商务部
上访
上海帮
上你
上妳
少妇
少妇穴
少修正
邵家健
舌头穴
舌頭穴
社会院
社科院
射　精
射精
射了
射奶
射你
射屏
射爽
射颜
射顏
身寸
身障
神汉
神经病
神泪
神淚
神泣
神曲
沈彤
审计署
升达
升天
生春袋
生鸦片
生殖器
圣火
圣母
圣女峰
圣战
盛华仁
盛宣鸣
盛宣鳴
聖火
聖母
聖女峰
聖戰
尸虫
尸体
尸體
师春生
屍
屍体
屍體
師春生
湿了
湿穴
濕穴
十八代
十八摸
十景缎
十七大
十三点
十三點
石戈
石进
石首
食精
食捻屎
食屎
史迪威
史玉柱
驶你爸
驶你公
驶你母
驶你娘
屎你娘
屎妳娘
駛你爸
駛你公
駛你母
駛你娘
示威
世界都
世模
世维会
事屎
试看片
是鸡
是雞
释欲
釋欲
手淫
受精
受虐狂
受伤
受傷
受灾
受災
售ID
售号
售號
售软件
售軟體
兽奸
兽交
兽欲
獸奸
獸交
獸欲
熟妇
熟婦
熟母
熟女
数通
刷钱
双十节
爽你
爽图网
爽穴
水扁
水利部
税力
司法部
司马晋
司马璐
司徒华
丝袜
丝诱
私！服
私#服
私%服
私**服
私*服
私/服
私？服
私\\服
私\服
私￥服
私处
私處
私服
私-服
私—服
斯大林
絲襪
死gd
死GD
死GM
死gm
死全家
四川独
四清
四人帮
四人幫
四我周
宋xx
宋楚瑜
宋美龄
宋美齡
宋庆龄
宋任穷
宋祖英
搜狐
搜神记
苏家屯
苏晓康
苏振华
酥穴
酥痒
酥癢
蘇秀文
孙大午
孙文
孙逸仙
孙中山
孫逸仙
孫中山
他NND
他ㄇㄉ
他ㄇ的
他爸爸
他爹
他干
他妈
他妈的
他妈地
他妈靠
他媽
他媽的
他媽地
他嗎的
他马的
他馬的
他吗的
他母亲
他奶奶
他娘
他娘的
他祖宗
它NND
它爸爸
它妈
它妈的
它妈地
它媽的
它媽地
她NND
她爸爸
她妈
她妈的
她妈地
她妈靠
她媽的
她媽地
她马的
她馬的
她娘
塔利班
台办
台幣
台独
台独靠
台獨
台联
台聯
台聯黨
台盟
台湾
台湾党
台湾独
台湾狗
台湾国
台灣狗
台灣国
台灣國
台灣豬
臺
臺幣
臺獨
臺湾國
臺灣
臺灣黨
臺灣國
太监
太監
太子党
谭震林
谭政
汤光中
唐家璇
唐捷
桃色
淘宝
淘宝靠
套牌
套子
特码
特派员
腾人
腾武
滕人
滕仁
滕任
滕文生
滕武
藤人
藤仁
藤任
騰仁
騰武
騰訊
踢踢球
体奸
體奸
剃毛
天安门
天安門
天畅
天皇
天骄
天怒
天上碑
天堂2
天下贰
天下貳
天阉
天閹
天遊
天浴
天葬
天主教
天纵
天縱
田纪云
田纪雲
田紀雲
甜嫩穴
舔 b
舔b
舔B
舔屄
舔逼
舔鸡巴
舔雞巴
舔脚
舔腳
舔奶
舔屁眼
舔西
调教
跳大神
铁道部
同床
同性恋
同性戀
童屹
统独
统战
统治
捅B
捅逼
捅比
捅你
捅死你
捅他
捅她
捅我
統治
痛经
偷欢
偷歡
偷拍
偷情
偷情網
凸点装
凸肉优
凸肉優
屠城
屠杀
推翻
推推侠
推推俠
推油
退党
退黨
退役
吞精
臀部
脫內褲
脫衣舞
脱内裤
脱衣舞
挖挂
瓦良格
歪逼
外　挂
外  挂
外$$挂
外$$掛
外$挂
外$掛
外**挂
外**掛
外*挂
外*掛
外/挂
外/掛
外?挂
外？挂
外？掛
外@@挂
外@@掛
外@挂
外@掛
外\\挂
外\挂
外\掛
外_挂
外_掛
外~~挂
外~~掛
外~挂
外~掛
外卦
外挂
外-挂
外—挂
外掛
外-掛
外—掛
外汇局
外交部
外阴
外陰
完蛋操
玩逼
玩穴
万钢
万润南
万税
万晓东
万淫堂
卐
萬鋼
萬人暴
萬稅
萬淫堂
汪东兴
汪岷
王八
王八蛋
王宝森
王寶森
王炳章
王从吾
王丹
王辅臣
王刚
王剛
王涵万
王昊
王洪文
王沪宁
王稼祥
王军涛
王俊博
王克
王乐泉
王力雄
王岐山
王润生
王若望
王胜俊
王太华
王文京
王希哲
王秀丽
王冶坪
王樂泉
王兆囯
王兆国
王兆國
王震
网 特
网爱
网捷信
网龙
網 特
網愛
網捷信
網龍
網星
網易
網域
威而钢
威而柔
韦国清
维权
伟哥
尾行
猥亵
猥褻
卫生部
卫生巾
尉健行
慰安妇
慰安婦
慰春情
魏京生
魏新生
温B
温b
温逼
温比
温加宝
温家宝
温家保
温家堡
温家寶
温元凯
温总理
溫B
溫逼
溫比
溫加寶
溫家宝
溫家寶
溫傢寳
溫總理
瘟B
瘟b
瘟比
文 革
文革
文九天
文物局
文胸
問道
倭国
倭寇
窝窝客
窩窩客
我操
我操靠
我操你
我草
我干
我幹
我和她
我奸
我就色
我考
我靠
我咧干
我日
我日靠
我日你
我有网
我周容
龌龊
齷齪
乌兰夫
无疆界
无界
无码
无码片
无毛穴
无网界
无修正
吴　仪
吴邦国
吴德
吴方城
吴官正
吴桂贤
吴仁华
吴学璨
吴仪
吳邦囯
吳邦國
吳德
吳官正
吳桂賢
吳儀
無界
無碼
無毛穴
無網界
無修正
五不
午夜场
伍凡
武雷
西藏
西藏独
西藏国
西藏國
吸毒
吸毒犯
吸精
希拉克
希特勒
习近平
习仲勋
習近平
洗脑
洗脑班
洗腦班
洗钱
系統
狭义道
狹義道
下贱
下賤
下流
下三烂
下三滥
下三濫
下三爛
下身
下体
下體
下阴
下陰
下注
夏树唯
仙剑
仙劍
鲜族
鮮族
藓鲍
蘚鮑
现金
現貨
乡巴佬
相奸
想上你
向忠发
项怀诚
项小吉
嚮導
肖强
销魂洞
销售
銷魂洞
小B
小b
小B样
小B樣
小逼
小比样
小比樣
小瘪三
小癟三
小电影
小電影
小鸡巴
小鸡鸡
小雞巴
小雞雞
小灵通
小卵泡
小卵子
小嫩逼
小嫩鸡
小嫩雞
小平
小泉
小日本
小肉粒
小乳头
小乳頭
小骚逼
小骚比
小骚货
小騷比
小騷貨
小穴
小淫女
小淫穴
歇b
歇逼
邪教
写真
泄密
谢非
谢长廷
谢中之
謝長廷
褻
辛灏年
新党
新黨
新斷線
新幹線
新光明
新华社
新江湖
新疆独
新疆国
新疆國
新浪
新魔界
新生网
新生網
新石器
新四
新唐人
新闻办
新义安
新義安
新语丝
新郑和
新鄭和
鑫諾
信访局
信仰
邢铮
兴奋剂
幸存
性爱
性愛
性病
性高潮
性虎
性饥渴
性飢渴
性交
性交靠
性交课
性交課
性交图
性交圖
性奴
性奴会
性奴會
性虐
性虐待
性器
性情
性趣
性骚扰
性生活
性无能
性無能
性息
性佣
性傭
性欲
性之站
倖存
胸罩
熊炎
熊焱
羞耻母
羞恥母
徐邦秦
徐炳慧
徐才厚
徐光春
徐匡迪
徐水良
徐向前
许家屯
轩辕二
轩辕剑
薛伟
穴海
穴爽
穴图
穴圖
穴淫
学潮
学联
学生妹
学习班
学运
学自联
學潮
學生妹
血逼
血比
血洒
血灑
血洗
寻仙
巡查员
巡查員
巡府媳
讯息
丫的
压制
押大
押小
鸦片
鸦片液
鸦片渣
鸭店
鴉片
壓迫
壓制
雅典娜
亚洲
亚洲色
亞情
亞無碼
亞洲色
烟草局
烟膏
煙膏
严方军
严家其
严家祺
严隽琪
阎明复
阎王
颜射
顏騎
顏射
嚴雋琪
艳乳
艳照
艳照门
艳照門
艷照门
艷照門
豔乳
豔照
豔照門
扬振
扬震
阳精
阳具
阳萎
阳痿
阳物
杨白冰
杨得志
杨帆
杨怀安
杨建利
杨林
杨尚昆
杨思敏
杨巍
杨勇
杨元元
杨振
杨震
杨周
陽精
陽具
陽萎
陽痿
陽物
揚振
揚震
楊帆
楊思敏
楊振
姚月谦
摇头丸
摇头玩
摇頭丸
搖头丸
搖頭丸
要色色
要射了
耶和华
耶苏
耶稣
耶蘇
野合
野鸡
野雞
叶剑英
叶群
夜情
夜色城
夜总会
夜總會
一本道
一党
一黨
一贯道
一貫道
一起玩
一四我
一夜欢
一夜歡
一夜情
伊拉克
伊朗
伊斯兰
依星
遗精
遗嘱
遗囑
遺囑
倚天二
义解
义母
亦凡
抑制剂
易当
易當
阴屄
阴部
阴唇
阴道
阴蒂
阴缔
阴阜
阴核
阴户
阴茎
阴莖
阴精
阴毛
阴门
阴囊
阴水
阴穴
陰屄
陰部
陰唇
陰道
陰蒂
陰締
陰阜
陰核
陰戶
陰茎
陰莖
陰精
陰毛
陰門
陰囊
陰水
隂
银民吧
淫
淫B
淫b
淫meimei
淫屄
淫屄儿
淫逼
淫痴
淫癡
淫虫
淫蟲
淫荡
淫蕩
淫电影
淫店
淫东方
淫洞
淫妇
淫婦
淫告白
淫棍
淫河
淫护士
淫秽
淫穢
淫货
淫貨
淫奸
淫间道
淫贱
淫賤
淫浆
淫漿
淫叫
淫浪
淫流
淫乱
淫亂
淫驴屯
淫驢屯
淫毛
淫妹
淫妹妹
淫糜
淫靡
淫蜜
淫民堂
淫魔
淫母
淫妞
淫奴
淫虐
淫女
淫女穴
淫妻
淫腔
淫情
淫色
淫少妇
淫湿
淫书
淫書
淫水
淫图
淫圖
淫娃
淫网
淫窝窝
淫西
淫穴
淫样
淫樣
淫液
淫欲
淫贼
淫汁
婬
滛
銀民吧
尹庆民
隐窝窝
隱窩窩
罂粟
罌粟
应招
应召
硬挺
應招
應召
悠遊網
悠游网
由喜贵
邮政局
铀
猶太豬
遊戲幣
游行
游衍
幼逼
幼齿
幼妓
幼交
幼男
幼女
幼图
幼圖
幼香阁
幼香閣
诱奸
诱色uu
誘姦
誘色uu
於天瑞
於永波
於幼軍
于大海
于浩成
于天瑞
于永波
于幼军
余秋里
余英时
俞正声
俞正聲
舆论
餘震
宇明网
雨星网
语句
玉杵
玉蒲团
玉蒲團
玉乳
玉穴
郁慕明
育碧
浴尿
预审查
欲火
欲女
慾
慾火
袁纯清
援交
援交妹
圓满
圓滿
远志明
曰GM
曰Gm
曰gM
曰gm
曰gＭ
曰本
曰你
月经
月經
运营人
运营长
运营者
运营组
运營者
运營组
运營組
運营者
運营組
運營者
運營组
運營組
杂种
雜種
再奸
昝爱宗
昝愛宗
早泄
早洩
造爱
造愛
造反
则民
择民
泽民
贼民
扎卡维
渣波波
战牌
战牌靠
张 钢
张朝阳
张潮阳
张春桥
张大权
张德江
张钢
张健
张立昌
张林
张榕明
张廷发
张万年
张萬年
张伟国
张伟华
张闻天
张小平
张晓丞
张筱雨
张昭富
张震
张志清
張 鋼
張 林
張朝陽
張宏堡
張立昌
張梅穎
張榕明
張万年
張萬年
張筱雨
張震
招鸡
招雞
招妓
兆鸿
兆鴻
赵海青
赵洪祝
赵品潞
赵小明
赵晓微
赵紫阳
哲 民
哲民
贞操
针扎
貞操
真封神
真理教
真善忍
真主
姫辱
姫野爱
震级
震級
镇压
鎮壓
征途
正见网
正見網
正清网
正清網
正悟网
正悟網
证监会
郑源
政变
政變
政府
政权
政协
政協
政治
政治犯
政治局
鄭萬通
支那
知障
指导员
制服狩
质检局
致幻剂
致幻劑
智傲
智凡迪
智能H3
智障
中公网
中公網
中功
中共
中廣網
中国
中国猪
中國
中國狗
中國豬
中蕐
中华
中机电
中機電
中奖
中科院
中南海
中宣部
中央
重题工
周百刚
周恩来
周恩來
周贺
周容重
周天法
周铁农
周鉄農
周鐵農
周旋
周永康
周子玉
周总理
周總理
朱德
朱嘉明
朱骏
朱駿
朱林
朱琳
朱毛
朱蒙
朱狨基
朱容鸡
朱容基
朱容雞
朱溶剂
朱熔基
朱镕基
朱鎔基
朱胜文
朱勝文
朱总理
诛仙
猪操
猪聋畸
猪猡
猪毛
猪毛1
猪容基
猪头
誅仙
豬操
豬容基
豬頭
主席
专政
专制
專政
專制
转法轮
轉法輪
装B
装B靠
装屄
装屄呢
装逼
装逼靠
装逼呢
卓奥
卓奧
子宫
子宮
梓霖
紫黛
自焚
自民党
自慰
自由门
宗教
总裁
总局
总理
总统号
總理
總書記
邹德威
邹家华
鄒德威
鄒家華
走光
走私
走资派
足脚交
足腳交
钻插
鑽插
阝月
作ai
作爱
作愛
作弊器
坐脸
坐台
坐台的
坐庄
做ai
做爱
做爱图
做愛
做雞
做鸭
做鴨

================================================
FILE: src/test/resources/Talk.txt
================================================
﻿利反对派称包围卡扎菲和其儿子藏身建筑群
[利反对派称今日将结束战斗][利反对派称8个月内举行大选][最新]
云南海南3省区党委主要负责人职务调整
[陈全国任西藏党委书记][秦光荣任云南书记][罗保铭任海南书记]
苹果CEO乔布斯辞职 COO接任 乔布斯语录
[乔布斯仍为董事会主席 辞职信 苹果股价盘后大跌5.39% 热议]
首届政务微博论坛在浙江举行 
赵洪祝致贺信[图文直播][视频直播][中国高官开微博成趋势][热议]
铁道部要求年底前无事故 
党员须做公开承诺 胡锦涛会见萨科齐 希望欧方确保中方对欧投资安全 
杭州为接住坠楼小孩“最美妈妈”
建城市雕塑(图) 神八将搭载“平民梦想”进太空 故宫诗词大赛 
卡扎菲倒台了值班编辑：沈建军 
乔布斯辞职了值班编辑：杨达 今日话题百人械斗 医院怎成黑社会