Repository: TwiN/go-away Branch: master Commit: 657cbb391e00 Files: 18 Total size: 57.0 KB Directory structure: gitextract_hcz_6jj4/ ├── .gitattributes ├── .github/ │ ├── codecov.yml │ ├── dependabot.yml │ └── workflows/ │ └── test.yml ├── .gitignore ├── LICENSE ├── README.md ├── falsenegatives.go ├── falsepositives.go ├── go.mod ├── go.sum ├── goaway.go ├── goaway_bench_test.go ├── goaway_test.go ├── profanities.go ├── replacements.go ├── writer.go └── writer_test.go ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitattributes ================================================ * text=lf ================================================ FILE: .github/codecov.yml ================================================ comment: false coverage: status: patch: off project: default: target: 75% threshold: null ================================================ FILE: .github/dependabot.yml ================================================ version: 2 updates: - package-ecosystem: "github-actions" directory: "/" labels: ["dependencies"] schedule: interval: "daily" - package-ecosystem: "gomod" directory: "/" labels: ["dependencies"] schedule: interval: "daily" ================================================ FILE: .github/workflows/test.yml ================================================ name: test on: pull_request: paths-ignore: - '*.md' push: branches: - master paths-ignore: - '*.md' jobs: test: name: test runs-on: ubuntu-latest timeout-minutes: 5 steps: - uses: actions/setup-go@v6 with: go-version: 1.25.x - uses: actions/checkout@v5 - run: go test ./... -race -coverprofile=coverage.txt -covermode=atomic - name: Codecov uses: codecov/codecov-action@v6.0.0 with: files: ./coverage.txt token: ${{ secrets.CODECOV_TOKEN }} ================================================ FILE: .gitignore ================================================ .idea accuracy /vendor ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2022-2023 TwiN Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ ![go-away](/.github/assets/go-away.png) # go-away ![test](https://github.com/TwiN/go-away/workflows/test/badge.svg) [![Go Report Card](https://goreportcard.com/badge/github.com/TwiN/go-away)](https://goreportcard.com/report/github.com/TwiN/go-away) [![codecov](https://codecov.io/gh/TwiN/go-away/branch/master/graph/badge.svg)](https://codecov.io/gh/TwiN/go-away) [![Go Reference](https://pkg.go.dev/badge/github.com/TwiN/go-away.svg)](https://pkg.go.dev/github.com/TwiN/go-away) [![Follow TwiN](https://img.shields.io/github/followers/TwiN?label=Follow&style=social)](https://github.com/TwiN) go-away is a stand-alone, lightweight library for detecting and censoring profanities in Go. This library must remain **extremely** easy to use. Its original intent of not adding overhead will always remain. ## Installation ```console go get -u github.com/TwiN/go-away ``` ## Usage ```go package main import ( "github.com/TwiN/go-away" ) func main() { goaway.IsProfane("fuck this shit") // returns true goaway.ExtractProfanity("fuck this shit") // returns "fuck" goaway.Censor("fuck this shit") // returns "**** this ****" goaway.IsProfane("F u C k th1$ $h!t") // returns true goaway.ExtractProfanity("F u C k th1$ $h!t") // returns "fuck" goaway.Censor("F u C k th1$ $h!t") // returns "* * * * th1$ ****" goaway.IsProfane("@$$h073") // returns true goaway.ExtractProfanity("@$$h073") // returns "asshole" goaway.Censor("@$$h073") // returns "*******" goaway.IsProfane("hello, world!") // returns false goaway.ExtractProfanity("hello, world!") // returns "" goaway.Censor("hello, world!") // returns "hello, world!" buf := &bytes.Buffer{} detector := goaway.NewProfanityDetector() writer := goaway.NewWriter(buf, detector) writer.Write([]byte("fuck this shit")) writer.Flush() print(buf.String()) // returns "**** this ****" } ``` Calling `goaway.IsProfane(s)`, `goaway.ExtractProfanity(s)` or `goaway.Censor(s)` will use the default profanity detector, but if you'd like to disable leet speak, numerical character or special character sanitization, you have to create a ProfanityDetector instead: ```go profanityDetector := goaway.NewProfanityDetector().WithSanitizeLeetSpeak(false).WithSanitizeSpecialCharacters(false).WithSanitizeAccents(false) profanityDetector.IsProfane("b!tch") // returns false because we're not sanitizing special characters ``` You can also disable the default behavior of white space sanitization like so: ```go profanityDetector := goaway.NewProfanityDetector().WithSanitizeSpaces(false) profanityDetector.IsProfane("sh it") // returns false because we're not sanitizing white spaces ``` You can also require stricter matching by enabling `WithExactWord`: ```go profanityDetector := NewProfanityDetector().WithExactWord(true).WithSanitizeSpecialCharacters(true) profanityDetector.IsProfane("analyst") // returns false because we match the exact word profanityDetector.IsProfane("anal") // returns true ``` By default, the `NewProfanityDetector` constructor uses the default dictionaries for profanities, false positives and false negatives. These dictionaries are exposed as `goaway.DefaultProfanities`, `goaway.DefaultFalsePositives` and `goaway.DefaultFalseNegatives` respectively. If you need to load a different dictionary, you could create a new instance of `ProfanityDetector` on this way: ```go profanities := []string{"ass"} falsePositives := []string{"bass"} falseNegatives := []string{"dumbass"} profanityDetector := goaway.NewProfanityDetector().WithCustomDictionary(profanities, falsePositives, falseNegatives) ``` You may also specify custom character replacements using `WithCustomCharacterReplacements` on a `ProfanityDetector`. By default, this is set to `goaway.DefaultCharacterReplacements`. Note that all character replacements with a value of `' '` are considered as special characters while all characters with a value that is not `' '` are considered to be leetspeak characters. This means that using `profanityDetector.WithSanitizeSpecialCharacters(bool)` and `profanityDetector.WithSanitizeLeetSpeak(bool)` will let you toggle which character replacements are executed during the sanitization process. ## Limitations Currently, go-away does not support UTF-8. As such, if the strings you are feeding to this library come from unsanitized user input, you are advised to filter out all non-ASCII characters. If you'd like to add support for UTF-8, see [#43](https://github.com/TwiN/go-away/issues/43) and [#47](https://github.com/TwiN/go-away/issues/47). ## In the background While using a giant regex query to handle everything would be a way of doing it, as more words are added to the list of profanities, that would slow down the filtering considerably. Instead, the following steps are taken before checking for profanities in a string: - Numbers are replaced to their letter counterparts (e.g. 1 -> L, 4 -> A, etc) - Special characters are replaced to their letter equivalent (e.g. @ -> A, ! -> i) - The resulting string has all of its spaces removed to prevent `w ords lik e tha t` - The resulting string has all of its characters converted to lowercase - The resulting string has all words deemed as false positives (e.g. `assassin`) removed In the future, the following additional steps could also be considered: - All non-transformed special characters are removed to prevent `s~tring li~ke tha~~t` - All words that have the same character repeated more than twice in a row are removed (e.g. `poooop -> poop`) - NOTE: This is obviously not a perfect approach, as words like `fuuck` wouldn't be detected, but it's better than nothing. - The upside of this method is that we only need to add base bad words, and not all tenses of said bad word. (e.g. the `fuck` entry would support `fucker`, `fucking`, etc.) ================================================ FILE: falsenegatives.go ================================================ package goaway // DefaultFalseNegatives is a list of profanities that are checked for before the DefaultFalsePositives are removed // // This is reserved for words that may be incorrectly filtered as false positives. // // Alternatively, words that are long, or that should mark a string as profane no matter what the context is // or whether the word is part of another word can also be included. // // Note that there is a test that prevents words from being in both DefaultProfanities and DefaultFalseNegatives, var DefaultFalseNegatives = []string{ "asshole", "dumbass", // ass -> bASS (FP) -> dumBASS "nigger", } ================================================ FILE: falsepositives.go ================================================ package goaway // DefaultFalsePositives is a list of words that may wrongly trigger the DefaultProfanities var DefaultFalsePositives = []string{ "analy", // analysis, analytics "arsenal", "assassin", "assaying", // was saying "assert", "assign", "assimil", "assist", "associat", "assum", // assuming, assumption, assumed "assur", // assurance "banal", "basement", "bass", "cass", // cassie, cassandra, carcass "butter", // butter, butterfly "butthe", "button", "canvass", "circum", "clitheroe", "cockburn", "cocktail", "cumber", "cumbing", "cumulat", "dickvandyke", "document", "evaluate", "exclusive", "expensive", "explain", "expression", "grape", "grass", "harass", "hass", "horniman", "hotwater", "identit", "kassa", // kassandra "kassi", // kassie, kassidy "lass", // class "leafage", "libshitz", "magnacumlaude", "mass", "mocha", "pass", // compass, passion "penistone", "peacock", "phoebe", "phoenix", "pushit", "raccoon", "sassy", "saturday", "scrap", // scrap, scrape, scraping "serfage", "sexist", // systems exist, sexist "shoe", "scunthorpe", "shitake", "stitch", "sussex", "therapist", "therapeutic", "tysongay", "wass", "wharfage", } ================================================ FILE: go.mod ================================================ module github.com/TwiN/go-away go 1.25.0 require golang.org/x/text v0.36.0 ================================================ FILE: go.sum ================================================ golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg= golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164= ================================================ FILE: goaway.go ================================================ package goaway import ( "strings" "unicode" "golang.org/x/text/runes" "golang.org/x/text/transform" "golang.org/x/text/unicode/norm" ) const ( space = " " firstRuneSupported = ' ' lastRuneSupported = '~' ) var ( defaultProfanityDetector *ProfanityDetector ) // ProfanityDetector contains the dictionaries as well as the configuration // for determining how profanity detection is handled type ProfanityDetector struct { sanitizeSpecialCharacters bool // Whether to replace characters with the value ' ' in characterReplacements sanitizeLeetSpeak bool // Whether to replace characters with a non-' ' value in characterReplacements sanitizeAccents bool sanitizeSpaces bool exactWord bool profanities []string falseNegatives []string falsePositives []string characterReplacements map[rune]rune } // NewProfanityDetector creates a new ProfanityDetector func NewProfanityDetector() *ProfanityDetector { return &ProfanityDetector{ sanitizeSpecialCharacters: true, sanitizeLeetSpeak: true, sanitizeAccents: true, sanitizeSpaces: true, exactWord: false, profanities: DefaultProfanities, falsePositives: DefaultFalsePositives, falseNegatives: DefaultFalseNegatives, characterReplacements: DefaultCharacterReplacements, } } // WithSanitizeLeetSpeak allows configuring whether the sanitization process should also take into account leetspeak // // Leetspeak characters are characters to be replaced by non-' ' values in the characterReplacements map. // For instance, '4' is replaced by 'a' and '3' is replaced by 'e', which means that "4sshol3" would be // sanitized to "asshole", which would be detected as a profanity. // // By default, this is set to true. func (g *ProfanityDetector) WithSanitizeLeetSpeak(sanitize bool) *ProfanityDetector { g.sanitizeLeetSpeak = sanitize return g.buildCharacterReplacements() } // WithSanitizeSpecialCharacters allows configuring whether the sanitization process should also take into account // special characters. // // Special characters are characters that are part of the characterReplacements map (DefaultCharacterReplacements by // default) and are to be removed during the sanitization step. // // For instance, "fu_ck" would be sanitized to "fuck", which would be detected as a profanity. // // By default, this is set to true. func (g *ProfanityDetector) WithSanitizeSpecialCharacters(sanitize bool) *ProfanityDetector { g.sanitizeSpecialCharacters = sanitize return g.buildCharacterReplacements() } // WithSanitizeAccents allows configuring of whether the sanitization process should also take into account accents. // By default, this is set to true, but since this adds a bit of overhead, you may disable it if your use case // is time-sensitive or if the input doesn't involve accents (i.e. if the input can never contain special characters) func (g *ProfanityDetector) WithSanitizeAccents(sanitize bool) *ProfanityDetector { g.sanitizeAccents = sanitize return g } // WithSanitizeSpaces allows configuring whether the sanitization process should also take into account spaces func (g *ProfanityDetector) WithSanitizeSpaces(sanitize bool) *ProfanityDetector { g.sanitizeSpaces = sanitize return g } // WithCustomDictionary allows configuring whether the sanitization process should also take into account // custom profanities, false positives and false negatives dictionaries. // All dictionaries are expected to be lowercased. func (g *ProfanityDetector) WithCustomDictionary(profanities, falsePositives, falseNegatives []string) *ProfanityDetector { g.profanities = profanities g.falsePositives = falsePositives g.falseNegatives = falseNegatives return g } // WithCustomCharacterReplacements allows configuring characters that to be replaced by other characters. // // Note that all entries that have the value ' ' are considered as special characters while all entries with a value // that is not ' ' are considered as leet speak. // // Defaults to DefaultCharacterReplacements func (g *ProfanityDetector) WithCustomCharacterReplacements(characterReplacements map[rune]rune) *ProfanityDetector { g.characterReplacements = characterReplacements return g } // WithExactWord allows configuring whether the profanity check process should require exact matches or not. // Using this reduces false positives and winds up more permissive. // // Note: this entails also setting WithSanitizeSpaces(false), since without spaces present exact word matching // does not make sense. func (g *ProfanityDetector) WithExactWord(exactWord bool) *ProfanityDetector { g.exactWord = exactWord return g.WithSanitizeSpaces(false) } // IsProfane takes in a string (word or sentence) and look for profanities. // Returns a boolean func (g *ProfanityDetector) IsProfane(s string) bool { return len(g.ExtractProfanity(s)) > 0 } // ExtractProfanity takes in a string (word or sentence) and look for profanities. // Returns the first profanity found, or an empty string if none are found func (g *ProfanityDetector) ExtractProfanity(s string) string { s, _ = g.sanitize(s, false) // Check for false negatives for _, word := range g.falseNegatives { if match := strings.Contains(s, word); match { return word } } // Remove false positives for _, word := range g.falsePositives { s = strings.Replace(s, word, "", -1) } if g.exactWord { tokens := strings.Split(s, space) for _, token := range tokens { if sliceContains(g.profanities, token) { return token } } } else { // Check for profanities for _, word := range g.profanities { if match := strings.Contains(s, word); match { return word } } } return "" } func sliceContains(words []string, s string) bool { for _, word := range words { if strings.EqualFold(s, word) { return true } } return false } func (g *ProfanityDetector) indexToRune(s string, index int) int { count := 0 for i := range s { if i == index { break } if i < index { count++ } } return count } func (g *ProfanityDetector) Censor(s string) string { censored := []rune(s) var originalIndexes []int s, originalIndexes = g.sanitize(s, true) runeWordLength := 0 g.checkProfanity(&s, &originalIndexes, &censored, g.falseNegatives, &runeWordLength) g.removeFalsePositives(&s, &originalIndexes, &runeWordLength) g.checkProfanity(&s, &originalIndexes, &censored, g.profanities, &runeWordLength) return string(censored) } func (g *ProfanityDetector) checkProfanity(s *string, originalIndexes *[]int, censored *[]rune, wordList []string, runeWordLength *int) { for _, word := range wordList { currentIndex := 0 *runeWordLength = len([]rune(word)) for currentIndex != -1 { if foundIndex := strings.Index((*s)[currentIndex:], word); foundIndex != -1 { for i := 0; i < *runeWordLength; i++ { runeIndex := g.indexToRune(*s, currentIndex+foundIndex) + i if runeIndex < len(*originalIndexes) { (*censored)[(*originalIndexes)[runeIndex]] = '*' } } currentIndex += foundIndex + len([]byte(word)) } else { break } } } } func (g *ProfanityDetector) removeFalsePositives(s *string, originalIndexes *[]int, runeWordLength *int) { for _, word := range g.falsePositives { currentIndex := 0 *runeWordLength = len([]rune(word)) for currentIndex != -1 { if foundIndex := strings.Index((*s)[currentIndex:], word); foundIndex != -1 { foundRuneIndex := g.indexToRune(*s, foundIndex) *originalIndexes = append((*originalIndexes)[:foundRuneIndex], (*originalIndexes)[foundRuneIndex+*runeWordLength:]...) currentIndex += foundIndex + len([]byte(word)) } else { break } } *s = strings.Replace(*s, word, "", -1) } } func (g ProfanityDetector) sanitize(s string, rememberOriginalIndexes bool) (string, []int) { s = strings.ToLower(s) if g.sanitizeLeetSpeak && !rememberOriginalIndexes && g.sanitizeSpecialCharacters { s = strings.ReplaceAll(s, "()", "o") } sb := strings.Builder{} for _, char := range s { if replacement, found := g.characterReplacements[char]; found { if g.sanitizeSpecialCharacters && replacement == ' ' { // If the replacement is a space, and we're sanitizing special characters speak, we replace. sb.WriteRune(replacement) continue } else if g.sanitizeLeetSpeak && replacement != ' ' { // If the replacement isn't a space, and we're sanitizing leet speak, we replace. sb.WriteRune(replacement) continue } } sb.WriteRune(char) } s = sb.String() if g.sanitizeAccents { s = removeAccents(s) } var originalIndexes []int if rememberOriginalIndexes { for i, c := range []rune(s) { // If spaces aren't being sanitized, appending to the original indices prevents off-by-one errors later on. if c != ' ' || !g.sanitizeSpaces { originalIndexes = append(originalIndexes, i) } } } if g.sanitizeSpaces { s = strings.Replace(s, space, "", -1) } return s, originalIndexes } // removeAccents strips all accents from characters. // Only called if ProfanityDetector.removeAccents is set to true func removeAccents(s string) string { removeAccentsTransformer := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC) for _, character := range s { // If there's a character outside the range of supported runes, there might be some accented words if character < firstRuneSupported || character > lastRuneSupported { s, _, _ = transform.String(removeAccentsTransformer, s) break } } return s } // buildCharacterReplacements builds characterReplacements if WithSanitizeLeetSpeak or WithSanitizeSpecialCharacters is // called. // // If this is not called, DefaultCharacterReplacements func (g *ProfanityDetector) buildCharacterReplacements() *ProfanityDetector { g.characterReplacements = make(map[rune]rune) if g.sanitizeSpecialCharacters { g.characterReplacements['-'] = ' ' g.characterReplacements['_'] = ' ' g.characterReplacements['|'] = ' ' g.characterReplacements['.'] = ' ' g.characterReplacements[','] = ' ' g.characterReplacements['('] = ' ' g.characterReplacements[')'] = ' ' g.characterReplacements['<'] = ' ' g.characterReplacements['>'] = ' ' g.characterReplacements['"'] = ' ' g.characterReplacements['`'] = ' ' g.characterReplacements['~'] = ' ' g.characterReplacements['*'] = ' ' g.characterReplacements['&'] = ' ' g.characterReplacements['%'] = ' ' g.characterReplacements['$'] = ' ' g.characterReplacements['#'] = ' ' g.characterReplacements['@'] = ' ' g.characterReplacements['!'] = ' ' g.characterReplacements['?'] = ' ' g.characterReplacements['+'] = ' ' } if g.sanitizeLeetSpeak { g.characterReplacements['4'] = 'a' g.characterReplacements['$'] = 's' g.characterReplacements['!'] = 'i' g.characterReplacements['+'] = 't' g.characterReplacements['#'] = 'h' g.characterReplacements['@'] = 'a' g.characterReplacements['0'] = 'o' g.characterReplacements['1'] = 'i' g.characterReplacements['7'] = 'l' g.characterReplacements['3'] = 'e' g.characterReplacements['5'] = 's' g.characterReplacements['<'] = 'c' } return g } // IsProfane checks whether there are any profanities in a given string (word or sentence). // // Uses the default ProfanityDetector func IsProfane(s string) bool { if defaultProfanityDetector == nil { defaultProfanityDetector = NewProfanityDetector() } return defaultProfanityDetector.IsProfane(s) } // ExtractProfanity takes in a string (word or sentence) and look for profanities. // Returns the first profanity found, or an empty string if none are found // // Uses the default ProfanityDetector func ExtractProfanity(s string) string { if defaultProfanityDetector == nil { defaultProfanityDetector = NewProfanityDetector() } return defaultProfanityDetector.ExtractProfanity(s) } // Censor takes in a string (word or sentence) and tries to censor all profanities found. // // Uses the default ProfanityDetector func Censor(s string) string { if defaultProfanityDetector == nil { defaultProfanityDetector = NewProfanityDetector() } return defaultProfanityDetector.Censor(s) } ================================================ FILE: goaway_bench_test.go ================================================ package goaway import ( "testing" ) func BenchmarkIsProfaneWhenShortStringHasNoProfanity(b *testing.B) { for n := 0; n < b.N; n++ { IsProfane("aaaaaaaaaaaaaa") } b.ReportAllocs() } func BenchmarkIsProfaneWhenShortStringHasProfanityAtTheStart(b *testing.B) { for n := 0; n < b.N; n++ { IsProfane("fuckaaaaaaaaaa") } b.ReportAllocs() } func BenchmarkIsProfaneWhenShortStringHasProfanityInTheMiddle(b *testing.B) { for n := 0; n < b.N; n++ { IsProfane("aaaaafuckaaaaa") } b.ReportAllocs() } func BenchmarkIsProfaneWhenShortStringHasProfanityAtTheEnd(b *testing.B) { for n := 0; n < b.N; n++ { IsProfane("aaaaaaaaaafuck") } b.ReportAllocs() } func BenchmarkIsProfaneWhenMediumStringHasNoProfanity(b *testing.B) { for n := 0; n < b.N; n++ { IsProfane("How are you doing today?") } b.ReportAllocs() } func BenchmarkIsProfaneWhenMediumStringHasProfanityAtTheStart(b *testing.B) { for n := 0; n < b.N; n++ { IsProfane("Shit, you're cute today.") } b.ReportAllocs() } func BenchmarkIsProfaneWhenMediumStringHasProfanityInTheMiddle(b *testing.B) { for n := 0; n < b.N; n++ { IsProfane("How are you fu ck doing?") } b.ReportAllocs() } func BenchmarkIsProfaneWhenMediumStringHasProfanityAtTheEnd(b *testing.B) { for n := 0; n < b.N; n++ { IsProfane("you're cute today. Fuck.") } b.ReportAllocs() } func BenchmarkIsProfaneWhenLongStringHasNoProfanity(b *testing.B) { for n := 0; n < b.N; n++ { IsProfane("Hello John Doe, I hope you're feeling well, as I come today bearing terrible news regarding your favorite chocolate chip cookie brand") } b.ReportAllocs() } func BenchmarkIsProfaneWhenLongStringHasProfanityAtTheStart(b *testing.B) { for n := 0; n < b.N; n++ { IsProfane("Fuck John Doe, I hope you're feeling well, as I come today bearing terrible news regarding your favorite chocolate chip cookie brand") } b.ReportAllocs() } func BenchmarkIsProfaneWhenLongStringHasProfanityInTheMiddle(b *testing.B) { for n := 0; n < b.N; n++ { IsProfane("Hello John Doe, I hope you're feeling well, as I come today bearing shitty news regarding your favorite chocolate chip cookie brand") } b.ReportAllocs() } func BenchmarkIsProfaneWhenLongStringHasProfanityAtTheEnd(b *testing.B) { for n := 0; n < b.N; n++ { IsProfane("Hello John Doe, I hope you're feeling well, as I come today bearing terrible news regarding your favorite chocolate chip cookie bitch") } b.ReportAllocs() } func BenchmarkProfanityDetector_WithSanitizeAccentsSetToFalseWhenLongStringHasProfanityAtTheStart(b *testing.B) { profanityDetector := NewProfanityDetector().WithSanitizeAccents(false) for n := 0; n < b.N; n++ { profanityDetector.IsProfane("Fuck John Doe, I hope you're feeling well, as I come today bearing terrible news regarding your favorite chocolate chip cookie brand") } b.ReportAllocs() } func BenchmarkProfanityDetector_WithSanitizeAccentsSetToFalseWhenLongStringHasProfanityInTheMiddle(b *testing.B) { profanityDetector := NewProfanityDetector().WithSanitizeAccents(false) for n := 0; n < b.N; n++ { profanityDetector.IsProfane("Hello John Doe, I hope you're feeling well, as I come today bearing shitty news regarding your favorite chocolate chip cookie brand") } b.ReportAllocs() } func BenchmarkProfanityDetector_WithSanitizeAccentsSetToFalseWhenLongStringHasProfanityAtTheEnd(b *testing.B) { profanityDetector := NewProfanityDetector().WithSanitizeAccents(false) for n := 0; n < b.N; n++ { profanityDetector.IsProfane("Hello John Doe, I hope you're feeling well, as I come today bearing terrible news regarding your favorite chocolate chip cookie bitch") } b.ReportAllocs() } func BenchmarkProfanityDetector_Sanitize(b *testing.B) { profanityDetector := NewProfanityDetector().WithSanitizeAccents(true).WithSanitizeSpecialCharacters(true).WithSanitizeLeetSpeak(true) for n := 0; n < b.N; n++ { profanityDetector.IsProfane("H3ll0 J0hn D0e, 1 h0p3 y0u'r3 f3eling w3ll, as 1 c0me t0d4y b34r1ng sh1tty n3w5 r3g4rd1ng y0ur fav0rite ch0c0l4t3 chip c00kie br4nd") } b.ReportAllocs() } func BenchmarkCensor(b *testing.B) { for n := 0; n < b.N; n++ { Censor("Thundercunt c()ck") } b.ReportAllocs() } func BenchmarkIsProfaneConcurrently(b *testing.B) { b.RunParallel(func(pb *testing.PB) { for pb.Next() { IsProfane("aaaaafuckaaaaa") } }) b.ReportAllocs() } func BenchmarkIsProfaneConcurrently_WithAccents(b *testing.B) { b.RunParallel(func(pb *testing.PB) { for pb.Next() { IsProfane("ÄšŚ") } }) b.ReportAllocs() } ================================================ FILE: goaway_test.go ================================================ package goaway import ( "testing" ) func TestExtractProfanity(t *testing.T) { defaultProfanityDetector = nil tests := []struct { input string expectedProfanity string }{ { input: "fuck this shit", expectedProfanity: "fuck", }, { input: "F u C k th1$ $h!t", expectedProfanity: "fuck", }, { input: "@$$h073", expectedProfanity: "asshole", }, { input: "hello, world!", expectedProfanity: "", }, } for _, tt := range tests { t.Run(tt.input, func(t *testing.T) { profanity := ExtractProfanity(tt.input) if profanity != tt.expectedProfanity { t.Errorf("expected '%s', got '%s'", tt.expectedProfanity, profanity) } }) } } func TestProfanityDetector_Censor(t *testing.T) { defaultProfanityDetector = nil profanityDetectorWithSanitizeSpaceDisabled := NewProfanityDetector().WithSanitizeSpaces(false) tests := []struct { input string expectedOutput string expectedOutputWithoutSpaceSanitization string }{ { input: "what the fuck", expectedOutput: "what the ****", expectedOutputWithoutSpaceSanitization: "what the ****", }, { input: "fuck this", expectedOutput: "**** this", expectedOutputWithoutSpaceSanitization: "**** this", }, { input: "one penis, two vaginas, three dicks, four sluts, five whores and a flower", expectedOutput: "one *****, two ******s, three ****s, four ****s, five *****s and a flower", expectedOutputWithoutSpaceSanitization: "one *****, two ******s, three ****s, four ****s, five *****s and a flower", }, { input: "Censor doesn't support sanitizing '()' into 'o', because it's two characters. Proof: c()ck. Maybe one day I'll have time to fix it.", expectedOutput: "Censor doesn't support sanitizing '()' into 'o', because it's two characters. Proof: c()ck. Maybe one day I'll have time to fix it.", expectedOutputWithoutSpaceSanitization: "Censor doesn't support sanitizing '()' into 'o', because it's two characters. Proof: c()ck. Maybe one day I'll have time to fix it.", }, { input: "fuck shit fuck", expectedOutput: "**** **** ****", expectedOutputWithoutSpaceSanitization: "**** **** ****", }, { input: "fuckfuck", expectedOutput: "********", expectedOutputWithoutSpaceSanitization: "********", }, { input: "fuck this shit", expectedOutput: "**** this ****", expectedOutputWithoutSpaceSanitization: "**** this ****", }, { input: "F u C k th1$ $h!t", expectedOutput: "* * * * th1$ ****", expectedOutputWithoutSpaceSanitization: "F u C k th1$ ****", }, { input: "@$$h073", expectedOutput: "*******", expectedOutputWithoutSpaceSanitization: "*******", }, { input: "hello, world!", expectedOutput: "hello, world!", expectedOutputWithoutSpaceSanitization: "hello, world!", }, { input: "Hey asshole, are y()u an assassin? If not, fuck off.", expectedOutput: "Hey *******, are y()u an assassin? If not, **** off.", expectedOutputWithoutSpaceSanitization: "Hey *******, are y()u an assassin? If not, **** off.", }, { input: "I am from Scunthorpe, north Lincolnshire", expectedOutput: "I am from Scunthorpe, north Lincolnshire", expectedOutputWithoutSpaceSanitization: "I am from Scunthorpe, north Lincolnshire", }, { input: "He is an associate of mine", expectedOutput: "He is an associate of mine", expectedOutputWithoutSpaceSanitization: "He is an associate of mine", }, { input: "But the table is on fucking fire", expectedOutput: "But the table is on ****ing fire", expectedOutputWithoutSpaceSanitization: "But the table is on ****ing fire", }, { input: "““““““““““““But the table is on fucking fire“", expectedOutput: "““““““““““““But the table is on ****ing fire“", expectedOutputWithoutSpaceSanitization: "““““““““““““But the table is on ****ing fire“", }, { input: "f.u_ck this s.h-i~t", expectedOutput: "*.*_** this *.*-*~*", expectedOutputWithoutSpaceSanitization: "f.u_ck this s.h-i~t", // This is because special characters get replaced with a space, and because we're not sanitizing spaces... }, { input: "glass", expectedOutput: "glass", expectedOutputWithoutSpaceSanitization: "glass", }, { input: "ы", expectedOutput: "ы", expectedOutputWithoutSpaceSanitization: "ы", }, { input: "documentdocument", // false positives (https://github.com/TwiN/go-away/issues/30) expectedOutput: "documentdocument", expectedOutputWithoutSpaceSanitization: "documentdocument", }, { input: "dumbassdumbass", // false negatives (https://github.com/TwiN/go-away/issues/30) expectedOutput: "**************", expectedOutputWithoutSpaceSanitization: "**************", }, { input: "document fuck document fuck", // FIXME: This is not censored properly expectedOutput: "document **** document ****", expectedOutputWithoutSpaceSanitization: "document **** document ****", }, { input: "Everyone was staring, and someone muttered ‘gyat’ under their breath.", expectedOutput: "Everyone was staring, and someone muttered ‘****’ under their breath.", expectedOutputWithoutSpaceSanitization: "Everyone was staring, and someone muttered ‘****’ under their breath.", }, } for _, tt := range tests { t.Run("default_"+tt.input, func(t *testing.T) { censored := Censor(tt.input) if censored != tt.expectedOutput { t.Errorf("expected '%s', got '%s'", tt.expectedOutput, censored) } }) t.Run("no-space-sanitization_"+tt.input, func(t *testing.T) { censored := profanityDetectorWithSanitizeSpaceDisabled.Censor(tt.input) if censored != tt.expectedOutputWithoutSpaceSanitization { t.Errorf("expected '%s', got '%s'", tt.expectedOutputWithoutSpaceSanitization, censored) } }) } } func TestNoDuplicatesBetweenProfanitiesAndFalseNegatives(t *testing.T) { for _, profanity := range DefaultProfanities { for _, falseNegative := range DefaultFalseNegatives { if profanity == falseNegative { t.Errorf("'%s' is already in 'falseNegatives', there's no need to have it in 'profanities' too", profanity) } } } } func TestBadWords(t *testing.T) { words := []string{"fuck", "ass", "poop", "penis", "bitch"} tests := []struct { name string profanityDetector *ProfanityDetector }{ { name: "With Default Dictionary", profanityDetector: NewProfanityDetector(), }, { name: "With Custom Dictionary", profanityDetector: NewProfanityDetector().WithCustomDictionary([]string{"fuck", "ass", "poop", "penis", "bitch"}, DefaultFalsePositives, DefaultFalseNegatives), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { for _, w := range words { if !tt.profanityDetector.IsProfane(w) { t.Error("Expected true, got false from word", w) } if word := tt.profanityDetector.ExtractProfanity(w); len(word) == 0 { t.Error("Expected true, got false from word", w) } else if word != w { t.Errorf("Expected %s, got %s", w, word) } } }) } } func TestBadWordsWithSpaces(t *testing.T) { profanities := []string{"fuck", "ass", "poop", "penis", "bitch"} words := []string{"fu ck", "as s", "po op", "pe ni s", "bit ch"} tests := []struct { name string profanityDetector *ProfanityDetector }{ { name: "With Default Dictionary", profanityDetector: NewProfanityDetector(), }, { name: "With Custom Dictionary", profanityDetector: NewProfanityDetector().WithCustomDictionary(profanities, DefaultFalsePositives, DefaultFalseNegatives), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { for _, w := range words { if !tt.profanityDetector.WithSanitizeSpaces(true).IsProfane(w) { t.Error("Expected true because sanitizeSpaces is set to true, got false from word", w) } if tt.profanityDetector.WithSanitizeSpaces(false).IsProfane(w) { t.Error("Expected false because sanitizeSpaces is set to false, got true from word", w) } } }) } } func TestBadWordsWithAccentedLetters(t *testing.T) { profanities := []string{"fuck", "ass", "poop", "penis", "bitch"} words := []string{"fučk", "ÄšŚ", "pÓöp", "pÉnìŚ", "bitčh"} tests := []struct { name string profanityDetector *ProfanityDetector }{ { name: "With Default Dictionary", profanityDetector: NewProfanityDetector(), }, { name: "With Custom Dictionary", profanityDetector: NewProfanityDetector().WithCustomDictionary(profanities, DefaultFalsePositives, DefaultFalseNegatives), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { for _, w := range words { if !tt.profanityDetector.WithSanitizeAccents(true).IsProfane(w) { t.Error("Expected true because sanitizeAccents is set to true, got false from word", w) } if tt.profanityDetector.WithSanitizeAccents(false).IsProfane(w) { t.Error("Expected false because sanitizeAccents is set to false, got true from word", w) } } }) } } func TestCensorWithVerySpecialCharacters(t *testing.T) { profanities := []string{"крывавыa"} words := []string{"крывавыa"} expectedOutputs := []string{"********"} tests := []struct { name string profanityDetector *ProfanityDetector }{ { name: "With Custom Dictionary", profanityDetector: NewProfanityDetector().WithCustomDictionary(profanities, DefaultFalsePositives, DefaultFalseNegatives), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { for index, w := range words { if output := tt.profanityDetector.Censor(w); output != expectedOutputs[index] { t.Errorf("Expected %s to return %s, got %s", w, expectedOutputs[index], output) } } }) } } func TestSentencesWithBadWords(t *testing.T) { profanities := []string{"fuck", "ass", "poop", "penis", "bitch"} sentences := []string{"What the fuck is your problem", "Go away, asshole!"} tests := []struct { name string profanityDetector *ProfanityDetector }{ { name: "With Default Dictionary", profanityDetector: NewProfanityDetector(), }, { name: "With Custom Dictionary", profanityDetector: NewProfanityDetector().WithCustomDictionary(profanities, DefaultFalsePositives, DefaultFalseNegatives), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { for _, s := range sentences { if !tt.profanityDetector.IsProfane(s) { t.Error("Expected true, got false from sentence", s) } } }) } } func TestProfanityDetector_WithCustomCharacterReplacements(t *testing.T) { tests := []struct { name string profanityDetector *ProfanityDetector sentence string result bool }{ { name: "With default profanity detector", profanityDetector: NewProfanityDetector(), sentence: "5#1+", result: true, // shit is a profanity }, { name: "With custom character replacements that has leet speak characters", profanityDetector: NewProfanityDetector().WithCustomCharacterReplacements(map[rune]rune{'(': 'c'}), sentence: "fu(k", result: true, // fuck is a profanity }, { name: "With custom character replacements that has leet speak characters with sanitizeLeetSpeak disabled", profanityDetector: NewProfanityDetector().WithCustomCharacterReplacements(map[rune]rune{'(': 'c'}).WithSanitizeLeetSpeak(false), sentence: "fu(k", result: false, // fuk isn't a profanity }, { name: "With custom character replacements that has leet speak characters with sanitizeSpecialCharacters disabled", profanityDetector: NewProfanityDetector().WithCustomCharacterReplacements(map[rune]rune{'(': 'c'}).WithSanitizeSpecialCharacters(false), sentence: "fu(k", result: false, // fu(k isn't a profanity }, { name: "With custom character replacements that has special characters", profanityDetector: NewProfanityDetector().WithCustomCharacterReplacements(map[rune]rune{'.': ' '}), sentence: "f.u.c.k", result: true, }, { name: "With custom character replacements that has special characters with sanitizeLeetSpeak disabled", profanityDetector: NewProfanityDetector().WithCustomCharacterReplacements(map[rune]rune{'.': ' '}).WithSanitizeLeetSpeak(false), sentence: "f.u.c.k", result: true, // fuck is a profanity }, { name: "With custom character replacements that has special characters with sanitizeSpecialCharacters disabled", profanityDetector: NewProfanityDetector().WithCustomCharacterReplacements(map[rune]rune{'.': ' '}).WithSanitizeSpecialCharacters(false), sentence: "f.u.c.k", result: false, // f.u.c.k isn't a profanity }, { name: "With empty character replacement mapping", profanityDetector: NewProfanityDetector().WithCustomCharacterReplacements(map[rune]rune{}), sentence: "5#1+", result: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got := tt.profanityDetector.IsProfane(tt.sentence) if got != tt.result { t.Errorf("Expected %v, got %v from sentence %s", tt.result, got, tt.sentence) } }) } } func TestSneakyBadWords(t *testing.T) { profanities := []string{"fuck", "ass", "poop", "penis", "bitch", "arse", "shit", "btch"} words := []string{"A$$", "4ss", "4s$", "a S s", "a $ s", "@$$h073", "f u c k", "4r5e", "5h1t", "5hit", "a55", "ar5e", "a_s_s", "b!tch", "b!+ch"} tests := []struct { name string profanityDetector *ProfanityDetector }{ { name: "With Default Dictionary", profanityDetector: NewProfanityDetector(), }, { name: "With Custom Dictionary", profanityDetector: NewProfanityDetector().WithCustomDictionary(profanities, DefaultFalsePositives, DefaultFalseNegatives), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { for _, w := range words { if !tt.profanityDetector.IsProfane(w) { t.Error("Expected true, got false from word", w) } } }) } } func TestSentencesWithSneakyBadWords(t *testing.T) { profanities := []string{"poop", "asshole"} sentences := []string{ "You smell p00p", "Go away, a$$h0l3!", } tests := []struct { name string profanityDetector *ProfanityDetector }{ { name: "With Default Dictionary", profanityDetector: NewProfanityDetector(), }, { name: "With Custom Dictionary", profanityDetector: NewProfanityDetector().WithCustomDictionary(profanities, DefaultFalsePositives, DefaultFalseNegatives), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { for _, s := range sentences { if !tt.profanityDetector.IsProfane(s) { t.Error("Expected true, got false from sentence", s) } } }) } } func TestNormalWords(t *testing.T) { words := []string{"hello", "world", "whats", "up"} tests := []struct { name string profanityDetector *ProfanityDetector }{ { name: "With Default Dictionary", profanityDetector: NewProfanityDetector(), }, { name: "With Custom Dictionary", profanityDetector: NewProfanityDetector().WithCustomDictionary(DefaultProfanities, DefaultFalsePositives, DefaultFalseNegatives), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { for _, w := range words { if tt.profanityDetector.IsProfane(w) { t.Error("Expected false, got true from word", w) } } }) } } func TestSentencesWithNoProfanities(t *testing.T) { sentences := []string{ "hello, my friend", "what's up?", "do you want to play bingo?", "who are you?", "Better late than never", "Bite the bullet", "Break a leg", "Call it a day", "Be careful when you're driving", "How are you?", "Hurry up!", "I don't like her", "If you need my help, please let me know", "Leave a message after the beep", "Thank you", "Yes, really", "Call me at 9, ok?", } for _, s := range sentences { if IsProfane(s) { t.Error("Expected false, got false from sentence", s) } } } func TestFalsePositives(t *testing.T) { sentences := []string{ "I am from Scunthorpe, north Lincolnshire", "He is an associate of mine", "Are you an assassin?", "But the table is on fire", "glass", "grass", "classic", "classification", "passion", "carcass", "cassandra", "just push it down the ledge", // puSH IT "has steph", // hAS Steph "was steph", // wAS Steph "hot water", // hoT WATer "Phoenix", // pHOEnix "systems exist", // systemS EXist "saturday", // saTURDay "therapeutic", "press the button", } tests := []struct { name string profanityDetector *ProfanityDetector }{ { name: "With Default Dictionary", profanityDetector: NewProfanityDetector(), }, { name: "With Custom Dictionary", profanityDetector: NewProfanityDetector().WithCustomDictionary(DefaultProfanities, DefaultFalsePositives, DefaultFalseNegatives), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { for _, s := range sentences { if tt.profanityDetector.IsProfane(s) { t.Error("Expected false, got true from:", s) } } }) } } func TestExactWord(t *testing.T) { acceptSentences := []string{ "I'm an analyst", } rejectSentences := []string{"Go away, ass."} tests := []struct { name string profanityDetector *ProfanityDetector }{ { name: "With Empty FalsePositives", profanityDetector: NewProfanityDetector().WithExactWord(true).WithSanitizeSpecialCharacters(true).WithCustomDictionary(DefaultProfanities, nil, nil), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { for _, s := range acceptSentences { if tt.profanityDetector.IsProfane(s) { t.Error("Expected false, got true from:", s) } } for _, s := range rejectSentences { if !tt.profanityDetector.IsProfane(s) { t.Error("Expected true, got false from:", s) } } }) } } func TestFalseNegatives(t *testing.T) { sentences := []string{ "dumb ass", // ass -> bASS (FP) -> dumBASS (FFP) } tests := []struct { name string profanityDetector *ProfanityDetector }{ { name: "With Default Dictionary", profanityDetector: NewProfanityDetector(), }, { name: "With Custom Dictionary", profanityDetector: NewProfanityDetector().WithCustomDictionary(DefaultProfanities, DefaultFalsePositives, DefaultFalseNegatives), }, { name: "With Custom Dictionary", profanityDetector: NewProfanityDetector().WithExactWord(true).WithCustomDictionary(DefaultProfanities, DefaultFalsePositives, DefaultFalseNegatives), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { for _, s := range sentences { if !tt.profanityDetector.IsProfane(s) { t.Error("Expected false, got true from:", s) } } }) } } func TestSentencesWithFalsePositivesAndProfanities(t *testing.T) { sentences := []string{"You are a shitty associate", "Go away, asshole!"} tests := []struct { name string profanityDetector *ProfanityDetector }{ { name: "With Default Dictionary", profanityDetector: NewProfanityDetector(), }, { name: "With Custom Dictionary", profanityDetector: NewProfanityDetector().WithCustomDictionary(DefaultProfanities, DefaultFalsePositives, DefaultFalseNegatives), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { for _, s := range sentences { if !tt.profanityDetector.IsProfane(s) { t.Error("Expected true, got false from sentence", s) } } }) } } // "The Adventures of Sherlock Holmes" by Arthur Conan Doyle is in the public domain, // which makes it a perfect source to use as reference. func TestSentencesFromTheAdventuresOfSherlockHolmes(t *testing.T) { defaultProfanityDetector = nil sentences := []string{ "I had called upon my friend, Mr. Sherlock Holmes, one day in the autumn of last year and found him in deep conversation with a very stout, florid-faced, elderly gentleman with fiery red hair.", "With an apology for my intrusion, I was about to withdraw when Holmes pulled me abruptly into the room and closed the door behind me.", "You could not possibly have come at a better time, my dear Watson, he said cordially", "I was afraid that you were engaged.", "So I am. Very much so.", "Then I can wait in the next room.", "Not at all. This gentleman, Mr. Wilson, has been my partner and helper in many of my most successful cases, and I have no doubt that he will be of the utmost use to me in yours also.", "The stout gentleman half rose from his chair and gave a bob of greeting, with a quick little questioning glance from his small fat-encircled eyes", "Try the settee, said Holmes, relapsing into his armchair and putting his fingertips together, as was his custom when in judicial moods.", "I know, my dear Watson, that you share my love of all that is bizarre and outside the conventions and humdrum routine of everyday life.", "You have shown your relish for it by the enthusiasm which has prompted you to chronicle, and, if you will excuse my saying so, somewhat to embellish so many of my own little adventures.", "You did, Doctor, but none the less you must come round to my view, for otherwise I shall keep on piling fact upon fact on you until your reason breaks down under them and acknowledges me to be right.", "Now, Mr. Jabez Wilson here has been good enough to call upon me this morning, and to begin a narrative which promises to be one of the most singular which I have listened to for some time.", "You have heard me remark that the strangest and most unique things are very often connected not with the larger but with the smaller crimes, and occasionally", "indeed, where there is room for doubt whether any positive crime has been committed.", "As far as I have heard it is impossible for me to say whether the present case is an instance of crime or not, but the course of events is certainly among the most singular that I have ever listened to.", "Perhaps, Mr. Wilson, you would have the great kindness to recommence your narrative.", "I ask you not merely because my friend Dr. Watson has not heard the opening part but also because the peculiar nature of the story makes me anxious to have every possible detail from your lips.", "As a rule, when I have heard some slight indication of the course of events, I am able to guide myself by the thousands of other similar cases which occur to my memory.", "In the present instance I am forced to admit that the facts are, to the best of my belief, unique.", "We had reached the same crowded thoroughfare in which we had found ourselves in the morning.", "Our cabs were dismissed, and, following the guidance of Mr. Merryweather, we passed down a narrow passage and through a side door, which he opened for us", "Within there was a small corridor, which ended in a very massive iron gate.", "We were seated at breakfast one morning, my wife and I, when the maid brought in a telegram. It was from Sherlock Holmes and ran in this way", } for _, s := range sentences { if IsProfane(s) { t.Error("Expected false, got false from sentence", s) } } } func TestSanitize(t *testing.T) { expectedString := "whatthefuckisyourproblem" sanitizedString, _ := NewProfanityDetector().sanitize("What the fu_ck is y()ur pr0bl3m?", false) if sanitizedString != expectedString { t.Errorf("Expected '%s', got '%s'", expectedString, sanitizedString) } } func TestSanitizeWithoutSanitizingSpecialCharacters(t *testing.T) { expectedString := "whatthefu_ckisy()urproblem?" sanitizedString, _ := NewProfanityDetector().WithSanitizeSpecialCharacters(false).sanitize("What the fu_ck is y()ur pr0bl3m?", false) if sanitizedString != expectedString { t.Errorf("Expected '%s', got '%s'", expectedString, sanitizedString) } } func TestSanitizeWithoutSanitizingLeetSpeak(t *testing.T) { expectedString := "whatthefuckisyurpr0bl3m" sanitizedString, _ := NewProfanityDetector().WithSanitizeLeetSpeak(false).sanitize("What the fu_ck is y()ur pr0bl3m?", false) if sanitizedString != expectedString { t.Errorf("Expected '%s', got '%s'", expectedString, sanitizedString) } } func TestDefaultDriver_UTF8(t *testing.T) { detector := NewProfanityDetector().WithCustomDictionary( []string{"anal", "あほ"}, // profanities []string{"あほほ"}, // falsePositives []string{"あほほし"}, // falseNegatives ) unsanitizedString := "いい加減にしろ あほほし あほほ あほ anal ほ" expectedString := "いい加減にしろ **** あほほ ** **** ほ" isProfane := detector.IsProfane(unsanitizedString) if !isProfane { t.Error("Expected false, got false from sentence", unsanitizedString) } sanitizedString := detector.Censor(unsanitizedString) if sanitizedString != expectedString { t.Errorf("Expected '%s', got '%s'", expectedString, sanitizedString) } } ================================================ FILE: profanities.go ================================================ package goaway // DefaultProfanities is a list of profanities that are checked after the DefaultFalsePositives are removed // // Note that some words that would normally be in this list may be in DefaultFalseNegatives var DefaultProfanities = []string{ "anal", "anus", "arse", "ass", "ballsack", "balls", "bastard", "bitch", "btch", "biatch", "blowjob", "bollock", "bollok", "boner", "boob", "bugger", "butt", "choad", "clitoris", "cock", "coon", "crap", "cum", "cunt", "dick", "dildo", "douchebag", "dyke", "fag", "feck", "fellate", "fellatio", "felching", "fuck", "fudgepacker", "flange", "gtfo", "gyat", "hoe", // while that's also a tool, I doubt somebody would be checking for profanities if that tool was relevant "horny", "incest", "jerk", "jizz", "labia", "masturbat", "muff", "naked", "nazi", "nigga", "niggu", "nipple", "nips", "nude", "pedophile", "penis", "piss", "poop", "porn", "prick", "prostitut", "pube", "pussie", "pussy", "queer", "rape", "rapist", "retard", "rimjob", "scrotum", "sex", "shit", "slut", "spunk", "stfu", "suckmy", "tits", "tittie", "titty", "turd", "twat", "vagina", "wank", "whore", } ================================================ FILE: replacements.go ================================================ package goaway // DefaultCharacterReplacements is the mapping of all characters that are replaced by other characters before // attempting to find a profanity. var DefaultCharacterReplacements = map[rune]rune{ // Leetspeak '0': 'o', '1': 'i', '3': 'e', '4': 'a', '5': 's', '7': 'l', '$': 's', '!': 'i', '+': 't', '#': 'h', '@': 'a', '<': 'c', // Special characters '-': ' ', '_': ' ', '|': ' ', '.': ' ', ',': ' ', '(': ' ', ')': ' ', '>': ' ', '"': ' ', '`': ' ', '~': ' ', '*': ' ', '&': ' ', '%': ' ', '?': ' ', } ================================================ FILE: writer.go ================================================ package goaway import "io" func NewWriter(base io.Writer, detector *ProfanityDetector) *Writer { return &Writer{ base: base, detector: detector, } } type Writer struct { base io.Writer detector *ProfanityDetector buf []byte } func (w *Writer) Write(payload []byte) (int, error) { last := 0 for i, char := range payload { if char != byte('\n') { continue } result := append(w.buf, payload[last:i+1]...) _, err := w.base.Write([]byte(w.detector.Censor(string(result)))) if err != nil { return 0, err } w.buf = w.buf[:0] last = i + 1 } w.buf = payload[last:] return len(payload), nil } func (w *Writer) Flush() error { if len(w.buf) == 0 { return nil } _, err := w.base.Write([]byte(w.detector.Censor(string(w.buf)))) w.buf = w.buf[:0] return err } ================================================ FILE: writer_test.go ================================================ package goaway_test import ( "bytes" "testing" goaway "github.com/TwiN/go-away" ) func TestWriter(t *testing.T) { tests := map[string]struct { input [][]byte detector *goaway.ProfanityDetector expectedOutput string }{ "no writing, empty output": { input: [][]byte{}, detector: goaway.NewProfanityDetector(), expectedOutput: "", }, "single uncensored write": { input: [][]byte{ []byte("I'm just a normal line"), }, detector: goaway.NewProfanityDetector(), expectedOutput: "I'm just a normal line", }, "single censored write": { input: [][]byte{ []byte("I'm just a shitty line"), }, detector: goaway.NewProfanityDetector(), expectedOutput: "I'm just a ****ty line", }, "multi-line single write": { input: [][]byte{ []byte("I'm just a shitty line\nAnd I'm another line"), }, detector: goaway.NewProfanityDetector(), expectedOutput: "I'm just a ****ty line\nAnd I'm another line", }, "single-line multi writes": { input: [][]byte{ []byte("I'm just a shitty line\n"), []byte("And I'm another line"), []byte("\nAnd I'm the final fucking line"), }, detector: goaway.NewProfanityDetector(), expectedOutput: "I'm just a ****ty line\nAnd I'm another line\nAnd I'm the final ****ing line", }, } for name, tc := range tests { t.Run(name, func(t *testing.T) { buf := &bytes.Buffer{} writer := goaway.NewWriter(buf, tc.detector) for _, write := range tc.input { n, err := writer.Write(write) if n != len(write) { t.Errorf("unexpected write count %d != %d", n, len(write)) } if err != nil { t.Errorf("unexpected writing error %v", err) } } err := writer.Flush() if err != nil { t.Errorf("unexpected error flushing writer %v", err) } result := buf.String() if tc.expectedOutput != result { t.Errorf("expected %q but recieved %q", tc.expectedOutput, result) } }) } }