Repository: kdjones/fuzzystring
Branch: master
Commit: 1828d564bf9e
Files: 28
Total size: 73.9 KB
Directory structure:
gitextract_f_31__2f/
├── .gitattributes
├── .gitignore
├── FuzzyString/
│ ├── ApproximatelyEquals.cs
│ ├── FuzzyString.csproj
│ ├── FuzzyStringComparisonOptions.cs
│ ├── FuzzyStringComparisonTolerance.cs
│ ├── HammingDistance.cs
│ ├── JaccardDistance.cs
│ ├── JaroDistance.cs
│ ├── JaroWinklerDistance.cs
│ ├── LevenshteinDistance.cs
│ ├── LongestCommonSubsequence.cs
│ ├── LongestCommonSubstring.cs
│ ├── Operations.cs
│ ├── OverlapCoefficient.cs
│ ├── Properties/
│ │ └── AssemblyInfo.cs
│ ├── RatcliffObershelpSimilarity.cs
│ ├── Resources/
│ │ └── License.txt
│ ├── SorensenDiceDistance.cs
│ └── TanimotoCoefficient.cs
├── FuzzyString.sln
├── FuzzyStringConsole/
│ ├── App.config
│ ├── FuzzyStringConsole.csproj
│ ├── Program.cs
│ ├── Properties/
│ │ └── AssemblyInfo.cs
│ └── Resources/
│ └── License.txt
├── LICENSE
└── README.md
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitattributes
================================================
###############################################################################
# Set default behavior to automatically normalize line endings.
###############################################################################
* text=auto
###############################################################################
# Set default behavior for command prompt diff.
#
# This is need for earlier builds of msysgit that does not have it on by
# default for csharp files.
# Note: This is only used by command line
###############################################################################
#*.cs diff=csharp
###############################################################################
# Set the merge driver for project and solution files
#
# Merging from the command prompt will add diff markers to the files if there
# are conflicts (Merging from VS is not affected by the settings below, in VS
# the diff markers are never inserted). Diff markers may cause the following
# file extensions to fail to load in VS. An alternative would be to treat
# these files as binary and thus will always conflict and require user
# intervention with every merge. To do so, just uncomment the entries below
###############################################################################
#*.sln merge=binary
#*.csproj merge=binary
#*.vbproj merge=binary
#*.vcxproj merge=binary
#*.vcproj merge=binary
#*.dbproj merge=binary
#*.fsproj merge=binary
#*.lsproj merge=binary
#*.wixproj merge=binary
#*.modelproj merge=binary
#*.sqlproj merge=binary
#*.wwaproj merge=binary
###############################################################################
# behavior for image files
#
# image files are treated as binary by default.
###############################################################################
#*.jpg binary
#*.png binary
#*.gif binary
###############################################################################
# diff behavior for common document formats
#
# Convert binary document formats to text before diffing them. This feature
# is only available from the command line. Turn it on by uncommenting the
# entries below.
###############################################################################
#*.doc diff=astextplain
#*.DOC diff=astextplain
#*.docx diff=astextplain
#*.DOCX diff=astextplain
#*.dot diff=astextplain
#*.DOT diff=astextplain
#*.pdf diff=astextplain
#*.PDF diff=astextplain
#*.rtf diff=astextplain
#*.RTF diff=astextplain
================================================
FILE: .gitignore
================================================
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
# User-specific files
*.suo
*.user
*.userosscache
*.sln.docstates
# User-specific files (MonoDevelop/Xamarin Studio)
*.userprefs
# Build results
[Dd]ebug/
[Dd]ebugPublic/
[Rr]elease/
[Rr]eleases/
[Xx]64/
[Xx]86/
[Bb]uild/
bld/
[Bb]in/
[Oo]bj/
# Visual Studio 2015 cache/options directory
.vs/
# Uncomment if you have tasks that create the project's static files in wwwroot
#wwwroot/
# MSTest test Results
[Tt]est[Rr]esult*/
[Bb]uild[Ll]og.*
# NUNIT
*.VisualState.xml
TestResult.xml
# Build Results of an ATL Project
[Dd]ebugPS/
[Rr]eleasePS/
dlldata.c
# DNX
project.lock.json
artifacts/
*_i.c
*_p.c
*_i.h
*.ilk
*.meta
*.obj
*.pch
*.pdb
*.pgc
*.pgd
*.rsp
*.sbr
*.tlb
*.tli
*.tlh
*.tmp
*.tmp_proj
*.log
*.vspscc
*.vssscc
.builds
*.pidb
*.svclog
*.scc
# Chutzpah Test files
_Chutzpah*
# Visual C++ cache files
ipch/
*.aps
*.ncb
*.opendb
*.opensdf
*.sdf
*.cachefile
*.VC.db
# Visual Studio profiler
*.psess
*.vsp
*.vspx
*.sap
# TFS 2012 Local Workspace
$tf/
# Guidance Automation Toolkit
*.gpState
# ReSharper is a .NET coding add-in
_ReSharper*/
*.[Rr]e[Ss]harper
*.DotSettings.user
# JustCode is a .NET coding add-in
.JustCode
# TeamCity is a build add-in
_TeamCity*
# DotCover is a Code Coverage Tool
*.dotCover
# NCrunch
_NCrunch_*
.*crunch*.local.xml
nCrunchTemp_*
# MightyMoose
*.mm.*
AutoTest.Net/
# Web workbench (sass)
.sass-cache/
# Installshield output folder
[Ee]xpress/
# DocProject is a documentation generator add-in
DocProject/buildhelp/
DocProject/Help/*.HxT
DocProject/Help/*.HxC
DocProject/Help/*.hhc
DocProject/Help/*.hhk
DocProject/Help/*.hhp
DocProject/Help/Html2
DocProject/Help/html
# Click-Once directory
publish/
# Publish Web Output
*.[Pp]ublish.xml
*.azurePubxml
# TODO: Un-comment the next line if you do not want to checkin
# your web deploy settings because they may include unencrypted
# passwords
#*.pubxml
*.publishproj
# NuGet Packages
*.nupkg
# The packages folder can be ignored because of Package Restore
**/packages/*
# except build/, which is used as an MSBuild target.
!**/packages/build/
# Uncomment if necessary however generally it will be regenerated when needed
#!**/packages/repositories.config
# NuGet v3's project.json files produces more ignoreable files
*.nuget.props
*.nuget.targets
# Microsoft Azure Build Output
csx/
*.build.csdef
# Microsoft Azure Emulator
ecf/
rcf/
# Microsoft Azure ApplicationInsights config file
ApplicationInsights.config
# Windows Store app package directory
AppPackages/
BundleArtifacts/
# Visual Studio cache files
# files ending in .cache can be ignored
*.[Cc]ache
# but keep track of directories ending in .cache
!*.[Cc]ache/
# Others
ClientBin/
[Ss]tyle[Cc]op.*
~$*
*~
*.dbmdl
*.dbproj.schemaview
*.pfx
*.publishsettings
node_modules/
orleans.codegen.cs
# RIA/Silverlight projects
Generated_Code/
# Backup & report files from converting an old project file
# to a newer Visual Studio version. Backup files are not needed,
# because we have git ;-)
_UpgradeReport_Files/
Backup*/
UpgradeLog*.XML
UpgradeLog*.htm
# SQL Server files
*.mdf
*.ldf
# Business Intelligence projects
*.rdl.data
*.bim.layout
*.bim_*.settings
# Microsoft Fakes
FakesAssemblies/
# GhostDoc plugin setting file
*.GhostDoc.xml
# Node.js Tools for Visual Studio
.ntvs_analysis.dat
# Visual Studio 6 build log
*.plg
# Visual Studio 6 workspace options file
*.opt
# Visual Studio LightSwitch build output
**/*.HTMLClient/GeneratedArtifacts
**/*.DesktopClient/GeneratedArtifacts
**/*.DesktopClient/ModelManifest.xml
**/*.Server/GeneratedArtifacts
**/*.Server/ModelManifest.xml
_Pvt_Extensions
# LightSwitch generated files
GeneratedArtifacts/
ModelManifest.xml
# Paket dependency manager
.paket/paket.exe
# FAKE - F# Make
.fake/
================================================
FILE: FuzzyString/ApproximatelyEquals.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace FuzzyString
{
public static partial class ComparisonMetrics
{
public static bool ApproximatelyEquals(this string source, string target, FuzzyStringComparisonTolerance tolerance, params FuzzyStringComparisonOptions[] options)
{
List<double> comparisonResults = new List<double>();
if (!options.Contains(FuzzyStringComparisonOptions.CaseSensitive))
{
source = source.Capitalize();
target = target.Capitalize();
}
// Min: 0 Max: source.Length = target.Length
if (options.Contains(FuzzyStringComparisonOptions.UseHammingDistance))
{
if (source.Length == target.Length)
{
comparisonResults.Add(source.HammingDistance(target) / target.Length);
}
}
// Min: 0 Max: 1
if (options.Contains(FuzzyStringComparisonOptions.UseJaccardDistance))
{
comparisonResults.Add(source.JaccardDistance(target));
}
// Min: 0 Max: 1
if (options.Contains(FuzzyStringComparisonOptions.UseJaroDistance))
{
comparisonResults.Add(source.JaroDistance(target));
}
// Min: 0 Max: 1
if (options.Contains(FuzzyStringComparisonOptions.UseJaroWinklerDistance))
{
comparisonResults.Add(source.JaroWinklerDistance(target));
}
// Min: 0 Max: LevenshteinDistanceUpperBounds - LevenshteinDistanceLowerBounds
// Min: LevenshteinDistanceLowerBounds Max: LevenshteinDistanceUpperBounds
if (options.Contains(FuzzyStringComparisonOptions.UseNormalizedLevenshteinDistance))
{
comparisonResults.Add(Convert.ToDouble(source.NormalizedLevenshteinDistance(target)) / Convert.ToDouble((Math.Max(source.Length, target.Length) - source.LevenshteinDistanceLowerBounds(target))));
}
else if (options.Contains(FuzzyStringComparisonOptions.UseLevenshteinDistance))
{
comparisonResults.Add(Convert.ToDouble(source.LevenshteinDistance(target)) / Convert.ToDouble(source.LevenshteinDistanceUpperBounds(target)));
}
if (options.Contains(FuzzyStringComparisonOptions.UseLongestCommonSubsequence))
{
comparisonResults.Add(1 - Convert.ToDouble((source.LongestCommonSubsequence(target).Length) / Convert.ToDouble(Math.Min(source.Length, target.Length))));
}
if (options.Contains(FuzzyStringComparisonOptions.UseLongestCommonSubstring))
{
comparisonResults.Add(1 - Convert.ToDouble((source.LongestCommonSubstring(target).Length) / Convert.ToDouble(Math.Min(source.Length, target.Length))));
}
// Min: 0 Max: 1
if (options.Contains(FuzzyStringComparisonOptions.UseSorensenDiceDistance))
{
comparisonResults.Add(source.SorensenDiceDistance(target));
}
// Min: 0 Max: 1
if (options.Contains(FuzzyStringComparisonOptions.UseOverlapCoefficient))
{
comparisonResults.Add(1 - source.OverlapCoefficient(target));
}
// Min: 0 Max: 1
if (options.Contains(FuzzyStringComparisonOptions.UseRatcliffObershelpSimilarity))
{
comparisonResults.Add(1 - source.RatcliffObershelpSimilarity(target));
}
if (comparisonResults.Count == 0)
{
return false;
}
if (tolerance == FuzzyStringComparisonTolerance.Strong)
{
if (comparisonResults.Average() < 0.25)
{
return true;
}
else
{
return false;
}
}
else if (tolerance == FuzzyStringComparisonTolerance.Normal)
{
if (comparisonResults.Average() < 0.5)
{
return true;
}
else
{
return false;
}
}
else if (tolerance == FuzzyStringComparisonTolerance.Weak)
{
if (comparisonResults.Average() < 0.75)
{
return true;
}
else
{
return false;
}
}
else if (tolerance == FuzzyStringComparisonTolerance.Manual)
{
if (comparisonResults.Average() > 0.6)
{
return true;
}
else
{
return false;
}
}
else
{
return false;
}
}
}
}
================================================
FILE: FuzzyString/FuzzyString.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netstandard2.0</TargetFramework>
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
<PackageId>FuzzyString</PackageId>
<PackageLicenseUrl>./Resources/License.txt</PackageLicenseUrl>
<PackageProjectUrl>https://github.com/kdjones/fuzzystring</PackageProjectUrl>
<RepositoryUrl>https://github.com/kdjones/fuzzystring</RepositoryUrl>
</PropertyGroup>
</Project>
================================================
FILE: FuzzyString/FuzzyStringComparisonOptions.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace FuzzyString
{
public enum FuzzyStringComparisonOptions
{
UseHammingDistance,
UseJaccardDistance,
UseJaroDistance,
UseJaroWinklerDistance,
UseLevenshteinDistance,
UseLongestCommonSubsequence,
UseLongestCommonSubstring,
UseNormalizedLevenshteinDistance,
UseOverlapCoefficient,
UseRatcliffObershelpSimilarity,
UseSorensenDiceDistance,
UseTanimotoCoefficient,
CaseSensitive
}
}
================================================
FILE: FuzzyString/FuzzyStringComparisonTolerance.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace FuzzyString
{
public enum FuzzyStringComparisonTolerance
{
Strong,
Normal,
Weak,
Manual
}
}
================================================
FILE: FuzzyString/HammingDistance.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace FuzzyString
{
public static partial class ComparisonMetrics
{
public static int HammingDistance(this string source, string target)
{
int distance = 0;
if (source.Length == target.Length)
{
for (int i = 0; i < source.Length; i++)
{
if (!source[i].Equals(target[i]))
{
distance++;
}
}
return distance;
}
else { return 99999; }
}
}
}
================================================
FILE: FuzzyString/JaccardDistance.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace FuzzyString
{
public static partial class ComparisonMetrics
{
public static double JaccardDistance(this string source, string target)
{
return 1 - source.JaccardIndex(target);
}
public static double JaccardIndex(this string source, string target)
{
return (Convert.ToDouble(source.Intersect(target).Count())) / (Convert.ToDouble(source.Union(target).Count()));
}
}
}
================================================
FILE: FuzzyString/JaroDistance.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace FuzzyString
{
public static partial class ComparisonMetrics
{
public static double JaroDistance(this string source, string target)
{
int m = source.Intersect(target).Count();
if (m == 0) { return 0; }
else
{
string sourceTargetIntersetAsString = "";
string targetSourceIntersetAsString = "";
IEnumerable<char> sourceIntersectTarget = source.Intersect(target);
IEnumerable<char> targetIntersectSource = target.Intersect(source);
foreach (char character in sourceIntersectTarget) { sourceTargetIntersetAsString += character; }
foreach (char character in targetIntersectSource) { targetSourceIntersetAsString += character; }
double t = sourceTargetIntersetAsString.LevenshteinDistance(targetSourceIntersetAsString) / 2;
return ((m / source.Length) + (m / target.Length) + ((m - t) / m)) / 3;
}
}
}
}
================================================
FILE: FuzzyString/JaroWinklerDistance.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace FuzzyString
{
public static partial class ComparisonMetrics
{
public static double JaroWinklerDistance(this string source, string target)
{
double jaroDistance = source.JaroDistance(target);
double commonPrefixLength = CommonPrefixLength(source, target);
return jaroDistance + (commonPrefixLength * 0.1 * (1 - jaroDistance));
}
public static double JaroWinklerDistanceWithPrefixScale(string source, string target, double p)
{
double prefixScale = 0.1;
if (p > 0.25) { prefixScale = 0.25; } // The maximu value for distance to not exceed 1
else if (p < 0) { prefixScale = 0; } // The Jaro Distance
else { prefixScale = p; }
double jaroDistance = source.JaroDistance(target);
double commonPrefixLength = CommonPrefixLength(source, target);
return jaroDistance + (commonPrefixLength * prefixScale * (1 - jaroDistance));
}
private static double CommonPrefixLength(string source, string target)
{
int maximumPrefixLength = 4;
int commonPrefixLength = 0;
if (source.Length <= 4 || target.Length <= 4) { maximumPrefixLength = Math.Min(source.Length, target.Length); }
for (int i = 0; i < maximumPrefixLength; i++)
{
if (source[i].Equals(target[i])) { commonPrefixLength++; }
else { return commonPrefixLength; }
}
return commonPrefixLength;
}
}
}
================================================
FILE: FuzzyString/LevenshteinDistance.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace FuzzyString
{
public static partial class ComparisonMetrics
{
/// <summary>
/// Calculate the minimum number of single-character edits needed to change the source into the target,
/// allowing insertions, deletions, and substitutions.
/// <br/><br/>
/// Time complexity: at least O(n^2), where n is the length of each string
/// Accordingly, this algorithm is most efficient when at least one of the strings is very short
/// </summary>
/// <param name="source"></param>
/// <param name="target"></param>
/// <returns>The number of edits required to transform the source into the target. This is at most the length of the longest string, and at least the difference in length between the two strings</returns>
public static int LevenshteinDistance(this string source, string target)
{
if (source.Length == 0) { return target.Length; }
if (target.Length == 0) { return source.Length; }
int distance = 0;
if (source[source.Length - 1] == target[target.Length - 1]) { distance = 0; }
else { distance = 1; }
var sourceInitial = source.Substring(0, source.Length - 1);
var targetInitial = target.Substring(0, target.Length - 1);
return Math.Min(Math.Min(LevenshteinDistance(sourceInitial, target) + 1,
LevenshteinDistance(source, targetInitial)) + 1,
LevenshteinDistance(sourceInitial, targetInitial) + distance);
}
/// <summary>
/// Calculate the minimum number of single-character edits needed to change the source into the target,
/// allowing insertions, deletions, and substitutions.
/// <br/><br/>
/// Time complexity: at least O(n^2), where n is the length of each string
/// Accordingly, this algorithm is most efficient when at least one of the strings is very short
/// </summary>
/// <param name="source"></param>
/// <param name="target"></param>
/// <returns>The Levenshtein distance, normalized so that the lower bound is always zero, rather than the difference in length between the two strings</returns>
public static double NormalizedLevenshteinDistance(this string source, string target)
{
int unnormalizedLevenshteinDistance = source.LevenshteinDistance(target);
return unnormalizedLevenshteinDistance - source.LevenshteinDistanceLowerBounds(target);
}
/// <summary>
/// The upper bounds is either the length of the longer string, or the Hamming distance.
/// </summary>
/// <param name="source"></param>
/// <param name="target"></param>
/// <returns></returns>
public static int LevenshteinDistanceUpperBounds(this string source, string target)
{
// If the two strings are the same length then the Hamming Distance is the upper bounds of the Levenshtien Distance.
if (source.Length == target.Length) { return source.HammingDistance(target); }
// Otherwise, the upper bound is the length of the longer string.
else if (source.Length > target.Length) { return source.Length; }
else if (target.Length > source.Length) { return target.Length; }
return 9999;
}
/// <summary>
/// The lower bounds is the difference in length between the two strings
/// </summary>
/// <param name="source"></param>
/// <param name="target"></param>
/// <returns></returns>
public static int LevenshteinDistanceLowerBounds(this string source, string target)
{
// If the two strings are different lengths then the lower bounds is the difference in length.
return Math.Abs(source.Length - target.Length);
}
}
}
================================================
FILE: FuzzyString/LongestCommonSubsequence.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace FuzzyString
{
public static partial class ComparisonMetrics
{
public static string LongestCommonSubsequence(this string source, string target)
{
int[,] C = LongestCommonSubsequenceLengthTable(source, target);
return Backtrack(C, source, target, source.Length, target.Length);
}
private static int[,] LongestCommonSubsequenceLengthTable(string source, string target)
{
int[,] C = new int[source.Length + 1, target.Length + 1];
for (int i = 0; i < source.Length + 1; i++) { C[i, 0] = 0; }
for (int j = 0; j < target.Length + 1; j++) { C[0, j] = 0; }
for (int i = 1; i < source.Length + 1; i++)
{
for (int j = 1; j < target.Length + 1; j++)
{
if (source[i - 1].Equals(target[j - 1]))
{
C[i, j] = C[i - 1, j - 1] + 1;
}
else
{
C[i, j] = Math.Max(C[i, j - 1], C[i - 1, j]);
}
}
}
return C;
}
private static string Backtrack(int[,] C, string source, string target, int i, int j)
{
if (i == 0 || j == 0)
{
return "";
}
else if (source[i - 1].Equals(target[j - 1]))
{
return Backtrack(C, source, target, i - 1, j - 1) + source[i - 1];
}
else
{
if (C[i, j - 1] > C[i - 1, j])
{
return Backtrack(C, source, target, i, j - 1);
}
else
{
return Backtrack(C, source, target, i - 1, j);
}
}
}
}
}
================================================
FILE: FuzzyString/LongestCommonSubstring.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace FuzzyString
{
public static partial class ComparisonMetrics
{
public static string LongestCommonSubstring(this string source, string target)
{
if (String.IsNullOrEmpty(source) || String.IsNullOrEmpty(target)) { return null; }
int[,] L = new int[source.Length, target.Length];
int maximumLength = 0;
int lastSubsBegin = 0;
StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < source.Length; i++)
{
for (int j = 0; j < target.Length; j++)
{
if (source[i] != target[j])
{
L[i, j] = 0;
}
else
{
if ((i == 0) || (j == 0))
L[i, j] = 1;
else
L[i, j] = 1 + L[i - 1, j - 1];
if (L[i, j] > maximumLength)
{
maximumLength = L[i, j];
int thisSubsBegin = i - L[i, j] + 1;
if (lastSubsBegin == thisSubsBegin)
{//if the current LCS is the same as the last time this block ran
stringBuilder.Append(source[i]);
}
else //this block resets the string builder if a different LCS is found
{
lastSubsBegin = thisSubsBegin;
stringBuilder.Length = 0; //clear it
stringBuilder.Append(source.Substring(lastSubsBegin, (i + 1) - lastSubsBegin));
}
}
}
}
}
return stringBuilder.ToString();
}
}
}
================================================
FILE: FuzzyString/Operations.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace FuzzyString
{
public static partial class Operations
{
public static string Capitalize(this string source)
{
return source.ToUpper();
}
public static string[] SplitIntoIndividualElements(string source)
{
string[] stringCollection = new string[source.Length];
for (int i = 0; i < stringCollection.Length; i++)
{
stringCollection[i] = source[i].ToString();
}
return stringCollection;
}
public static string MergeIndividualElementsIntoString(IEnumerable<string> source)
{
string returnString = "";
for (int i = 0; i < source.Count(); i++)
{
returnString += source.ElementAt<string>(i);
}
return returnString;
}
public static List<string> ListPrefixes(this string source)
{
List<string> prefixes = new List<string>();
for (int i = 0; i < source.Length; i++)
{
prefixes.Add(source.Substring(0, i));
}
return prefixes;
}
public static List<string> ListBiGrams(this string source)
{
return ListNGrams(source, 2);
}
public static List<string> ListTriGrams(this string source)
{
return ListNGrams(source, 3);
}
public static List<string> ListNGrams(this string source, int n)
{
List<string> nGrams = new List<string>();
if (n > source.Length)
{
return null;
}
else if (n == source.Length)
{
nGrams.Add(source);
return nGrams;
}
else
{
for (int i = 0; i < source.Length - n; i++)
{
nGrams.Add(source.Substring(i, n));
}
return nGrams;
}
}
}
}
================================================
FILE: FuzzyString/OverlapCoefficient.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace FuzzyString
{
public static partial class ComparisonMetrics
{
public static double OverlapCoefficient(this string source, string target)
{
return (Convert.ToDouble(source.Intersect(target).Count())) / Convert.ToDouble(Math.Min(source.Length, target.Length));
}
}
}
================================================
FILE: FuzzyString/Properties/AssemblyInfo.cs
================================================
// The following GUID is for the ID of the typelib if this project is exposed to COM
using System.Runtime.InteropServices;
[assembly: Guid("ccb1b9db-80eb-42bf-b7d1-2d6f8886d98b")]
================================================
FILE: FuzzyString/RatcliffObershelpSimilarity.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace FuzzyString
{
public static partial class ComparisonMetrics
{
public static double RatcliffObershelpSimilarity(this string source, string target)
{
return (2 * Convert.ToDouble(source.Intersect(target).Count())) / (Convert.ToDouble(source.Length + target.Length));
}
}
}
================================================
FILE: FuzzyString/Resources/License.txt
================================================
Eclipse Public License -v 1.0
THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
1. DEFINITIONS
"Contribution" means:
a) in the case of the initial Contributor, the initial code and documentation distributed under this Agreement, and
b) in the case of each subsequent Contributor:
i) changes to the Program, and
ii) additions to the Program;
where such changes and/or additions to the Program originate from and are distributed by that particular Contributor. A Contribution 'originates' from a Contributor if it was added to the Program by such Contributor itself or anyone acting on such Contributor's behalf. Contributions do not include additions to the Program which: (i) are separate modules of software distributed in conjunction with the Program under their own license agreement, and (ii) are not derivative works of the Program.
"Contributor" means any person or entity that distributes the Program.
"Licensed Patents " mean patent claims licensable by a Contributor which are necessarily infringed by the use or sale of its Contribution alone or when combined with the Program.
"Program" means the Contributions distributed in accordance with this Agreement.
"Recipient" means anyone who receives the Program under this Agreement, including all Contributors.
2. GRANT OF RIGHTS
a) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, distribute and sublicense the Contribution of such Contributor, if any, and such derivative works, in source code and object code form.
b) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free patent license under Licensed Patents to make, use, sell, offer to sell, import and otherwise transfer the Contribution of such Contributor, if any, in source code and object code form. This patent license shall apply to the combination of the Contribution and the Program if, at the time the Contribution is added by the Contributor, such addition of the Contribution causes such combination to be covered by the Licensed Patents. The patent license shall not apply to any other combinations which include the Contribution. No hardware per se is licensed hereunder.
c) Recipient understands that although each Contributor grants the licenses to its Contributions set forth herein, no assurances are provided by any Contributor that the Program does not infringe the patent or other intellectual property rights of any other entity. Each Contributor disclaims any liability to Recipient for claims brought by any other entity based on infringement of intellectual property rights or otherwise. As a condition to exercising the rights and licenses granted hereunder, each Recipient hereby assumes sole responsibility to secure any other intellectual property rights needed, if any. For example, if a third party patent license is required to allow Recipient to distribute the Program, it is Recipient's responsibility to acquire that license before distributing the Program.
d) Each Contributor represents that to its knowledge it has sufficient copyright rights in its Contribution, if any, to grant the copyright license set forth in this Agreement.
3. REQUIREMENTS
A Contributor may choose to distribute the Program in object code form under its own license agreement, provided that:
a) it complies with the terms and conditions of this Agreement; and
b) its license agreement:
i) effectively disclaims on behalf of all Contributors all warranties and conditions, express and implied, including warranties or conditions of title and non-infringement, and implied warranties or conditions of merchantability and fitness for a particular purpose;
ii) effectively excludes on behalf of all Contributors all liability for damages, including direct, indirect, special, incidental and consequential damages, such as lost profits;
iii) states that any provisions which differ from this Agreement are offered by that Contributor alone and not by any other party; and
iv) states that source code for the Program is available from such Contributor, and informs licensees how to obtain it in a reasonable manner on or through a medium customarily used for software exchange.
When the Program is made available in source code form:
a) it must be made available under this Agreement; and
b) a copy of this Agreement must be included with each copy of the Program.
Contributors may not remove or alter any copyright notices contained within the Program.
Each Contributor must identify itself as the originator of its Contribution, if any, in a manner that reasonably allows subsequent Recipients to identify the originator of the Contribution.
4. COMMERCIAL DISTRIBUTION
Commercial distributors of software may accept certain responsibilities with respect to end users, business partners and the like. While this license is intended to facilitate the commercial use of the Program, the Contributor who includes the Program in a commercial product offering should do so in a manner which does not create potential liability for other Contributors. Therefore, if a Contributor includes the Program in a commercial product offering, such Contributor ("Commercial Contributor") hereby agrees to defend and indemnify every other Contributor ("Indemnified Contributor") against any losses, damages and costs (collectively "Losses") arising from claims, lawsuits and other legal actions brought by a third party against the Indemnified Contributor to the extent caused by the acts or omissions of such Commercial Contributor in connection with its distribution of the Program in a commercial product offering. The obligations in this section do not apply to any claims or Losses relating to any actual or alleged intellectual property infringement. In order to qualify, an Indemnified Contributor must: a) promptly notify the Commercial Contributor in writing of such claim, and b) allow the Commercial Contributor to control, and cooperate with the Commercial Contributor in, the defense and any related settlement negotiations. The Indemnified Contributor may participate in any such claim at its own expense.
For example, a Contributor might include the Program in a commercial product offering, Product X. That Contributor is then a Commercial Contributor. If that Commercial Contributor then makes performance claims, or offers warranties related to Product X, those performance claims and warranties are such Commercial Contributor's responsibility alone. Under this section, the Commercial Contributor would have to defend claims against the other Contributors related to those performance claims and warranties, and if a court requires any other Contributor to pay any damages as a result, the Commercial Contributor must pay those damages.
5. NO WARRANTY
EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the appropriateness of using and distributing the Program and assumes all risks associated with its exercise of rights under this Agreement , including but not limited to the risks and costs of program errors, compliance with applicable laws, damage to or loss of data, programs or equipment, and unavailability or interruption of operations.
6. DISCLAIMER OF LIABILITY
EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
7. GENERAL
If any provision of this Agreement is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this Agreement, and without further action by the parties hereto, such provision shall be reformed to the minimum extent necessary to make such provision valid and enforceable.
If Recipient institutes patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Program itself (excluding combinations of the Program with other software or hardware) infringes such Recipient's patent(s), then such Recipient's rights granted under Section 2(b) shall terminate as of the date such litigation is filed.
All Recipient's rights under this Agreement shall terminate if it fails to comply with any of the material terms or conditions of this Agreement and does not cure such failure in a reasonable period of time after becoming aware of such noncompliance. If all Recipient's rights under this Agreement terminate, Recipient agrees to cease use and distribution of the Program as soon as reasonably practicable. However, Recipient's obligations under this Agreement and any licenses granted by Recipient relating to the Program shall continue and survive.
Everyone is permitted to copy and distribute copies of this Agreement, but in order to avoid inconsistency the Agreement is copyrighted and may only be modified in the following manner. The Agreement Steward reserves the right to publish new versions (including revisions) of this Agreement from time to time. No one other than the Agreement Steward has the right to modify this Agreement. The Eclipse Foundation is the initial Agreement Steward. The Eclipse Foundation may assign the responsibility to serve as the Agreement Steward to a suitable separate entity. Each new version of the Agreement will be given a distinguishing version number. The Program (including Contributions) may always be distributed subject to the version of the Agreement under which it was received. In addition, after a new version of the Agreement is published, Contributor may elect to distribute the Program (including its Contributions) under the new version. Except as expressly stated in Sections 2(a) and 2(b) above, Recipient receives no rights or licenses to the intellectual property of any Contributor under this Agreement, whether expressly, by implication, estoppel or otherwise. All rights in the Program not expressly granted under this Agreement are reserved.
This Agreement is governed by the laws of the State of New York and the intellectual property laws of the United States of America. No party to this Agreement will bring a legal action under this Agreement more than one year after the cause of action arose. Each party waives its rights to a jury trial in any resulting litigation.
================================================
FILE: FuzzyString/SorensenDiceDistance.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace FuzzyString
{
public static partial class ComparisonMetrics
{
public static double SorensenDiceDistance(this string source, string target)
{
return 1 - source.SorensenDiceIndex(target);
}
public static double SorensenDiceIndex(this string source, string target)
{
return (2 * Convert.ToDouble(source.Intersect(target).Count())) / (Convert.ToDouble(source.Length + target.Length));
}
}
}
================================================
FILE: FuzzyString/TanimotoCoefficient.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace FuzzyString
{
public static partial class ComparisonMetrics
{
public static double TanimotoCoefficient(this string source, string target)
{
double Na = source.Length;
double Nb = target.Length;
double Nc = source.Intersect(target).Count();
return Nc / (Na + Nb - Nc);
}
}
}
================================================
FILE: FuzzyString.sln
================================================
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 14
VisualStudioVersion = 14.0.25420.1
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FuzzyStringConsole", "FuzzyStringConsole\FuzzyStringConsole.csproj", "{FFE75CA6-E76E-4977-85D3-B6F256ACA5E6}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FuzzyString", "FuzzyString\FuzzyString.csproj", "{2AAA900B-64FB-4874-9444-64AE33ACA970}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{8CFDC47C-8A8B-4163-A61C-2535CD872A64}"
ProjectSection(SolutionItems) = preProject
.gitattributes = .gitattributes
.gitignore = .gitignore
README.md = README.md
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{FFE75CA6-E76E-4977-85D3-B6F256ACA5E6}.Debug|Any CPU.ActiveCfg = Release|Any CPU
{FFE75CA6-E76E-4977-85D3-B6F256ACA5E6}.Debug|Any CPU.Build.0 = Release|Any CPU
{FFE75CA6-E76E-4977-85D3-B6F256ACA5E6}.Release|Any CPU.ActiveCfg = Release|Any CPU
{FFE75CA6-E76E-4977-85D3-B6F256ACA5E6}.Release|Any CPU.Build.0 = Release|Any CPU
{2AAA900B-64FB-4874-9444-64AE33ACA970}.Debug|Any CPU.ActiveCfg = Release|Any CPU
{2AAA900B-64FB-4874-9444-64AE33ACA970}.Debug|Any CPU.Build.0 = Release|Any CPU
{2AAA900B-64FB-4874-9444-64AE33ACA970}.Release|Any CPU.ActiveCfg = Release|Any CPU
{2AAA900B-64FB-4874-9444-64AE33ACA970}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal
================================================
FILE: FuzzyStringConsole/App.config
================================================
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<startup>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.6.1"/>
</startup>
</configuration>
================================================
FILE: FuzzyStringConsole/FuzzyStringConsole.csproj
================================================
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{FFE75CA6-E76E-4977-85D3-B6F256ACA5E6}</ProjectGuid>
<OutputType>Exe</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>FuzzyStringConsole</RootNamespace>
<AssemblyName>FuzzyStringConsole</AssemblyName>
<TargetFrameworkVersion>v4.6.1</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<SccProjectName>SAK</SccProjectName>
<SccLocalPath>SAK</SccLocalPath>
<SccAuxPath>SAK</SccAuxPath>
<SccProvider>SAK</SccProvider>
<IsWebBootstrapper>false</IsWebBootstrapper>
<PublishUrl>publish\</PublishUrl>
<Install>true</Install>
<InstallFrom>Disk</InstallFrom>
<UpdateEnabled>false</UpdateEnabled>
<UpdateMode>Foreground</UpdateMode>
<UpdateInterval>7</UpdateInterval>
<UpdateIntervalUnits>Days</UpdateIntervalUnits>
<UpdatePeriodically>false</UpdatePeriodically>
<UpdateRequired>false</UpdateRequired>
<MapFileExtensions>true</MapFileExtensions>
<ApplicationRevision>0</ApplicationRevision>
<ApplicationVersion>1.0.0.%2a</ApplicationVersion>
<UseApplicationTrust>false</UseApplicationTrust>
<BootstrapperEnabled>true</BootstrapperEnabled>
<TargetFrameworkProfile />
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<ItemGroup>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
<Reference Include="System.Data" />
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
</ItemGroup>
<ItemGroup>
<None Include="App.config" />
</ItemGroup>
<ItemGroup>
<BootstrapperPackage Include=".NETFramework,Version=v4.5">
<Visible>False</Visible>
<ProductName>Microsoft .NET Framework 4.5 %28x86 and x64%29</ProductName>
<Install>true</Install>
</BootstrapperPackage>
<BootstrapperPackage Include="Microsoft.Net.Client.3.5">
<Visible>False</Visible>
<ProductName>.NET Framework 3.5 SP1 Client Profile</ProductName>
<Install>false</Install>
</BootstrapperPackage>
<BootstrapperPackage Include="Microsoft.Net.Framework.3.5.SP1">
<Visible>False</Visible>
<ProductName>.NET Framework 3.5 SP1</ProductName>
<Install>false</Install>
</BootstrapperPackage>
</ItemGroup>
<ItemGroup>
<Content Include="Resources\License.txt" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\FuzzyString\FuzzyString.csproj">
<Project>{2aaa900b-64fb-4874-9444-64ae33aca970}</Project>
<Name>FuzzyString</Name>
</ProjectReference>
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
</Target>
<Target Name="AfterBuild">
</Target>
-->
</Project>
================================================
FILE: FuzzyStringConsole/Program.cs
================================================
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using FuzzyString;
namespace FuzzyStringConsole
{
class Program
{
static void Main(string[] args)
{
string kevin = "kevin";
string kevyn = "kevyn";
List<FuzzyStringComparisonOptions> options = new List<FuzzyStringComparisonOptions>();
options.Add(FuzzyStringComparisonOptions.UseJaccardDistance);
options.Add(FuzzyStringComparisonOptions.UseNormalizedLevenshteinDistance);
options.Add(FuzzyStringComparisonOptions.UseOverlapCoefficient);
options.Add(FuzzyStringComparisonOptions.UseLongestCommonSubsequence);
options.Add(FuzzyStringComparisonOptions.CaseSensitive);
Console.WriteLine(kevin.ApproximatelyEquals(kevyn, FuzzyStringComparisonTolerance.Weak, options.ToArray()));
Console.WriteLine(kevin.ApproximatelyEquals(kevyn, FuzzyStringComparisonTolerance.Normal, options.ToArray()));
Console.WriteLine(kevin.ApproximatelyEquals(kevyn, FuzzyStringComparisonTolerance.Strong, options.ToArray()));
Console.ReadLine();
}
}
}
================================================
FILE: FuzzyStringConsole/Properties/AssemblyInfo.cs
================================================
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("FuzzyStringConsole")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("FuzzyStringConsole")]
[assembly: AssemblyCopyright("Copyright © 2013")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[assembly: ComVisible(false)]
// The following GUID is for the ID of the typelib if this project is exposed to COM
[assembly: Guid("a23fa530-8f7b-4072-826a-d39e88763f22")]
// Version information for an assembly consists of the following four values:
//
// Major Version
// Minor Version
// Build Number
// Revision
//
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]
================================================
FILE: FuzzyStringConsole/Resources/License.txt
================================================
Eclipse Public License -v 1.0
THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
1. DEFINITIONS
"Contribution" means:
a) in the case of the initial Contributor, the initial code and documentation distributed under this Agreement, and
b) in the case of each subsequent Contributor:
i) changes to the Program, and
ii) additions to the Program;
where such changes and/or additions to the Program originate from and are distributed by that particular Contributor. A Contribution 'originates' from a Contributor if it was added to the Program by such Contributor itself or anyone acting on such Contributor's behalf. Contributions do not include additions to the Program which: (i) are separate modules of software distributed in conjunction with the Program under their own license agreement, and (ii) are not derivative works of the Program.
"Contributor" means any person or entity that distributes the Program.
"Licensed Patents " mean patent claims licensable by a Contributor which are necessarily infringed by the use or sale of its Contribution alone or when combined with the Program.
"Program" means the Contributions distributed in accordance with this Agreement.
"Recipient" means anyone who receives the Program under this Agreement, including all Contributors.
2. GRANT OF RIGHTS
a) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, distribute and sublicense the Contribution of such Contributor, if any, and such derivative works, in source code and object code form.
b) Subject to the terms of this Agreement, each Contributor hereby grants Recipient a non-exclusive, worldwide, royalty-free patent license under Licensed Patents to make, use, sell, offer to sell, import and otherwise transfer the Contribution of such Contributor, if any, in source code and object code form. This patent license shall apply to the combination of the Contribution and the Program if, at the time the Contribution is added by the Contributor, such addition of the Contribution causes such combination to be covered by the Licensed Patents. The patent license shall not apply to any other combinations which include the Contribution. No hardware per se is licensed hereunder.
c) Recipient understands that although each Contributor grants the licenses to its Contributions set forth herein, no assurances are provided by any Contributor that the Program does not infringe the patent or other intellectual property rights of any other entity. Each Contributor disclaims any liability to Recipient for claims brought by any other entity based on infringement of intellectual property rights or otherwise. As a condition to exercising the rights and licenses granted hereunder, each Recipient hereby assumes sole responsibility to secure any other intellectual property rights needed, if any. For example, if a third party patent license is required to allow Recipient to distribute the Program, it is Recipient's responsibility to acquire that license before distributing the Program.
d) Each Contributor represents that to its knowledge it has sufficient copyright rights in its Contribution, if any, to grant the copyright license set forth in this Agreement.
3. REQUIREMENTS
A Contributor may choose to distribute the Program in object code form under its own license agreement, provided that:
a) it complies with the terms and conditions of this Agreement; and
b) its license agreement:
i) effectively disclaims on behalf of all Contributors all warranties and conditions, express and implied, including warranties or conditions of title and non-infringement, and implied warranties or conditions of merchantability and fitness for a particular purpose;
ii) effectively excludes on behalf of all Contributors all liability for damages, including direct, indirect, special, incidental and consequential damages, such as lost profits;
iii) states that any provisions which differ from this Agreement are offered by that Contributor alone and not by any other party; and
iv) states that source code for the Program is available from such Contributor, and informs licensees how to obtain it in a reasonable manner on or through a medium customarily used for software exchange.
When the Program is made available in source code form:
a) it must be made available under this Agreement; and
b) a copy of this Agreement must be included with each copy of the Program.
Contributors may not remove or alter any copyright notices contained within the Program.
Each Contributor must identify itself as the originator of its Contribution, if any, in a manner that reasonably allows subsequent Recipients to identify the originator of the Contribution.
4. COMMERCIAL DISTRIBUTION
Commercial distributors of software may accept certain responsibilities with respect to end users, business partners and the like. While this license is intended to facilitate the commercial use of the Program, the Contributor who includes the Program in a commercial product offering should do so in a manner which does not create potential liability for other Contributors. Therefore, if a Contributor includes the Program in a commercial product offering, such Contributor ("Commercial Contributor") hereby agrees to defend and indemnify every other Contributor ("Indemnified Contributor") against any losses, damages and costs (collectively "Losses") arising from claims, lawsuits and other legal actions brought by a third party against the Indemnified Contributor to the extent caused by the acts or omissions of such Commercial Contributor in connection with its distribution of the Program in a commercial product offering. The obligations in this section do not apply to any claims or Losses relating to any actual or alleged intellectual property infringement. In order to qualify, an Indemnified Contributor must: a) promptly notify the Commercial Contributor in writing of such claim, and b) allow the Commercial Contributor to control, and cooperate with the Commercial Contributor in, the defense and any related settlement negotiations. The Indemnified Contributor may participate in any such claim at its own expense.
For example, a Contributor might include the Program in a commercial product offering, Product X. That Contributor is then a Commercial Contributor. If that Commercial Contributor then makes performance claims, or offers warranties related to Product X, those performance claims and warranties are such Commercial Contributor's responsibility alone. Under this section, the Commercial Contributor would have to defend claims against the other Contributors related to those performance claims and warranties, and if a court requires any other Contributor to pay any damages as a result, the Commercial Contributor must pay those damages.
5. NO WARRANTY
EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the appropriateness of using and distributing the Program and assumes all risks associated with its exercise of rights under this Agreement , including but not limited to the risks and costs of program errors, compliance with applicable laws, damage to or loss of data, programs or equipment, and unavailability or interruption of operations.
6. DISCLAIMER OF LIABILITY
EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
7. GENERAL
If any provision of this Agreement is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this Agreement, and without further action by the parties hereto, such provision shall be reformed to the minimum extent necessary to make such provision valid and enforceable.
If Recipient institutes patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Program itself (excluding combinations of the Program with other software or hardware) infringes such Recipient's patent(s), then such Recipient's rights granted under Section 2(b) shall terminate as of the date such litigation is filed.
All Recipient's rights under this Agreement shall terminate if it fails to comply with any of the material terms or conditions of this Agreement and does not cure such failure in a reasonable period of time after becoming aware of such noncompliance. If all Recipient's rights under this Agreement terminate, Recipient agrees to cease use and distribution of the Program as soon as reasonably practicable. However, Recipient's obligations under this Agreement and any licenses granted by Recipient relating to the Program shall continue and survive.
Everyone is permitted to copy and distribute copies of this Agreement, but in order to avoid inconsistency the Agreement is copyrighted and may only be modified in the following manner. The Agreement Steward reserves the right to publish new versions (including revisions) of this Agreement from time to time. No one other than the Agreement Steward has the right to modify this Agreement. The Eclipse Foundation is the initial Agreement Steward. The Eclipse Foundation may assign the responsibility to serve as the Agreement Steward to a suitable separate entity. Each new version of the Agreement will be given a distinguishing version number. The Program (including Contributions) may always be distributed subject to the version of the Agreement under which it was received. In addition, after a new version of the Agreement is published, Contributor may elect to distribute the Program (including its Contributions) under the new version. Except as expressly stated in Sections 2(a) and 2(b) above, Recipient receives no rights or licenses to the intellectual property of any Contributor under this Agreement, whether expressly, by implication, estoppel or otherwise. All rights in the Program not expressly granted under this Agreement are reserved.
This Agreement is governed by the laws of the State of New York and the intellectual property laws of the United States of America. No party to this Agreement will bring a legal action under this Agreement more than one year after the cause of action arose. Each party waives its rights to a jury trial in any resulting litigation.
================================================
FILE: LICENSE
================================================
Eclipse Public License - v 1.0
THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC
LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM
CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
1. DEFINITIONS
"Contribution" means:
a) in the case of the initial Contributor, the initial code and documentation
distributed under this Agreement, and
b) in the case of each subsequent Contributor:
i) changes to the Program, and
ii) additions to the Program;
where such changes and/or additions to the Program originate from and are
distributed by that particular Contributor. A Contribution 'originates'
from a Contributor if it was added to the Program by such Contributor
itself or anyone acting on such Contributor's behalf. Contributions do not
include additions to the Program which: (i) are separate modules of
software distributed in conjunction with the Program under their own
license agreement, and (ii) are not derivative works of the Program.
"Contributor" means any person or entity that distributes the Program.
"Licensed Patents" mean patent claims licensable by a Contributor which are
necessarily infringed by the use or sale of its Contribution alone or when
combined with the Program.
"Program" means the Contributions distributed in accordance with this
Agreement.
"Recipient" means anyone who receives the Program under this Agreement,
including all Contributors.
2. GRANT OF RIGHTS
a) Subject to the terms of this Agreement, each Contributor hereby grants
Recipient a non-exclusive, worldwide, royalty-free copyright license to
reproduce, prepare derivative works of, publicly display, publicly
perform, distribute and sublicense the Contribution of such Contributor,
if any, and such derivative works, in source code and object code form.
b) Subject to the terms of this Agreement, each Contributor hereby grants
Recipient a non-exclusive, worldwide, royalty-free patent license under
Licensed Patents to make, use, sell, offer to sell, import and otherwise
transfer the Contribution of such Contributor, if any, in source code and
object code form. This patent license shall apply to the combination of
the Contribution and the Program if, at the time the Contribution is
added by the Contributor, such addition of the Contribution causes such
combination to be covered by the Licensed Patents. The patent license
shall not apply to any other combinations which include the Contribution.
No hardware per se is licensed hereunder.
c) Recipient understands that although each Contributor grants the licenses
to its Contributions set forth herein, no assurances are provided by any
Contributor that the Program does not infringe the patent or other
intellectual property rights of any other entity. Each Contributor
disclaims any liability to Recipient for claims brought by any other
entity based on infringement of intellectual property rights or
otherwise. As a condition to exercising the rights and licenses granted
hereunder, each Recipient hereby assumes sole responsibility to secure
any other intellectual property rights needed, if any. For example, if a
third party patent license is required to allow Recipient to distribute
the Program, it is Recipient's responsibility to acquire that license
before distributing the Program.
d) Each Contributor represents that to its knowledge it has sufficient
copyright rights in its Contribution, if any, to grant the copyright
license set forth in this Agreement.
3. REQUIREMENTS
A Contributor may choose to distribute the Program in object code form under
its own license agreement, provided that:
a) it complies with the terms and conditions of this Agreement; and
b) its license agreement:
i) effectively disclaims on behalf of all Contributors all warranties
and conditions, express and implied, including warranties or
conditions of title and non-infringement, and implied warranties or
conditions of merchantability and fitness for a particular purpose;
ii) effectively excludes on behalf of all Contributors all liability for
damages, including direct, indirect, special, incidental and
consequential damages, such as lost profits;
iii) states that any provisions which differ from this Agreement are
offered by that Contributor alone and not by any other party; and
iv) states that source code for the Program is available from such
Contributor, and informs licensees how to obtain it in a reasonable
manner on or through a medium customarily used for software exchange.
When the Program is made available in source code form:
a) it must be made available under this Agreement; and
b) a copy of this Agreement must be included with each copy of the Program.
Contributors may not remove or alter any copyright notices contained
within the Program.
Each Contributor must identify itself as the originator of its Contribution,
if
any, in a manner that reasonably allows subsequent Recipients to identify the
originator of the Contribution.
4. COMMERCIAL DISTRIBUTION
Commercial distributors of software may accept certain responsibilities with
respect to end users, business partners and the like. While this license is
intended to facilitate the commercial use of the Program, the Contributor who
includes the Program in a commercial product offering should do so in a manner
which does not create potential liability for other Contributors. Therefore,
if a Contributor includes the Program in a commercial product offering, such
Contributor ("Commercial Contributor") hereby agrees to defend and indemnify
every other Contributor ("Indemnified Contributor") against any losses,
damages and costs (collectively "Losses") arising from claims, lawsuits and
other legal actions brought by a third party against the Indemnified
Contributor to the extent caused by the acts or omissions of such Commercial
Contributor in connection with its distribution of the Program in a commercial
product offering. The obligations in this section do not apply to any claims
or Losses relating to any actual or alleged intellectual property
infringement. In order to qualify, an Indemnified Contributor must:
a) promptly notify the Commercial Contributor in writing of such claim, and
b) allow the Commercial Contributor to control, and cooperate with the
Commercial Contributor in, the defense and any related settlement
negotiations. The Indemnified Contributor may participate in any such claim at
its own expense.
For example, a Contributor might include the Program in a commercial product
offering, Product X. That Contributor is then a Commercial Contributor. If
that Commercial Contributor then makes performance claims, or offers
warranties related to Product X, those performance claims and warranties are
such Commercial Contributor's responsibility alone. Under this section, the
Commercial Contributor would have to defend claims against the other
Contributors related to those performance claims and warranties, and if a
court requires any other Contributor to pay any damages as a result, the
Commercial Contributor must pay those damages.
5. NO WARRANTY
EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR
IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE,
NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each
Recipient is solely responsible for determining the appropriateness of using
and distributing the Program and assumes all risks associated with its
exercise of rights under this Agreement , including but not limited to the
risks and costs of program errors, compliance with applicable laws, damage to
or loss of data, programs or equipment, and unavailability or interruption of
operations.
6. DISCLAIMER OF LIABILITY
EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY
CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION
LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE
EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY
OF SUCH DAMAGES.
7. GENERAL
If any provision of this Agreement is invalid or unenforceable under
applicable law, it shall not affect the validity or enforceability of the
remainder of the terms of this Agreement, and without further action by the
parties hereto, such provision shall be reformed to the minimum extent
necessary to make such provision valid and enforceable.
If Recipient institutes patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Program itself
(excluding combinations of the Program with other software or hardware)
infringes such Recipient's patent(s), then such Recipient's rights granted
under Section 2(b) shall terminate as of the date such litigation is filed.
All Recipient's rights under this Agreement shall terminate if it fails to
comply with any of the material terms or conditions of this Agreement and does
not cure such failure in a reasonable period of time after becoming aware of
such noncompliance. If all Recipient's rights under this Agreement terminate,
Recipient agrees to cease use and distribution of the Program as soon as
reasonably practicable. However, Recipient's obligations under this Agreement
and any licenses granted by Recipient relating to the Program shall continue
and survive.
Everyone is permitted to copy and distribute copies of this Agreement, but in
order to avoid inconsistency the Agreement is copyrighted and may only be
modified in the following manner. The Agreement Steward reserves the right to
publish new versions (including revisions) of this Agreement from time to
time. No one other than the Agreement Steward has the right to modify this
Agreement. The Eclipse Foundation is the initial Agreement Steward. The
Eclipse Foundation may assign the responsibility to serve as the Agreement
Steward to a suitable separate entity. Each new version of the Agreement will
be given a distinguishing version number. The Program (including
Contributions) may always be distributed subject to the version of the
Agreement under which it was received. In addition, after a new version of the
Agreement is published, Contributor may elect to distribute the Program
(including its Contributions) under the new version. Except as expressly
stated in Sections 2(a) and 2(b) above, Recipient receives no rights or
licenses to the intellectual property of any Contributor under this Agreement,
whether expressly, by implication, estoppel or otherwise. All rights in the
Program not expressly granted under this Agreement are reserved.
This Agreement is governed by the laws of the State of New York and the
intellectual property laws of the United States of America. No party to this
Agreement will bring a legal action under this Agreement more than one year
after the cause of action arose. Each party waives its rights to a jury trial in
any resulting litigation.
================================================
FILE: README.md
================================================
# fuzzystring
Approximate String Comparision in C#
*Originally Hosted on Codplex*
http://fuzzystring.codeplex.com
## Project Description
FuzzyString is a library developed for use in my day job for reconciling naming conventions between different models of the electric grid. I have stripped off the power system specific code and put together what can effectively be used as a string extension for determining approximate equality between two strings. All of the algorithms used here have been pulled from online resources, translated into C#, and compiled into this library. I found several other similar open-source implementations around but nothing for .NET/C#. Adding the *.dll to your project will give you access to this extension and the individual extensions under the hood of the ApproximatelyEquals() extension.
## Algorithms included in this project
* Hamming Distance|http://en.wikipedia.org/wiki/Hamming_distance
* Jaccard Distance|http://en.wikipedia.org/wiki/Jaccard_index
* Jaro Distance|http://en.wikipedia.org/wiki/Jaro_distance
* Jaro-Winkler Distance|http://en.wikipedia.org/wiki/Jaro_distance
* Levenshtein Distance|http://en.wikipedia.org/wiki/Levenshtein_distance
* Longest Common Subsequence|http://en.wikipedia.org/wiki/Longest_common_subsequence_problem
* Longest Common Substring|http://en.wikipedia.org/wiki/Longest_common_substring
* Overlap Coefficient|http://en.wikipedia.org/wiki/Overlap_coefficient
* Ratcliff-Obershelp Similarity|http://www.morfoedro.it/doc.php?n=223&lang=en
* Sorensen-Dice Distance|http://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient
* Tanimoto Coefficient|http://en.wikipedia.org/wiki/Tanimoto_coefficient#Tanimoto_coefficient_.28extended_Jaccard_coefficient.29
## Approximate String Comparision
> Note: This sample is taken from the [legacy documentation] on CodePlex.
While all of the algorithms are exposed and can be used and can provide their raw results, they have been conveniently combined in a way that they can selectively be used to judge the approximate equality of two strings. This is done through the `ApproximatelyEquals` extension and by setting the desired `FuzzyStringComparisonOptions` and `FuzzyStringComparisonTolerance`.
For two strings that are desired to be compared approximately, a boolean response of equality can be garnered in the following way:
```csharp
string source = "kevin";
string target = "kevyn";
List<FuzzyStringComparisonOptions> options = new List<FuzzyStringComparisonOptions>();
// Choose which algorithms should weigh in for the comparison
options.Add(FuzzyStringComparisonOptions.UseOverlapCoefficient);
options.Add(FuzzyStringComparisonOptions.UseLongestCommonSubsequence);
options.Add(FuzzyStringComparisonOptions.UseLongestCommonSubstring);
// Choose the relative strength of the comparison - is it almost exactly equal? or is it just close?
FuzzyStringComparisonTolerance tolerance = FuzzyStringTolerance.Strong;
// Get a boolean determination of approximate equality
bool result = source.ApproximatelyEquals(target, options, tolerance);
```
[legacy documentation]: http://fuzzystring.codeplex.com/wikipage?title=Using%20the%20ApproximatelyEquals%28%29%20Extension&referringTitle=Documentation
gitextract_f_31__2f/ ├── .gitattributes ├── .gitignore ├── FuzzyString/ │ ├── ApproximatelyEquals.cs │ ├── FuzzyString.csproj │ ├── FuzzyStringComparisonOptions.cs │ ├── FuzzyStringComparisonTolerance.cs │ ├── HammingDistance.cs │ ├── JaccardDistance.cs │ ├── JaroDistance.cs │ ├── JaroWinklerDistance.cs │ ├── LevenshteinDistance.cs │ ├── LongestCommonSubsequence.cs │ ├── LongestCommonSubstring.cs │ ├── Operations.cs │ ├── OverlapCoefficient.cs │ ├── Properties/ │ │ └── AssemblyInfo.cs │ ├── RatcliffObershelpSimilarity.cs │ ├── Resources/ │ │ └── License.txt │ ├── SorensenDiceDistance.cs │ └── TanimotoCoefficient.cs ├── FuzzyString.sln ├── FuzzyStringConsole/ │ ├── App.config │ ├── FuzzyStringConsole.csproj │ ├── Program.cs │ ├── Properties/ │ │ └── AssemblyInfo.cs │ └── Resources/ │ └── License.txt ├── LICENSE └── README.md
SYMBOL INDEX (45 symbols across 16 files)
FILE: FuzzyString/ApproximatelyEquals.cs
class ComparisonMetrics (line 9) | public static partial class ComparisonMetrics
method ApproximatelyEquals (line 11) | public static bool ApproximatelyEquals(this string source, string targ...
FILE: FuzzyString/FuzzyStringComparisonOptions.cs
type FuzzyStringComparisonOptions (line 9) | public enum FuzzyStringComparisonOptions
FILE: FuzzyString/FuzzyStringComparisonTolerance.cs
type FuzzyStringComparisonTolerance (line 9) | public enum FuzzyStringComparisonTolerance
FILE: FuzzyString/HammingDistance.cs
class ComparisonMetrics (line 9) | public static partial class ComparisonMetrics
method HammingDistance (line 11) | public static int HammingDistance(this string source, string target)
FILE: FuzzyString/JaccardDistance.cs
class ComparisonMetrics (line 9) | public static partial class ComparisonMetrics
method JaccardDistance (line 11) | public static double JaccardDistance(this string source, string target)
method JaccardIndex (line 16) | public static double JaccardIndex(this string source, string target)
FILE: FuzzyString/JaroDistance.cs
class ComparisonMetrics (line 9) | public static partial class ComparisonMetrics
method JaroDistance (line 11) | public static double JaroDistance(this string source, string target)
FILE: FuzzyString/JaroWinklerDistance.cs
class ComparisonMetrics (line 9) | public static partial class ComparisonMetrics
method JaroWinklerDistance (line 11) | public static double JaroWinklerDistance(this string source, string ta...
method JaroWinklerDistanceWithPrefixScale (line 19) | public static double JaroWinklerDistanceWithPrefixScale(string source,...
method CommonPrefixLength (line 33) | private static double CommonPrefixLength(string source, string target)
FILE: FuzzyString/LevenshteinDistance.cs
class ComparisonMetrics (line 9) | public static partial class ComparisonMetrics
method LevenshteinDistance (line 21) | public static int LevenshteinDistance(this string source, string target)
method NormalizedLevenshteinDistance (line 48) | public static double NormalizedLevenshteinDistance(this string source,...
method LevenshteinDistanceUpperBounds (line 61) | public static int LevenshteinDistanceUpperBounds(this string source, s...
method LevenshteinDistanceLowerBounds (line 79) | public static int LevenshteinDistanceLowerBounds(this string source, s...
FILE: FuzzyString/LongestCommonSubsequence.cs
class ComparisonMetrics (line 9) | public static partial class ComparisonMetrics
method LongestCommonSubsequence (line 11) | public static string LongestCommonSubsequence(this string source, stri...
method LongestCommonSubsequenceLengthTable (line 18) | private static int[,] LongestCommonSubsequenceLengthTable(string sourc...
method Backtrack (line 43) | private static string Backtrack(int[,] C, string source, string target...
FILE: FuzzyString/LongestCommonSubstring.cs
class ComparisonMetrics (line 9) | public static partial class ComparisonMetrics
method LongestCommonSubstring (line 11) | public static string LongestCommonSubstring(this string source, string...
FILE: FuzzyString/Operations.cs
class Operations (line 9) | public static partial class Operations
method Capitalize (line 11) | public static string Capitalize(this string source)
method SplitIntoIndividualElements (line 16) | public static string[] SplitIntoIndividualElements(string source)
method MergeIndividualElementsIntoString (line 28) | public static string MergeIndividualElementsIntoString(IEnumerable<str...
method ListPrefixes (line 39) | public static List<string> ListPrefixes(this string source)
method ListBiGrams (line 51) | public static List<string> ListBiGrams(this string source)
method ListTriGrams (line 56) | public static List<string> ListTriGrams(this string source)
method ListNGrams (line 61) | public static List<string> ListNGrams(this string source, int n)
FILE: FuzzyString/OverlapCoefficient.cs
class ComparisonMetrics (line 9) | public static partial class ComparisonMetrics
method OverlapCoefficient (line 11) | public static double OverlapCoefficient(this string source, string tar...
FILE: FuzzyString/RatcliffObershelpSimilarity.cs
class ComparisonMetrics (line 9) | public static partial class ComparisonMetrics
method RatcliffObershelpSimilarity (line 11) | public static double RatcliffObershelpSimilarity(this string source, s...
FILE: FuzzyString/SorensenDiceDistance.cs
class ComparisonMetrics (line 9) | public static partial class ComparisonMetrics
method SorensenDiceDistance (line 11) | public static double SorensenDiceDistance(this string source, string t...
method SorensenDiceIndex (line 16) | public static double SorensenDiceIndex(this string source, string target)
FILE: FuzzyString/TanimotoCoefficient.cs
class ComparisonMetrics (line 9) | public static partial class ComparisonMetrics
method TanimotoCoefficient (line 11) | public static double TanimotoCoefficient(this string source, string ta...
FILE: FuzzyStringConsole/Program.cs
class Program (line 10) | class Program
method Main (line 12) | static void Main(string[] args)
Condensed preview — 28 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (79K chars).
[
{
"path": ".gitattributes",
"chars": 2518,
"preview": "###############################################################################\n# Set default behavior to automatically "
},
{
"path": ".gitignore",
"chars": 3833,
"preview": "## Ignore Visual Studio temporary files, build results, and\n## files generated by popular Visual Studio add-ons.\n\n# User"
},
{
"path": "FuzzyString/ApproximatelyEquals.cs",
"chars": 5174,
"preview": "using System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Text;\nusing System.Threading.Tasks;\n\nna"
},
{
"path": "FuzzyString/FuzzyString.csproj",
"chars": 456,
"preview": "<Project Sdk=\"Microsoft.NET.Sdk\">\n <PropertyGroup>\n <TargetFramework>netstandard2.0</TargetFramework>\n <Generate"
},
{
"path": "FuzzyString/FuzzyStringComparisonOptions.cs",
"chars": 630,
"preview": "using System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Text;\nusing System.Threading.Tasks;\n\nna"
},
{
"path": "FuzzyString/FuzzyStringComparisonTolerance.cs",
"chars": 267,
"preview": "using System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Text;\nusing System.Threading.Tasks;\n\nna"
},
{
"path": "FuzzyString/HammingDistance.cs",
"chars": 704,
"preview": "using System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Text;\nusing System.Threading.Tasks;\n\nna"
},
{
"path": "FuzzyString/JaccardDistance.cs",
"chars": 580,
"preview": "using System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Text;\nusing System.Threading.Tasks;\n\nna"
},
{
"path": "FuzzyString/JaroDistance.cs",
"chars": 1150,
"preview": "using System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Text;\nusing System.Threading.Tasks;\n\nna"
},
{
"path": "FuzzyString/JaroWinklerDistance.cs",
"chars": 1695,
"preview": "using System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Text;\nusing System.Threading.Tasks;\n\nna"
},
{
"path": "FuzzyString/LevenshteinDistance.cs",
"chars": 4094,
"preview": "using System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Text;\nusing System.Threading.Tasks;\n\nna"
},
{
"path": "FuzzyString/LongestCommonSubsequence.cs",
"chars": 2000,
"preview": "using System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Text;\nusing System.Threading.Tasks;\n\nna"
},
{
"path": "FuzzyString/LongestCommonSubstring.cs",
"chars": 2082,
"preview": "using System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Text;\nusing System.Threading.Tasks;\n\nna"
},
{
"path": "FuzzyString/Operations.cs",
"chars": 2170,
"preview": "using System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Text;\nusing System.Threading.Tasks;\n\nna"
},
{
"path": "FuzzyString/OverlapCoefficient.cs",
"chars": 441,
"preview": "using System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Text;\nusing System.Threading.Tasks;\n\nna"
},
{
"path": "FuzzyString/Properties/AssemblyInfo.cs",
"chars": 183,
"preview": "// The following GUID is for the ID of the typelib if this project is exposed to COM\n\nusing System.Runtime.InteropServi"
},
{
"path": "FuzzyString/RatcliffObershelpSimilarity.cs",
"chars": 447,
"preview": "using System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Text;\nusing System.Threading.Tasks;\n\nna"
},
{
"path": "FuzzyString/Resources/License.txt",
"chars": 11252,
"preview": "Eclipse Public License -v 1.0\n\nTHE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC LICENSE (\"AG"
},
{
"path": "FuzzyString/SorensenDiceDistance.cs",
"chars": 600,
"preview": "using System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Text;\nusing System.Threading.Tasks;\n\nna"
},
{
"path": "FuzzyString/TanimotoCoefficient.cs",
"chars": 487,
"preview": "using System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Text;\nusing System.Threading.Tasks;\n\nna"
},
{
"path": "FuzzyString.sln",
"chars": 1773,
"preview": "\nMicrosoft Visual Studio Solution File, Format Version 12.00\n# Visual Studio 14\nVisualStudioVersion = 14.0.25420.1\nMini"
},
{
"path": "FuzzyStringConsole/App.config",
"chars": 180,
"preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<configuration>\n <startup> \n <supportedRuntime version=\"v4.0\" sku=\".NET"
},
{
"path": "FuzzyStringConsole/FuzzyStringConsole.csproj",
"chars": 4358,
"preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project ToolsVersion=\"12.0\" DefaultTargets=\"Build\" xmlns=\"http://schemas.micros"
},
{
"path": "FuzzyStringConsole/Program.cs",
"chars": 1225,
"preview": "using System;\nusing System.Collections.Generic;\nusing System.Linq;\nusing System.Text;\nusing System.Threading.Tasks;\nusi"
},
{
"path": "FuzzyStringConsole/Properties/AssemblyInfo.cs",
"chars": 1409,
"preview": "using System.Reflection;\nusing System.Runtime.CompilerServices;\nusing System.Runtime.InteropServices;\n\n// General Infor"
},
{
"path": "FuzzyStringConsole/Resources/License.txt",
"chars": 11252,
"preview": "Eclipse Public License -v 1.0\n\nTHE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC LICENSE (\"AG"
},
{
"path": "LICENSE",
"chars": 11514,
"preview": "Eclipse Public License - v 1.0\n\nTHE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC\nLICENSE (\"AG"
},
{
"path": "README.md",
"chars": 3232,
"preview": "# fuzzystring\nApproximate String Comparision in C#\n\n*Originally Hosted on Codplex*\nhttp://fuzzystring.codeplex.com\n\n## P"
}
]
About this extraction
This page contains the full source code of the kdjones/fuzzystring GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 28 files (73.9 KB), approximately 16.6k tokens, and a symbol index with 45 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.