J'ai une table Personnes avec données personnelles et ainsi de suite. Il y a beaucoup de colonnes, mais les unes des autres qui vous intéressent ici sont: addressindex
, lastname
et firstname
où addressindex
est une adresse unique renseignée jusqu'à la porte de l'appartement .. Donc, si j'ai 'comme ci-dessous' deux personnes avec le lastname
et une le firstnames
sont les mêmes, elles sont probablement des doublons.
J'ai besoin d'un moyen de lister ces doublons.
tabledata:
personid 1
firstname "Carl"
lastname "Anderson"
addressindex 1
personid 2
firstname "Carl Peter"
lastname "Anderson"
addressindex 1
Je sais comment faire cela si je fais correspondre exactement toutes les colonnes, mais il me faut une correspondance approximative pour faire le tour avec (d'après l'exemple ci-dessus) un résultat comme:
Row personid addressindex lastname firstname
1 2 1 Anderson Carl Peter
2 1 1 Anderson Carl
.....
Des astuces sur la façon de résoudre ce problème?
J'ai constaté que le travail que SQL Server vous donne pour effectuer une correspondance approximative est assez maladroit. J'ai eu beaucoup de chance avec mes propres fonctions CLR utilisant l'algorithme de distance de Levenshtein et une certaine pondération. À l'aide de cet algorithme, j'ai ensuite créé une fonction utilisateur appelée GetSimilarityScore qui prend deux chaînes et renvoie un score compris entre 0.0 et 1.0. Plus le match est proche de 1,0, mieux c'est. Ensuite, interrogez avec un seuil> = 0.8 ou plus pour obtenir les correspondances les plus probables. Quelque chose comme ça:
if object_id('tempdb..#similar') is not null drop table #similar
select a.id, (
select top 1 x.id
from MyTable x
where x.id <> a.id
order by dbo.GetSimilarityScore(a.MyField, x.MyField) desc
) as MostSimilarId
into #similar
from MyTable a
select *, dbo.GetSimilarityScore(a.MyField, c.MyField)
from MyTable a
join #similar b on a.id = b.id
join MyTable c on b.MostSimilarId = c.id
Ne le faites pas avec de très grandes tables. C'est un processus lent.
Voici les UDF CLR:
''' <summary>
''' Compute the distance between two strings.
''' </summary>
''' <param name="s1">The first of the two strings.</param>
''' <param name="s2">The second of the two strings.</param>
''' <returns>The Levenshtein cost.</returns>
<Microsoft.SqlServer.Server.SqlFunction()> _
Public Shared Function ComputeLevenstheinDistance(ByVal string1 As SqlString, ByVal string2 As SqlString) As SqlInt32
If string1.IsNull OrElse string2.IsNull Then Return SqlInt32.Null
Dim s1 As String = string1.Value
Dim s2 As String = string2.Value
Dim n As Integer = s1.Length
Dim m As Integer = s2.Length
Dim d As Integer(,) = New Integer(n, m) {}
' Step 1
If n = 0 Then Return m
If m = 0 Then Return n
' Step 2
For i As Integer = 0 To n
d(i, 0) = i
Next
For j As Integer = 0 To m
d(0, j) = j
Next
' Step 3
For i As Integer = 1 To n
'Step 4
For j As Integer = 1 To m
' Step 5
Dim cost As Integer = If((s2(j - 1) = s1(i - 1)), 0, 1)
' Step 6
d(i, j) = Math.Min(Math.Min(d(i - 1, j) + 1, d(i, j - 1) + 1), d(i - 1, j - 1) + cost)
Next
Next
' Step 7
Return d(n, m)
End Function
''' <summary>
''' Returns a score between 0.0-1.0 indicating how closely two strings match. 1.0 is a 100%
''' T-SQL equality match, and the score goes down from there towards 0.0 for less similar strings.
''' </summary>
<Microsoft.SqlServer.Server.SqlFunction()> _
Public Shared Function GetSimilarityScore(string1 As SqlString, string2 As SqlString) As SqlDouble
If string1.IsNull OrElse string2.IsNull Then Return SqlInt32.Null
Dim s1 As String = string1.Value.ToUpper().TrimEnd(" "c)
Dim s2 As String = string2.Value.ToUpper().TrimEnd(" "c)
If s1 = s2 Then Return 1.0F ' At this point, T-SQL would consider them the same, so I will too
Dim flatLevScore As Double = InternalGetSimilarityScore(s1, s2)
Dim letterS1 As String = GetLetterSimilarityString(s1)
Dim letterS2 As String = GetLetterSimilarityString(s2)
Dim letterScore As Double = InternalGetSimilarityScore(letterS1, letterS2)
'Dim wordS1 As String = GetWordSimilarityString(s1)
'Dim wordS2 As String = GetWordSimilarityString(s2)
'Dim wordScore As Double = InternalGetSimilarityScore(wordS1, wordS2)
If flatLevScore = 1.0F AndAlso letterScore = 1.0F Then Return 1.0F
If flatLevScore = 0.0F AndAlso letterScore = 0.0F Then Return 0.0F
' Return weighted result
Return (flatLevScore * 0.2F) + (letterScore * 0.8F)
End Function
Private Shared Function InternalGetSimilarityScore(s1 As String, s2 As String) As Double
Dim dist As SqlInt32 = ComputeLevenstheinDistance(s1, s2)
Dim maxLen As Integer = If(s1.Length > s2.Length, s1.Length, s2.Length)
If maxLen = 0 Then Return 1.0F
Return 1.0F - Convert.ToDouble(dist.Value) / Convert.ToDouble(maxLen)
End Function
''' <summary>
''' Sorts all the alpha numeric characters in the string in alphabetical order
''' and removes everything else.
''' </summary>
Private Shared Function GetLetterSimilarityString(s1 As String) As String
Dim allChars = If(s1, "").ToUpper().ToCharArray()
Array.Sort(allChars)
Dim result As New StringBuilder()
For Each ch As Char In allChars
If Char.IsLetterOrDigit(ch) Then
result.Append(ch)
End If
Next
Return result.ToString()
End Function
''' <summary>
''' Removes all non-alpha numeric characters and then sorts
''' the words in alphabetical order.
''' </summary>
Private Shared Function GetWordSimilarityString(s1 As String) As String
Dim words As New List(Of String)()
Dim curWord As StringBuilder = Nothing
For Each ch As Char In If(s1, "").ToUpper()
If Char.IsLetterOrDigit(ch) Then
If curWord Is Nothing Then
curWord = New StringBuilder()
End If
curWord.Append(ch)
Else
If curWord IsNot Nothing Then
words.Add(curWord.ToString())
curWord = Nothing
End If
End If
Next
If curWord IsNot Nothing Then
words.Add(curWord.ToString())
End If
words.Sort(StringComparer.OrdinalIgnoreCase)
Return String.Join(" ", words.ToArray())
End Function
En plus des autres bonnes informations ici, vous pouvez envisager d’utiliser l’algorithme phonétique Double Metaphone phonetic qui est généralement considéré comme meilleur que SOUNDEX .
Tim Pfeiffer détaille une implémentation en SQL dans son article Double Metaphone Sounds Great Convertit l'algorithme C++ Double Metaphone en T-SQL (à l'origine dans SQL Mag & puis dans SQL Server Pro ).
Cela vous aidera à faire correspondre les noms avec de légères erreurs d’orthographe, par exemple, Carl contre Karl.
Update: le code téléchargeable actuel semble avoir disparu, mais voici une implémentation trouvée sur un dépôt github qui semble avoir cloné le code original
J'utiliserais l'indexation de texte intégral de SQL Server, qui vous permettra de faire des recherches et de renvoyer des éléments contenant non seulement le mot mais pouvant également comporter une faute d'orthographe.
Depuis la première version de Master Data Services, vous avez accès à des algorithmes plus avancés de logique floue que ce que SOUNDEX implémente. Donc, à condition que MDS soit installé, vous pourrez trouver une fonction appelée Similarity () dans le schéma mdq (base de données MDS).
Plus d'informations sur son fonctionnement: http://blog.hoegaerden.be/2011/02/05/finding-similar-strings-with-fuzzy-logic-functions-built-into-mds/
Personnellement, j'utilise une implémentation CLR de Jaro-Winkler algorithm qui semble fonctionner assez bien - elle se débat un peu avec des chaînes d'une longueur supérieure à 15 caractères et n'aime pas les adresses e-mail correspondantes, mais est plutôt bonne - implémentation complète guide peut être trouvé ici
Si vous ne pouvez pas utiliser les fonctions CLR pour quelque raison que ce soit, vous pouvez peut-être essayer d'exécuter les données via un package SSIS (à l'aide de la recherche de transformation fuzzy) - détaillé ici
En ce qui concerne le dédoublement des choses, la division et l’accord des cordes constituent une excellente première coupe. S'il existe des éléments connus sur les données qui peuvent être exploités pour réduire la charge de travail et/ou produire de meilleurs résultats, il est toujours bon de les exploiter. N'oubliez pas que, souvent, il est impossible d'éliminer entièrement le travail manuel lors de la duplication, bien que vous puissiez le simplifier beaucoup plus facilement en capturant le plus possible automatiquement et en générant ensuite des rapports sur vos "cas d'incertitude".
Concernant l'appariement des noms: SOUNDEX est horrible pour la qualité de l'appariement et particulièrement pour le type de travail que vous essayez d'effectuer, car il correspond à des éléments trop éloignés de la cible. Il est préférable d’utiliser une combinaison de résultats de métaphone double et de distance de Levenshtein pour effectuer la correspondance de nom. Avec un biais approprié, cela fonctionne vraiment bien et pourrait probablement être utilisé pour un second passage après un nettoyage de vos objets connus.
Vous pouvez également envisager d'utiliser un package SSIS et d'examiner les transformations Fuzzy Lookup and Grouping (http://msdn.Microsoft.com/en-us/library/ms345128(SQL.90).aspx).
L'utilisation de la recherche de texte intégral SQL (http://msdn.Microsoft.com/en-us/library/cc879300.aspx) est également une possibilité, mais ne convient probablement pas à votre domaine de problème spécifique.
le faire de cette façon
create table person(
personid int identity(1,1) primary key,
firstname varchar(20),
lastname varchar(20),
addressindex int,
sound varchar(10)
)
et plus tard créer un déclencheur
create trigger trigoninsert for dbo.person
on insert
as
declare @personid int;
select @personid=personid from inserted;
update person
set sound=soundex(firstname) where personid=@personid;
maintenant, ce que je peux faire est que je peux créer une procédure qui ressemble à ceci
create procedure getfuzzi(@personid int)
as
declare @sound varchar(10);
set @sound=(select sound from person where personid=@personid;
select personid,firstname,lastname,addressindex from person
where sound=@sound
cela vous retournera tous les noms qui correspondent presque aux noms fournis par un personid particulier
Vous pouvez utiliser la fonction SOUNDEX et la fonction DIFFERENCE associée dans SQL Server pour rechercher des noms similaires. La référence sur MSDN est ici .
Coller le code RedFilter en deux parties, afin d’éviter la pourriture des liens
Références:
https://github.com/mb16/geocoderNet/blob/master/build/sql/doubleMetaphone.sqlPart1:
--WEB LISTING 1: Double Metaphone Script
-------------------------------------
IF OBJECT_ID('fnIsVowel') IS NOT NULL BEGIN DROP FUNCTION fnIsVowel END
GO;
CREATE FUNCTION fnIsVowel( @c char(1) )
RETURNS bit
AS
BEGIN
IF (@c = 'A') OR (@c = 'E') OR (@c = 'I') OR (@c = 'O') OR (@c = 'U') OR (@c = 'Y')
BEGIN
RETURN 1
END
--'ELSE' would worry SQL Server, it wants RETURN last in a scalar function
RETURN 0
END
GO;
-----------------------------------------------
IF OBJECT_ID('fnSlavoGermanic') IS NOT NULL BEGIN DROP FUNCTION fnSlavoGermanic
END
GO;
CREATE FUNCTION fnSlavoGermanic( @Word char(50) )
RETURNS bit
AS
BEGIN
--Catch NULL also...
IF (CHARINDEX('W',@Word) > 0) OR (CHARINDEX('K',@Word) > 0) OR
(CHARINDEX('CZ',@Word) > 0)
--'WITZ' test is in original Lawrence Philips C++ code, but appears to be a subset of the first test for 'W'
-- OR (CHARINDEX('WITZ',@Word) > 0)
BEGIN
RETURN 1
END
--ELSE
RETURN 0
END
GO;
---------------------------------------------------------------------------------------------------------------------------------
----------------------
--Lawrence Philips calls for a length argument, but this has two drawbacks:
--1. All target strings must be of the same length
--2. It presents an opportunity for subtle bugs, ie fnStringAt( 1, 7, 'Search me please', 'Search' ) returns 0 (no matter what is in the searched string)
--So I've eliminated the argument and fnStringAt checks the length of each target as it executes
--DEFAULTS suck with UDFs. Have to specify DEFAULT in caller - why bother?
IF OBJECT_ID('fnStringAtDef') IS NOT NULL BEGIN DROP FUNCTION fnStringAtDef END
GO;
CREATE FUNCTION fnStringAtDef( @Start int, @StringToSearch varchar(50),
@Target1 varchar(50),
@Target2 varchar(50) = NULL,
@Target3 varchar(50) = NULL,
@Target4 varchar(50) = NULL,
@Target5 varchar(50) = NULL,
@Target6 varchar(50) = NULL )
RETURNS bit
AS
BEGIN
IF CHARINDEX(@Target1,@StringToSearch,@Start) > 0 RETURN 1
--2 Styles, test each optional argument for NULL, nesting further tests
--or just take advantage of CHARINDEX behavior with a NULL arg (unless 65 compatibility - code check before CREATE FUNCTION?
--Style 1:
--IF @Target2 IS NOT NULL
--BEGIN
-- IF CHARINDEX(@Target2,@StringToSearch,@Start) > 0 RETURN 1
-- (etc.)
--END
--Style 2:
IF CHARINDEX(@Target2,@StringToSearch,@Start) > 0 RETURN 1
IF CHARINDEX(@Target3,@StringToSearch,@Start) > 0 RETURN 1
IF CHARINDEX(@Target4,@StringToSearch,@Start) > 0 RETURN 1
IF CHARINDEX(@Target5,@StringToSearch,@Start) > 0 RETURN 1
IF CHARINDEX(@Target6,@StringToSearch,@Start) > 0 RETURN 1
RETURN 0
END
GO;
-------------------------------------------------------------------------------------------------
IF OBJECT_ID('fnStringAt') IS NOT NULL BEGIN DROP FUNCTION fnStringAt END
GO;
CREATE FUNCTION fnStringAt( @Start int, @StringToSearch varchar(50), @TargetStrings
varchar(2000) )
RETURNS bit
AS
BEGIN
DECLARE @SingleTarget varchar(50)
DECLARE @CurrentStart int
DECLARE @CurrentLength int
--Eliminate special cases
--Trailing space is needed to check for end of Word in some cases, so always append comma
--loop tests should fairly quickly ignore ',,' termination
SET @TargetStrings = @TargetStrings + ','
SET @CurrentStart = 1
--Include terminating comma so spaces don't get truncated
SET @CurrentLength = (CHARINDEX(',',@TargetStrings,@CurrentStart) -
@CurrentStart) + 1
SET @SingleTarget = SUBSTRING(@TargetStrings,@CurrentStart,@CurrentLength)
WHILE LEN(@SingleTarget) > 1
BEGIN
IF SUBSTRING(@StringToSearch,@Start,LEN(@SingleTarget)-1) =
LEFT(@SingleTarget,LEN(@SingleTarget)-1)
BEGIN
RETURN 1
END
SET @CurrentStart = (@CurrentStart + @CurrentLength)
SET @CurrentLength = (CHARINDEX(',',@TargetStrings,@CurrentStart) -
@CurrentStart) + 1
IF NOT @CurrentLength > 1 --getting trailing comma
BEGIN
BREAK
END
SET @SingleTarget =
SUBSTRING(@TargetStrings,@CurrentStart,@CurrentLength)
END
RETURN 0
END
GO;
------------------------------------------------------------------------
IF OBJECT_ID('fnDoubleMetaphoneTable') IS NOT NULL BEGIN DROP FUNCTION
fnDoubleMetaphoneTable END
GO;
CREATE FUNCTION fnDoubleMetaphoneTable( @Word varchar(50) )
RETURNS @DMP TABLE ( Metaphone1 char(4), Metaphone2 char(4) )
AS
BEGIN
DECLARE @MP1 varchar(4), @MP2 varchar(4)
SET @MP1 = ''
SET @MP2 = ''
DECLARE @CurrentPosition int, @WordLength int, @CurrentChar char(1)
SET @CurrentPosition = 1
SET @WordLength = LEN(@Word)
IF @WordLength < 1
BEGIN
RETURN
END
--ensure case insensitivity
SET @Word = UPPER(@Word)
IF dbo.fnStringAt(1, @Word, 'GN,KN,PN,WR,PS') = 1
BEGIN
SET @CurrentPosition = @CurrentPosition + 1
END
IF 'X' = LEFT(@Word,1)
BEGIN
SET @MP1 = @MP1 + 'S'
SET @MP2 = @MP2 + 'S'
SET @CurrentPosition = @CurrentPosition + 1
END
WHILE (4 > LEN(RTRIM(@MP1))) OR (4 > LEN(RTRIM(@MP2)))
BEGIN
IF @CurrentPosition > @WordLength
BEGIN
BREAK
END
SET @CurrentChar = SUBSTRING(@Word,@CurrentPosition,1)
IF @CurrentChar IN('A','E','I','O','U','Y')
BEGIN
IF @CurrentPosition = 1
BEGIN
SET @MP1 = @MP1 + 'A'
SET @MP2 = @MP2 + 'A'
END
SET @CurrentPosition = @CurrentPosition + 1
END
ELSE IF @CurrentChar = 'B'
BEGIN
SET @MP1 = @MP1 + 'P'
SET @MP2 = @MP2 + 'P'
IF 'B' = SUBSTRING(@Word,@CurrentPosition + 1,1)
BEGIN
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
SET @CurrentPosition = @CurrentPosition + 1
END
END
ELSE IF @CurrentChar = 'Ç'
BEGIN
SET @MP1 = @MP1 + 'S'
SET @MP2 = @MP2 + 'S'
SET @CurrentPosition = @CurrentPosition + 1
END
ELSE IF @CurrentChar = 'C'
BEGIN
--various germanic
IF (@CurrentPosition > 2)
AND (dbo.fnIsVowel(SUBSTRING(@Word,@CurrentPosition-2,1))=0)
AND (dbo.fnStringAt(@CurrentPosition-1,@Word,'ACH') = 1)
AND ((SUBSTRING(@Word,@CurrentPosition+2,1) <> 'I')
AND ((SUBSTRING(@Word,@CurrentPosition+2,1) <> 'E') OR
(dbo.fnStringAt(@CurrentPosition-2,@Word,'BACHER,MACHER')=1)))
BEGIN
SET @MP1 = @MP1 + 'K'
SET @MP2 = @MP2 + 'K'
SET @CurrentPosition = @CurrentPosition + 2
END
-- 'caesar'
ELSE IF (@CurrentPosition = 1) AND
(dbo.fnStringAt(@CurrentPosition,@Word,'CAESAR') = 1)
BEGIN
SET @MP1 = @MP1 + 'S'
SET @MP2 = @MP2 + 'S'
SET @CurrentPosition = @CurrentPosition + 2
END
-- 'chianti'
ELSE IF dbo.fnStringAt(@CurrentPosition,@Word,'CHIA') = 1
BEGIN
SET @MP1 = @MP1 + 'K'
SET @MP2 = @MP2 + 'K'
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE IF dbo.fnStringAt(@CurrentPosition,@Word,'CH') = 1
BEGIN
-- Find 'michael'
IF (@CurrentPosition > 1) AND
(dbo.fnStringAt(@CurrentPosition,@Word,'CHAE') = 1)
BEGIN
--First instance of alternate encoding
SET @MP1 = @MP1 + 'K'
SET @MP2 = @MP2 + 'X'
SET @CurrentPosition = @CurrentPosition + 2
END
--greek roots e.g. 'chemistry', 'chorus'
ELSE IF (@CurrentPosition = 1) AND (dbo.fnStringAt(2, @Word,
'HARAC,HARIS,HOR,HYM,HIA,HEM') = 1) AND (dbo.fnStringAt(1,@Word,'CHORE') = 0)
BEGIN
SET @MP1 = @MP1 + 'K'
SET @MP2 = @MP2 + 'K'
SET @CurrentPosition = @CurrentPosition + 2
END
--germanic, greek, or otherwise 'ch' for 'kh' sound
ELSE IF ((dbo.fnStringAt(1,@Word,'VAN ,VON ,SCH')=1) OR
(dbo.fnStringAt(@CurrentPosition-
2,@Word,'ORCHES,ARCHIT,ORCHID')=1) OR
(dbo.fnStringAt(@CurrentPosition+2,@Word,'T,S')=1) OR
(((dbo.fnStringAt(@CurrentPosition-1,@Word,'A,O,U,E')=1)
OR
(@CurrentPosition = 1))
AND
(dbo.fnStringAt(@CurrentPosition+2,@Word,'L,R,N,M,B,H,F,V,W, ')=1)))
BEGIN
SET @MP1 = @MP1 + 'K'
SET @MP2 = @MP2 + 'K'
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
--is this a given?
IF (@CurrentPosition > 1)
BEGIN
IF (dbo.fnStringAt(1,@Word,'MC') = 1)
BEGIN
--eg McHugh
SET @MP1 = @MP1 + 'K'
SET @MP2 = @MP2 + 'K'
END
ELSE
BEGIN
--Alternate encoding
SET @MP1 = @MP1 + 'X'
SET @MP2 = @MP2 + 'K'
END
END
ELSE
BEGIN
SET @MP1 = @MP1 + 'X'
SET @MP2 = @MP2 + 'X'
END
SET @CurrentPosition = @CurrentPosition + 2
END
END
--e.g, 'czerny'
ELSE IF (dbo.fnStringAt(@CurrentPosition,@Word,'CZ')=1) AND
(dbo.fnStringAt((@CurrentPosition - 2),@Word,'WICZ')=0)
BEGIN
SET @MP1 = @MP1 + 'S'
SET @MP2 = @MP2 + 'X'
SET @CurrentPosition = @CurrentPosition + 2
END
--e.g., 'focaccia'
ELSE IF(dbo.fnStringAt((@CurrentPosition + 1),@Word,'CIA')=1)
BEGIN
SET @MP1 = @MP1 + 'X'
SET @MP2 = @MP2 + 'X'
SET @CurrentPosition = @CurrentPosition + 3
END
--double 'C', but not if e.g. 'McClellan'
ELSE IF(dbo.fnStringAt(@CurrentPosition,@Word,'CC')=1) AND NOT
((@CurrentPosition = 2) AND (LEFT(@Word,1) = 'M'))
--'bellocchio' but not 'bacchus'
IF (dbo.fnStringAt((@CurrentPosition + 2),@Word,'I,E,H')=1) AND
(dbo.fnStringAt((@CurrentPosition + 2),@Word,'HU')=0)
BEGIN
--'accident', 'accede' 'succeed'
IF (((@CurrentPosition = 2) AND
(SUBSTRING(@Word,@CurrentPosition - 1,1) = 'A'))
OR (dbo.fnStringAt((@CurrentPosition -
1),@Word,'UCCEE,UCCES')=1))
BEGIN
SET @MP1 = @MP1 + 'KS'
SET @MP2 = @MP2 + 'KS'
END
--'bacci', 'bertucci', other italian
ELSE
BEGIN
SET @MP1 = @MP1 + 'X'
SET @MP2 = @MP2 + 'X'
END
SET @CurrentPosition = @CurrentPosition + 3
END
--Pierce's rule
ELSE
BEGIN
SET @MP1 = @MP1 + 'K'
SET @MP2 = @MP2 + 'K'
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE IF (dbo.fnStringAt(@CurrentPosition,@Word,'CK,CG,CQ')=1)
BEGIN
SET @MP1 = @MP1 + 'K'
SET @MP2 = @MP2 + 'K'
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE IF (dbo.fnStringAt(@CurrentPosition,@Word,'CI,CE,CY')=1)
BEGIN
--italian vs. english
IF (dbo.fnStringAt(@CurrentPosition,@Word,'CIO,CIE,CIA')=1)
BEGIN
SET @MP1 = @MP1 + 'S'
SET @MP2 = @MP2 + 'X'
END
ELSE
BEGIN
SET @MP1 = @MP1 + 'S'
SET @MP2 = @MP2 + 'S'
END
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
SET @MP1 = @MP1 + 'K'
SET @MP2 = @MP2 + 'K'
--name sent in 'mac caffrey', 'mac gregor
IF (dbo.fnStringAt((@CurrentPosition + 1),@Word,' C, Q, G')=1)
BEGIN
SET @CurrentPosition = @CurrentPosition + 3
END
ELSE
BEGIN
IF (dbo.fnStringAt((@CurrentPosition + 1),@Word,'C,K,Q')=1)
AND (dbo.fnStringAt((@CurrentPosition + 1), 2, 'CE,CI')=0)
BEGIN
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
SET @CurrentPosition = @CurrentPosition + 1
END
END
END
END
ELSE IF @CurrentChar = 'D'
BEGIN
IF (dbo.fnStringAt(@CurrentPosition, @Word, 'DG')=1)
BEGIN
IF (dbo.fnStringAt((@CurrentPosition + 2),@Word,'I,E,Y')=1)
BEGIN
--e.g. 'Edge'
SET @MP1 = @MP1 + 'J'
SET @MP2 = @MP2 + 'J'
SET @CurrentPosition = @CurrentPosition + 3
END
ELSE
BEGIN
--e.g. 'edgar'
SET @MP1 = @MP1 + 'TK'
SET @MP2 = @MP2 + 'TK'
SET @CurrentPosition = @CurrentPosition + 2
END
END
ELSE IF (dbo.fnStringAt(@CurrentPosition,@Word,'DT,DD')=1)
BEGIN
SET @MP1 = @MP1 + 'T'
SET @MP2 = @MP2 + 'T'
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
SET @MP1 = @MP1 + 'T'
SET @MP2 = @MP2 + 'T'
SET @CurrentPosition = @CurrentPosition + 1
END
END
ELSE IF @CurrentChar = 'F'
BEGIN
IF (SUBSTRING(@Word,@CurrentPosition + 1,1) = 'F')
BEGIN
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
SET @CurrentPosition = @CurrentPosition + 1
END
SET @MP1 = @MP1 + 'F'
SET @MP2 = @MP2 + 'F'
END
ELSE IF @CurrentChar = 'G'
BEGIN
IF (SUBSTRING(@Word,@CurrentPosition + 1,1) = 'H')
BEGIN
IF (@CurrentPosition > 1) AND
(dbo.fnIsVowel(SUBSTRING(@Word,@CurrentPosition - 1,1)) = 0)
BEGIN
SET @MP1 = @MP1 + 'K'
SET @MP2 = @MP2 + 'K'
SET @CurrentPosition = @CurrentPosition + 2
END
--'ghislane', ghiradelli
ELSE IF (@CurrentPosition = 1)
BEGIN
IF (SUBSTRING(@Word,@CurrentPosition + 2,1) = 'I')
BEGIN
SET @MP1 = @MP1 + 'J'
SET @MP2 = @MP2 + 'J'
END
ELSE
BEGIN
SET @MP1 = @MP1 + 'K'
SET @MP2 = @MP2 + 'K'
END
SET @CurrentPosition = @CurrentPosition + 2
END
--Parker's rule (with some further refinements) - e.g., 'hugh'
ELSE IF (((@CurrentPosition > 2) AND (dbo.fnStringAt((@CurrentPosition
- 2),@Word,'B,H,D')=1) )
--e.g., 'bough'
OR ((@CurrentPosition > 3) AND (dbo.fnStringAt((@CurrentPosition
- 3),@Word,'B,H,D')=1) )
--e.g., 'broughton'
OR ((@CurrentPosition > 4) AND (dbo.fnStringAt((@CurrentPosition
- 4),@Word,'B,H')=1) ) )
BEGIN
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
--e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
IF ((@CurrentPosition > 3)
AND (SUBSTRING(@Word,@CurrentPosition - 1,1) = 'U')
AND (dbo.fnStringAt((@CurrentPosition -
3),@Word,'C,G,L,R,T')=1) )
BEGIN
SET @MP1 = @MP1 + 'F'
SET @MP2 = @MP2 + 'F'
END
ELSE
BEGIN
IF ((@CurrentPosition > 1) AND
SUBSTRING(@Word,@CurrentPosition - 1,1) <> 'I')
BEGIN
SET @MP1 = @MP1 + 'K'
SET @MP2 = @MP2 + 'K'
END
END
SET @CurrentPosition = @CurrentPosition + 2
END
END
ELSE IF (SUBSTRING(@Word,@CurrentPosition + 1,1) = 'N')
BEGIN
IF ((@CurrentPosition = 2) AND (dbo.fnIsVowel(LEFT(@Word,1))=1) AND
(dbo.fnSlavoGermanic(@Word)=0))
BEGIN
SET @MP1 = @MP1 + 'KN'
SET @MP2 = @MP2 + 'N'
END
ELSE
BEGIN
--not e.g. 'cagney'
IF ((dbo.fnStringAt((@CurrentPosition + 2),@Word,'EY')=0)
AND (SUBSTRING(@Word,@CurrentPosition + 1,1) <>
'Y') AND (dbo.fnSlavoGermanic(@Word)=0))
Partie 2 du code Pour le lien dans Redfilter Réponse:
Références:
https://github.com/mb16/geocoderNet/blob/master/build/sql/doubleMetaphone.sql
BEGIN
SET @MP1 = @MP1 + 'N'
SET @MP2 = @MP2 + 'KN'
END
ELSE
BEGIN
SET @MP1 = @MP1 + 'KN'
SET @MP2 = @MP2 + 'KN'
END
END
SET @CurrentPosition = @CurrentPosition + 2
END
--'tagliaro'
ELSE IF (dbo.fnStringAt((@CurrentPosition + 1),@Word,'LI')=1) AND
(dbo.fnSlavoGermanic(@Word)=0)
BEGIN
SET @MP1 = @MP1 + 'KL'
SET @MP2 = @MP2 + 'L'
SET @CurrentPosition = @CurrentPosition + 2
END
-- -ges-,-gep-,-gel-, -gie- at beginning
-- This call to fnStringAt() is the 'worst case' in number of values passed. A UDF that used DEFAULT values instead of
-- a multi-valued argument would require ten DEFAULT arguments for EP, EB, EL, etc. (assuming the first was not defined with a DEFAULT).
ELSE IF ((@CurrentPosition = 1)
AND ((SUBSTRING(@Word,@CurrentPosition + 1,1) = 'Y')
OR (dbo.fnStringAt((@CurrentPosition +
1),@Word,'ES,EP,EB,EL,EY,IB,IL,IN,IE,EI,ER')=1)) )
BEGIN
SET @MP1 = @MP1 + 'K'
SET @MP2 = @MP2 + 'J'
SET @CurrentPosition = @CurrentPosition + 2
END
-- -ger-, -gy-
ELSE IF (((dbo.fnStringAt((@CurrentPosition + 1), @Word, 'ER')=1) OR
(SUBSTRING(@Word,@CurrentPosition + 1,1) = 'Y'))
AND (dbo.fnStringAt(1, @Word, 'DANGER,RANGER,MANGER')=0)
AND (dbo.fnStringAt((@CurrentPosition - 1), @Word,
'E,I,RGY,OGY')=0) )
BEGIN
SET @MP1 = @MP1 + 'K'
SET @MP2 = @MP2 + 'J'
SET @CurrentPosition = @CurrentPosition + 2
END
-- italian e.g, 'biaggi'
ELSE IF (dbo.fnStringAt((@CurrentPosition + 1),@Word,'E,I,Y')=1) OR
(dbo.fnStringAt((@CurrentPosition - 1),@Word,'AGGI,OGGI')=1)
BEGIN
--obvious germanic
IF ((dbo.fnStringAt(1,@Word,'VAN ,VON ,SCH')=1)
OR (dbo.fnStringAt((@CurrentPosition + 1),@Word,'ET')=1))
BEGIN
SET @MP1 = @MP1 + 'K'
SET @MP2 = @MP2 + 'K'
END
ELSE
BEGIN
--always soft if french ending
IF (dbo.fnStringAt((@CurrentPosition + 1),@Word,'IER ')=1)
BEGIN
SET @MP1 = @MP1 + 'J'
SET @MP2 = @MP2 + 'J'
END
ELSE
BEGIN
SET @MP1 = @MP1 + 'J'
SET @MP2 = @MP2 + 'K'
END
END
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
IF (SUBSTRING(@Word,@CurrentPosition + 1,1) = 'G')
BEGIN
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
SET @CurrentPosition = @CurrentPosition + 1
END
SET @MP1 = @MP1 + 'K'
SET @MP2 = @MP2 + 'K'
END
END
ELSE IF @CurrentChar = 'H'
BEGIN
--only keep if first & before vowel or btw. 2 vowels
IF (((@CurrentPosition = 1) OR
(dbo.fnIsVowel(SUBSTRING(@Word,@CurrentPosition - 1,1))=1))
AND (dbo.fnIsVowel(SUBSTRING(@Word,@CurrentPosition + 1,1))=1))
BEGIN
SET @MP1 = @MP1 + 'H'
SET @MP2 = @MP2 + 'H'
SET @CurrentPosition = @CurrentPosition + 2
END
--also takes care of 'HH'
ELSE
BEGIN
SET @CurrentPosition = @CurrentPosition + 1
END
END
ELSE IF @CurrentChar = 'J'
BEGIN
--obvious spanish, 'jose', 'san jacinto'
IF (dbo.fnStringAt(@CurrentPosition,@Word,'JOSE')=1) OR
(dbo.fnStringAt(1,@Word,'SAN ')=1)
BEGIN
IF (((@CurrentPosition = 1) AND (SUBSTRING(@Word,@CurrentPosition
+ 4,1) = ' ')) OR (dbo.fnStringAt(1,@Word,'SAN ')=1) )
BEGIN
SET @MP1 = @MP1 + 'H'
SET @MP2 = @MP2 + 'H'
END
ELSE
BEGIN
SET @MP1 = @MP1 + 'J'
SET @MP2 = @MP2 + 'H'
END
SET @CurrentPosition = @CurrentPosition + 1
END
ELSE IF ((@CurrentPosition = 1) AND
(dbo.fnStringAt(@CurrentPosition,@Word,'JOSE')=0))
BEGIN
SET @MP1 = @MP1 + 'J'
--Yankelovich/Jankelowicz
SET @MP2 = @MP2 + 'A'
--it could happen!
IF (SUBSTRING(@Word,@CurrentPosition + 1,1) = 'J')
BEGIN
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
SET @CurrentPosition = @CurrentPosition + 1
END
END
ELSE
BEGIN
--spanish pron. of e.g. 'bajador'
IF( (dbo.fnIsVowel(SUBSTRING(@Word,@CurrentPosition - 1,1))=1)
AND (dbo.fnSlavoGermanic(@Word)=0)
AND ((SUBSTRING(@Word,@CurrentPosition + 1,1) = 'A') OR
(SUBSTRING(@Word,@CurrentPosition + 1,1) = 'O')))
BEGIN
SET @MP1 = @MP1 + 'J'
SET @MP2 = @MP2 + 'H'
END
ELSE
BEGIN
IF (@CurrentPosition = @WordLength)
BEGIN
SET @MP1 = @MP1 + 'J'
SET @MP2 = @MP2 + ''
END
ELSE
BEGIN
IF ((dbo.fnStringAt((@CurrentPosition + 1), @Word,
'L,T,K,S,N,M,B,Z')=0)
AND (dbo.fnStringAt((@CurrentPosition - 1), @Word,
'S,K,L')=0))
BEGIN
SET @MP1 = @MP1 + 'J'
SET @MP2 = @MP2 + 'J'
END
END
END
--it could happen!
IF (SUBSTRING(@Word,@CurrentPosition + 1,1) = 'J')
BEGIN
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
SET @CurrentPosition = @CurrentPosition + 1
END
END
END
ELSE IF @CurrentChar = 'K'
BEGIN
IF (SUBSTRING(@Word,@CurrentPosition + 1,1) = 'K')
BEGIN
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
SET @CurrentPosition = @CurrentPosition + 1
END
SET @MP1 = @MP1 + 'K'
SET @MP2 = @MP2 + 'K'
END
ELSE IF @CurrentChar = 'L'
BEGIN
IF (SUBSTRING(@Word,@CurrentPosition + 1,1) = 'L')
BEGIN
--spanish e.g. 'cabrillo', 'gallegos'
IF (((@CurrentPosition = (@WordLength - 3))
AND (dbo.fnStringAt((@CurrentPosition -
1),@Word,'ILLO,ILLA,ALLE')=1))
OR (((dbo.fnStringAt((@WordLength - 1),@Word,'AS,OS')=1)
OR (dbo.fnStringAt(@WordLength,@Word,'A,O')=1))
AND (dbo.fnStringAt((@CurrentPosition -
1),@Word,'ALLE')=1)) )
BEGIN
SET @MP1 = @MP1 + 'L'
SET @MP2 = @MP2 + ''
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
SET @CurrentPosition = @CurrentPosition + 2
SET @MP1 = @MP1 + 'L'
SET @MP2 = @MP2 + 'L'
END
END
ELSE
BEGIN
SET @CurrentPosition = @CurrentPosition + 1
SET @MP1 = @MP1 + 'L'
SET @MP2 = @MP2 + 'L'
END
END
ELSE IF @CurrentChar = 'M'
BEGIN
--'dumb','thumb'
IF (((dbo.fnStringAt((@CurrentPosition - 1), @Word,'UMB')=1)
AND (((@CurrentPosition + 1) = @WordLength) OR
(dbo.fnStringAt((@CurrentPosition + 2),@Word,'ER')=1)))
OR (SUBSTRING(@Word,@CurrentPosition + 1,1) = 'M') )
BEGIN
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
SET @CurrentPosition = @CurrentPosition + 1
END
SET @MP1 = @MP1 + 'M'
SET @MP2 = @MP2 + 'M'
END
ELSE IF @CurrentChar = 'N'
BEGIN
IF (SUBSTRING(@Word,@CurrentPosition + 1,1) = 'N')
BEGIN
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
SET @CurrentPosition = @CurrentPosition + 1
END
SET @MP1 = @MP1 + 'N'
SET @MP2 = @MP2 + 'N'
END
ELSE IF @CurrentChar = 'Ñ'
BEGIN
SET @CurrentPosition = @CurrentPosition + 1
SET @MP1 = @MP1 + 'N'
SET @MP2 = @MP2 + 'N'
END
ELSE IF @CurrentChar = 'P'
BEGIN
--What about Michelle Pfeiffer, star of Grease 2? Price-Pfister?, Pfizer?
--Don't just look for an 'F' next, what about 'topflight', helpful, campfire, leapfrog, stepfather
--Sorry, Mark Knopfler, I don't know how to help you
IF (SUBSTRING(@Word,@CurrentPosition + 1,1) = 'H')
OR ((@CurrentPosition = 1) AND
(SUBSTRING(@Word,@CurrentPosition + 1,1) = 'F') AND
(dbo.fnIsVowel(SUBSTRING(@Word,@CurrentPosition+2,1))=1))
BEGIN
SET @MP1 = @MP1 + 'F'
SET @MP2 = @MP2 + 'F'
SET @CurrentPosition = @CurrentPosition + 2
END
--also account for "campbell", "raspberry"
ELSE
BEGIN
IF (dbo.fnStringAt((@CurrentPosition + 1),@Word, 'P,B')=1)
BEGIN
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
SET @CurrentPosition = @CurrentPosition + 1
END
SET @MP1 = @MP1 + 'P'
SET @MP2 = @MP2 + 'P'
END
END
ELSE IF @CurrentChar = 'Q'
BEGIN
IF (SUBSTRING(@Word,@CurrentPosition + 1,1) = 'Q')
BEGIN
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
SET @CurrentPosition = @CurrentPosition + 1
END
SET @MP1 = @MP1 + 'K'
SET @MP2 = @MP2 + 'K'
END
ELSE IF @CurrentChar = 'R'
BEGIN
--QQ: Will SQL short circuit eval? Otherwise, I could try to read before string begins here...
--french e.g. 'rogier', but exclude 'hochmeier'
IF ((@CurrentPosition = @WordLength)
AND (dbo.fnSlavoGermanic(@Word)=0)
AND (dbo.fnStringAt((@CurrentPosition - 2), @Word, 'IE')=1)
AND (dbo.fnStringAt((@CurrentPosition - 4), @Word,
'ME,MA')=0))
BEGIN
SET @MP1 = @MP1 + ''
SET @MP2 = @MP2 + 'R'
END
ELSE
BEGIN
SET @MP1 = @MP1 + 'R'
SET @MP2 = @MP2 + 'R'
END
IF (SUBSTRING(@Word,@CurrentPosition + 1,1) = 'R')
BEGIN
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
SET @CurrentPosition = @CurrentPosition + 1
END
END
ELSE IF @CurrentChar = 'S'
BEGIN
--special cases 'island', 'isle', 'carlisle', 'carlysle'
IF (dbo.fnStringAt((@CurrentPosition - 1), @Word, 'ISL,YSL')=1)
BEGIN
SET @CurrentPosition = @CurrentPosition + 1
END
--special case 'sugar-'
ELSE IF ((@CurrentPosition = 1) AND (dbo.fnStringAt(@CurrentPosition,
@Word, 'SUGAR')=1))
BEGIN
SET @MP1 = @MP1 + 'X'
SET @MP2 = @MP2 + 'S'
SET @CurrentPosition = @CurrentPosition + 1
END
ELSE IF (dbo.fnStringAt(@CurrentPosition, @Word, 'SH')=1)
BEGIN
--germanic
IF (dbo.fnStringAt((@CurrentPosition + 1), @Word,
'HEIM,HOEK,HOLM,HOLZ')=1)
BEGIN
SET @MP1 = @MP1 + 'S'
SET @MP2 = @MP2 + 'S'
END
ELSE
BEGIN
SET @MP1 = @MP1 + 'X'
SET @MP2 = @MP2 + 'X'
END
SET @CurrentPosition = @CurrentPosition + 2
END
--italian & armenian
ELSE IF (dbo.fnStringAt(@CurrentPosition, @Word, 'SIO,SIA')=1) OR
(dbo.fnStringAt(@CurrentPosition, @Word, 'SIAN')=1)
BEGIN
IF (dbo.fnSlavoGermanic(@Word)=0)
BEGIN
SET @MP1 = @MP1 + 'S'
SET @MP2 = @MP2 + 'X'
END
ELSE
BEGIN
SET @MP1 = @MP1 + 'S'
SET @MP2 = @MP2 + 'S'
END
SET @CurrentPosition = @CurrentPosition + 3
END
--german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
--also, -sz- in slavic language altho in hungarian it is pronounced 's'
ELSE IF (((@CurrentPosition = 1)
AND (dbo.fnStringAt((@CurrentPosition + 1), @Word, 'M,N,L,W')=1))
OR (dbo.fnStringAt((@CurrentPosition + 1), @Word, 'Z')=1))
BEGIN
SET @MP1 = @MP1 + 'S'
SET @MP2 = @MP2 + 'X'
IF (dbo.fnStringAt((@CurrentPosition + 1), @Word, 'Z')=1)
BEGIN
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
SET @CurrentPosition = @CurrentPosition + 1
END
END
ELSE IF (dbo.fnStringAt(@CurrentPosition, @Word, 'SC')=1)
BEGIN
--Schlesinger's rule
IF (SUBSTRING(@Word,@CurrentPosition + 2,1) = 'H')
BEGIN
--dutch Origin, e.g. 'school', 'schooner'
IF (dbo.fnStringAt((@CurrentPosition + 3), @Word,
'OO,ER,EN,UY,ED,EM')=1)
BEGIN
--'schermerhorn', 'schenker'
IF (dbo.fnStringAt((@CurrentPosition + 3), @Word, 'ER,EN')=1)
BEGIN
SET @MP1 = @MP1 + 'X'
SET @MP2 = @MP2 + 'SK'
END
ELSE
BEGIN
SET @MP1 = @MP1 + 'SK'
SET @MP2 = @MP2 + 'SK'
END
SET @CurrentPosition = @CurrentPosition + 3
END
ELSE
BEGIN
IF ((@CurrentPosition = 1) AND
(dbo.fnIsVowel(SUBSTRING(@Word,3,1))=0) AND (SUBSTRING(@Word,3,1) <> 'W'))
BEGIN
SET @MP1 = @MP1 + 'X'
SET @MP2 = @MP2 + 'S'
END
ELSE
BEGIN
SET @MP1 = @MP1 + 'X'
SET @MP2 = @MP2 + 'X'
END
SET @CurrentPosition = @CurrentPosition + 3
END
END
ELSE IF (dbo.fnStringAt((@CurrentPosition + 2), @Word, 'I,E,Y')=1)
BEGIN
SET @MP1 = @MP1 + 'S'
SET @MP2 = @MP2 + 'S'
SET @CurrentPosition = @CurrentPosition + 3
END
ELSE
BEGIN
SET @MP1 = @MP1 + 'SK'
SET @MP2 = @MP2 + 'SK'
SET @CurrentPosition = @CurrentPosition + 3
END
END
ELSE
BEGIN
--french e.g. 'resnais', 'artois'
IF ((@CurrentPosition = @WordLength) AND
(dbo.fnStringAt((@CurrentPosition - 2), @Word, 'AI,OI')=1))
BEGIN
SET @MP1 = @MP1 + ''
SET @MP2 = @MP2 + 'S'
END
ELSE
BEGIN
SET @MP1 = @MP1 + 'S'
SET @MP2 = @MP2 + 'S'
END
IF (dbo.fnStringAt((@CurrentPosition + 1), @Word, 'S,Z')=1)
BEGIN
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
SET @CurrentPosition = @CurrentPosition + 1
END
END
END
ELSE IF @CurrentChar = 'T'
BEGIN
IF (dbo.fnStringAt(@CurrentPosition, @Word, 'TION,TIA,TCH')=1)
BEGIN
SET @MP1 = @MP1 + 'X'
SET @MP2 = @MP2 + 'X'
SET @CurrentPosition = @CurrentPosition + 3
END
ELSE IF (dbo.fnStringAt(@CurrentPosition, @Word, 'TH,TTH')=1)
BEGIN
--special case 'thomas', 'thames' or germanic
IF (dbo.fnStringAt((@CurrentPosition + 2), @Word, 'OM,AM')=1)
OR (dbo.fnStringAt(1, @Word, 'VAN ,VON ,SCH')=1)
BEGIN
SET @MP1 = @MP1 + 'T'
SET @MP2 = @MP2 + 'T'
END
ELSE
BEGIN
SET @MP1 = @MP1 + '0'
SET @MP2 = @MP2 + 'T'
END
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
IF (dbo.fnStringAt((@CurrentPosition + 1), @Word, 'T,D')=1)
BEGIN
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
SET @CurrentPosition = @CurrentPosition + 1
END
SET @MP1 = @MP1 + 'T'
SET @MP2 = @MP2 + 'T'
END
END
ELSE IF @CurrentChar = 'V'
BEGIN
IF (SUBSTRING(@Word,@CurrentPosition + 1,1) = 'V')
BEGIN
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
SET @CurrentPosition = @CurrentPosition + 1
END
SET @MP1 = @MP1 + 'F'
SET @MP2 = @MP2 + 'F'
END
ELSE IF @CurrentChar = 'W'
BEGIN
--can also be in middle of Word
IF (dbo.fnStringAt(@CurrentPosition, @Word, 'WR')=1)
BEGIN
SET @MP1 = @MP1 + 'R'
SET @MP2 = @MP2 + 'R'
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE IF ((@CurrentPosition = 1)
AND ((dbo.fnIsVowel(SUBSTRING(@Word,@CurrentPosition + 1,1))=1)
OR (dbo.fnStringAt(@CurrentPosition, @Word, 'WH')=1)))
BEGIN
--Wasserman should match Vasserman
IF (dbo.fnIsVowel(SUBSTRING(@Word,@CurrentPosition + 1,1))=1)
BEGIN
SET @MP1 = @MP1 + 'A'
SET @MP2 = @MP2 + 'F'
END
ELSE
BEGIN
--need Uomo to match Womo
SET @MP1 = @MP1 + 'A'
SET @MP2 = @MP2 + 'A'
END
SET @CurrentPosition = @CurrentPosition + 1
END
--Arnow should match Arnoff
ELSE IF (((@CurrentPosition = @WordLength) AND
(dbo.fnIsVowel(SUBSTRING(@Word,@CurrentPosition - 1,1))=1))
OR (dbo.fnStringAt((@CurrentPosition - 1), @Word,
'EWSKI,EWSKY,OWSKI,OWSKY')=1)
OR (dbo.fnStringAt(1, @Word, 'SCH')=1))
BEGIN
SET @MP1 = @MP1 + ''
SET @MP2 = @MP2 + 'F'
SET @CurrentPosition = @CurrentPosition + 1
END
--polish e.g. 'filipowicz'
ELSE IF (dbo.fnStringAt(@CurrentPosition, @Word, 'WICZ,WITZ')=1)
BEGIN
SET @MP1 = @MP1 + 'TS'
SET @MP2 = @MP2 + 'FX'
SET @CurrentPosition = @CurrentPosition + 4
END
-- skip it
ELSE
BEGIN
SET @CurrentPosition = @CurrentPosition + 1
END
END
ELSE IF @CurrentChar = 'X'
BEGIN
--french e.g. breaux
IF (NOT((@CurrentPosition = @WordLength)
AND ((dbo.fnStringAt((@CurrentPosition - 3), @Word, 'IAU,EAU')=1)
OR (dbo.fnStringAt((@CurrentPosition - 2), @Word,
'AU,OU')=1))) )
BEGIN
SET @MP1 = @MP1 + 'KS'
SET @MP2 = @MP2 + 'KS'
END
IF (dbo.fnStringAt((@CurrentPosition + 1), @Word, 'C,X')=1)
BEGIN
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
SET @CurrentPosition = @CurrentPosition + 1
END
END
ELSE IF @CurrentChar = 'Z'
BEGIN
--chinese pinyin e.g. 'zhao'
IF (SUBSTRING(@Word,@CurrentPosition + 1,1) = 'H')
BEGIN
SET @MP1 = @MP1 + 'J'
SET @MP2 = @MP2 + 'J'
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
IF ((dbo.fnStringAt((@CurrentPosition + 1), @Word, 'ZO,ZI,ZA')=1)
OR ((dbo.fnSlavoGermanic(@Word)=1) AND ((@CurrentPosition >
1) AND SUBSTRING(@Word,@CurrentPosition - 1,1) <> 'T')))
BEGIN
SET @MP1 = @MP1 + 'S'
SET @MP2 = @MP2 + 'TS'
END
ELSE
BEGIN
SET @MP1 = @MP1 + 'S'
SET @MP2 = @MP2 + 'S'
END
IF (SUBSTRING(@Word,@CurrentPosition + 1,1) = 'Z')
BEGIN
SET @CurrentPosition = @CurrentPosition + 2
END
ELSE
BEGIN
SET @CurrentPosition = @CurrentPosition + 1
END
END
END
ELSE
BEGIN
SET @CurrentPosition = @CurrentPosition + 1
END
END
--only give back 4 char metaphone
IF (LEN(@MP1) > 4)
BEGIN
SET @MP1 = LEFT(@MP1, 4)
END
IF (LEN(@MP2) > 4)
BEGIN
SET @MP2 = LEFT(@MP2, 4)
END
IF @MP2 = @MP1
BEGIN
SET @MP2 = ''
END
INSERT @DMP(Metaphone1,Metaphone2) VALUES( @MP1, @MP2 )
RETURN
END
GO;
------------------------------------------------------------------------
IF OBJECT_ID('fnDoubleMetaphoneScalar') IS NOT NULL BEGIN DROP FUNCTION
fnDoubleMetaphoneScalar END
GO;
CREATE FUNCTION fnDoubleMetaphoneScalar( @MetaphoneType int, @Word varchar(50) )
RETURNS char(4)
AS
BEGIN
RETURN (SELECT CASE @MetaphoneType WHEN 1 THEN Metaphone1
WHEN 2 THEN Metaphone2 END FROM fnDoubleMetaphoneTable( @Word ))
END