Double-Metaphone Algorithm by Lawrence Philips:
/+
' Source : DoubleMetaphone.d - A much better algorithm than Soundex()
' : Converted from Lawrence Philips' CUJ June 2000 C++ "Double Metaphone" code.
' Version : v0.1 Beta
' Author(s) : Lawrence Philips for the "Original" C++ code, and
' : David L. 'SpottedTiger' Davis for the D conversion of the C++ code.
' Date Created : 20.Oct.04 Compiled and Tested with dmd v0.102
' Date Modified : 22.Oct.04 Compiled and Tested with dmd v0.104
' : 17.Jun.05 Compiled and Tested with dmd v0.127
' : 21.Jan.07 Compiled and Tested with dmd v1.0
' :
' Licence : Public Domain for this D module, and the
' : "Double Metaphone" Algorithm is (c)1998, 1999 by Lawrence Philips.
' -------------------------------------------------------------------------------------
' References : Philips, Lawrence. C/C++ Users Journal (CUJ), June, 2000
' : Philips, Lawrence. Computer Language, Vol. 7, No. 12 December 1990
' :
' Note : Takes a word and returns one or two phonetic encodings for the word.
' -------------------------------------------------------------------------------------
'
' A link to the "Double Metaphone Search Algorithm" article:
' http://www.cuj.com/documents/s=8038/cuj0006philips/
'
' To Compile for Unittest:
' C:\dmd\MKoD_ex>dmd doublemetaphone.d -debug=doublemetaphone -unittest
+/
module DoubleMetaphone;
private import std.string;
private import std.stdio;
private import std.stdarg;
private import std.ctype;
bool isVowelAt( in char[] sWord, in int iAt )
{
char cIt;
if ( ( iAt < 0 ) || ( iAt >= sWord.length ) )
return false;
cIt = std.ctype.toupper( getCharAt( sWord, iAt ) );
if ( ( cIt == 'A' ) || ( cIt == 'E' ) || ( cIt == 'I' ) ||
( cIt == 'O' ) || ( cIt == 'U' ) || ( cIt == 'Y' ) )
return true;
else
return false;
} // end bool isVowelAt( in char[], in int )
bool doesAnyStringMatchAt( in char[] s, in int iStartAt, ... )
{
char[] sArgVal;
if ( ( iStartAt < 0 ) || ( iStartAt >= s.length ) )
return false;
for ( int i = 0; i < _arguments.length; i++ )
{
// All arguments should be char[] only
if ( _arguments[ i ] == typeid( char[] ) )
{
sArgVal = *cast(char[] *)_argptr;
_argptr += sArgVal.sizeof;
if ( iStartAt + sArgVal.length > s.length )
return false;
if ( icmp( s[ iStartAt .. iStartAt + sArgVal.length ], sArgVal ) == 0 )
return true;
}
else
break;
}
return false;
} // bool doesStringEqualAt( in char[], in int, ... )
bool isSlavoGermanic( in char[] sWord )
{
char[] s = sWord.dup;
s = std.string.toupper( s );
if( ( find( s, "W" ) > -1 ) || ( find( s, "K" ) > -1 ) ||
( find( s, "CZ" ) > -1 ) || ( find( s, "WITZ" ) > -1 ) )
return true;
else
return false;
} // end bool isSlavoGermanic( in char[] )
char getCharAt( in char[] sWord, in int iAt )
{
if ( ( iAt < 0 ) || ( iAt >= sWord.length ) )
return cast(char)'\0';
else
return sWord[ iAt ];
} // end char getCharAt( in char[] sWord, in int )
debug( doublemetaphone )
{
int main()
{
debug( doublemetaphone ) writefln( "unittest done." );
char[] sWord;
char[] sPrimary;
char[] sSecondary;
writefln();
sWord = "Write";
if ( doesAnyStringMatchAt( sWord, 0, "GN", "KN", "PN", "WR", "PS" ) == true )
writefln( "Match found at %d for one of the following: %s", 0, "GN,KN,PN,WR,PS" );
if ( isSlavoGermanic( sWord ) == true )
writefln( "\"%s\" is SlavoGermanic because one of following \"%s\" was found.", sWord, "W,K,CZ,WITZ" );
if ( isVowelAt( sWord, 4 ) == true )
writefln( "\"%s\" has a vowel in position %d", sWord, 4 );
getDoubleMetaphone( sWord, sPrimary, sSecondary );
writefln( "sWord=%s, sPrimary=%s, sSecondary=%s", sWord, sPrimary, sSecondary );
sWord = "agencies";
getDoubleMetaphone( sWord, sPrimary, sSecondary );
writefln( "sWord=%s, sPrimary=%s, sSecondary=%s", sWord, sPrimary, sSecondary );
sWord = "nelson";
getDoubleMetaphone( sWord, sPrimary, sSecondary );
writefln( "sWord=%s, sPrimary=%s, sSecondary=%s; should=\"NLSN\"", sWord, sPrimary, sSecondary );
// Occasionally - "AKSN" and "AKXN"
sWord = "Occasionally";
getDoubleMetaphone( sWord, sPrimary, sSecondary );
writefln( "sWord=%s, sPrimary=%s, sSecondary=%s; should=\"AKSN\", and \"AKXN\"", sWord, sPrimary, sSecondary );
return 0;
}
} // end debug( doublemetaphone )
void getDoubleMetaphone( in char[] s, out char[] sPrimary, out char[] sSecondary )
{
char[] sWord = std.string.toupper( s ).dup;
int iCurrPos = 0;
int iLen = 0;
int iLast = 0;
bool bAlternate = false;
iLen = sWord.length;
if( iLen < 1 )
return;
void addMetaph( in char[] s, in char[] sAlt = "" )
{
if ( s != "" )
sPrimary ~= s;
if ( sAlt != "" )
{
bAlternate = true;
if ( sAlt[ 0 ] != '\x20' )
sSecondary ~= sAlt;
}
else if ( s != "" && ( s[ 0 ] != '\x20' ) )
{
sSecondary ~= s;
}
} // end void addMetaph( in char[] = "" )
iLast = iLen - 1; // zero based index
bAlternate = false;
// skip these when at start of word
if ( doesAnyStringMatchAt( sWord, 0, "GN", "KN", "PN", "WR", "PS" ) )
iCurrPos += 1;
// Initial 'X' is pronounced 'Z' e.g. ( "Xavier" - "SF" and "SFR" )
if ( getCharAt( sWord, 0 ) == 'X' )
{
addMetaph( "S" ); // 'Z' maps to 'S'
iCurrPos += 1;
}
// main loop
while ( ( sPrimary.length < 4 ) || ( sSecondary.length < 4 ) )
{
if ( iCurrPos >= iLen )
break;
switch ( getCharAt( sWord, iCurrPos ) )
{
case 'A','E','I','O','U','Y':
if( iCurrPos == 0 )
// all init vowels now map to 'A'
addMetaph( "A" );
iCurrPos +=1;
break;
case 'B':
// "-mb", e.g", ( "dumb" - "TM" ), already skipped over...
addMetaph( "P" );
if( getCharAt( sWord, iCurrPos + 1 ) == 'B' )
iCurrPos += 2;
else
iCurrPos += 1;
break;
case '\u00C7': // 0xC7 "Latin Capital Letter C with Cedilla"
addMetaph( "S" );
iCurrPos += 1;
break;
case 'C':
// various germanic
if ( ( iCurrPos > 1 )
&& !isVowelAt( sWord, iCurrPos - 2 )
&& doesAnyStringMatchAt( sWord, ( iCurrPos - 1 ), "ACH" )
&& ( ( getCharAt( sWord, iCurrPos + 2 ) != 'I' )
&& ( ( getCharAt( sWord, iCurrPos + 2 ) != 'E' )
|| doesAnyStringMatchAt( sWord, ( iCurrPos - 2 ), "BACHER", "MACHER" ) ) ) )
{
addMetaph( "K" );
iCurrPos += 2;
break;
}
// special case ( "caesar" - "SSR" )
if( ( iCurrPos == 0 )
&& doesAnyStringMatchAt( sWord, iCurrPos, "CAESAR" ) )
{
addMetaph( "S" );
iCurrPos += 2;
break;
}
// italian ( "chianti" - "KNT" )
if ( doesAnyStringMatchAt( sWord, iCurrPos, "CHIA" ) )
{
addMetaph( "K" );
iCurrPos += 2;
break;
}
if ( doesAnyStringMatchAt( sWord, iCurrPos, "CH" ) )
{
// find ( "michael" - "MKL" and "MXL" )
if ( ( iCurrPos > 0 )
&& doesAnyStringMatchAt( sWord, iCurrPos, "CHAE" ) )
{
addMetaph( "K", "X" );
iCurrPos += 2;
break;
}
// greek roots e.g. ( "chemistry" - "KMST" ), ( "chorus" - "KRS" )
if ( ( iCurrPos == 0 )
&& ( doesAnyStringMatchAt( sWord, ( iCurrPos + 1 ), "HARAC", "HARIS" )
|| doesAnyStringMatchAt( sWord, ( iCurrPos + 1 ), "HOR", "HYM", "HIA", "HEM" ) )
&& !doesAnyStringMatchAt( sWord, 0, "CHORE" ) )
{
addMetaph( "K" );
iCurrPos += 2;
break;
}
// germanic, greek, or otherwise "ch" for "kh" sound
if ( ( doesAnyStringMatchAt( sWord, 0, "VAN ", "VON " )
|| doesAnyStringMatchAt( sWord, 0, "SCH" ) )
// ( "architect" - "ARKT" ) but not ( "arch" - "ARX" and "ARK" ),
// ( "orchestra" - "ARKS" ), ( "orchid" - "ARKT" )
|| doesAnyStringMatchAt( sWord, ( iCurrPos - 2 ), "ORCHES", "ARCHIT", "ORCHID" )
|| doesAnyStringMatchAt( sWord, ( iCurrPos + 2 ), "T", "S" )
|| ( ( doesAnyStringMatchAt( sWord, ( iCurrPos - 1 ), "A", "O", "U", "E" )
|| ( iCurrPos == 0 ) )
// e.g., ( "wachtler" - "AKTL" and "FKTL" ), ( "wechsler" - "AKSL" and "FKSL" ),
// but not ( "tichner" - "TXNR" and "TKNR" )
&& doesAnyStringMatchAt( sWord, ( iCurrPos + 2 ), "L", "R", "N", "M", "B", "H", "F", "V", "W", " " ) ) )
{
addMetaph( "K" );
}
else
{
if ( iCurrPos > 0 )
{
if ( doesAnyStringMatchAt( sWord, 0, "MC" ) )
// e.g., ( "McHugh" - "MK" )
addMetaph( "K" );
else
addMetaph( "X", "K" );
}
else
addMetaph( "X" );
}
iCurrPos +=2;
break;
}
// e.g, ( "czerny" - "SRN" and "XRN" )
if ( doesAnyStringMatchAt( sWord, iCurrPos, "CZ" )
&& !doesAnyStringMatchAt( sWord, ( iCurrPos - 2 ), "WICZ" ) )
{
addMetaph( "S", "X" );
iCurrPos += 2;
break;
}
// e.g., ( "focaccia" - "FKX" )
if ( doesAnyStringMatchAt( sWord, ( iCurrPos + 1 ), "CIA" ) )
{
addMetaph( "X" );
iCurrPos += 3;
break;
}
// double 'C', but not if e.g. ( "McClellan" - "MKLL" )
if ( doesAnyStringMatchAt( sWord, iCurrPos, "CC" )
&& !( ( iCurrPos == 1 )
&& ( getCharAt( sWord, 0 ) == 'M' ) ) )
{
// ( "bellocchio" - "PLX" ) but not ( "bacchus" - "PKS" )
if ( doesAnyStringMatchAt( sWord, ( iCurrPos + 2 ), "I", "E", "H" )
&& !doesAnyStringMatchAt( sWord, ( iCurrPos + 2 ), "HU" ) )
{
// ( "accident" - "AKST" ), ( "accede" - "AKST" ), ( "succeed" - "SKST" )
if ( ( ( iCurrPos == 1 )
&& ( getCharAt( sWord, iCurrPos - 1 ) == 'A' ) )
|| doesAnyStringMatchAt( sWord, ( iCurrPos - 1 ), "UCCEE", "UCCES" ) )
addMetaph( "KS" );
// ( "bacci" - "PX" ), ( "bertucci" - "PRTX" ), other italian
else
addMetaph( "X" );
iCurrPos += 3;
break;
}
else
{ // Pierce's rule
addMetaph( "K" );
iCurrPos += 2;
break;
}
}
if ( doesAnyStringMatchAt( sWord, iCurrPos, "CK", "CG", "CQ" ) )
{
addMetaph( "K" );
iCurrPos += 2;
break;
}
if ( doesAnyStringMatchAt( sWord, iCurrPos, "CI", "CE", "CY" ) )
{
// italian vs. english
if ( doesAnyStringMatchAt( sWord, iCurrPos, "CIO", "CIE", "CIA" ) )
addMetaph( "S", "X" );
else
addMetaph( "S" );
iCurrPos += 2;
break;
}
// else
addMetaph( "K" );
// name sent in ( "mac caffrey" - "MKFR" ), ( "mac gregor" - "MKRK" )
if ( doesAnyStringMatchAt( sWord, ( iCurrPos + 1 ), " C", " Q", " G" ) )
iCurrPos += 3;
else if ( doesAnyStringMatchAt( sWord, ( iCurrPos + 1 ), "C", "K", "Q" )
&& !doesAnyStringMatchAt( sWord, ( iCurrPos + 1 ), "CE", "CI" ) )
iCurrPos += 2;
else
iCurrPos += 1;
break;
case 'D':
if ( doesAnyStringMatchAt( sWord, iCurrPos, "DG" ) )
{
if ( doesAnyStringMatchAt( sWord, ( iCurrPos + 2 ), "I", "E", "Y" ) )
{
// e.g. ( "edge" - "AJ" )
addMetaph( "J" );
iCurrPos += 3;
break;
}
else
{
// e.g. ( "edgar" - "ATKR" )
addMetaph( "TK" );
iCurrPos += 2;
break;
}
}
if ( doesAnyStringMatchAt( sWord, iCurrPos, "DT", "DD" ) )
{
addMetaph( "T" );
iCurrPos += 2;
break;
}
// else
addMetaph( "T" );
iCurrPos += 1;
break;
case 'F':
if( getCharAt( sWord, iCurrPos + 1 ) == 'F' )
iCurrPos += 2;
else
iCurrPos += 1;
addMetaph( "F" );
break;
case 'G':
if( getCharAt( sWord, iCurrPos + 1 ) == 'H' )
{
if ( ( iCurrPos > 0 )
&& !isVowelAt( sWord, iCurrPos - 1 ) )
{
addMetaph( "K" );
iCurrPos += 2;
break;
}
if ( iCurrPos < 3 )
{
// ( "ghislane" - "JLN" ), ( "ghiradelli" - "JRTL" )
if ( iCurrPos == 0 )
{
if( getCharAt( sWord, iCurrPos + 2 ) == 'I' )
addMetaph( "J" );
else
addMetaph( "K" );
iCurrPos += 2;
break;
}
}
// Parker's rule (with some further refinements) - e.g., ( "hugh" - "H" )
if ( ( ( iCurrPos > 1 ) && doesAnyStringMatchAt( sWord, ( iCurrPos - 2 ), "B", "H", "D" ) )
// e.g., ( "bough" - "P" )
|| ( ( iCurrPos > 2 ) && doesAnyStringMatchAt( sWord, ( iCurrPos - 3 ), "B", "H", "D" ) )
// e.g., ( "broughton" - "PRTN" )
|| ( ( iCurrPos > 3 ) && doesAnyStringMatchAt( sWord, ( iCurrPos - 4 ), "B", "H" ) ) )
{
iCurrPos += 2;
break;
}
else
{
// e.g., ( "laugh" - "LF" ), ( "McLaughlin" - "MKLF" ),
// ( "cough" - "KF" ), ( "gough" - "KF" ), ( "rough" - "RF" ), ( "tough" - "TF" )
if ( ( iCurrPos > 2 )
&& ( getCharAt( sWord, iCurrPos - 1 ) == 'U' )
&& doesAnyStringMatchAt( sWord, ( iCurrPos - 3 ), "C", "G", "L", "R", "T" ) )
{
addMetaph( "F" );
}
else
{
if ( ( iCurrPos > 0 )
&& getCharAt( sWord, iCurrPos - 1 ) != 'I' )
addMetaph( "K" );
}
iCurrPos += 2;
break;
}
}
if( getCharAt( sWord, iCurrPos + 1 ) == 'N' )
{
if ( ( iCurrPos == 1)
&& isVowelAt( sWord, 0 )
&& !isSlavoGermanic( sWord ) )
{
addMetaph( "KN", "N" );
}
else
{
// not e.g. ( "cagney" - "KKN" )
if ( !doesAnyStringMatchAt( sWord, (iCurrPos + 2), "EY" )
&& ( getCharAt( sWord, iCurrPos + 1 ) != 'Y' )
&& !isSlavoGermanic( sWord ) )
{
addMetaph( "N", "KN" );
}
else
addMetaph( "KN" );
}
iCurrPos += 2;
break;
}
// ( "tagliaro" - "TKLR" and "TLR" )
if ( doesAnyStringMatchAt( sWord, ( iCurrPos + 1 ), "LI" )
&& !isSlavoGermanic( sWord ) )
{
addMetaph( "KL", "L" );
iCurrPos += 2;
break;
}
// -ges-,-gep-,-gel-, -gie- at beginning
if ( ( iCurrPos == 0 )
&& ( ( getCharAt( sWord, iCurrPos + 1 ) == 'Y' )
|| doesAnyStringMatchAt( sWord, ( iCurrPos + 1 ), "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER" ) ) )
{
addMetaph( "K", "J" );
iCurrPos += 2;
break;
}
// -ger-, -gy-
if ( ( doesAnyStringMatchAt( sWord, ( iCurrPos + 1 ), "ER" )
|| ( getCharAt( sWord, iCurrPos + 1 ) == 'Y' ) )
&& !doesAnyStringMatchAt( sWord, 0, "DANGER", "RANGER", "MANGER" )
&& !doesAnyStringMatchAt( sWord, ( iCurrPos - 1 ), "E", "I" )
&& !doesAnyStringMatchAt( sWord, ( iCurrPos - 1 ), "RGY", "OGY" ) )
{
addMetaph( "K", "J" );
iCurrPos += 2;
break;
}
// italian e.g, ( "biaggi" - "PJ" and "PK" )
if ( doesAnyStringMatchAt( sWord, ( iCurrPos + 1 ), "E", "I", "Y" )
|| doesAnyStringMatchAt( sWord, ( iCurrPos - 1 ), "AGGI", "OGGI" ) )
{
// obvious germanic
if ( ( doesAnyStringMatchAt( sWord, 0, "VAN ", "VON " )
|| doesAnyStringMatchAt( sWord, 0, "SCH" ) )
|| doesAnyStringMatchAt( sWord, ( iCurrPos + 1 ), "ET" ) )
{
addMetaph( "K" );
}
else
{
// always soft if french ending
if ( doesAnyStringMatchAt( sWord, ( iCurrPos + 1 ), "IER " ) )
addMetaph( "J" );
else
addMetaph( "J", "K" );
}
iCurrPos += 2;
break;
}
if( getCharAt( sWord, iCurrPos + 1 ) == 'G' )
iCurrPos += 2;
else
iCurrPos += 1;
addMetaph( "K" );
break;
case 'H':
// only keep if first & before vowel or btw. 2 vowels
if ( ( ( iCurrPos == 0 )
|| isVowelAt( sWord, iCurrPos - 1 ) )
&& isVowelAt( sWord, iCurrPos + 1 ) )
{
addMetaph( "H" );
iCurrPos += 2;
}
else // also takes care of "HH"
iCurrPos += 1;
break;
case 'J':
// obvious spanish, ( "jose" - "JS" and "HS" ), ( "san jacinto" - "SNHS" )
if ( doesAnyStringMatchAt( sWord, iCurrPos, "JOSE" )
|| doesAnyStringMatchAt( sWord, 0, "SAN " ) )
{
if ( ( ( iCurrPos == 0 )
&& ( getCharAt( sWord, iCurrPos + 4 ) == ' ' ) )
|| doesAnyStringMatchAt( sWord, 0, "SAN " ) )
{
addMetaph( "H" );
}
else
{
addMetaph( "J", "H" );
}
iCurrPos +=1;
break;
}
if ( ( iCurrPos == 0 )
&& !doesAnyStringMatchAt( sWord, iCurrPos, "JOSE" ) )
{
addMetaph( "J", "A" ); // ( "Yankelovich" - "ANKL" ) / ( "Jankelowicz" - "JNKL" and "ANKL" )
}
// spanish pron. of e.g. ( "bajador" - "PJTR" and "PHTR" )
else if ( isVowelAt( sWord, iCurrPos - 1 )
&& !isSlavoGermanic( sWord )
&& ( ( getCharAt( sWord, iCurrPos + 1 ) == 'A' )
|| ( getCharAt( sWord, iCurrPos + 1 ) == 'O' ) ) )
{
addMetaph( "J", "H" );
}
else if ( iCurrPos == iLast )
{
addMetaph( "J", " " );
}
else if ( !doesAnyStringMatchAt( sWord, ( iCurrPos + 1 ), "L", "T", "K", "S", "N", "M", "B", "Z" )
&& !doesAnyStringMatchAt( sWord, ( iCurrPos - 1 ), "S", "K", "L" ) )
{
addMetaph( "J" );
}
if ( getCharAt( sWord, iCurrPos + 1 ) == 'J' ) //it could happen!
iCurrPos += 2;
else
iCurrPos += 1;
break;
case 'K':
if ( getCharAt( sWord, iCurrPos + 1 ) == 'K' )
iCurrPos += 2;
else
iCurrPos += 1;
addMetaph( "K" );
break;
case 'L':
if ( getCharAt( sWord, iCurrPos + 1 ) == 'L' )
{
// spanish e.g. ( "cabrillo" - "KPRL" and "KPR" ), ( "gallegos" - "KLKS" and "KKS" )
if ( ( ( iCurrPos == ( iLen - 3 ) )
&& doesAnyStringMatchAt( sWord, ( iCurrPos - 1 ), "ILLO", "ILLA", "ALLE" ) )
|| ( ( doesAnyStringMatchAt( sWord, ( iLast - 1 ), "AS", "OS" )
|| doesAnyStringMatchAt( sWord, iLast, "A", "O" ) )
&& doesAnyStringMatchAt( sWord, ( iCurrPos - 1 ), "ALLE" ) ) )
{
addMetaph( "L", " " );
iCurrPos += 2;
break;
}
iCurrPos += 2;
}
else
{
iCurrPos += 1;
}
addMetaph( "L" );
break;
case 'M':
if ( ( doesAnyStringMatchAt( sWord, ( iCurrPos - 1 ), "UMB" )
&& ( ( ( iCurrPos + 1 ) == iLast )
|| doesAnyStringMatchAt( sWord, ( iCurrPos + 2 ), "ER" ) ) )
// ( "dumb" - "TM" ), ( "thumb" - "0M" and "TM" )
|| ( getCharAt( sWord, iCurrPos + 1 ) == 'M' ) )
iCurrPos += 2;
else
iCurrPos += 1;
addMetaph( "M" );
break;
case 'N':
if ( getCharAt( sWord, iCurrPos + 1 ) == 'N' )
iCurrPos += 2;
else
iCurrPos += 1;
addMetaph( "N" );
break;
case '\u00D1': // 0xD1 "Latin Capital Letter N with Tilde"
iCurrPos += 1;
addMetaph( "N" );
break;
case 'P':
if ( getCharAt( sWord, iCurrPos + 1 ) == 'H' )
{
addMetaph( "F" );
iCurrPos += 2;
break;
}
// also account for ( "campbell" - "KMPL" ), ( "raspberry" - "RSPR" )
if ( doesAnyStringMatchAt( sWord, ( iCurrPos + 1 ), "P", "B" ) )
iCurrPos += 2;
else
iCurrPos += 1;
addMetaph( "P" );
break;
case 'Q':
if ( getCharAt( sWord, iCurrPos + 1 ) == 'Q' )
iCurrPos += 2;
else
iCurrPos += 1;
addMetaph( "K" );
break;
case 'R':
// french e.g. ( "rogier" - "RJ" and "RKR" ), but exclude ( "hochmeier" - "HKMR" )
if ( ( iCurrPos == iLast )
&& !isSlavoGermanic( sWord )
&& doesAnyStringMatchAt( sWord, ( iCurrPos - 2 ), "IE" )
&& !doesAnyStringMatchAt( sWord, ( iCurrPos - 4 ), "ME", "MA" ) )
{
addMetaph( "", "R" );
}
else
addMetaph( "R" );
if ( getCharAt( sWord, iCurrPos + 1 ) == 'R' )
iCurrPos += 2;
else
iCurrPos += 1;
break;
case 'S':
// special cases ( "island" - "ALNT" ), ( "isle" - "AL" ),
// ( "carlisle" - "KRLL" ), ( "carlysle" - "KRLL" )
if ( doesAnyStringMatchAt( sWord, ( iCurrPos - 1 ), "ISL", "YSL" ) )
{
iCurrPos += 1;
break;
}
// special case "sugar-"
if ( ( iCurrPos == 0 )
&& doesAnyStringMatchAt( sWord, iCurrPos, "SUGAR" ) )
{
addMetaph( "X", "S" );
iCurrPos += 1;
break;
}
if ( doesAnyStringMatchAt( sWord, iCurrPos, "SH" ) )
{
// germanic
if ( doesAnyStringMatchAt( sWord, ( iCurrPos + 1 ), "HEIM", "HOEK", "HOLM", "HOLZ" ) )
addMetaph( "S" );
else
addMetaph( "X" );
iCurrPos += 2;
break;
}
// italian & armenian
if ( doesAnyStringMatchAt( sWord, iCurrPos, "SIO", "SIA" )
|| doesAnyStringMatchAt( sWord, iCurrPos, "SIAN" ) )
{
if ( !isSlavoGermanic( sWord ) )
addMetaph( "S", "X" );
else
addMetaph( "S" );
iCurrPos += 3;
break;
}
// german & anglicisations, e.g. ( "smith" - "SM0" and "XMT" ) match ( "schmidt" - "XMT" and "SMT" ),
// ( "snider" - "SNTR" and "XNTR" ) match ( "schneider" - "XNTR" and "SNTR" )
// also, -sz- in slavic language altho in hungarian it is pronounced 's'
if ( ( ( iCurrPos == 0 )
&& doesAnyStringMatchAt( sWord, ( iCurrPos + 1 ), "M", "N", "L", "W" ) )
|| doesAnyStringMatchAt( sWord, ( iCurrPos + 1 ), "Z" ) )
{
addMetaph( "S", "X" );
if ( doesAnyStringMatchAt( sWord, ( iCurrPos + 1 ), "Z" ) )
iCurrPos += 2;
else
iCurrPos += 1;
break;
}
if ( doesAnyStringMatchAt( sWord, iCurrPos, "SC" ) )
{
// Schlesinger's rule
if ( getCharAt( sWord, iCurrPos + 2 ) == 'H' )
{
// dutch origin, e.g. ( "school" - "SKL" ), ( "schooner" - "SKNR" )
if ( doesAnyStringMatchAt( sWord, ( iCurrPos + 3 ), "OO", "ER", "EN", "UY", "ED", "EM" ) )
{
// ( "schermerhorn" - "XRMR" and "SKRM" ), ( "schenker" - "XNKR" and "SKNK" )
if ( doesAnyStringMatchAt( sWord, ( iCurrPos + 3 ), "ER", "EN" ) )
{
addMetaph( "X", "SK" );
}
else
{
addMetaph( "SK" );
}
iCurrPos += 3;
break;
}
else
{
if ( ( iCurrPos == 0 )
&& !isVowelAt( sWord, 3 )
&& ( getCharAt( sWord, 3 ) != 'W' ) )
{
addMetaph( "X", "S" );
}
else
{
addMetaph( "X" );
}
iCurrPos += 3;
break;
}
}
if ( doesAnyStringMatchAt( sWord, ( iCurrPos + 2 ), "I", "E", "Y" ) )
{
addMetaph( "S" );
iCurrPos += 3;
break;
}
//else
addMetaph( "SK" );
iCurrPos += 3;
break;
}
// french e.g. ( "resnais" - "RSN" and "RSNS" ), ( "artois" - "ART" and "ARTS" )
if ( ( iCurrPos == iLast )
&& doesAnyStringMatchAt( sWord, ( iCurrPos - 2 ), "AI", "OI" ) )
{
addMetaph( "", "S" );
}
else
{
addMetaph( "S" );
}
if ( doesAnyStringMatchAt( sWord, ( iCurrPos + 1 ), "S", "Z" ) )
iCurrPos += 2;
else
iCurrPos += 1;
break;
case 'T':
if ( doesAnyStringMatchAt( sWord, iCurrPos, "TION" ) )
{
addMetaph( "X" );
iCurrPos += 3;
break;
}
if ( doesAnyStringMatchAt( sWord, iCurrPos, "TIA", "TCH" ) )
{
addMetaph( "X" );
iCurrPos += 3;
break;
}
if ( doesAnyStringMatchAt( sWord, iCurrPos, "TH" )
|| doesAnyStringMatchAt( sWord, iCurrPos, "TTH" ) )
{
// special case ( "thomas" - "TMS" ), ( "thames" - "TMS" ) or germanic
if ( doesAnyStringMatchAt( sWord, ( iCurrPos + 2 ), "OM", "AM" )
|| doesAnyStringMatchAt( sWord, 0, "VAN ", "VON " )
|| doesAnyStringMatchAt( sWord, 0, "SCH" ) )
{
addMetaph( "T" );
}
else
{
addMetaph( "0", "T" );
}
iCurrPos += 2;
break;
}
if ( doesAnyStringMatchAt( sWord, ( iCurrPos + 1 ), "T", "D" ) )
iCurrPos += 2;
else
iCurrPos += 1;
addMetaph( "T" );
break;
case 'V':
if ( getCharAt( sWord, iCurrPos + 1 ) == 'V' )
iCurrPos += 2;
else
iCurrPos += 1;
addMetaph( "F" );
break;
case 'W':
// can also be in middle of word
if ( doesAnyStringMatchAt( sWord, iCurrPos, "WR" ) )
{
addMetaph( "R" );
iCurrPos += 2;
break;
}
if ( ( iCurrPos == 0 )
&& ( isVowelAt( sWord, iCurrPos + 1 )
|| doesAnyStringMatchAt( sWord, iCurrPos, "WH" ) ) )
{
// ( "Wasserman" - "ASRM" and "FSRM" ) should match ( Vasserman - "FSRM" )
if ( isVowelAt( sWord, iCurrPos + 1 ) )
addMetaph( "A", "F" );
else
// need ( "Uomo" - "AM" ) to match ( "Womo" - "AM" and "FM" )
addMetaph( "A" );
}
// ( "Arnow" - "ARN" and "ARNF" ) should match ( "Arnoff" - "ARNF" )
if ( ( ( iCurrPos == iLast )
&& isVowelAt( sWord, iCurrPos - 1 ) )
|| doesAnyStringMatchAt( sWord, ( iCurrPos - 1 ), "EWSKI", "EWSKY", "OWSKI", "OWSKY" )
|| doesAnyStringMatchAt( sWord, 0, "SCH" ) )
{
addMetaph( "", "F" );
iCurrPos += 1;
break;
}
// polish e.g. ( "filipowicz" - "FLPT" and "FLPF" )
if ( doesAnyStringMatchAt( sWord, iCurrPos, "WICZ", "WITZ" ) )
{
addMetaph( "TS", "FX" );
iCurrPos += 4;
break;
}
// else skip it
iCurrPos +=1;
break;
case 'X':
// french e.g. ( "breaux" - "PR" )
if ( !( ( iCurrPos == iLast )
&& ( doesAnyStringMatchAt( sWord, ( iCurrPos - 3 ), "IAU", "EAU" )
|| doesAnyStringMatchAt( sWord, ( iCurrPos - 2 ), "AU", "OU" ) ) ) )
{
addMetaph("KS");
}
if ( doesAnyStringMatchAt( sWord, ( iCurrPos + 1 ), "C", "X" ) )
iCurrPos += 2;
else
iCurrPos += 1;
break;
case 'Z':
// chinese pinyin e.g. ( "zhao" - "J" )
if ( getCharAt( sWord, iCurrPos + 1 ) == 'H' )
{
addMetaph( "J" );
iCurrPos += 2;
break;
}
else if ( doesAnyStringMatchAt( sWord, ( iCurrPos + 1 ), "ZO", "ZI", "ZA" )
|| ( isSlavoGermanic( sWord )
&& ( ( iCurrPos > 0 )
&& getCharAt( sWord, iCurrPos - 1 ) != 'T' ) ) )
{
addMetaph( "S", "TS" );
}
else
addMetaph( "S" );
if ( getCharAt( sWord, iCurrPos + 1 ) == 'Z' )
iCurrPos += 2;
else
iCurrPos += 1;
break;
default:
iCurrPos += 1;
}
}
//writefln( "*end* sPrimary=%s, sSecondary=%s", sPrimary, sSecondary );
if ( sPrimary.length > 4 )
sPrimary = sPrimary[ 0 .. 4 ];
if ( bAlternate == true )
{
if (sSecondary.length >= 4 )
sSecondary = sSecondary[ 0 .. 4 ];
}
else
sSecondary = "";
} // end void getDoubleMetaphone( in char[], out char[], out char[] )
unittest
{
debug( doublemetaphone ) writefln( "getDoubleMetaphone( in char[], out char[], out char[] ).unittest" );
char[] sPrimary;
char[] sSecondary;
// "Occasionally" - "AKSN" and "AKXN"
getDoubleMetaphone( "Occasionally", sPrimary, sSecondary );
assert( sPrimary == "AKSN" );
assert( sSecondary == "AKXN" );
// "antidisestablishmentarianism" - "ANTT"
getDoubleMetaphone( "antidisestablishmentarianism", sPrimary, sSecondary );
assert( sPrimary == "ANTT" );
assert( sSecondary == "" );
// "appreciated" - "APRS" and "APRX"
getDoubleMetaphone( "appreciated", sPrimary, sSecondary );
assert( sPrimary == "APRS" );
assert( sSecondary == "APRX" );
// "beginning" - "PJNN" and "PKNN"
getDoubleMetaphone( "beginning", sPrimary, sSecondary );
assert( sPrimary == "PJNN" );
assert( sSecondary == "PKNN" );
// "changing" - "XNJN" and "XNKN"
getDoubleMetaphone( "changing", sPrimary, sSecondary );
assert( sPrimary == "XNJN" );
assert( sSecondary == "XNKN" );
// "cheat" - "XT"
getDoubleMetaphone( "cheat", sPrimary, sSecondary );
assert( sPrimary == "XT" );
assert( sSecondary == "" );
// "dangerous" - "TNJR" and "TNKR"
getDoubleMetaphone( "dangerous", sPrimary, sSecondary );
assert( sPrimary == "TNJR" );
assert( sSecondary == "TNKR" );
// "development" - "TFLP"
getDoubleMetaphone( "development", sPrimary, sSecondary );
assert( sPrimary == "TFLP" );
assert( sSecondary == "" );
// "etiology" - "ATLJ" and "ATLK"
getDoubleMetaphone( "etiology", sPrimary, sSecondary );
assert( sPrimary == "ATLJ" );
assert( sSecondary == "ATLK" );
// "existence" - "AKSS"
getDoubleMetaphone( "existence", sPrimary, sSecondary );
assert( sPrimary == "AKSS" );
assert( sSecondary == "" );
// "simplicity" - "SMPL"
getDoubleMetaphone( "simplicity", sPrimary, sSecondary );
assert( sPrimary == "SMPL" );
assert( sSecondary == "" );
// "circumstances" - "SRKM"
getDoubleMetaphone( "circumstances", sPrimary, sSecondary );
assert( sPrimary == "SRKM" );
assert( sSecondary == "" );
// "fiery" - "FR"
getDoubleMetaphone( "fiery", sPrimary, sSecondary );
assert( sPrimary == "FR" );
assert( sSecondary == "" );
// "february" - "FPRR"
getDoubleMetaphone( "february", sPrimary, sSecondary );
assert( sPrimary == "FPRR" );
assert( sSecondary == "" );
// "illegitimate" - "ALJT" and "ALKT"
getDoubleMetaphone( "illegitimate", sPrimary, sSecondary );
assert( sPrimary == "ALJT" );
assert( sSecondary == "ALKT" );
// "immediately" - "AMTT"
getDoubleMetaphone( "immediately", sPrimary, sSecondary );
assert( sPrimary == "AMTT" );
assert( sSecondary == "" );
// "happily" - "HPL"
getDoubleMetaphone( "happily", sPrimary, sSecondary );
assert( sPrimary == "HPL" );
assert( sSecondary == "" );
// "judgment" - "JTKM" and "ATKM"
getDoubleMetaphone( "judgment", sPrimary, sSecondary );
assert( sPrimary == "JTKM" );
assert( sSecondary == "ATKM" );
// "knowing" - "NNK"
getDoubleMetaphone( "knowing", sPrimary, sSecondary );
assert( sPrimary == "NNK" );
assert( sSecondary == "" );
// "kipper" - "KPR"
getDoubleMetaphone( "kipper", sPrimary, sSecondary );
assert( sPrimary == "KPR" );
assert( sSecondary == "" );
// "john" - "JN" and "AN"
getDoubleMetaphone( "john", sPrimary, sSecondary );
assert( sPrimary == "JN" );
assert( sSecondary == "AN" );
// "lesion" - "LSN" and "LXN"
getDoubleMetaphone( "lesion", sPrimary, sSecondary );
assert( sPrimary == "LSN" );
assert( sSecondary == "LXN" );
// "Xavier" - "SF" and "SFR"
getDoubleMetaphone( "Xavier", sPrimary, sSecondary );
assert( sPrimary == "SF" );
assert( sSecondary == "SFR" );
// "dumb" - "TM"
getDoubleMetaphone( "dumb", sPrimary, sSecondary );
assert( sPrimary == "TM" );
assert( sSecondary == "" );
// "caesar" - "SSR"
getDoubleMetaphone( "caesar", sPrimary, sSecondary );
assert( sPrimary == "SSR" );
assert( sSecondary == "" );
// "chianti" - "KNT"
getDoubleMetaphone( "chianti", sPrimary, sSecondary );
assert( sPrimary == "KNT" );
assert( sSecondary == "" );
// "michael" - "MKL" and "MXL"
getDoubleMetaphone( "michael", sPrimary, sSecondary );
assert( sPrimary == "MKL" );
assert( sSecondary == "MXL" );
// "chemistry" - "KMST"
getDoubleMetaphone( "chemistry", sPrimary, sSecondary );
assert( sPrimary == "KMST" );
assert( sSecondary == "" );
// "chorus" - "KRS"
getDoubleMetaphone( "chorus", sPrimary, sSecondary );
assert( sPrimary == "KRS" );
assert( sSecondary == "" );
// "architect - "ARKT"
getDoubleMetaphone( "architect", sPrimary, sSecondary );
assert( sPrimary == "ARKT" );
assert( sSecondary == "" );
// "arch" - "ARX" and "ARK"
getDoubleMetaphone( "arch", sPrimary, sSecondary );
assert( sPrimary == "ARX" );
assert( sSecondary == "ARK" );
// "orchestra" - "ARKS"
getDoubleMetaphone( "orchestra", sPrimary, sSecondary );
assert( sPrimary == "ARKS" );
assert( sSecondary == "" );
// "orchid" - "ARKT"
getDoubleMetaphone( "orchid", sPrimary, sSecondary );
assert( sPrimary == "ARKT" );
assert( sSecondary == "" );
// "wachtler" - "AKTL" and "FKTL"
getDoubleMetaphone( "wachtler", sPrimary, sSecondary );
assert( sPrimary == "AKTL" );
assert( sSecondary == "FKTL" );
// "wechsler" - "AKSL" and "FKSL"
getDoubleMetaphone( "wechsler", sPrimary, sSecondary );
assert( sPrimary == "AKSL" );
assert( sSecondary == "FKSL" );
// "tichner" - "TXNR" and "TKNR"
getDoubleMetaphone( "tichner", sPrimary, sSecondary );
assert( sPrimary == "TXNR" );
assert( sSecondary == "TKNR" );
// "McHugh" - "MK"
getDoubleMetaphone( "McHugh", sPrimary, sSecondary );
assert( sPrimary == "MK" );
assert( sSecondary == "" );
// "czerny" - "SRN" and "XRN"
getDoubleMetaphone( "czerny", sPrimary, sSecondary );
assert( sPrimary == "SRN" );
assert( sSecondary == "XRN" );
// "focaccia" - "FKX"
getDoubleMetaphone( "focaccia", sPrimary, sSecondary );
assert( sPrimary == "FKX" );
assert( sSecondary == "" );
// "bellocchio" - "PLX"
getDoubleMetaphone( "bellocchio", sPrimary, sSecondary );
assert( sPrimary == "PLX" );
assert( sSecondary == "" );
// "bacchus" - "PKS"
getDoubleMetaphone( "bacchus", sPrimary, sSecondary );
assert( sPrimary == "PKS" );
assert( sSecondary == "" );
// "accident" - "AKST"
getDoubleMetaphone( "accident", sPrimary, sSecondary );
assert( sPrimary == "AKST" );
assert( sSecondary == "" );
// "accede" - "AKST"
getDoubleMetaphone( "accede", sPrimary, sSecondary );
assert( sPrimary == "AKST" );
assert( sSecondary == "" );
// "succeed" - "SKST"
getDoubleMetaphone( "succeed", sPrimary, sSecondary );
assert( sPrimary == "SKST" );
assert( sSecondary == "" );
// "bacci" - "PX"
getDoubleMetaphone( "bacci", sPrimary, sSecondary );
assert( sPrimary == "PX" );
assert( sSecondary == "" );
// "bertucci" - "PRTX"
getDoubleMetaphone( "bertucci", sPrimary, sSecondary );
assert( sPrimary == "PRTX" );
assert( sSecondary == "" );
// "mac caffrey" - "MKFR"
getDoubleMetaphone( "mac caffrey", sPrimary, sSecondary );
assert( sPrimary == "MKFR" );
assert( sSecondary == "" );
// "mac gregor" - "MKRK"
getDoubleMetaphone( "mac gregor", sPrimary, sSecondary );
assert( sPrimary == "MKRK" );
assert( sSecondary == "" );
// "edge" - "AJ"
getDoubleMetaphone( "edge", sPrimary, sSecondary );
assert( sPrimary == "AJ" );
assert( sSecondary == "" );
// "edgar" - "ATKR"
getDoubleMetaphone( "edgar", sPrimary, sSecondary );
assert( sPrimary == "ATKR" );
assert( sSecondary == "" );
// "ghislane" - "JLN"
getDoubleMetaphone( "ghislane", sPrimary, sSecondary );
assert( sPrimary == "JLN" );
assert( sSecondary == "" );
// ghiradelli - "JRTL"
getDoubleMetaphone( "ghiradelli", sPrimary, sSecondary );
assert( sPrimary == "JRTL" );
assert( sSecondary == "" );
// "hugh" - "H"
getDoubleMetaphone( "hugh", sPrimary, sSecondary );
assert( sPrimary == "H" );
assert( sSecondary == "" );
// "bough" - "P"
getDoubleMetaphone( "bough", sPrimary, sSecondary );
assert( sPrimary == "P" );
assert( sSecondary == "" );
// "broughton" - "PRTN"
getDoubleMetaphone( "broughton", sPrimary, sSecondary );
assert( sPrimary == "PRTN" );
assert( sSecondary == "" );
// "laugh" - "LF"
getDoubleMetaphone( "laugh", sPrimary, sSecondary );
assert( sPrimary == "LF" );
assert( sSecondary == "" );
// "McLaughlin" - "MKLF"
getDoubleMetaphone( "McLaughlin", sPrimary, sSecondary );
assert( sPrimary == "MKLF" );
assert( sSecondary == "" );
// "cough" - "KF"
getDoubleMetaphone( "cough", sPrimary, sSecondary );
assert( sPrimary == "KF" );
assert( sSecondary == "" );
// "gough" - "KF"
getDoubleMetaphone( "gough", sPrimary, sSecondary );
assert( sPrimary == "KF" );
assert( sSecondary == "" );
// "rough" - "RF"
getDoubleMetaphone( "rough", sPrimary, sSecondary );
assert( sPrimary == "RF" );
assert( sSecondary == "" );
// "tough" - "TF"
getDoubleMetaphone( "tough", sPrimary, sSecondary );
assert( sPrimary == "TF" );
assert( sSecondary == "" );
// "cagney" - "KKN"
getDoubleMetaphone( "cagney", sPrimary, sSecondary );
assert( sPrimary == "KKN" );
assert( sSecondary == "" );
// "tagliaro" - "TKLR" and "TLR"
getDoubleMetaphone( "tagliaro", sPrimary, sSecondary );
assert( sPrimary == "TKLR" );
assert( sSecondary == "TLR" );
// "biaggi" - "PJ" and "PK"
getDoubleMetaphone( "biaggi", sPrimary, sSecondary );
assert( sPrimary == "PJ" );
assert( sSecondary == "PK" );
// "san jacinto" - "SNHS"
getDoubleMetaphone( "san jacinto", sPrimary, sSecondary );
assert( sPrimary == "SNHS" );
assert( sSecondary == "" );
// Yankelovich - "ANKL"
getDoubleMetaphone( "Yankelovich", sPrimary, sSecondary );
assert( sPrimary == "ANKL" );
assert( sSecondary == "" );
// Jankelowicz - "JNKL" and "ANKL"
getDoubleMetaphone( "Jankelowicz", sPrimary, sSecondary );
assert( sPrimary == "JNKL" );
assert( sSecondary == "ANKL" );
// "bajador" - "PJTR" and "PHTR"
getDoubleMetaphone( "bajador", sPrimary, sSecondary );
assert( sPrimary == "PJTR" );
assert( sSecondary == "PHTR" );
// "cabrillo" - "KPRL" and "KPR"
getDoubleMetaphone( "cabrillo", sPrimary, sSecondary );
assert( sPrimary == "KPRL" );
assert( sSecondary == "KPR" );
// "gallegos" - "KLKS" and "KKS"
getDoubleMetaphone( "gallegos", sPrimary, sSecondary );
assert( sPrimary == "KLKS" );
assert( sSecondary == "KKS" );
// "dumb" - "TM"
getDoubleMetaphone( "dumb", sPrimary, sSecondary );
assert( sPrimary == "TM" );
assert( sSecondary == "" );
// "thumb" - "0M" and "TM"
getDoubleMetaphone( "thumb", sPrimary, sSecondary );
assert( sPrimary == "0M" );
assert( sSecondary == "TM" );
// "campbell" - "KMPL"
getDoubleMetaphone( "campbell", sPrimary, sSecondary );
assert( sPrimary == "KMPL" );
assert( sSecondary == "" );
// "raspberry" - "RSPR"
getDoubleMetaphone( "raspberry", sPrimary, sSecondary );
assert( sPrimary == "RSPR" );
assert( sSecondary == "" );
// "hochmeier" - "HKMR"
getDoubleMetaphone( "hochmeier", sPrimary, sSecondary );
assert( sPrimary == "HKMR" );
assert( sSecondary == "" );
// "island" - "ALNT"
getDoubleMetaphone( "island", sPrimary, sSecondary );
assert( sPrimary == "ALNT" );
assert( sSecondary == "" );
// "isle" - "AL"
getDoubleMetaphone( "isle", sPrimary, sSecondary );
assert( sPrimary == "AL" );
assert( sSecondary == "" );
// "carlisle" - "KRLL"
getDoubleMetaphone( "carlisle", sPrimary, sSecondary );
assert( sPrimary == "KRLL" );
assert( sSecondary == "" );
// "carlysle" - "KRLL"
getDoubleMetaphone( "carlysler", sPrimary, sSecondary );
assert( sPrimary == "KRLL" );
assert( sSecondary == "" );
// "smith" - "SM0" and "XMT"
getDoubleMetaphone( "smith", sPrimary, sSecondary );
assert( sPrimary == "SM0" );
assert( sSecondary == "XMT" );
// "schmidt" - "XMT" and "SMT"
getDoubleMetaphone( "schmidt", sPrimary, sSecondary );
assert( sPrimary == "XMT" );
assert( sSecondary == "SMT" );
// "snider" - "SNTR" and "XNTR"
getDoubleMetaphone( "snider", sPrimary, sSecondary );
assert( sPrimary == "SNTR" );
assert( sSecondary == "XNTR" );
// "schneider" - "XNTR" and "SNTR"
getDoubleMetaphone( "schneider", sPrimary, sSecondary );
assert( sPrimary == "XNTR" );
assert( sSecondary == "SNTR" );
// "school" - "SKL"
getDoubleMetaphone( "school", sPrimary, sSecondary );
assert( sPrimary == "SKL" );
assert( sSecondary == "" );
// "schooner" - "SKNR"
getDoubleMetaphone( "schooner", sPrimary, sSecondary );
assert( sPrimary == "SKNR" );
assert( sSecondary == "" );
// "schermerhorn" - "XRMR" and "SKRM"
getDoubleMetaphone( "schermerhorn", sPrimary, sSecondary );
assert( sPrimary == "XRMR" );
assert( sSecondary == "SKRM" );
// "schenker" - "XNKR" and "SKNK"
getDoubleMetaphone( "schenker", sPrimary, sSecondary );
assert( sPrimary == "XNKR" );
assert( sSecondary == "SKNK" );
// "resnais" - "RSN" and "RSNS"
getDoubleMetaphone( "resnais", sPrimary, sSecondary );
assert( sPrimary == "RSN" );
assert( sSecondary == "RSNS" );
// "artois" - "ART" and "ARTS"
getDoubleMetaphone( "artois", sPrimary, sSecondary );
assert( sPrimary == "ART" );
assert( sSecondary == "ARTS" );
// "thomas" - "TMS"
getDoubleMetaphone( "thomas", sPrimary, sSecondary );
assert( sPrimary == "TMS" );
assert( sSecondary == "" );
// Wasserman - "ASRM" and "FSRM"
getDoubleMetaphone( "Wasserman", sPrimary, sSecondary );
assert( sPrimary == "ASRM" );
assert( sSecondary == "FSRM" );
// Vasserman - "FSRM"
getDoubleMetaphone( "Vasserman", sPrimary, sSecondary );
assert( sPrimary == "FSRM" );
assert( sSecondary == "" );
// Uomo - "AM"
getDoubleMetaphone( "Uomo", sPrimary, sSecondary );
assert( sPrimary == "AM" );
assert( sSecondary == "" );
// Womo - "AM" and "FM"
getDoubleMetaphone( "Womo", sPrimary, sSecondary );
assert( sPrimary == "AM" );
assert( sSecondary == "FM" );
// Arnow - "ARN" and "ARNF"
getDoubleMetaphone( "Arnow", sPrimary, sSecondary );
assert( sPrimary == "ARN" );
assert( sSecondary == "ARNF" );
// Arnoff - "ARNF"
getDoubleMetaphone( "Arnoff", sPrimary, sSecondary );
assert( sPrimary == "ARNF" );
assert( sSecondary == "" );
// "filipowicz" - "FLPT" and "FLPF"
getDoubleMetaphone( "filipowicz", sPrimary, sSecondary );
assert( sPrimary == "FLPT" );
assert( sSecondary == "FLPF" );
// breaux - "PR"
getDoubleMetaphone( "breaux", sPrimary, sSecondary );
assert( sPrimary == "PR" );
assert( sSecondary == "" );
// "zhao" - "J"
getDoubleMetaphone( "zhao", sPrimary, sSecondary );
assert( sPrimary == "J" );
assert( sSecondary == "" );
// "thames" - "TMS"
getDoubleMetaphone( "thames", sPrimary, sSecondary );
//writefln( "thames - \"%s\" and \"%s\"", sPrimary, sSecondary );
assert( sPrimary == "TMS" );
assert( sSecondary == "" );
// a little different from the "DoubleMetaphone Test" results
// at http://swoodbridge.com/DoubleMetaPhone/mptest.php3
// ----------------------------------------------------------
// "jose" - "JS" and "HS" (DoubleMetaphone Test got "HS" and "")
getDoubleMetaphone( "jose", sPrimary, sSecondary );
//writefln( "jose - \"%s\" and \"%s\"", sPrimary, sSecondary );
assert( sPrimary == "JS" );
assert( sSecondary == "HS" );
// "rogier" - "RJ" and "RKR" (DoubleMetaphone Test got "RJ" and "RJR" )
getDoubleMetaphone( "rogier", sPrimary, sSecondary );
//writefln( "rogier - \"%s\" and \"%s\"", sPrimary, sSecondary );
assert( sPrimary == "RJ" );
assert( sSecondary == "RKR" );
// ^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^-^
}
C:\dmd\MKoD_ex>..\bin\dmd doublemetaphone.d -debug=Test -unittest
C:\dmd\bin\..\..\dm\bin\link.exe doublemetaphone,,,user32+kernel32/noi;
C:\dmd\MKoD_ex>doublemetaphone
getDoubleMetaphone( in char[], out char[], out char[] ).unittest
unittest done.
Match found at 0 for one of the following: GN,KN,PN,WR,PS
"Write" is SlavoGermanic because one of following "W,K,CZ,WITZ" was found.
"Write" has a vowel in position 4
sWord=Write, sPrimary=RT, sSecondary=
sWord=agencies, sPrimary=AJNS, sSecondary=AKNX
sWord=nelson, sPrimary=NLSN, sSecondary=; should="NLSN"
sWord=Occasionally, sPrimary=AKSN, sSecondary=AKXN; should="AKSN", and "AKXN"
C:\dmd\MKoD_ex>