/* Soundex in Picat. See for example: * http://en.wikipedia.org/wiki/soundex * http://rosettacode.org/wiki/Soundex Model created by Hakan Kjellerstrand, hakank@gmail.com See also my Picat page: http://www.hakank.org/picat/ */ import util. main => go. % {{trans|C#}} go => Names = split("Lloyd Woolcock Donnell Baragwanath Williams Ashcroft Ashcraft Euler Ellery Gauss Ghosh Hilbert Heilbronn Knuth Kant Ladd Lukasiewicz Lissajous O'Hara"), SoundexNames = split("L300 W422 D540 B625 W452 A261 A261 E460 E460 G200 G200 H416 H416 K530 K530 L300 L222 L222 O600"), foreach({Name,Correct} in zip(Names, SoundexNames)) S = soundex(Name), printf("%s: %s ",Name,S), if S == Correct then println("ok") else printf("not correct! Should be: %s\n", Correct) end end, nl. go2 => Name = "Ashcraft", Correct = "A261", S = soundex(Name), println([Name,soundex=S,correct=Correct]), if S != Correct then println("\tThat was wrong!\n") end, nl. go3 => test. % {{trans|C#}} soundex("", _) = "" => true. soundex(Word) = Soundex => SoundexAlphabet = "0123012#02245501262301#202", Soundex = "", LastC = '?', foreach(Ch in Word.to_uppercase, C = ord(Ch), C >= 0'A', C <= 0'Z', Soundex.len < 4) ThisC := SoundexAlphabet[C-0'A'+1], Skip = false, % to handle '#' if Soundex.len == 0 then Soundex := Soundex ++ [Ch] elseif ThisC == '#' then Skip := true elseif ThisC != '0', ThisC != LastC then Soundex := Soundex ++ [ThisC] end, if Skip == false then LastC := ThisC end end, Soundex := Soundex.padRight(4,'0'). padRight(S,Len,PadChar) = S ++ [PadChar : _ in 1..Len-S.len]. % encode(C) = Code => % T = "-123-12*-22455-12623-1*2-2", % Code = T[ord(to_lowercase(C)) - 0'a + 1]. % This was inspired by the Python code from % http://code.activestate.com/recipes/52213/ % with the following comment: % """ % soundex module conforming to Knuth's algorithm % implementation 2000-12-24 by Gregory Jorgensen % public domain % """ soundex_old("", _) = "" => true. soundex_old(Name, Len) = Soundex => % digits holds the soundex values for the alphabet Digits = "01230120022455012623010202", Sndx = "", Fc = "", % translate alpha chars in name to soundex digits foreach(C in Name.to_uppercase()) if is_alpha(C) then if Fc == "" then Fc := C % remember first letter end, D = Digits[ord(C)-64], % duplicate consecutive soundex digits are skipped if Sndx.length == 0; D != Sndx[Sndx.length] then Sndx := Sndx ++ [D] end end end, % replace first digit with first alpha character if Sndx.length >= 2 then Sndx := [Fc] ++ [Sndx[J] : J in 2..Sndx.length] else Sndx := Fc end, % remove all 0s from the soundex code Sndx := [S : S in Sndx, S != '0'], % return soundex code padded to len characters if Len > Sndx.length then Sndx := Sndx ++ rep('0', (Len - Sndx.length)) end, Soundex := [Sndx[J] : J in 1..Len]. % for a single character is_alpha(C), char(C) => member(C,"ABCDEFGHIJKLMNOPQRSTUVWXYZ"). % check the whole string is_alpha(String), string(String) => Alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ", foreach(C in String) member(C,Alpha) end. % Repeat a string/character S N times. rep(S, N) = [S : _I in 1..N].