Category : Miscellaneous Language Source Code
Archive   : QTAWKU42.ZIP
Filename : SOUNDX4.EXP

 
Output of file : SOUNDX4.EXP contained in archive : QTAWKU42.ZIP
# QTAwk Soundix Algorithm
#
# Optimized Soundix Algorithm. Adapted for QTAwk from article in:
# "The C Gazette", Vol. 4, No. 2, Autumn 1989, page 29, by Joe Celko
#
BEGIN {
sl = 4; # set code length
}

{
for ( i = 1 ; i <= NF ; i++ ) printf(" Result: %s ==> %s\n",$i,soundix4($i,sl));
}

function soundix4(inname,len) {
local workbuf;
local leading_letter;

#* make a working copy
workbuf = strupr(inname);

#* convert all vowels to 'A'
gsub(/[AEIOUY]/,'A',workbuf);

#* prefix transformations: done only once on the front of a name
sub(/^MAC/,"MCC",workbuf); # MAC -> MCC
sub(/^KN/ ,"NN" ,workbuf); # KN -> NN
sub(/^PF/ ,"FF" ,workbuf); # PF -> FF
sub(/^SCH/,"SSS",workbuf); # SCH -> SSS
sub(/^K/ ,'C' ,workbuf); # K -> C

#* preserve leading letter
leading_letter = substr(workbuf,1,1);
workbuf = substr(workbuf,2);

#* infix transformations: done after the first letter
#* and are from left to right on the name
gsub(/DG/ ,"GG" ,workbuf); # DG -> GG
gsub(/CAAN/,"TAAN",workbuf); # CAAN -> TAAN
gsub(/D/ ,'T' ,workbuf); # D -> T
gsub(/NST/ ,"NSS" ,workbuf); # NST -> NSS
gsub(/AV/ ,"AF" ,workbuf); # AV -> AF
gsub(/Q/ ,'G' ,workbuf); # Q -> G
gsub(/Z/ ,'S' ,workbuf); # Z -> S
gsub(/M/ ,'N' ,workbuf); # M -> N
gsub(/KN/ ,"NN" ,workbuf); # KN -> NN
gsub(/K/ ,'C' ,workbuf); # K -> C
gsub(/AH/ ,"AA" ,workbuf); # AH -> AA
gsub(/HA/ ,"AA" ,workbuf); # HA -> AA
gsub(/AW/ ,"AA" ,workbuf); # AW -> AA
gsub(/PH/ ,"FF" ,workbuf); # PH -> FF
gsub(/SCH/ ,"SSS" ,workbuf); # SCH -> SSS

#* suffix transformations: done on the end of the word going right to left
#* (1) remove terminal A's and S's
sub(/[AS]+$/,"",workbuf);

#* (2) terminal NT-> TT
sub(/NT$/,"TT",workbuf);

#* now strip out all vowels except the first - remember that all vowels
#* were transformed to 'A' earlier
gsub(/A/,"",workbuf);

#* remove all duplicate letters.
#* Note this is different from the Soundex3 duplicate cleanup because
#* the letter transforms can create duplicates at the front of the
#* output name
gsub(/B+/,'B',workbuf);
gsub(/C+/,'C',workbuf);
gsub(/F+/,'F',workbuf);
gsub(/H+/,'H',workbuf);
gsub(/G+/,'G',workbuf);
gsub(/J+/,'J',workbuf);
gsub(/L+/,'L',workbuf);
gsub(/N+/,'N',workbuf);
gsub(/P+/,'P',workbuf);
gsub(/R+/,'R',workbuf);
gsub(/S+/,'S',workbuf);
gsub(/T+/,'T',workbuf);
gsub(/V+/,'V',workbuf);
gsub(/W+/,'W',workbuf);
gsub(/X+/,'X',workbuf);

#* return proper length code
return leading_letter ï (len ? substr(workbuf,1,len - 1) : workbuf);
}


  3 Responses to “Category : Miscellaneous Language Source Code
Archive   : QTAWKU42.ZIP
Filename : SOUNDX4.EXP

  1. Very nice! Thank you for this wonderful archive. I wonder why I found it only now. Long live the BBS file archives!

  2. This is so awesome! 😀 I’d be cool if you could download an entire archive of this at once, though.

  3. But one thing that puzzles me is the “mtswslnkmcjklsdlsbdmMICROSOFT” string. There is an article about it here. It is definitely worth a read: http://www.os2museum.com/wp/mtswslnk/