Category : Files from Magazines
Archive   : CGAZV4N2.ZIP
Filename : SOUNDEX4.C

 
Output of file : SOUNDEX4.C contained in archive : CGAZV4N2.ZIP
/************************** SOUNDEX4.C *************************
* Optimized Soundex Algorithm (Algorithm #4)
* Author: Joe Celko
* Compilers: Turbo C 2.0, Microsoft C 5.0
*
* Compile time switches:
* TEST to get a test driver
*
* Source code may be freely used if source is acknowledged
* Object code may be freely used
*/

/* Preserves first character, uppercase name, drop non-alphas
* convert letters to Soundex digits, and returns first n letters.
* Many of the steps here could be combined into the same loop,
* but they are kept separate for clarity and to give the user
* a chance to experiment with changes.
*/

#define TEST 100

#include

void soundex4 (char *inname, /* name to be transformed */
char *outcode, /* where to put soundex code */
int n /* length of soundex code */
)
{
char *p, *p1;
int i;
char workbuf[100]; /* that should be long enough */
char priorletter; /* for duplicate checking */

/* make a working copy */
strncpy(workbuf, inname, 99);
workbuf[99] = '\0'; /* just in case */
strupr(workbuf);

/* convert all vowels to A */
for (p = workbuf; *p; p++)
if (strchr("AEIOUY", *p))
*p = 'A';

/* prefix transformations: done only once on the front of a name */
if (strncmp(workbuf, "MAC", 3) == 0) /* MAC to MCC */
workbuf[1] = 'C';
else if (strncmp(workbuf, "KN", 2) == 0) /* KN to NN */
workbuf[0] = 'N';
else if (workbuf[0] == 'K') /* K to C */
workbuf[0] = 'C';
else if (strncmp(workbuf, "PF", 2) == 0) /* PF to FF */
workbuf[0] = 'F';
else if (strncmp(workbuf, "SCH", 3) == 0) /* SCH to SSS */ {
workbuf[1] = 'S';
workbuf[2] = 'S';
}


/* infix transformations: done after the first letter,
* and are from left to right on the name
*/
while ((p = strstr(workbuf, "DG")) > workbuf) /* DG to GG */
p[0] = 'G';
while ((p = strstr(workbuf, "CAAN")) > workbuf) /* CANN to TAAN */
p[0] = 'T';
while ((p = strchr(workbuf, 'D')) > workbuf) /* D to T */
p[0] = 'T';
while ((p = strstr(workbuf, "NST")) > workbuf) /* NST to NTT */
p[2] = 'S';
while ((p = strstr(workbuf, "AV")) > workbuf) /* AV to AF */
p[1] = 'F';
while ((p = strchr(workbuf, 'Q')) > workbuf) /* Q to G */
p[0] = 'G';
while ((p = strchr(workbuf, 'Z')) > workbuf) /* Z to S */
p[0] = 'S';
while ((p = strchr(workbuf, 'M')) > workbuf) /* M to N */
p[0] = 'N';
while ((p = strstr(workbuf, "KN")) > workbuf) /* KN to NN */
p[0] = 'N';
while ((p = strchr(workbuf, 'K')) > workbuf) /* K to C */
p[0] = 'C';
while ((p = strstr(workbuf, "AH")) > workbuf) /* AH to AA */
p[1] = 'A';
while ((p = strstr(workbuf, "HA")) > workbuf) /* HA to AA */
p[0] = 'A';
while ((p = strstr(workbuf, "AW")) > workbuf) /* AW to AA */
p[1] = 'A';
while ((p = strstr(workbuf, "PH")) > workbuf) /* PH to FF */ {
p[0] = 'F';
p[1] = 'F';
}
while ((p = strstr(workbuf, "SCH")) > workbuf) { /* SCH to SSS */
p[1] = 'S'; p[2] = 'S';
};

/* suffix transformations: done on the end of the word,
* going right to left
*/

/* (1) remove terminal A's and S's */
i = strlen(workbuf) - 1;
for (; (i > 0) && (workbuf[i] == 'S' || workbuf[i] == 'A'); i--)
workbuf[i] = '\0';

/* (2) terminal NT to TT */
i = strlen(workbuf) - 1;
for (; (i > 0) && (workbuf[i-1] == 'N') && (workbuf[i] == 'T'); i--)
workbuf[i-1] = 'T';

/* now strip out all vowels except the first (remember that all
* vowels were transformed into 'A's earlier).
*/
p = p1 = workbuf;
while (*p1++ = *p++) {
while (*p == 'A')
p++;
}

/* Remove all duplicate letters.
* Note this is different from the Soundex3 (see article) duplicate
* cleanup because the letter transforms can create duplicates
* at the front of the output name.
*/
p = p1 = workbuf;
priorletter = '\001'; /* an unlikely value */
do {
while (*p == priorletter)
p++;
priorletter = *p;
} while (*p1++ = *p++);

strncpy(outcode, workbuf, n);
outcode[n] = '\0'; /* just in case */
}

#if defined (TEST)

#include
#include
void main(int argc, char **argv)
{
char outbuf[50];

if (argc != 3) {
puts("Usage: soundex4 name length\n");
exit(0);
}

soundex4(argv[1], outbuf, atoi(argv[2]));

printf(" Result: %s\n", outbuf);
}
#endif

  3 Responses to “Category : Files from Magazines
Archive   : CGAZV4N2.ZIP
Filename : SOUNDEX4.C

  1. Very nice! Thank you for this wonderful archive. I wonder why I found it only now. Long live the BBS file archives!

  2. This is so awesome! 😀 I’d be cool if you could download an entire archive of this at once, though.

  3. But one thing that puzzles me is the “mtswslnkmcjklsdlsbdmMICROSOFT” string. There is an article about it here. It is definitely worth a read: http://www.os2museum.com/wp/mtswslnk/