Category : Files from Magazines
Archive   : DDJ0492.ZIP
Filename : STRING.ASC

 
Output of file : STRING.ASC contained in archive : DDJ0492.ZIP
_FINDING STRING DISTANCES_
by Ray Valdes

[LISTING ONE]

/***********************************************************************
LEVDIST.C -- Computing the Levenshtein distance (string-to-string edit)
by Ray Valdes, DDJ April 92
***********************************************************************/

#define TRUE 1
#define FALSE 0
#define private static
#define public /**/
typedef int bool;

private bool verbose_mode = TRUE;

typedef enum { MATCH, INS, DEL, SUB } opcode;

typedef struct
{ int cost;
char* name;
int delta_row;
int delta_col;
} operation;

#define COST(op) (optable[(int)op].cost) // for convenience
#define OPSTR(op) (optable[(int)op].name) // for convenience

private operation optable[] = //costs defined on a per-op basis
{
/*--cost, name, delta_row, delta_col---------------------------------*/
{ 0, "EQU", -1, -1}, /* a match or no-op backtracks to NorthWest */
{ 1, "INS", 0, -1}, /* insert op backtrack to the West */
{ 1, "DEL", -1, 0}, /* delete op backstracks to the North */
{ 1, "SUB", -1, -1}, /* substitution op backtracks to NorthWest */
};

typedef struct
{ int distance;
opcode op;
} matrix_cell;

#define NUM_ROWS 64
#define NUM_COLS NUM_ROWS
#define SIZEOF_STRING NUM_ROWS

private char A [SIZEOF_STRING];
private char B [SIZEOF_STRING];
private matrix_cell M [NUM_ROWS] [NUM_COLS]; // this is The Matrix

#define DIST(i,j) (M [(i)][(j)].distance) // for convenience
/****************************************************************/

private void say_hello (void);
private bool get_strings (void);
private void initialize_matrix (void);
private void calculate_cell (int row,int col);
private void print_cell (int row,int col);
private void calculate_matrix (int num_rows,int num_cols);
private void backtrack_matrix (int num_rows,int num_cols);
/****************************************************************/
public int
main(int argc,char **argv)
{ say_hello();
while(get_strings())
{ initialize_matrix();
calculate_matrix(strlen(A),strlen(B));
backtrack_matrix(strlen(A),strlen(B));
}
return 0;
}
/****************************************************************/
private void
say_hello(void)
{ if(verbose_mode) printf("\nLevenshtein distance, V1.0");
}
/****************************************************************/
private bool
get_strings(void)
{ char buffer[SIZEOF_STRING*3]; //arbitrarily big buffer

printf("\nEnter first string > "); gets(buffer);
if(buffer[0]=='\0') return FALSE;
strcpy(A,buffer);
printf("\nEnter second string > "); gets(buffer);
if(buffer[0]=='\0') return FALSE;
strcpy(B,buffer);
return TRUE;
}
/****************************************************************/
private void
initialize_matrix(void)
{ int row,col;

for(row=0,col=0; col {
M [row][col].distance = col;
M [row][col].op = INS;
}
for(row=0,col=0; row {
M [row][col].distance = row;
M [row][col].op = DEL;
}
}
/****************************************************************/
private void
calculate_cell(int row,int col)

{ int dNorthWest = DIST(row-1, col-1);
int dWest = DIST(row , col-1);
int dNorth = DIST(row-1, col );

if(dWest < dNorth)
{ if(dWest < dNorthWest)
{ M [row][col].op = INS;
M [row][col].distance = dWest + COST(INS);
}
else // dNorthWest <= dWest < dNorth
{ opcode op;
op = ( A[row-1]==B[col-1] ) ? MATCH : SUB;
M [row][col].op = op;
M [row][col].distance = dNorthWest + COST(op);
}
}
else // dNorth <= dWest
{ if(dNorth < dNorthWest)
{ M [row][col].op = DEL;
M [row][col].distance = dNorth + COST(DEL);
}
else // dNorthWest <= dNorth <= dWest
{ opcode op;
op = ( A[row-1]==B[col-1] ) ? MATCH : SUB;
M [row][col].op = op;
M [row][col].distance = dNorthWest + COST(op);
}
}
}
/****************************************************************/
private void
calculate_matrix(int num_rows,int num_cols)
{ int row,col;

if(verbose_mode)
{ printf("\n\\\n \\COL |");
for(col=0; col < num_cols+1; col++)
printf("____%d____|");
printf("\nROW \\ | |");
for(col=1; col < num_cols+1; col++)
printf(" %c |", B [col-1]);
printf("\n 0 \\|");
for(row=0,col=0; col < num_cols+1; col++)
print_cell(row,col);
}
for(row=1; row { if(verbose_mode) printf("\n% 2d %c |",row, A [row-1]);
print_cell(row,0);
for(col=1; col {
calculate_cell(row,col);
if(verbose_mode) print_cell(row,col);
}
}
}

/****************************************************************/
private void
print_cell(int row,int col)
{ printf(" %d %s ",DIST(row,col),OPSTR( M [row][col].op));
}
/****************************************************************/
private void
backtrack_matrix(int num_rows,int num_cols)
{ int dx,dy;
int i,j;
int row = num_rows;
int col = num_cols;

printf("\n\nLevenshtein distance is %d.\n",DIST(row,col));
printf("\nBacktrace: %s",A);

while(row>0 || col>0)
{ if( ( M [row][col].op != MATCH) && verbose_mode)
{ printf("\nD(%d,%d)=%d ", row,col,DIST(row,col));
printf("%s --> ",OPSTR( M [row][col].op));
for(i=1;i printf("%c", A[i-1]);
/*printf("_");*/
for(j=col-(M[row][col].op==INS?1:0);j printf("%c", B[j-1]);
}
dy = optable[(int)( M [row][col].op)].delta_row;
dx = optable[(int)( M [row][col].op)].delta_col;
row += dy;
col += dx;
}
}




  3 Responses to “Category : Files from Magazines
Archive   : DDJ0492.ZIP
Filename : STRING.ASC

  1. Very nice! Thank you for this wonderful archive. I wonder why I found it only now. Long live the BBS file archives!

  2. This is so awesome! 😀 I’d be cool if you could download an entire archive of this at once, though.

  3. But one thing that puzzles me is the “mtswslnkmcjklsdlsbdmMICROSOFT” string. There is an article about it here. It is definitely worth a read: http://www.os2museum.com/wp/mtswslnk/