Category : C Source Code
Archive   : CSRC2.ZIP
Filename : DIFF.C

 
Output of file : DIFF.C contained in archive : CSRC2.ZIP
/*
* D I F F
*/

#include
#include
#define EOS 0
#define TEMPFILE "diff.tmp"
#define TRUE 1
#define FALSE 0

#define MEGAMAX
#define TIMING

#ifdef MEGAMAX
#include
#define free(a) Mfree(a)
#define delete(a) Fdelete(a)
#define fgetss(a,b,c) fgets(a,b,c)
#define fputss(a,b) fputs(a,b)
#define short int
#endif

#ifdef pdp11
#define short int
#endif

extern long ftell();

typedef struct candidate {
int b; /* Line in fileB */
int a; /* Line in fileA */
int link; /* Previous candidate */
} CANDIDATE;

typedef struct line {
unsigned short hash; /* Hash value etc. */
short serial; /* Line number */
} LINE;

LINE *file[2]; /* Hash/line for total file */
#define fileA file[0]
#define fileB file[1]

LINE *sfile[2]; /* Hash/line after prefix */
#define sfileA sfile[0]
#define sfileB sfile[1]

int len[2]; /* Actual lines in each file */
#define lenA len[0]
#define lenB len[1]

int slen[2]; /* Squished lengths */
#define slenA slen[0]
#define slenB slen[1]

int prefix; /* Identical lines at start */
int suffix; /* Identical lenes at end */

FILE *infd[2] = { NULL, NULL }; /* Input file identifiers */
FILE *tempfd; /* Temp for input redirection */

/*
* The following vectors overlay the area defined by fileA
*/

int *class; /* Unsorted line numbers */
int *klist; /* Index of element in clist */
CANDIDATE *clist; /* Storage pool for candidates */
int clength = 0; /* Number of active candidates */

int *match; /* Longest subsequence */
long *oldseek; /* Seek position in file A */

/*
* The following vectors overlay the area defined by fileB
*/

int *member; /* Concatenated equiv. classes */
long *newseek; /* Seek position in file B */
char *textb; /* Input from file2 for check */

/*
* Global variables
*/

int eflag = FALSE; /* Edit script requested */
int bflag = FALSE; /* Blank supress requested */
int cflag = FALSE; /* Context printout */
int iflag = FALSE; /* Ignore case requested */
char text[257]; /* Input line from file1 */
#ifndef MEGAMAX
#ifndef vms
char *free_space; /* For storage allocation */
#endif
#endif
extern char *myalloc(); /* Storage allocator */
extern char *compact(); /* Storage compactor */

#ifdef DEBUG
#define TIMING
#endif
#ifdef TIMING
extern long time();
extern char *$$mend;
long totaltime;
long sectiontime;
char *mstart;
#endif

main(argc, argv)
int argc;
char **argv;
/*
* Diff main program
*/
{
register int i;
register char *ap;

#ifdef TIMING
sectiontime = time(&totaltime);
#endif
while (argc > 1 && *(ap = argv[1]) == '-' && *++ap != EOS) {
while (*ap != EOS) {
switch (tolower(*ap++)) {
case 'b':
bflag++;
break;

case 'c':
cflag++;
break;

case 'e':
eflag++;
break;

case 'i':
iflag++;
break;

default:
fprintf(stderr,
"Warning, bad option '%c'\n",
ap[-1]);
break;
}
}
argc--;
argv++;
}

if (argc != 3)
error("Usage: diff [-options] file1 file2");
if (cflag && eflag) {
fprintf(stderr,
"Warning, -c and -e are incompatible, -c supressed.\n");
cflag = FALSE;
}
argv++;
for (i = 0; i <= 1; i++) {
if (argv[i][0] == '-' && argv[i][1] == EOS) {
infd[i] = stdin;
if ((tempfd = fopen(TEMPFILE, "w")) == NULL)
cant(TEMPFILE, "work", 1);
}
else {
infd[i] = fopen(argv[i], "r");
}
}
if (infd[0] == NULL && infd[1] == NULL) {
cant(argv[0], "input", 0);
cant(argv[1], "input", 1);
}
else if (infd[1] == NULL)
opendir(1, &argv[1], infd[0]);
else if (infd[0] == NULL)
opendir(0, &argv[0], infd[1]);
#ifndef MEGAMAX
#ifndef vms
free_space = malloc(1);
#endif
#endif
/*
* Read input, building hash tables.
*/
input(0);
input(1);
squish();
#ifdef DEBUG
printf("before sort\n");
for (i = 1; i <= slenA; i++)
printf("sfileA[%d] = %6d %06o\n",
i, sfileA[i].serial, sfileA[i].hash);
for (i = 1; i <= slenB; i++)
printf("sfileB[%d] = %6d %06o\n",
i, sfileB[i].serial, sfileB[i].hash);
#endif
sort(sfileA, slenA);
sort(sfileB, slenB);
#ifdef TIMING
ptime("input");
#endif
#ifdef DEBUG
printf("after sort\n");
for (i = 1; i <= slenA; i++)
printf("sfileA[%d] = %6d %06o\n",
i, sfileA[i].serial, sfileB[i].hash);
for (i = 1; i <= slenB; i++)
printf("sfileB[%d] = %6d %06o\n",
i, sfileB[i].serial, sfileB[i].hash);
#endif
/*
* Build equivalence classes.
*/
member = (int *)fileB;
equiv();
member = (int *)compact((char *)member, (slenB + 2) * sizeof (int),
"squeezing member vector");
/*
* Reorder equivalence classes into array class[]
*/
class = (int *)fileA;
unsort();
class = (int *)compact((char *)class, (slenA + 2) * sizeof (int),
"compacting class vector");
#ifdef TIMING
ptime("equiv/unsort");
#endif
/*
* Find longest subsequences
*/
klist = (int *)myalloc((slenA + 2) * sizeof (int), "klist");
clist = (CANDIDATE *)myalloc(sizeof (CANDIDATE), "clist");
i = subseq();
free((char *)member);
free((char *)class);
match = (int *)myalloc((lenA + 2) * sizeof (int), "match");
unravel(klist[i]);
free(clist);
free(klist);
#ifdef TIMING
ptime("subsequence/unravel");
#endif
/*
* Check for fortuitous matches and output differences
*/
oldseek = (long *)myalloc((lenA + 2) * sizeof (* oldseek), "oldseek");
newseek = (long *)myalloc((lenB + 2) * sizeof (* newseek), "newseek");
textb = myalloc(sizeof text, "textbuffer");
if (check(argv[0], argv[1]))
fprintf(stderr, "Spurious match, output is not optimal\n");
#ifdef TIMING
ptime("check");
#endif
output();
#ifdef TIMING
ptime("output");
printf("%ld seconds required\n", sectiontime - totaltime);
#endif
if (tempfd != NULL) {
fclose(tempfd);
delete(TEMPFILE);
}
}

input(which)
int which; /* 0 or 1 to redefine infd[] */
/*
* Read the file, building hash table
*/
{
register LINE *lentry;
register int linect;
register short hashval;
FILE *fd;
unsigned short hash();

linect = 0;
lentry = (LINE *)myalloc(sizeof(LINE) * 3, "line");
fd = infd[which];
while (!getline(fd, text)) {
lentry = (LINE *)compact((char *)lentry,
(++linect + 3) * sizeof(LINE),
"extending line vector");
lentry[linect].hash = hash(text);
}
/*
* If input was from stdin ("-" command), finish off the temp file.
*/
if (fd == stdin) {
fclose(tempfd);
tempfd = infd[which] = fopen(TEMPFILE, "r");
}
len[which] = linect;
file[which] = lentry;
}

squish()
/*
* Look for initial and trailing sequences that have identical hash values.
* Don't bother building them into the candidate vector.
*/
{
register int i;
register LINE *ap;
register LINE *bp;
int j;
int k;

/*
* prefix -> first line (from start) that doesn't hash identically
*/
i = 0; ap = &fileA[1]; bp = &fileB[1];
while (i < lenA && i < lenB && ap->hash == bp->hash) {
i++; ap++; bp++;
}
prefix = i;
/*
* suffix -> first line (from end) that doesn't hash identically
*/
j = lenA - i;
k = lenB - i;
ap = &fileA[lenA];
bp = &fileB[lenB];
i = 0;
while (i < j && i < k && ap->hash == bp->hash) {
i++; ap--; bp--;
}
suffix = i;
/*
* Tuck the counts away
*/
for (k = 0; k <= 1; k++) {
sfile[k] = file[k] + prefix;
j = slen[k] = len[k] - prefix - suffix;

for (i = 0, ap = sfile[k]; i <= slen[k]; i++, ap++) {
ap->serial = i;
}
}
}

sort(vector, vecsize)
LINE *vector; /* What to sort */
int vecsize; /* How many to sort */
/*
* Sort hash entries
*/
{
register int j;
register LINE *aim;
register LINE *ai;
int mid;
int k;
LINE work;

for (j = 1; j <= vecsize; j *= 2);
mid = (j - 1);
while ((mid /= 2) != 0) {
k = vecsize - mid;
for (j = 1; j <= k; j++) {
for (ai = &vector[j]; ai > vector; ai -= mid) {
aim = &ai[mid];
if (aim < ai)
break; /* ?? Why ?? */
if (aim->hash > ai->hash ||
aim->hash == ai->hash &&
aim->serial > ai->serial)
break;
work.hash = ai->hash;
ai->hash = aim->hash;
aim->hash = work.hash;
work.serial = ai->serial;
ai->serial = aim->serial;
aim->serial = work.serial;
}
}
}
}

equiv()
/*
* Build equivalence class vector
*/
{
register LINE *ap;
register union {
LINE *bp;
int *mp;
} r;
register int j;
LINE *atop;

#ifdef DEBUG
printf("equiv entry\n");
for (j = 1; j <= slenA; j++)
printf("sfileA[%d] = %6d %06o\n",
j, sfileA[j].serial, sfileA[j].hash);
for (j = 1; j <= slenB; j++)
printf("sfileB[%d] = %6d %06o\n",
j, sfileB[j].serial, sfileB[j].hash);
#endif
j = 1;
ap = &sfileA[1];
r.bp = &sfileB[1];
atop = &sfileA[slenA];
while (ap <= atop && j <= slenB) {
if (ap->hash < r.bp->hash) {
ap->hash = 0;
ap++;
}
else if (ap->hash == r.bp->hash) {
ap->hash = j;
ap++;
}
else {
r.bp++;
j++;
}
}
while (ap <= atop) {
ap->hash = 0;
ap++;
}
sfileB[slenB + 1].hash = 0;
#ifdef DEBUG
printf("equiv exit\n");
for (j = 1; j <= slenA; j++)
printf("sfileA[%d] = %6d %06o\n",
j, sfileA[j].serial, sfileA[j].hash);
for (j = 1; j <= slenB; j++)
printf("sfileB[%d] = %6d %06o\n",
j, sfileB[j].serial, sfileB[j].hash);
#endif
ap = &sfileB[0];
atop = &sfileB[slenB];
r.mp = &member[0];
while (++ap <= atop) {
r.mp++;
*r.mp = -(ap->serial);
while (ap[1].hash == ap->hash) {
ap++;
r.mp++;
*r.mp = ap->serial;
}
}
r.mp[1] = -1;
#ifdef DEBUG
for (j = 0; j <= slenB; j++)
printf("member[%d] = %d\n", j, member[j]);
#endif
}

unsort()
/*
* Build class vector
*/
{
register int *temp;
register int *tp;
register union {
LINE *ap;
int *cp;
} u;
LINE *evec;
int *eclass;
#ifdef DEBUG
int i;
#endif

temp = (int *)myalloc((slenA + 1) * sizeof(int), "unsort scratch");
u.ap = &sfileA[1];
evec = &sfileA[slenA];
while (u.ap <= evec) {
#ifdef DEBUG
printf("temp[%2d] := %06o\n", u.ap->serial, u.ap->hash);
#endif
temp[u.ap->serial] = u.ap->hash;
u.ap++;
}
/*
* Copy into class vector and free work space
*/
u.cp = &class[1];
eclass = &class[slenA];
tp = &temp[1];
while (u.cp <= eclass)
*u.cp++ = *tp++;
free((char *) temp);
#ifdef DEBUG
printf("unsort exit\n");
for (i = 1; i <= slenA; i++)
printf("class[%d] = %d %06o\n", i, class[i], class[i]);
#endif
}

#include "diff1.c"


  3 Responses to “Category : C Source Code
Archive   : CSRC2.ZIP
Filename : DIFF.C

  1. Very nice! Thank you for this wonderful archive. I wonder why I found it only now. Long live the BBS file archives!

  2. This is so awesome! 😀 I’d be cool if you could download an entire archive of this at once, though.

  3. But one thing that puzzles me is the “mtswslnkmcjklsdlsbdmMICROSOFT” string. There is an article about it here. It is definitely worth a read: http://www.os2museum.com/wp/mtswslnk/