Category : C Source Code
Archive   : BAWK.ZIP
Filename : BAWK.C

 
Output of file : BAWK.C contained in archive : BAWK.ZIP
/*
* Bawk main program
*/
#define MAIN 1
#include
#include "bawk.h"

/*
* Main program
*/
main( argc, argv )
int argc;
char **argv;
{
char gotrules, didfile, getstdin;

getstdin =
didfile =
gotrules = 0;

/*
* Initialize global variables:
*/
Beginact =
Endact =
Rules =
Rulep =
#ifdef DEBUG
Debug =
#endif
Filename =
Linecount =
Saw_break = 0;
Stackptr = Stackbtm - 1;
Stacktop = Stackbtm + MAXSTACKSZ;
Nextvar = Vartab;

strcpy( Fieldsep, " \t" );
strcpy( Recordsep, "\n" );

/*
* Parse command line
*/
while ( --argc )
{
if ( **(++argv) == '-' )
{
/*
* Process dash options.
*/
switch ( tolower( *(++(*argv)) ) )
{
#ifdef DEBUG
case 'd':
++Debug;
break;
#endif
case 0:
++getstdin;
--argv;
goto dosomething;
break;
default: usage();
}
}
else
{
dosomething:
if ( gotrules )
{
/*
* Already read rules file - assume this is
* is a text file for processing.
*/
if ( ++didfile == 1 && Beginact )
doaction( Beginact );
if ( getstdin )
{
--getstdin;
newfile( 0 );
}
else
newfile( *argv );
process();
}
else
{
/*
* First file name argument on command line
* is assumed to be a rules file - attempt to
* compile it.
*/
if ( getstdin )
{
--getstdin;
newfile( 0 );
}
else
newfile( *argv );
compile();
gotrules = 1;
}
}
}
if ( !gotrules )
usage();

if ( ! didfile )
{
/*
* Didn't process any files yet - process stdin.
*/
newfile( 0 );
if ( Beginact )
doaction( Beginact );
process();
}
if ( Endact )
doaction( Endact );
}

/*
* Regular expression/action file compilation routines.
*/
compile()
{
/*
* Compile regular expressions and C actions into Rules struct,
* reading from current input file "Fileptr".
*/
int c, len;

#ifdef DEBUG
if ( Debug )
error( "compiling...", 0 );
#endif

while ( (c = getcharacter()) != -1 )
{
if ( c==' ' || c=='\t' || c=='\n' )
/* swallow whitespace */
;
else if ( c=='#' )
{
/*
* Swallow comments
*/
while ( (c=getcharacter()) != -1 && c!='\n' )
;
}
else if ( c=='{' )
{
#ifdef DEBUG
if ( Debug )
error( "action", 0 );
#endif
/*
* Compile (tokenize) the action string into our
* global work buffer, then allocate some memory
* for it and copy it over.
*/
ungetcharacter( '{' );
len = act_compile( Workbuf );

if ( Rulep && Rulep->action )
{
Rulep->nextrule = getmem( sizeof( *Rulep ) );
Rulep = Rulep->nextrule;
fillmem( Rulep, sizeof( *Rulep ), 0 );
}
if ( !Rulep )
{
/*
* This is the first action encountered.
* Allocate the first Rules structure and
* initialize it
*/
Rules = Rulep = getmem( sizeof( *Rulep ) );
fillmem( Rulep, sizeof( *Rulep ), 0 );
}
Rulep->action = getmem( len );
movemem( Workbuf, Rulep->action, len );
}
else if ( c==',' )
{
#ifdef DEBUG
if ( Debug )
error( "stop pattern", 0 );
#endif
/*
* It's (hopefully) the second part of a two-part
* pattern string. Swallow the comma and start
* compiling an action string.
*/
if ( !Rulep || !Rulep->pattern.start )
error( "stop pattern without a start",
RE_ERROR );
if ( Rulep->pattern.stop )
error( "already have a stop pattern",
RE_ERROR );
len = pat_compile( Workbuf );
Rulep->pattern.stop = getmem( len );
movemem( Workbuf, Rulep->pattern.stop, len );
}
else
{
/*
* Assume it's a regular expression pattern
*/
#ifdef DEBUG
if ( Debug )
error( "start pattern", 0 );
#endif

ungetcharacter( c );
len = pat_compile( Workbuf );

if ( *Workbuf == T_BEGIN )
{
/*
* Saw a "BEGIN" keyword - compile following
* action into special "Beginact" buffer.
*/
len = act_compile( Workbuf );
Beginact = getmem( len );
movemem( Workbuf, Beginact, len );
continue;
}
if ( *Workbuf == T_END )
{
/*
* Saw an "END" keyword - compile following
* action into special "Endact" buffer.
*/
len = act_compile( Workbuf );
Endact = getmem( len );
movemem( Workbuf, Endact, len );
continue;
}
if ( Rulep )
{
/*
* Already saw a pattern/action - link in
* another Rules structure.
*/
Rulep->nextrule = getmem( sizeof( *Rulep ) );
Rulep = Rulep->nextrule;
fillmem( Rulep, sizeof( *Rulep ), 0 );
}
if ( !Rulep )
{
/*
* This is the first pattern encountered.
* Allocate the first Rules structure and
* initialize it
*/
Rules = Rulep = getmem( sizeof( *Rulep ) );
fillmem( Rulep, sizeof( *Rulep ), 0 );
}
if ( Rulep->pattern.start )
error( "already have a start pattern",
RE_ERROR );

Rulep->pattern.start = getmem( len );
movemem( Workbuf, Rulep->pattern.start, len );
}
}
endfile();
}

/*
* Text file main processing loop.
*/
process()
{
/*
* Read a line at a time from current input file at "Fileptr",
* then apply each rule in the Rules chain to the input line.
*/
int i;

#ifdef DEBUG
if ( Debug )
error( "processing...", 0 );
#endif

Recordcount = 0;

while ( getline() )
{
/*
* Parse the input line.
*/
Fieldcount = parse( Linebuf, Fields, Fieldsep );
#ifdef DEBUG
if ( Debug>1 )
{
printf( "parsed %d words:\n", Fieldcount );
for(i=0; i printf( "<%s>\n", Fields[i] );
}
#endif

Rulep = Rules;
do
{
if ( ! Rulep->pattern.start )
{
/*
* No pattern given - perform action on
* every input line.
*/
doaction( Rulep->action );
}
else if ( Rulep->pattern.startseen )
{
/*
* Start pattern already found - perform
* action then check if line matches
* stop pattern.
*/
doaction( Rulep->action );
if ( dopattern( Rulep->pattern.stop ) )
Rulep->pattern.startseen = 0;
}
else if ( dopattern( Rulep->pattern.start ) )
{
/*
* Matched start pattern - perform action.
* If a stop pattern was given, set "start
* pattern seen" flag and process every input
* line until stop pattern found.
*/
doaction( Rulep->action );
if ( Rulep->pattern.stop )
Rulep->pattern.startseen = 1;
}
}
while ( Rulep = Rulep->nextrule );

/*
* Release memory allocated by parse().
*/
while ( Fieldcount )
free( Fields[ --Fieldcount ] );
}
}

/*
* Miscellaneous functions
*/
parse( str, wrdlst, delim )
char *str;
char *wrdlst[];
char *delim;
{
/*
* Parse the string of words in "str" into the word list at "wrdlst".
* A "word" is a sequence of characters delimited by one or more
* of the characters found in the string "delim".
* Returns the number of words parsed.
* CAUTION: the memory for the words in "wrdlst" is allocated
* by malloc() and should eventually be returned by free()...
*/
int wrdcnt, wrdlen;
char wrdbuf[ MAXLINELEN ], c;

wrdcnt = 0;
while ( *str )
{
while ( instr( *str, delim ) )
++str;
if ( !*str )
break;
wrdlen = 0;
while ( (c = *str) && !instr( c, delim ) )
{
wrdbuf[ wrdlen++ ] = c;
++str;
}
wrdbuf[ wrdlen++ ] = 0;
/*
* NOTE: allocate a MAXLINELEN sized buffer for every
* word, just in case user wants to copy a larger string
* into a field.
*/
wrdlst[ wrdcnt ] = getmem( MAXLINELEN );
strcpy( wrdlst[ wrdcnt++ ], wrdbuf );
}

return wrdcnt;
}

unparse( wrdlst, wrdcnt, str, delim )
char *wrdlst[];
int wrdcnt;
char *str;
char *delim;
{
/*
* Replace all the words in "str" with the words in "wrdlst",
* maintaining the same word seperation distance as found in
* the string.
* A "word" is a sequence of characters delimited by one or more
* of the characters found in the string "delim".
*/
int wc;
char strbuf[ MAXLINELEN ], *sp, *wp, *start;

wc = 0; /* next word in "wrdlst" */
sp = strbuf; /* points to our local string */
start = str; /* save start address of "str" for later... */
while ( *str )
{
/*
* Copy the field delimiters from the original string to
* our local version.
*/
while ( instr( *str, delim ) )
*sp++ = *str++;
if ( !*str )
break;
/*
* Skip over the field in the original string and...
*/
while ( *str && !instr( *str, delim ) )
++str;

if ( wc < wrdcnt )
{
/*
* ...copy in the field in the wordlist instead.
*/
wp = wrdlst[ wc++ ];
while ( *wp )
*sp++ = *wp++;
}
}
/*
* Tie off the local string, then copy it back to caller's string.
*/
*sp = 0;
strcpy( start, strbuf );
}

instr( c, s )
char c, *s;
{
while ( *s )
if ( c==*s++ )
return 1;
return 0;
}

char *
getmem( len )
unsigned len;
{
char *cp;

if ( cp=malloc( len ) )
return cp;
error( "out of memory", MEM_ERROR );
}

char *
newfile( s )
char *s;
{
Linecount = 0;
if ( Filename = s )
{
#ifdef BDS_C
if ( fopen( s, Fileptr = Curfbuf ) == -1 )
#else
if ( !(Fileptr = fopen( s, "r" )) )
#endif
error( "file not found", FILE_ERROR );
}
else
{
/*
* No file name given - process standard input.
*/
Fileptr = stdin;
Filename = "standard input";
}
}

getline()
{
/*
* Read a line of text from current input file. Strip off
* trailing record seperator (newline).
*/
int rtn, len;

for ( len=0; len {
if ( (rtn = getcharacter()) == *Recordsep || rtn == -1 )
break;
Linebuf[ len ] = rtn;
}
Linebuf[ len ] = 0;

if ( rtn == -1 )
{
endfile();
return 0;
}
return 1;
}

getcharacter()
{
/*
* Read a character from curren input file.
* WARNING: your getc() must convert lines that end with CR+LF
* to LF and CP/M's EOF character (^Z) to a -1.
* Also, getc() must return a -1 when attempting to read from
* an unopened file.
*/
int c;

#ifdef BDS_C
/*
* BDS C doesn't do CR+LF to LF and ^Z to -1 conversions
*/
if ( (c = getc( Fileptr )) == '\r' )
{
if ( (c = getc( Fileptr )) != '\n' )
{
ungetc( c );
c = '\r';
}
}
else if ( c == 26 ) /* ^Z */
c = -1;
#else
c = getc( Fileptr );
#endif

if ( c == *Recordsep )
++Recordcount;
if ( c=='\n' )
++Linecount;

return c;
}

ungetcharacter( c )
{
/*
* Push a character back into the input stream.
* If the character is a record seperator, or a newline character,
* the record and line counters are adjusted appropriately.
*/
if ( c == *Recordsep )
--Recordcount;
if ( c=='\n' )
--Linecount;
return ungetc( c, Fileptr );
}

endfile()
{
fclose( Fileptr );
Filename = Linecount = 0;
}

error( s, severe )
char *s;
int severe;
{
char *cp, *errat;

if ( Filename )
fprintf( stderr, "%s:", Filename );

if ( Linecount )
fprintf( stderr, " line %d:", Linecount );

fprintf( stderr, " %s\n", s );
if ( severe )
exit( severe );
}

usage()
{
error( "Usage: bawk [ ...]\n", USAGE_ERROR );
}

movemem( from, to, count )
char *from, *to;
int count;
{
while ( count-- > 0 )
*to++ = *from++;
}

fillmem( array, count, value )
char *array, value;
int count;
{
while ( count-- > 0 )
*array++ = value;
}

strncmp( s, t, n )
char *s, *t;
int n;
{
while ( --n>0 && *s && *t && *s==*t )
{
++s;
++t;
}
if ( *s || *t )
return *s - *t;
return 0;
}

num( c )
char c;
{
return '0'<=c && c<='9';
}

alpha( c )
char c;
{
return ('a'<=c && c<='z') || ('A'<=c && c<='Z') || c=='_';
}

alphanum( c )
char c;
{
return alpha( c ) || num( c );
}


  3 Responses to “Category : C Source Code
Archive   : BAWK.ZIP
Filename : BAWK.C

  1. Very nice! Thank you for this wonderful archive. I wonder why I found it only now. Long live the BBS file archives!

  2. This is so awesome! 😀 I’d be cool if you could download an entire archive of this at once, though.

  3. But one thing that puzzles me is the “mtswslnkmcjklsdlsbdmMICROSOFT” string. There is an article about it here. It is definitely worth a read: http://www.os2museum.com/wp/mtswslnk/