Category : Files from Magazines
Archive   : DDJ0589.ZIP
Filename : TAWK.ASC

 
Output of file : TAWK.ASC contained in archive : DDJ0589.ZIP
_TAWK, A Simple Interpreter in C++_
by Bruce Eckel


[LISTING ONE]


// FIELD.HXX: used by csascii class to build a single field.
// Fields are collected by csascii to create a record.
// by Bruce Eckel,
#include

class field { // one field in a comma-separated ASCII record
istream * input; // where to get the data
char * data;
int length, fsize;
int end_of_file; // flag to indicate the end of file happened
void getfield(); // recursive function to read in a field;
// treats data, length & input as globals
int infield; // flag used by getfield() to determine whether
// it's inside a quoted field
public:
field(istream & instream);
~field();
friend ostream& operator<<(ostream &s, field & f) {
s << f.data;
return s;
}
int eof() { return end_of_file; } // to check for end
int size() { return fsize;}
int last_length() {return length; }
char * string() { return data; }
};


[LISTING TWO]

// FIELD.CXX: definitions for class field
// A "recursive descent" scanning scheme is used because field
// length is always unknown.
// by Bruce Eckel
#include "field.hxx"

field::field(istream & instream) {
input = &instream;
length = 0;
end_of_file = 0; // set flag to say "we're not at the end"
infield = 0; // set flag to say "we're not inside a field"
data = (char *)0; // to show no memory has been allocated
getfield(); // recursively get characters until end of field
}

field::~field() {
delete data; // if no memory has been allocated,
// data = (char *)0 so this will have no effect.
}

// A Comma-separated ASCII field is contained in quotes to allow
// commas within the field; these quotes must be stripped out
void field::getfield() {
char c;
// This happens when DEscending:
if((input->get(c)).eof() ) {
end_of_file++; // just say we reached the end...
return;
}
else // watch out for the Unix vs. DOS LF/CR problem here:
if (((c != ',') || infield) && (c != '\n')) {
if ( (c != '"') && (c != '\r')) // watch for quotes or CR
length++; // no quotes -- count this character
else {
if ( c == '"')
infield = !infield; // if we weren't inside a field
// and a quote was encountered, we are now inside
// a field. If we were inside a field and a quote
// was found, we're out of the field.
c = 0; // a quote or CR; mark it so it isn't included
}
getfield(); // recursively get characters in field
// after returning from function call, we jump past
// the following "else" part to finish the recursion
}
else { // This happens once, when the terminator is found:
fsize = length; // remember how long the string is
data = new char[length + 1]; // space for null terminator
data[length] = '\0'; // highest index is "length"
// when you allocate an array of length + 1
length--; // notice we don't insert the delimiter
// Now the first "if" statement evaluates to TRUE and
// the function rises back up.
return;
}
// This happens when Ascending:
if ( c ) // if it wasn't a quote or CR,
data[length--] = c; // put chars in as we rise back up...
}


[LISTING THREE]

// CSASCII.HXX: class to manipulate comma-separated ASCII
// database files.
//by Bruce Eckel
#include
#include "field.hxx"

class csascii { // manipulates comma-separated ascii files,
// generated by most database management systems (generated and
// used by the BASIC programming language). Each field
// is separated by a comma; records are separated by newlines.
int fieldcount;
field ** data; // an array to hold the entire record
istream * datafile; // file with comma separated ASCII input
int readrecord(); // private function to read a record
public:
csascii( char * filename ); // Open file, get first record
~csascii(); // destructor
int next(); // get next record, return 0 when EOF
field & operator[](int index); // select a field
int number_of_fields() { return fieldcount; }
};



[LISTING FOUR]

// CSASCII.CXX: function definitions for comma-separated
// ascii database manipulation class
// by Bruce Eckel,
#include "csascii.hxx"

int csascii::readrecord() {
for (int fieldnum = 0; fieldnum < fieldcount; fieldnum++ ) {
data[fieldnum] = new field(*datafile);
if (data[fieldnum]->eof()) return 0;
}
return 1;
}

csascii::csascii( char * filename ) {
char c;
fieldcount = 0;
int quote = 0;
// first, determine the number of fields in a record:
{
// See text for dangers of opening files this way:
istream infile(new filebuf->open(filename, input));
while(infile.get(c), c != '\n') {
// keep track of being inside a quoted string:
if (c == '"') quote = !quote;
// fields are delimited by unquoted commas:
if ( c == ',' && !quote)
fieldcount++;
}
} // infile goes out of scope; file closed
fieldcount++; // last field terminated by newline, not comma
// an array of field pointers:
data = new field * [ fieldcount ];
// re-open at start; dynamically allocate so it isn't scoped:
datafile = new istream(new filebuf->open(filename, input));
readrecord();
}

csascii::~csascii() {
delete data;
delete datafile; // calls istream destructor to close file
}

int csascii::next() {
for (int i = 0; i < fieldcount; i++ )
delete data[i]; // free all the data storage
return readrecord(); // 0 when end of file
}

field & csascii::operator[](int index) {
if (index >= fieldcount) {
cerr << "index too large for number of fields in record\n";
exit(1);
}
return *(data[index]);
}


[LISTING FIVE]

// LOOKUP.CXX: simple use of csascii to find name in a database
// by Bruce Eckel,
#include "csascii.hxx"
#include

main(int argc, char ** argv) {
if (argc < 2) {
cerr << "usage: lookup lastname\n";
exit(1);
}
// This puts the database file in the root directory:
csascii file("\\ppquick.asc"); // create object & open file
int found = 0; // indicates one record was found
do {
if (strcmp(file[0].string(),argv[1]) == 0) {
found++; // found one. File is sorted, so if we stop
// finding them, quit instead of wasting time.
cout << chr(27) << "[2J"; // ANSI clear screen
for (int i = 0; i < file.number_of_fields(); i++)
cout << file[i] << "\n";
cout << chr(27) << "[7m" << "press any key" <<
chr(27) << "[0m";
if( getch() == 27) break;
} else if (found) exit(0); // quit if that was the last
} while (file.next());
}


[LISTING SIX]


// PARSE.HXX: class to parse a tawk script file. Creates
// a structure which can be used at run-time to "execute"
// the tawk script.
// by Bruce Eckel,
#include

// types of tokens the scanner can find:
enum tokentype {
fieldnumber, string, if_, else_, endif_, phase_change
};

// preamble and conclusion of the tawk script are only executed
// once, while main is executed once for every data record
enum phase { preamble, tmain, conclusion};

class token {
tokentype ttype;
union { // an "anonymous union"
int fieldnum; // if type is a fieldnumber
unsigned char * literal; // if type is a string
};
int if_level; // if this is an if_, then_, or else_
// private functions:
void get_token(); // recursive descent scanner
// Functions to help in scanning:
void getnext(char & c); // used by get_token();
unsigned char get_value(char delimiter, char * msg);
void dumpline(); // for @! comments
void error(char * msg = "", char * msg2 = "");
public:
token(istream & input);
~token();
friend ostream & operator<<(ostream &s, token &t);
int field_number() { return fieldnum; }
int token_type() { return ttype; }
int nesting_level() { return if_level;}
};

// The following is called a "container class," since its sole
// purpose is to hold a list of objects (tokens, in this case):
class parse_array {
token ** tokenarray; // an array of token pointers
istream * parse_stream;
int token_count;
int end; // the size of the array
phase p_section; // of the program (preamble, etc.)
void build_array(); // another recursive function
public:
parse_array(istream & input);
~parse_array();
int size() { return end; } // how big is it?
token & operator[](int index); // select a token
phase section() { return p_section; }
};


[LISTING SEVEN]

// PARSE.CXX: class parse function definitions
// by Bruce Eckel,
#include "csascii.hxx"
#include "parse.hxx"
#include
#include

// The following are "file static," which means no one outside
// this file can know about them. This is the meaning when a
// global variable is declared "static."
static istream * tokenstream;
static int length; // to remember size of string
static int line_number = 1; // line counting for errors
static int if_counter = 0; // monitors "if" statement nesting
static phase program_section = preamble; // ... until @main
static int end_of_file = 0; // zero means not end of file

token::token(istream & input) {
// initialize values and start the descent
tokenstream = &input;
length = 0;
get_token(); // recursively get characters to end of token
}

token::~token() { // delete heap if any has been allocated:
if (ttype == string)
delete literal;
}

void token::error(char * msg, char * msg2) {
cerr << "token error on line " << line_number << ": " <<
msg << " " << msg2 << "\n";
exit(1);
}

ostream & operator<<(ostream &s, token &t) {
switch (t.ttype) {
case string:
s << (char *)t.literal;
break;
case fieldnumber: // only for testing
s << " fieldnumber: " << t.fieldnum << "\n";
}
return s;
}

// Get a character from the tokenstream, checking for
// end-of-file and newlines
void token::getnext(char & c) {
if(end_of_file)
error("attempt to read after @end statement\n",
"missing @conclusion ?");
if((tokenstream->get(c)).eof() )
error("@end statement missing");
if (c == '\n')
line_number++; // keep track of the line count
}

// See text for description of tokens
void token::get_token() {
char c;
// This happens when DEscending:
getnext(c);
if ( c == '@') {
if (length == 0) { // length 0 means start of token
getnext(c);
switch(c) {
case '!': // comment line
dumpline(); // dump the comment
get_token(); // get a real token
break;
case 'p' : case 'P' : // preamble statement
if ( program_section != preamble )
error("only one preamble allowed");
dumpline(); // just for looks, ignore it
get_token(); // get a real token
break;
case 'm' : case 'M' : // start of main loop
dumpline(); // toss rest of line
program_section = tmain;
ttype = phase_change;
return; // very simple token
case 'c' : case 'C' : // start conclusion
dumpline();
program_section = conclusion;
ttype = phase_change;
return; // very simple token
case 'e' : case 'E': // end statement
end_of_file++; // set flag
ttype = fieldnumber; // so destructor doesn't
// delete free store for this token.
if (if_counter)
error("unclosed 'if' statement(s)");
return;
case '(' :
if ( program_section == preamble ||
program_section == conclusion )
error("@() not allowed in preamble or conclusion");
fieldnum = get_value(')',"@()");
ttype = fieldnumber;
// This is a complete token, so quit
return;
case '<' :
c = get_value('>',"@<>");
length++;
get_token(); // get more...
break;
case '?' : // beginning of an "if" statement
if ( program_section == preamble ||
program_section == conclusion )
error("@? not allowed in preamble or conclusion");
fieldnum = get_value('@',"@?@");
ttype = if_;
getnext(c); // just eat the colon
if(c != ':')
error("@? must be followed by @: (then)");
if_level = ++if_counter; // for nesting
return;
case '~' : // the "else" part of an "if" statement
ttype = else_;
if_level = if_counter;
return;
case '.' : // "endif" terminator of an "if" statement
ttype = endif_;
if_level = if_counter--;
if(if_counter < 0)
error("incorrect nesting of if-then-else clauses");
return;
case '@' : // two '@' in a row mean print an '@'
length++; // just leave '@' as the value of c
get_token();
break;
default:
error("'@' must be followed by:",
"'(', '<', '?',':','~','.','p','m','c' or '@'");
}
} else { // an '@' in the middle of a string; terminate
// the string. Putback() is part of the stream class.
// It is only safe to put one character back on the input
tokenstream->putback(c); // to be used by the next token
// allocate space, put the null in and return up the stack
literal = new unsigned char[length + 1]; // space for '\0'
literal[length--] = '\0'; // string delimiter
ttype = string; // what kind of token this is
return; // back up the stack
}
} else { // not an '@', must be plain text
length++;
get_token();
}
// This occurs on the "tail" of the recursion:
literal[length--] = c; // put chars in as we rise back up...
}

// This function is used by get_token when it encounters a @(
// or a @< to get a number until it finds "delimiter."
// If an error occurs, msg is used to notify the user what
// kind of statement it is.
unsigned char token::get_value(char delimiter, char * msg) {
char c;
char buf[5];
int i = 0;
while(getnext(c), c != delimiter) {
if (!isdigit(c))
error("must use only digits inside", msg);
buf[i++] = c;
}
buf[i] = 0;
return atoi(buf);
}

void token::dumpline() { // called when '@!' encountered
char c;
while(getnext(c), c != '\n')
; // just eat characters until newline
}

// Since there's no way to know how big a parse_array is
// going to be until the entire tawkfile has been tokenized,
// the recursive approach is again used:

parse_array::parse_array(istream & input) {
parse_stream = &input;
token_count = 0;
p_section = program_section; // so we know at run-time
build_array();
}

void parse_array::build_array() {
token * tk = new token(*parse_stream);
if( ! end_of_file && tk->token_type() != phase_change) {
// normal token, not end of file or phase change:
token_count++;
// recursively get tokens until eof or phase change:
build_array();
} else { // end of file or phase change
// only done once per object:
// allocate memory and return up the stack
tokenarray = new token * [end = token_count];
if(token_count) token_count--; // only if non-zero
return;
}
tokenarray[token_count--] = tk; // performed on the "tail"
}


parse_array::~parse_array() {
for (int i = 0; i < end; i++)
delete tokenarray[i];
delete tokenarray;
}

token & parse_array::operator[](int index) {
if ( index >= end ) {
cerr << "parse_array error: index " << index
<< " out of bounds\n";
exit(1);
}
return *tokenarray[index];
}


[LISTING EIGHT]

// TAWK.CXX: parses a tawk script and reads an ascii file;
// generates results according to the tawk script.
// by Bruce Eckel,
#include "csascii.hxx"
#include "parse.hxx"

main (int argc, char * argv[]) {
int screen = 0; // flag set true if screen output desired
if (argc < 3) {
cerr << "usage: tawk tawkfile datafile\n" <<
"trailing -s pages output to screen";
exit(1);
}
if (argc == 4) {
if (argv[3][0] != '-') {
cerr << "must use '-' before trailing flag\n";
exit(1);
} else
if (argv[3][1] != 's') {
cerr << "'s' is only trailing flag allowed";
exit(1);
} else
screen++; // set screen output flag true
}
istream tawkfile(new filebuf->open(argv[1], input));
parse_array Apreamble(tawkfile); // the @preamble
parse_array Amain(tawkfile); // the @main section
parse_array Aconclusion(tawkfile); // the @conclusion
csascii datafile(argv[2]); // make a comma-separated ASCII
// object from the second arg
// ------ @preamble ------
for (int i = 0; i < Apreamble.size(); i++)
cout << Apreamble[i]; // preamble can only contain strings
if(screen) {
// ANSI reverse video sequence:
cout << chr(27) << "[7m" << "press any key" <<
chr(27) << "[0m";
getch();
}
// ------ The Central Loop (@main) -------
do { // for each record in the data file
if(screen) cout << chr(27) << "[2J"; // ANSI clear screen
for(int i = 0; i < Amain.size(); i++) {
switch(Amain[i].token_type()) {
case fieldnumber:
cout << datafile[Amain[i].field_number()];
break;
case string:
cout << Amain[i];
break;
case if_:
int fn = Amain[i].field_number();
if (datafile[fn].size() == 0) { // conditional false
int level = Amain[i].nesting_level();
// find the "else" statement on the same level:
while ( !(Amain[i].token_type() == else_
&& Amain[i].nesting_level() == level))
i++;
} // conditional true -- just continue
break;
case else_: // an "if" conditional was true so skip
// all the statements in the "else" clause
int level = Amain[i].nesting_level();
// find the "endif" statement on the same level:
while ( !(Amain[i].token_type() == endif_
&& Amain[i].nesting_level() == level))
i++;
break;
case endif_: // after performing the "else" clause
break; // ignore it; only used to find the end
// of the conditional when "if" is true.
default: // should never happen (caught in parsing)
cerr << "unknown statement encountered at run-time\n";
exit(1);
}
}
if(screen) {
cout << chr(27) << "[7m" <<
"press a key (ESC quits)" << chr(27) << "[0m";
if( getch() == 27) break;
}
} while (datafile.next()); // matches do { ...
// ------ @conclusion ------
for ( i = 0; i < Aconclusion.size(); i++)
cout << Aconclusion[i]; //conclusion contains only strings
}



[LISTING NINE]

# makefile for tawk.exe & lookup.exe
# Zortech C++:
CPP = ztc
# Glockenspiel C++ w/ MSC 4:
#CPP = ccxx !4

all: tawk.exe lookup.exe

tawk.exe : tawk.obj parse.obj csascii.obj field.obj
$(CPP) tawk.obj parse.obj csascii.obj field.obj

lookup.exe : lookup.cxx csascii.obj field.obj
$(CPP) lookup.cxx csascii.obj field.obj

tawk.obj : tawk.cxx parse.hxx csascii.hxx field.hxx
$(CPP) -c tawk.cxx

parse.obj : parse.cxx parse.hxx
$(CPP) -c parse.cxx

csascii.obj : csascii.cxx csascii.hxx field.hxx
$(CPP) -c csascii.cxx

field.obj : field.cxx field.hxx
$(CPP) -c field.cxx



[LISTING TEN]

@! REFORM.TWK
@! A tawk script to reformat a comma-separated ASCII file
@! with 6 fields. This creates a new CS-ASCII file with
@! fields 4 and 5 combined.
@main
"@(0)","@(1)","@(2)","@(3)","@(4)@?4@: @~@.@(5)"
@conclusion
@end



[LISTING ELEVEN]


@! WALLET.TWK
@! Tawkfile to create a tiny phone listing for a wallet
@! on a Hewlett-Packard Laserjet-compatible printer
@! From a comma-separated ASCII file generated by a DBMS
@preamble
@<27>&l5C@! approximately 10 lines per inch
@<27>(s16.66H@! small typeface, built into Laserjet
@main
@! last, first, (area code) phone1
@(0),@(1)(@(2))@?3@:@(3)
@ phone2, if it exists
@?4@:@(4)
@~@.@~@?4@:@(4)
@~
@.@.@conclusion
@<27>E @! Reset the Laserjet
@end

[EXAMPLE 1]

class tiny {
// private stuff here (this is a comment)
int i;
public: // public stuff here:
print() { // an "in-line" function
printf("i = %d\n",i);
}
tiny(int j); // constructors have the class name
~tiny() {} // destructors use a tilde
}; // classes end with a brace and a semicolon

tiny::tiny(int j) { // non inline definition
i = j;
}

main() {
tiny A(2); // implicit constructor call
// A.i = 30; // error! private member
A.print(); // calling a member function
// implicit destructor call at end of scope
}





[EXAMPLE 2]


#include // cout automatically defined
main() {
cout << "Hello, world!\n" << "I am "
<< 6 << "today!\n";
}




[EXAMPLE 3]

filebuf f1;
if (f1.open(argv[1],input) == 0) {
cout << "cannot open " << argv[1] << "\n";
exit(1);
}
istream infile(&f1);



[EXAMPLE 4]


"Ball","Mike","Oregon Software C++ Compiler"
"Bright","Walter","Zortech C++ Compiler"
"Carolan","John","Glockenspiel C++ Translator"
"Stroustrup","Bjarne","AT&T, C++ Creator"
"Tiemann","Michael","Free Software Foundation C++ Compiler"



  3 Responses to “Category : Files from Magazines
Archive   : DDJ0589.ZIP
Filename : TAWK.ASC

  1. Very nice! Thank you for this wonderful archive. I wonder why I found it only now. Long live the BBS file archives!

  2. This is so awesome! 😀 I’d be cool if you could download an entire archive of this at once, though.

  3. But one thing that puzzles me is the “mtswslnkmcjklsdlsbdmMICROSOFT” string. There is an article about it here. It is definitely worth a read: http://www.os2museum.com/wp/mtswslnk/