Output of file : DETEX.L contained in archive :
DTEX25A.ZIP
%{
#ifndef lint
static char rcsid[] = "$Header: /usr/src/local/bin/detex/RCS/detex.l,v 2.16 1993/01/14 16:48:25 trinkle Exp $";
#endif
/*
* detex [-e environment-list] [-c] [-l] [-n] [-s] [-t] [-w] [file[.tex]]
*
* This program is used to remove TeX or LaTeX constructs from a text
* file.
*
* Written by:
* Daniel Trinkle
* Department of Computer Science
* Purdue University
*
*/
#include "detex.h"
#ifdef USG
#include
#define index strchr
#define rindex strrchr
#else
#include
#endif
#ifndef MAXPATHLEN
#include
#endif
#define LaBEGIN if (fLatex) BEGIN
#define CITEBEGIN if (fLatex && !fCite) BEGIN
#define IGNORE if (fSpace && !fWord) putchar(' ')
#define SPACE if (!fWord) putchar(' ')
#define NEWLINE if (!fWord) putchar('\n')
#ifndef NO_MALLOC_DECL
char *malloc();
#endif
char *rgsbEnvIgnore[MAXENVS]; /* list of environments ignored */
char *rgsbIncList[MAXINCLIST]; /* list of includeonly files */
char *rgsbInputPaths[MAXINPUTPATHS]; /* list of input paths in order */
char sbCurrentEnv[CCHMAXENV]; /* current environment being ignored */
char *sbProgName; /* name we were invoked with */
FILE *rgfp[NOFILE+1]; /* stack of input/include files */
int cfp = 0; /* count of files in stack */
int cOpenBrace = 0; /* count of `{' in */
int csbEnvIgnore; /* count of environments ignored */
int csbIncList = 0; /* count of includeonly files */
int csbInputPaths; /* count of input paths */
int fLatex = 0; /* flag to indicated delatex */
int fWord = 0; /* flag for -w option */
int fFollow = 1; /* flag to follow input/include */
int fCite = 0; /* flag to echo \cite and \ref args */
int fSpace = 0; /* flag to replace \cs with space */
int fForcetex = 0; /* flag to inhibit latex mode */
%}
S [ \t\n]*
W [a-zA-Z]+
%Start Define Display IncludeOnly Input Math Normal Control
%Start LaBegin LaDisplay LaEnd LaEnv LaFormula LaInclude
%Start LaMacro LaMacro2 LaVerbatim
%%
"%".* /* ignore comments */ ;
"\\begin"{S}"{"{S}"document"{S}"}" {fLatex = !fForcetex; IGNORE;}
"\\begin" /* environment start */ {LaBEGIN LaBegin; IGNORE;}
{S}"{"{S}"verbatim"{S}"}" { if (BeginEnv("verbatim"))
BEGIN LaEnv;
else
BEGIN LaVerbatim;
IGNORE;
}
"\\end"{S}"{"{S}"verbatim"{S}"}" /* verbatim mode */
{BEGIN Normal; IGNORE;}
. ECHO;
{W} { if (BeginEnv(yytext))
BEGIN LaEnv;
else
BEGIN LaMacro;
IGNORE;
}
"\n" NEWLINE;
. ;
"\\end" /* absorb some environments */ {LaBEGIN LaEnd; IGNORE;}
"\n" NEWLINE;
. ;
{W} /* end environment */ { if (EndEnv(yytext))
BEGIN Normal;
IGNORE;
}
"}" {BEGIN LaEnv; IGNORE;}
"\n" NEWLINE;
. ;
"\\bibitem" /* ignore args */ {LaBEGIN LaMacro2; IGNORE;}
"\\bibliography" /* of these \cs */ {LaBEGIN LaMacro; IGNORE;}
"\\bibstyle" {LaBEGIN LaMacro; IGNORE;}
"\\cite" {CITEBEGIN LaMacro2; IGNORE;}
"\\documentstyle" {LaBEGIN LaMacro; IGNORE;}
"\\end" {LaBEGIN LaMacro; IGNORE;}
"\\index" {LaBEGIN LaMacro2; SPACE;}
"\\label" {LaBEGIN LaMacro; IGNORE;}
"\\pageref" {CITEBEGIN LaMacro; IGNORE;}
"\\pagestyle" {LaBEGIN LaMacro; IGNORE;}
"\\ref" {CITEBEGIN LaMacro; IGNORE;}
"\\setcounter" {LaBEGIN LaMacro; IGNORE;}
"\\verb" /* ignore \verb... */
{ if (fLatex) {
char verbchar, c;
verbchar = input();
while ((c = input()) != verbchar)
if (c == '\n')
NEWLINE;
}
IGNORE;
}
"}" BEGIN Normal;
"\n" NEWLINE;
. ;
"{" { cOpenBrace++; }
"}" { cOpenBrace--;
if (cOpenBrace == 0)
BEGIN Normal;
}
"\n" NEWLINE;
. ;
"\\def" /* ignore def begin */ {BEGIN Define; IGNORE;}
"{" BEGIN Normal;
"\n" NEWLINE;
. ;
"\\(" /* formula mode */ {LaBEGIN LaFormula; IGNORE;}
"\\)" BEGIN Normal;
"\n" NEWLINE;
. ;
"\\[" /* display mode */ {LaBEGIN LaDisplay; IGNORE;}
"\\]" BEGIN Normal;
"\n" NEWLINE;
. ;
"$$" /* display mode */ {BEGIN Display; IGNORE;}
"$$" BEGIN Normal;
"\n" NEWLINE;
. ;
"$" /* math mode */ {BEGIN Math; IGNORE;}
"$" BEGIN Normal;
"\n" NEWLINE;
"\\$" ;
. ;
"\\include" /* process files */ {LaBEGIN LaInclude; IGNORE;}
[^{ \t\n}]+ { IncludeFile(yytext);
BEGIN Normal;
}
"\n" NEWLINE;
. ;
"\\includeonly" {BEGIN IncludeOnly; IGNORE;}
[^{ \t,\n}]+ AddInclude(yytext);
"}" { if (csbIncList == 0)
rgsbIncList[csbIncList++] = '\0';
BEGIN Normal;
}
"\n" NEWLINE;
. ;
"\\input" {BEGIN Input; IGNORE;}
[^{ \t\n}]+ { InputFile(yytext);
BEGIN Normal;
}
"\n" NEWLINE;
. ;
\\(aa|AA|ae|AE|oe|OE|ss)[ \t]*[ \t\n}] /* handle ligatures */
{printf("%.2s", yytext+1);}
\\[OoijLl][ \t]*[ \t\n}] {printf("%.1s", yytext+1);}
\\[a-zA-Z@]+ /* ignore other \cs */ {BEGIN Control; IGNORE;}
"\\ " SPACE;
\\. IGNORE;
\\[a-zA-Z@]+ IGNORE;
[a-zA-Z@0-9]*[-'=`][^ \t\n{]* IGNORE;
"\n" {BEGIN Normal; NEWLINE;}
[ \t]*[{]* {BEGIN Normal; IGNORE;}
. {yyless(0);BEGIN Normal;}
[{}\\|] /* special characters */ IGNORE;
[!?]"`" IGNORE;
~ SPACE;
{W}[']*{W} { if (fWord)
printf("%s\n", yytext);
else
ECHO;
}
[0-9]+ if (!fWord) ECHO;
(.|\n) if (!fWord) ECHO;
%%
/******
** main --
** Set sbProgName to the base of arg 0.
** Set the input paths.
** Check for options
** -c echo LaTeX \cite, \ref, and \pageref values
** -e list of LaTeX environments to ignore
** -l force latex mode
** -n do not follow \input and \include
** -s replace control sequences with space
** -t force tex mode
** -w word only output
** Set the list of LaTeX environments to ignore.
** Process each input file.
** If no input files are specified on the command line, process stdin.
******/
main(cArgs,rgsbArgs)
int cArgs;
char *rgsbArgs[];
{
char *pch, *sbEnvList = DEFAULTENV, sbBadOpt[2];
FILE *TexOpen();
int fSawFile = 0, iArgs = 1;
/* get base name and decide what we are doing, detex or delatex */
if ((sbProgName = rindex(rgsbArgs[0], '/')) != NULL)
sbProgName++;
else
sbProgName = rgsbArgs[0];
if (strcmp("delatex",sbProgName) == 0)
fLatex = 1;
/* set rgsbInputPaths for use with TexOpen() */
SetInputPaths();
/* process command line options */
while (iArgs < cArgs && *(pch = rgsbArgs[iArgs]) == CHOPT) {
while (*++pch)
switch (*pch) {
case CHCITEOPT:
fCite = 1;
break;
case CHENVOPT:
sbEnvList = rgsbArgs[++iArgs];
break;
case CHLATEXOPT:
fLatex = 1;
break;
case CHNOFOLLOWOPT:
fFollow = 0;
break;
case CHSPACEOPT:
fSpace = 1;
break;
case CHTEXOPT:
fForcetex = 1;
break;
case CHWORDOPT:
fWord = 1;
break;
default:
sbBadOpt[0] = *pch;
sbBadOpt[1] = '\0';
Warning("unknown option ignored -", sbBadOpt);
}
iArgs++;
}
SetEnvIgnore(sbEnvList);
/* process input files */
for (; iArgs < cArgs; iArgs++) {
fSawFile++;
if ((yyin = TexOpen(rgsbArgs[iArgs])) == NULL) {
Warning("can't open file", rgsbArgs[iArgs]);
continue;;
}
BEGIN Normal;
(void)yylex();
}
/* if there were no input files, assume stdin */
if (!fSawFile) {
yyin = stdin;
BEGIN Normal;
(void)yylex();
}
#ifndef FLEX_SCANNER
if (YYSTATE != Normal)
ErrorExit("input contains an unterminated mode or environment");
#endif
exit(0);
}
#ifdef FLEX_SCANNER
#undef yywrap
#endif
/******
** yywrap -- handles EOF for lex. Check to see if the stack of open files
** has anything on it. If it does, set yyin to the to value. If not
** return the termination signal for lex.
******/
yywrap()
{
(void)fclose(yyin);
if (cfp > 0) {
yyin = rgfp[--cfp];
return(0);
}
return(1);
}
/******
** SetEnvIgnore -- sets rgsbEnvIgnore to the values indicated by the
** sbEnvList.
******/
SetEnvIgnore(sbEnvList)
char *sbEnvList;
{
csbEnvIgnore = SeparateList(sbEnvList, rgsbEnvIgnore, CHENVSEP, MAXENVS);
if (csbEnvIgnore == ERROR)
ErrorExit("The environtment list contains too many environments");
}
/******
** BeginEnv -- checks to see if sbEnv is in the list rgsbEnvIgnore. If it
** is, sbCurrentEnv is set to sbEnv.
******/
BeginEnv(sbEnv)
char *sbEnv;
{
int i;
if (!fLatex) return(0);
for (i = 0; i < csbEnvIgnore; i++)
if (strcmp(sbEnv, rgsbEnvIgnore[i]) == 0) {
(void)strcpy(sbCurrentEnv, sbEnv);
return(1);
}
return(0);
}
/******
** EndEnv -- checks to see if sbEnv is the current environment being ignored.
******/
EndEnv(sbEnv)
char *sbEnv;
{
if (!fLatex) return(0);
if (strcmp(sbEnv, sbCurrentEnv) == 0)
return(1);
return(0);
}
/******
** InputFile -- push the current yyin and open sbFile. If the open fails,
** the sbFile is ignored.
******/
InputFile(sbFile)
char *sbFile;
{
FILE *TexOpen();
if (!fFollow)
return;
rgfp[cfp++] = yyin;
if ((yyin = TexOpen(sbFile)) == NULL) {
Warning("can't open \\input file", sbFile);
yyin = rgfp[--cfp];
}
}
/******
** IncludeFile -- if sbFile is not in the rgsbIncList, push current yyin
** and open sbFile. If the open fails, the sbFile is ignored.
******/
IncludeFile(sbFile)
char *sbFile;
{
FILE *TexOpen();
if (!fFollow)
return;
if (!InList(sbFile))
return;
rgfp[cfp++] = yyin;
if ((yyin = TexOpen(sbFile)) == NULL) {
Warning("can't open \\include file", sbFile);
yyin = rgfp[--cfp];
}
}
/******
** AddInclude -- adds sbFile to the rgsbIncList and increments csbIncList.
** If the include list is too long, sbFile is ignored.
******/
AddInclude(sbFile)
char *sbFile;
{
if (!fFollow)
return;
if (csbIncList >= MAXINCLIST)
Warning("\\includeonly list is too long, ignoring", sbFile);
rgsbIncList[csbIncList] = malloc((unsigned)(strlen(sbFile) + 1));
(void)strcpy(rgsbIncList[csbIncList++], sbFile);
}
/******
** InList -- checks to see if sbFile is in the rgsbIncList. If there is
** no list, all files are assumed to be "in the list".
******/
InList(sbFile)
char *sbFile;
{
char *pch, sbBase[MAXPATHLEN];
int i;
if (csbIncList == 0) /* no list */
return(1);
(void)strcpy(sbBase, sbFile);
if ((pch = rindex(sbBase, '.')) != NULL)
*pch = '\0';
i = 0;
while ((i < csbIncList) && rgsbIncList[i])
if (strcmp(rgsbIncList[i++], sbBase) == 0)
return(1);
return(0);
}
/******
** SetInputPaths -- sets rgsbInputPaths to the values indicated by the
** TEXINPUTS environment variable if set or else DEFAULTINPUTS.
******/
SetInputPaths()
{
char *sb, *sbPaths, *getenv();
if ((sb = getenv("TEXINPUTS")) == NULL)
sbPaths = DEFAULTINPUTS;
else {
sbPaths = malloc((unsigned)(strlen(sb) + 1));
(void)strcpy(sbPaths, sb);
}
csbInputPaths = SeparateList(sbPaths, rgsbInputPaths, CHPATHSEP, MAXINPUTPATHS);
if (csbInputPaths == ERROR)
ErrorExit("TEXINPUTS environment variable has too many paths");
}
/******
** SeparateList -- takes a chSep separated list sbList, replaces the
** chSep's with NULLs and sets rgsbList[i] to the beginning of
** the ith word in sbList. The number of words is returned. A
** ERROR is returned if there are more than csbMax words.
******/
SeparateList(sbList, rgsbList, chSep, csbMax)
char *sbList, *rgsbList[], chSep;
int csbMax;
{
int csbList = 0;
while (sbList && *sbList && csbList < csbMax) {
rgsbList[csbList++] = sbList;
if (sbList = index(sbList, chSep))
*sbList++ = '\0';
}
return(sbList && *sbList ? ERROR : csbList);
}
/******
** TexOpen -- tries to open sbFile in each of the rgsbInputPaths in turn.
** For each input path the following order is used:
** file.tex - must be as named, if not there go to the next path
** file.ext - random extension, try it
** file - base name, add .tex and try it
** file - try it as is
** Notice that if file exists in the first path and file.tex exists in
** one of the other paths, file in the first path is what is opened.
** If the sbFile begins with a '/', no paths are searched.
******/
FILE *
TexOpen(sbFile)
char *sbFile;
{
char *pch, *sbNew;
FILE *fp;
int iPath;
static char sbFullPath[MAXPATHLEN];
for (iPath = 0; iPath < csbInputPaths; iPath++) {
if (*sbFile == '/') { /* absolute path */
(void)sprintf(sbFullPath, "%s", sbFile);
iPath = csbInputPaths; /* only check once */
} else
(void)sprintf(sbFullPath, "%s/%s", rgsbInputPaths[iPath], sbFile);
/* If sbFile ends in .tex then it must be there */
if ((pch = rindex(sbFullPath, '.')) != NULL
&& (strcmp(pch, ".tex") == 0))
if ((fp = fopen(sbFullPath, "r")) != NULL)
return(fp);
else
continue;
/* if . then try to open it. the '.' represents */
/* the beginning of an extension if it is not the first */
/* character and it does not follow a '.' or a '/' */
if (pch != NULL && pch > &(sbFullPath[0])
&& *(pch - 1) != '.' && *(pch - 1) != '/'
&& (fp = fopen(sbFullPath, "r")) != NULL)
return(fp);
/* just base name, add .tex to the name */
sbNew = malloc((unsigned)(strlen(sbFullPath) + 5));
(void)strcpy(sbNew, sbFullPath);
(void)strcat(sbNew, ".tex");
if ((fp = fopen(sbNew, "r")) != NULL)
return(fp);
/* try sbFile regardless */
if ((fp = fopen(sbFullPath, "r")) != NULL)
return(fp);
}
return((FILE *)NULL);
}
/******
** Warning -- print a warning message preceded by the program name.
******/
Warning(sb1, sb2)
char *sb1, *sb2;
{
fprintf(stderr, "%s: warning: %s %s\n", sbProgName, sb1, sb2);
}
/******
** ErrorExit -- print an error message preceded by the program name.
** Stdout is flushed and detex exits.
******/
ErrorExit(sb1)
char *sb1;
{
(void)fflush(stdout);
fprintf(stderr, "%s: error: %s\n", sbProgName, sb1);
exit(1);
}
Very nice! Thank you for this wonderful archive. I wonder why I found it only now. Long live the BBS file archives!
This is so awesome! 😀 I’d be cool if you could download an entire archive of this at once, though.
But one thing that puzzles me is the “mtswslnkmcjklsdlsbdmMICROSOFT” string. There is an article about it here. It is definitely worth a read: http://www.os2museum.com/wp/mtswslnk/