Category : Miscellaneous Language Source Code
Archive   : QTAWKU42.ZIP
Filename : WORDFREQ.EXP

 
Output of file : WORDFREQ.EXP contained in archive : QTAWKU42.ZIP
# wordfreq - print number of occurrences of each word
# input: text
# output: number-word pairs sorted by number

BEGIN {
min_cnt = 1; # define minimum number of occurrances to print
min_lng = 3; # define minimum length of word to count
comment_only = /^#/;
}

# Ignore Comment Only Lines
comment_only { next; }

{
fprintf("stderr","%u\n",FNR);
gsub(/{_p}/,""); #remove puncutation
gsub(/#.*$/,""); # remove comments
gsub(/[0-9+^"'`*\$\&~\<\>=\\\/\[\]\(\)\{\}-]+/," "); # change to single white space
for ( i = 1 ; i <= NF ; i++ ) if ( length($i) > min_lng ) count[$i]++;
}

FINAL {
local i = 0, k = 0, m = 0, w;

for ( w in count ) {
if ( min_cnt <= (j = count[w]) ) {
print j , w;
i++;
m += j;
}
k++;
}
deletea count;
printf("File: %s\n",FILENAME);
printf("Total Words: %lu\nTotal Output: %lu\nTotal Count Ouput: %lu\n\n",k,i,m);
}


  3 Responses to “Category : Miscellaneous Language Source Code
Archive   : QTAWKU42.ZIP
Filename : WORDFREQ.EXP

  1. Very nice! Thank you for this wonderful archive. I wonder why I found it only now. Long live the BBS file archives!

  2. This is so awesome! 😀 I’d be cool if you could download an entire archive of this at once, though.

  3. But one thing that puzzles me is the “mtswslnkmcjklsdlsbdmMICROSOFT” string. There is an article about it here. It is definitely worth a read: http://www.os2museum.com/wp/mtswslnk/