Category : BASIC Source Code
Archive   : QBCMP.ZIP
Filename : C1.BAS

 
Output of file : C1.BAS contained in archive : QBCMP.ZIP

'Experimental LZW Compressor for QuickBASIC 4.5
'By Rich Geldreich 1992
'This program is in the public domain: use as you wish!
'(QB4.5 users: Use search & replace to change all of the "SSEG" strings
'to "VARSEG" strings in this program.)
'Please see QBLZW.BAS for more information on LZW compression in QB.

'If you have and questions or problems, write/call:
'
'Rich Geldreich
'410 Market St.
'Gloucester City, NJ 08030
'(609)-742-8752
'
'Benchmarks: ORIGINAL HUFFMAN2.BAS C1.BAS ZIP
'BCL71ENR.LIB 263245 216495 191799 159324
'BIG_1_3.PCX 7401 3926 2735 2374
'MESSAGES.TXT 226989 151750 113077 84044
'TIME.MOD 155394 102447 87460 75101
'
'
'
' Do not press ctrl+break while this program is compressing! The string
' pointers may change, which may result in an error!

DEFINT A-Z
DECLARE SUB PutByte (A)
DECLARE SUB PutCode (A)
DECLARE SUB Rebuild.Table (New.Entries)
DECLARE FUNCTION GetByte ()
DECLARE SUB Hash (Prefix, Suffix, Index, Found)

CONST True = -1, False = 0

DIM SHARED Prefix(6576), Suffix(6576), Code(6576)
DIM SHARED Used(4096)

DIM SHARED InBuffer$, IAddress, IEndAddress, Iseg
DIM SHARED OutBuffer$, OStartAddress, OAddress, OEndAddress, Oseg

DIM SHARED CodeSize, CurrentBit, Char&
DIM SHARED Shift(12) AS LONG


FOR A = 0 TO 12: READ Shift(A): NEXT
DATA 1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192


LOCATE , , 1
IF POS(0) <> 1 THEN PRINT


InBuffer$ = STRING$(4000, 0) 'input buffer
OutBuffer$ = STRING$(4000, 0) 'output buffer


A& = SADD(OutBuffer$)
A& = A& - 65536 * (A& < 0)
Oseg = SSEG(OutBuffer$) + (A& \ 16) 'Segment of buffer
OAddress = (A& MOD 16) 'Current address in disk buffer
OEndAddress = OAddress + 4000 'End address of buffer
OStartAddress = OAddress 'Start of buffer

'Open input file
File$ = COMMAND$
IF File$ = "" THEN LINE INPUT "File to compress? "; File$: File$ = LTRIM$(RTRIM$(File$))
IF File$ = "" THEN END
OPEN File$ FOR BINARY AS #1
FileLength& = LOF(1)
'Is it there?
IF FileLength& = 0 THEN
CLOSE #1
KILL COMMAND$
PRINT COMMAND$; " not found"
END
END IF
'Open output file
OPEN "output.lzw" FOR BINARY AS #2
'Is it already there?
IF LOF(2) <> 0 THEN
'Kill output file and reopen it
CLOSE #2
KILL "output.lzw"
OPEN "output.lzw" FOR BINARY AS #2
END IF
'CurrentLoc& - position in input file
CurrentLoc& = 2

'Compression codes:
'Code 256 = end of file
'Code 257 = increase code size
'Code 258 = rebuild table
'Code 259 - 4095 = available for strings
StartCode = 259 'First LZW code that is available
NextCode = 259
'The maximum code that can be represented in 9 bits
MaxCode = 512
'Start with 9 bit code size
CodeSize = 9
'Current bit position in Char& - use for PutCode
CurrentBit = 0
'Char& is a temporary buffer; accumulates codes from main program and
'puts them in the output file once complete bytes have been
'built
Char& = 0

GOSUB ClearTable
'Get first byte from file(it's a special case)
Prefix = GetByte

PRINT "LZW Compressor For QuickBASIC 4.5"
PRINT "By Richard Geldreich June 2nd, 1992"
PRINT "Compressing "; File$
PRINT : PRINT : PRINT
'First line to start updating statistics
Y = CSRLIN - 3
'Main compression loop
DO
DO

IF CurrentLoc& > FileLength& THEN
PutCode Prefix
PutCode 256
PutCode 0: PutCode 0
OutBuffer$ = LEFT$(OutBuffer$, OAddress - OStartAddress)
LOCATE Y, 1
PRINT "Bytes In:"; CurrentLoc&; (100& * CurrentLoc&) \ FileLength&; "%"
BytesOut& = LOF(2) + (OAddress - OStartAddress)
PRINT "Bytes Out:"; BytesOut&
PRINT "Total Compression:"; 100 - ((100& * BytesOut&) \ CurrentLoc&); "% ";
PUT #2, , OutBuffer$
CLOSE
END
ELSE

Suffix = GetByte
CurrentLoc& = CurrentLoc& + 1
'We now have a Prefix:Suffix to search for.
'If the search fails, put the Prefix in the output file
'and set the Prefix equal to the character which caused
'the failure.

Hash Prefix, Suffix, Index, Found
IF Found = True THEN
Prefix = Code(Index)
'update how many times this string was used
Used(Prefix) = Used(Prefix) + 1
END IF
END IF
LOOP WHILE Found = True

'only increase the code size when required
DO WHILE Prefix >= MaxCode AND CodeSize < 12
PutCode 257
MaxCode = MaxCode * 2
CodeSize = CodeSize + 1
LOOP

PutCode Prefix

'Put the new string into the hash table.
Prefix(Index) = Prefix
Suffix(Index) = Suffix
Code(Index) = NextCode 'remember this string's code

'Prefix is now equal to the character that caused the failure now.
Prefix = Suffix

NextCode = NextCode + 1
'if there are too many strings then rebuild the encoding table
IF NextCode > 4096 THEN

PutCode 258 'send rebuild table code to decompressor

Rebuild.Table New.Entries
NextCode = New.Entries + StartCode

IF NextCode > 4096 THEN
GOSUB ClearTable
NextCode = StartCode 'reset NextCode to top of tree
END IF

CodeSize = 9
MaxCode = 512


END IF

'let the impatient user know we haven't hung up (yet!)
PrintCounter = PrintCounter + 1 'see if time to update the
IF PrintCounter = 512 THEN 'screen
LOCATE Y, 1
PRINT "Bytes In:"; CurrentLoc&; (100& * CurrentLoc&) \ FileLength&; "%"
BytesOut& = LOF(2) + (OAddress - OStartAddress)
PRINT "Bytes Out:"; BytesOut&
PRINT "Compression:"; 100 - ((100& * BytesOut&) \ CurrentLoc&); "% "; "CodeSize:"; CodeSize; "NextCode:"; NextCode; " ";
PrintCounter = 0
END IF
LOOP
'clears the hash table
ClearTable:
FOR A = 0 TO 6576
Prefix(A) = -1
Suffix(A) = -1
Code(A) = -1
NEXT
RETURN

'Reads one byte from the input buffer, and fills the buffer if it's emty.
FUNCTION GetByte STATIC
IF IAddress = IEndAddress THEN
GET #1, , InBuffer$
A& = SADD(InBuffer$)
A& = A& - 65536 * (A& < 0)
Iseg = SSEG(InBuffer$) + (A& \ 16)
IAddress = (A& MOD 16)
IEndAddress = IAddress + 4000
END IF
DEF SEG = Iseg
GetByte = PEEK(IAddress)
IAddress = IAddress + 1
END FUNCTION

'Attempts to finds a prefix:suffix string.
SUB Hash (Prefix, Suffix, Index, Found)

Index = (Prefix * 256& XOR Suffix) MOD 6577 'XOR hashing
IF Index = 0 THEN 'is Index lucky enough to be 0?
Offset = 1 'Set offset to 1, because 6577-0=6577
ELSE
Offset = 6577 - Index
END IF
DO 'until we find a match or don't
IF Code(Index) = -1 THEN 'is there nothing here?
Found = False 'yup, not found
EXIT SUB
'is this entry what we're looking for?
ELSEIF Prefix(Index) = Prefix AND Suffix(Index) = Suffix THEN
Found = True 'yup, found
EXIT SUB
ELSE 'retry until we find what were looking for or we find a blank
'entry
Index = Index - Offset
IF Index < 0 THEN 'is index too far down?
Index = Index + 6577 'yup, bring it up then
END IF
END IF
LOOP
END SUB

'Throws a byte into the output buffer and writes the buffer if it's full.
SUB PutByte (A) STATIC
IF OAddress = OEndAddress THEN
PUT #2, , OutBuffer$
OAddress = OStartAddress
END IF
DEF SEG = Oseg
POKE OAddress, A
OAddress = OAddress + 1
END SUB

'Throws one multi-bit code to the output file.
SUB PutCode (A) STATIC
SHARED MaxCode
IF A >= MaxCode THEN STOP

Char& = Char& + A * Shift(CurrentBit)
CurrentBit = CurrentBit + CodeSize
DO WHILE CurrentBit > 7
PutByte Char& AND 255
Char& = Char& \ 256
CurrentBit = CurrentBit - 8
LOOP
END SUB

'This is the "experimental" part of the program. This procedure eliminates
'any strings which are not used in the encoding table: the usual result of
'doing this is greater compression.
'It isn't documented well yet... I'm still working on it.
SUB Rebuild.Table (New.Entries)
DIM P(4096), S(4096), U(4096) AS LONG, Pn(4096), C(4096)
DIM Location(4096)

SHARED StartCode, MaxCode, Prefix
Num.Entries = 0

FOR A = 0 TO 6576
C = Code(A)
IF C <> -1 THEN 'valid code?
IF Used(C) > 0 THEN 'was it used at all?
Used(C) = 0
P = Prefix(A): S = Suffix(A)
P(Num.Entries) = P 'put it into a temporary table
S(Num.Entries) = S
U(Num.Entries) = P * 4096& + S
C(C) = Num.Entries
Num.Entries = Num.Entries + 1
END IF
END IF
NEXT


Num.Entries = Num.Entries - 1
FOR A = 0 TO Num.Entries
Pn(A) = A
NEXT
'sort the table according to it's prefix:suffix
Mid = Num.Entries \ 2
DO
FOR A = 0 TO Num.Entries - Mid
IF U(Pn(A)) > U(Pn(A + Mid)) THEN
SWAP Pn(A), Pn(A + Mid)
Swap.Flag = True
CompareLow = A - Mid
CompareHigh = A
DO WHILE CompareLow >= 0
IF U(Pn(CompareLow)) > U(Pn(CompareHigh)) THEN
SWAP Pn(CompareLow), Pn(CompareHigh)
CompareHigh = CompareLow
CompareLow = CompareLow - Mid
ELSE
EXIT DO
END IF
LOOP

END IF
NEXT

Mid = Mid \ 2
LOOP WHILE Mid > 0


FOR A = 0 TO Num.Entries
Location(Pn(A)) = A
NEXT
'clear the old hash table
FOR A = 0 TO 6576
Prefix(A) = -1
Suffix(A) = -1
Code(A) = -1
NEXT

'put each prefix:suffix into the hash table
FOR A1 = 0 TO Num.Entries
A = Pn(A1)

P = P(A)
S = S(A)
IF P >= StartCode THEN 'is it pointing twards a string?
P = StartCode + Location(C(P)) 'yup; update the pointer
END IF
IF S >= StartCode THEN
S = StartCode + Location(C(S))
END IF
'where does this prefix:suffix go?
Hash P, S, Index, 0
'put it there
Prefix(Index) = P
Suffix(Index) = S
Code(Index) = A1 + StartCode

NEXT
'# of entries in the hash table now
New.Entries = Num.Entries + 1
END SUB