Category : Assembly Language Source Code
Archive   : LZ.ZIP
Filename : LZCOMP.ASM

 
Output of file : LZCOMP.ASM contained in archive : LZ.ZIP
title lzcomp - file compressor using limpel-ziv algorithm

;Tom Pfau
;Digital Equipment Corporation
;Parsippany, NJ

;Constants
clear equ 256 ;Clear code
eof equ 257 ;End of file marker
first_free equ 258 ;First free code
maxmax equ 4096 ;Max code + 1

include macros.mlb

;Hash table entry
hash_rec struc
first dw ? ; First entry with this value
next dw ? ; Next entry along chain
char db ? ; Suffix char
hash_rec ends

;Declare Segments
code segment byte public 'code'
code ends
stack segment word stack 'data'
dw 128 dup (?)
stack ends
data segment word public 'data'
data ends
memory segment para public 'memory'
hash label hash_rec
memory ends

;Start writing code
code segment
assume cs:code,ds:data,es:data,ss:stack

start proc far
mov bx,seg hash ;End of program
mov ax,ds ;Start of program
sub bx,ax ;Program size
inc bx ;Make sure
setmem bx ;Set our size
mov bx,data ;Set up data segment addressability
mov es,bx
mov ds,bx
print input_prompt ;Get input file name
input input_file
print crlf
print output_prompt ;And output
input output_file
print crlf
mov al,input_file+1 ;Terminate file names with nulls
xor ah,ah
mov si,ax
mov input_file+2[si],0
mov al,output_file+1
mov si,ax
mov output_file+2[si],0
hopen input_file+2,0
mov input_handle,ax
hcreat output_file+2,0
mov output_handle,ax
call compress ;Compress file
hclose input_handle ;Close in and out
hclose output_handle
exit ;Done
start endp

data segment
input_prompt db 'Input file: $'
output_prompt db 'Output file: $'
input_file db 80,0,80 dup (?)
output_file db 80,0,80 dup (?)
crlf db 13,10,'$'
input_handle dw ?
output_handle dw ?
data ends

compress proc near
malloc 1280 ;Allocate space for hash table
mov hash_seg,ax ;Save segment address
l1: call init_table ;Initialize the table and some vars
mov ax,clear ;Write a clear code
call write_code
call read_char ;Read first char
l4: xor ah,ah ;Turn char into code
l4a: mov prefix_code,ax ;Set prefix code
call read_char ;Read next char
jc l17 ;Carry means eof
mov k,al ;Save char in k
mov bx,prefix_code ;Get prefix code
call lookup_code ;See if this pair in table
jnc l4a ;nc means yes, new code in ax
call add_code ;Add pair to table
push bx ;Save new code
mov ax,prefix_code ;Write old prefix code
call write_code
pop bx
mov al,k ;Get last char
cmp bx,max_code ;Exceed code size?
jl l4 ;less means no
cmp nbits,12 ;Currently less than 12 bits?
jl l14 ;yes
mov ax,clear ;Write a clear code
call write_code
call init_table ;Reinit table
mov al,k ;get last char
jmp l4 ;Start over
l14: inc nbits ;Increase number of bits
shl max_code,1 ;Double max code size
jmp l4 ;Get next char
l17: mov ax,prefix_code ;Write last code
call write_code
mov ax,eof ;Write eof code
call write_code
mov ax,bit_offset ;Make sure buffer is flushed to file
cmp ax,0
je l18
mov cx,8 ;convert bits to bytes
xor dx,dx
div cx
or dx,dx ;If extra bits, make sure they get
je l17a ;written
inc ax
l17a: call flush
l18: ret
compress endp

data segment
hash_seg dw ?
prefix_code dw ?
free_code dw ?
max_code dw ?
nbits dw ?
k db ?
data ends

init_table proc near
mov nbits,9 ;Set code size to 9
mov max_code,512 ;Set max code to 512
push es ;Save seg reg
mov es,hash_seg ;Address hash table
mov ax,-1 ;Unused flag
mov cx,640 ;Clear first 256 entries
mov di,offset hash ;Point to first entry
rep stosw ;Clear it out
pop es ;Restore seg reg
mov free_code,first_free ;Set next code to use
ret ;done
init_table endp

write_code proc near
push ax ;Save code
mov ax,bit_offset ;Get bit offset
mov cx,nbits ;Adjust bit offset by code size
add bit_offset,cx
mov cx,8 ;Convert bit offset to byte offset
xor dx,dx
div cx
cmp ax,1020 ;Approaching end of buffer?
jl wc1 ;less means no
call flush ;Write the buffer
push dx ;dx contains offset within byte
add dx,nbits ;adjust by code size
mov bit_offset,dx ;new bit offset
pop dx ;restore dx
add ax,offset output_data ;Point to last byte
mov si,ax ;put in si
mov al,byte ptr [si] ;move byte to first position
mov output_data,al
xor ax,ax ;Byte offset of zero
wc1: add ax,offset output_data ;Point into buffer
mov di,ax ;Destination
pop ax ;Restore code
mov cx,dx ;offset within byte
xor dx,dx ;dx will catch bits rotated out
jcxz wc3 ;If offset in byte is zero, skip shift
wc2: shl ax,1 ;Rotate code
rcl dx,1
loop wc2
or al,byte ptr [di] ;Grab bits currently in buffer
wc3: stosw ;Save data
mov al,dl ;Grab extra bits
stosb ;and save
ret
write_code endp

data segment
bit_offset dw ?
output_data db 1024 dup (?)
data ends

flush proc near
push ax ;Save all registers
push bx ;AX contains number of bytes to write
push cx
push dx
hwrite output_handle,output_data,ax ;write output file
pop dx
pop cx
pop bx
pop ax
ret
flush endp

read_char proc near
mov di,input_offset ;Anything left in buffer?
cmp di,input_size
jl rd1 ;less means yes
hread input_handle,input_data,1024 ;Read next chunk of input
cmp ax,0 ;Anything left?
je rd2 ;equal means no, finished
mov input_size,ax ;Save bytes read
mov input_offset,0 ;Point to beginning of buffer
mov di,0
rd1: lea si,input_data[di] ;Point at character
lodsb ;Read it in
inc input_offset ;Adjust pointer
clc ;Success
ret
rd2: stc ;Nothing left
ret
read_char endp

data segment
input_data db 1024 dup (?)
input_offset dw 0
input_size dw 0
data ends

lookup_code proc near
push ds ;Save seg reg
mov ds,hash_seg ;point to hash table
call index ;convert code to address
mov di,0 ;flag
cmp [si].first,-1 ;Has this code been used?
je gc4 ;equal means no
inc di ;set flag
mov bx,[si].first ;Get first entry
gc2: call index ;convert code to address
cmp [si].char,al ;is char the same?
jne gc3 ;ne means no
clc ;success
mov ax,bx ;put found code in ax
pop ds ;restore seg reg
ret ;done
gc3: cmp [si].next,-1 ;More left with this prefix?
je gc4 ;equal means no
mov bx,[si].next ;get next code
jmp gc2 ;try again
gc4: stc ;not found
pop ds ;restore seg reg
ret ;done
lookup_code endp

index proc near
mov si,bx ;si = bx * 5 (5 byte hash entries)
shl si,1 ;si = bx * 2 * 2 + bx
shl si,1
add si,bx
ret
index endp

add_code proc near
mov bx,free_code ;Get code to use
push ds ;point to hash table
mov ds,hash_seg
cmp di,0 ;First use of this prefix?
je ac1 ;equal means yes
mov [si].next,bx ;point last use to new entry
jmp short ac2
ac1: mov [si].first,bx ;Point first use to new entry
ac2: cmp bx,maxmax ;Have we reached code limit?
je ac3 ;equal means yes, just return
call index ;get address of new entry
mov [si].first,-1 ;initialize pointers
mov [si].next,-1
mov [si].char,al ;save suffix char
inc es:free_code ;adjust next code
ac3: pop ds ;restore seg reg
ret
add_code endp

code ends

end start


  3 Responses to “Category : Assembly Language Source Code
Archive   : LZ.ZIP
Filename : LZCOMP.ASM

  1. Very nice! Thank you for this wonderful archive. I wonder why I found it only now. Long live the BBS file archives!

  2. This is so awesome! 😀 I’d be cool if you could download an entire archive of this at once, though.

  3. But one thing that puzzles me is the “mtswslnkmcjklsdlsbdmMICROSOFT” string. There is an article about it here. It is definitely worth a read: http://www.os2museum.com/wp/mtswslnk/