Category : Word Processors
Archive   : FMAC16AS.ZIP
Filename : SEARCH.ASM

 
Output of file : SEARCH.ASM contained in archive : FMAC16AS.ZIP
;History:772,1
;Wed Nov 29 23:58:27 1989 Add support for \|
;Tue Nov 07 23:45:44 1989 match newlines in character classes.
;Mon Nov 06 00:40:16 1989 try to make backwards regexp searches work.
;Sat Nov 05 22:05:14 1988 let CR LF match LINENEW.
;10-08-88 08:48:54 add \n to regexp search.
;09-26-88 21:23:42 add case translation for character classes.
;08-19-88 23:36:40 closure didn't work because omatch iterated on matching.
;08-13-88 22:12:46 try forwards again.
;07-24-88 16:42:24 BOL and EOL match BOB and EOB respectively.
;07-21-88 22:49:18 add optimized search backwards.
;07-20-88 00:15:38 too late at night to continue...
;07-20-88 00:02:35 optimize forward searches.
;07-19-88 23:38:07 use the right omatch_chr for both regexps and literals.
;07-19-88 00:51:06 initialize the case table.
;07-18-88 21:20:18 don't increment di twice in omatch_NCCL
;07-18-88 00:04:34 replace bad patterns with "".
;07-17-88 23:15:23 Check for topbot right after incrementing di.
;07-17-88 22:55:12 search *at* the end_ptr (check for end_ptr after searching).
;07-17-88 18:54:53 when searching backwards, don't search past right_ptr.
;07-17-88 10:59:27 save di around omatch()
;07-17-88 10:42:13 omatch_CHR was incrementing di even if it didn't match.
;06-06-88 23:58:09 change the regexp chars to match Gnu's.
;07-06-87 06:55:31 Use botbot for eof, not LINENEW
include memory.def

data segment byte public

b_struc struc
b db ?
b_struc ends

w_struc struc
w dw ?
w_struc ends

extrn outpat: byte
extrn OUTPATSIZE: abs
inpat_ptr dw ? ;beginning of input pattern.
direction dw ? ;routine to increment di in correct direction.
scan_char dw ? ;routine to scan for a character.
end_ptr dw ? ;end of region we're searching.
right_ptr dw ? ;rightmost end of region we're searching.
clo_si dw ? ;saved pointer for closure.
last_ptr dw ? ;pointer to last character matched.
which_chr dw ? ;which omatch_CHR to use.
this_pattern dw ? ;->this pattern (for closure).
last_pattern dw ? ;->previous pattern (for closure).
last_or dw ? ;->last or pointer.

extrn textseg: word

init_case dw init_case_table
case_ignore_table db 256 dup(?)

data ends


bufseg segment public

extrn toptop: word
extrn topbot: word
extrn bottop: word
extrn botbot: word

bufseg ends


code segment byte public
assume cs:code, ds:data, ss:data

public slowly

extrn get_mark: near, set_mark_si: near
extrn get_syntax: near

public search
search:
;enter with ch=start mark, cl=end mark, dh=first mark, dl=last mark.
;start searching at mark ch. If the string is found, then return the
; beginning in mark dh, and the end in mark dl, and cy=0. If the string
; wasn't found, return cy=1.
push dx ;save the first, last marks.
push es
mov es,textseg
assume es:bufseg
push ds ;save ds
push es
pop ds
assume ds:bufseg ;for get_mark
mov al,cl ;get the end mark.
push cx
call get_mark
mov end_ptr,si ;save a copy of the end.
mov right_ptr,si ;save a copy of the end.
pop cx
mov al,ch ;get the start mark.
call get_mark
pop ds ;restore ds
assume ds:data
mov direction,offset inc_di
cmp si,end_ptr ;start>=end?
jb search_4 ;no. (doesn't matter if they're equal)
mov direction,offset dec_di ;yes, go in reverse direction.
mov right_ptr,si ;yes, remember that start is rightmost.
search_4:
mov di,si ;get the pointer to our string.
call slowly
pop es
assume es:data
pop dx
jc search_1 ;not found.

push ds
mov ds,textseg ;for set_mark_si
assume ds:bufseg
mov al,dh
mov si,di
call set_mark_si ;set the first mark.

mov si,last_ptr
mov al,dl
call set_mark_si ;set the last mark.

pop ds
assume ds:data
clc ;return a match.
ret
search_1:
stc ;return no match.
ret


assume ds:data, es:bufseg


scan_char_literal:
or sp,sp ;ensure NZ in case cx=0.
repne scasb ;search for the character.
ret


scan_char_fold:
xlat
mov ah,al
or sp,sp ;if cx=0, be sure to return nz.
jcxz scan_char_fold_2
shr cx,1 ;we unrolled the loop once.
jnc scan_char_fold_1 ;if even, start at the top.
inc cx ;otherwise, add one for the odd
jmp short scan_char_fold_3 ; iteration, and jump to it.
scan_char_fold_1:
mov al,es:[di] ;unroll this puppy once.
add di,dx
xlat
cmp al,ah ;compare them.
je scan_char_fold_2 ;if equal, we're done.
scan_char_fold_3:
mov al,es:[di] ;now do the second set.
add di,dx
xlat
cmp al,ah
loopne scan_char_fold_1
scan_char_fold_2:
mov al,ah ;get our character back.
ret


slowly:
;es:si -> first char to look at.
;es:right_ptr -> after last char to look at.
;return cy if no match,
; else nc, si->start of match, last_ptr->after end of match.
cmp di,topbot ;at topbot already?
jne slowly_0
mov di,bottop
slowly_0:
mov ax,which_chr ;does the pattern start with a CHR?
cmp ax,word ptr outpat
jne slowly_1 ;no.
cmp outpat+2,CR ;searching for literal CR?
je slowly_1 ;yes - don't optimize because of CRLFs.
cmp outpat+2,LF ;searching for literal LF?
je slowly_1 ;yes - don't optimize because of CRLFs.

mov scan_char,offset scan_char_literal
cmp ax,offset omatch_CHR ;Are we folding case?
je quickly_1 ;no.
mov scan_char,offset scan_char_fold
quickly_1:
cmp direction,offset inc_di ;Are we going forwards?
je forwards_0 ;yes.
if 0 ;disable optimization for now.
jmp slowly_1
endif
jmp backwards_0 ;no.

slowly_1:
mov si,offset outpat ;start at beginning of pattern.
mov bx,offset case_ignore_table
push di ;remember where we're starting.
call omatch ;now search.
pop di
jnc slowly_succeed ;we found a match
;not found, should we give up?
cmp di,end_ptr ;at the end yet?
je slowly_fail ;yes - not found.
;not found, we have to bump di.
call direction
jmp slowly_1
slowly_fail:
stc ;not found.
ret
slowly_succeed:
if 1 ;an attempt to make backwards regexp searches work right.
cmp direction,offset inc_di ;Are we going forwards?
je slowly_done ;yes - we're done now.
slowly_backwards_again:
call dec_di ;move backwards.
push last_ptr ;remember the pointer to the end of it.
mov si,offset outpat ;start at beginning of pattern.
mov bx,offset case_ignore_table
push di
call omatch ;did it match?
pop di
pop ax
jc slowly_backwards_done ;no - we're done.
cmp ax,last_ptr ;did last_ptr change?
je slowly_backwards_again ;no, we can try again.
slowly_backwards_done:
mov last_ptr,ax
call inc_di ;point to the last match again.
slowly_done:
endif
clc
ret

public forwards_0
forwards_0:
mov bx,offset case_ignore_table
mov al,outpat+2 ;get the character
cmp di,bottop ;are we in the bottom?
jae forwards_2 ;yes - don't search the top.

mov cx,topbot ;should we search to topbot
cmp cx,end_ptr ; or to end_ptr?
jbe forwards_3
mov cx,end_ptr ;just to end_ptr.
forwards_3:
sub cx,di ;compute the amount left in the top.
mov dx,1
call scan_char ;scan for our character.
je forwards_1 ;we found it!

cmp di,end_ptr ;are we at the end?
jae slowly_fail ;yes - no match.

mov di,bottop
forwards_2:
mov cx,end_ptr ;we only need search that far.
sub cx,di
mov dx,1
call scan_char ;scan for our character.
jne slowly_fail ;we didn't find it.
forwards_1:
mov si,offset outpat+3 ;start at beginning of pattern.
push di ;remember where we're starting.
call omatch ;now search.
pop di
jnc forwards_4 ;we matched - return it.
cmp di,end_ptr ;are we at the end?
jb forwards_0 ;no - keep matching.
slowly_fail_j_1:
jmp slowly_fail ;yes - no match.
forwards_4:
dec di ;remember that we actually started
jmp slowly_succeed ; one character into the pattern.


public backwards_0
backwards_0:
mov bx,offset case_ignore_table
mov al,outpat+2 ;get the character
cmp di,bottop ;are we in the top?
jb backwards_2 ;yes - don't search the bottom.
je backwards_5

mov si,bottop ;should we search to bottop
cmp si,end_ptr ; or to end_ptr?
jae backwards_3
mov si,end_ptr ;just to end_ptr.
backwards_3:

dec di
mov cx,di ;compute the amount left in the bottom.
sub cx,si
inc cx ;be sure to look at where di points.
std
mov dx,-1
call scan_char ;scan for our character.
cld
je backwards_1 ;we found it!

backwards_5:
cmp di,end_ptr ;are we at the end?
jbe slowly_fail_j_1 ;yes - no match.

mov di,topbot
dec di
backwards_2:
mov cx,di ;we only search here if end_ptr is here.
sub cx,end_ptr
inc cx ;be sure to compare where di is.
std
mov dx,-1
call scan_char ;scan for our character.
cld
jne slowly_fail_j_1 ;we didn't find it.
backwards_1:
mov si,offset outpat+3 ;start at beginning of pattern.
push di ;remember where we're starting.
add di,2 ;we post-decremented.
call omatch ;now search.
pop di
jnc backwards_4 ;we suceeded.
inc di
cmp di,end_ptr ;are we after the end?
jb slowly_fail_j_1 ;yes - no match.
dec di
jmp backwards_0
backwards_4:
inc di ;remember that we post-decremented,
jmp slowly_succeed ; so we're one character too far.

inc_di:
;bump di forwards.
inc di
cmp di,topbot ;at bottom of top?
je inc_di_1 ;yes - can't possibly be split over newline.
cmp es:[di-1].w,LINENEW ;did we just move into a newline?
jne inc_di_2 ;no.
inc di ;yes - skip LF part of newline.
cmp di,topbot ;at topbot already?
jne inc_di_2
inc_di_1:
mov di,bottop
inc_di_2:
ret

dec_di:
;bump di backwards.
cmp di,bottop ;at top of bottom?
jne dec_di_1 ;no.
mov di,topbot ;yes - load bottom of top.
dec_di_1:
dec di ;back up to previous character.
cmp es:[di-1].w,LINENEW ;at newline?
jne dec_di_2 ;no.
cmp di,bottop ;at top of bottom now?
je dec_di_2 ;yes - can't possibly be split over newline.
dec di ;yes - skip to beginning of newline.
dec_di_2:
ret


omatch:
;return nc if we matched, cy if not.
;es:di -> source text
;ds:si -> pattern
omatch_0:
cmp di,topbot ;at bottom of top?
jne omatch_1
mov di,bottop ;yes, go to top of bottom.
omatch_1:
lodsw
call ax
jnc omatch_0
ret


;each of the omatch_XXX routines operates under the following constraints
; on failure, return with cy set.
; on matching (only used by omatch_EOS right now), return to caller's caller
; with cy clear.
; on success, bump si as needed so that it points to the next omatch,
; bump di as needed (either zero or one), and return with cy clear.

public omatch_EOS
omatch_EOS:
mov last_ptr,di ;remember the last thing we matched.
add sp,2 ;pop our return address.
clc ;if we get to the end of the
ret ; pattern, then we matched.

public omatch_CLO
omatch_CLO:
push di ;save the first closure pattern.
mov CLO_si,si ;remember the pattern we're closing.
;Note that we don't have to worry about CLO_si being global because the
; next pattern can't be another closure.
;match as many as fit the next pattern
mov bx,offset case_ignore_table
omatch_CLO_1:
mov si,CLO_si ;get the pattern being closed.
cmp di,topbot ;at bottom of top?
jne omatch_CLO_5
mov di,bottop ;yes, go to top of bottom.
omatch_CLO_5:
lodsw
call ax
jnc omatch_CLO_1
pop bx
;match only as many as fit the pattern after the next pattern.
omatch_CLO_2:
push si
push di
push bx
mov bx,offset case_ignore_table
call omatch ;try to match rest of pattern.
pop bx
pop di
pop si
jnc omatch_CLO_4 ;go if it matched.
cmp di,bottop ;backing up past the point?
jne omatch_CLO_3 ;no - just decrement.
mov di,topbot ;yes - get the bottom of the top.
omatch_CLO_3:
dec di ;point to the previous character.
cmp di,bx ;zero or more matches still?
jae omatch_CLO_2 ;yes.
stc ;no matches--return no match.
ret
omatch_CLO_4:
pop bx ;get rid of our return address.
ret


omatch_OR:
add si,2 ;skip past our param.
push si
push di
mov bx,offset case_ignore_table
call omatch ;try to match rest of pattern.
jnc omatch_OR_1 ;go if it matched.
pop di
pop si
push si
mov si,[si-2] ;point to the next or-clause.
push di
call omatch
jnc omatch_OR_1 ;go if it matched.
pop di
pop si
;guaranteed cy.
ret
omatch_OR_1:
add sp,6 ;get rid of si,di, and our return addr.
;guaranteed nc.
ret


public omatch_CHR
omatch_CHR:
cmp di,right_ptr ;are we at the end?
je omatch_CHR_skip ;yes - we never match CHR
cmp es:[di].w,LINENEW
je omatch_CHR_linenew
cmpsb
je omatch_yes ;if they're the same, match again.
dec di ;don't modify buffer pointer if no match.
stc
ret
omatch_CHR_linenew:
cmp [si].b,CR ;got a LINENEW, are we looking for one?
jne omatch_CHR_skip ;no.
mov ax,which_chr ;is the next one another char?
cmp [si+1].w,ax
jne omatch_CHR_skip ;no - no match.
cmp [si+1+2].b,LF ;Are we really looking for a linenew?
jne omatch_CHR_skip ;no - no match.
add si,1+2+1 ;skip past the two of them.
add di,2 ;skip in the buffer also.
clc
ret
omatch_CHR_skip:
inc si ;skip the pattern character.
omatch_CHR_no:
stc
ret
omatch_yes:
clc
ret


public omatch_NCHR
omatch_NCHR:
cmp di,right_ptr ;are we at the end?
je omatch_CHR_skip ;yes - we never match CHR
cmp es:[di].w,LINENEW
je omatch_CHR_linenew
lodsb
xlat
mov ah,al
mov al,es:[di]
inc di
xlat
cmp ah,al
je omatch_yes ;if they're the same, match again.
dec di ;don't modify buffer pointer if no match.
stc
ret


omatch_NL:
cmp di,right_ptr ;are we at the end?
je omatch_NL_no ;yes - we never match newline.
cmp es:[di].w,LINENEW ;is it newline?
jne omatch_NL_no ;no - don't match it.
add di,2 ;yes - skip it.
clc
ret
omatch_NL_no:
stc
ret


public omatch_BOB
omatch_BOB:
;match beginning of buffer.
cmp di,toptop ;are we at the beginning of the buffer?
je omatch_yes ;yes.
stc
ret


public omatch_BOL
omatch_BOL:
;match beginning of line.
push di ;we might have to look at the top.
cmp di,bottop ;are we at the point?
jne omatch_BOL_1 ;yes - ok.
mov di,topbot ;no - get the top.
omatch_BOL_1:
cmp di,toptop
je omatch_BOL_2
cmp es:[di-2].w,LINENEW
pop di
jne omatch_CHR_no
clc
ret
omatch_BOL_2:
pop di
clc
ret


public omatch_ISW
omatch_ISW:
;match word character.
cmp di,botbot
je omatch_CHR_no
cmp es:[di].w,LINENEW
je omatch_CHR_no
call chars_around_di
test al,1 ;word character?
je omatch_CHR_no ;nope--no match.
inc di ;match the character.
clc
ret


public omatch_NOW
omatch_NOW:
;match word character.
cmp di,botbot
je omatch_no
cmp es:[di].w,LINENEW
je omatch_no
call chars_around_di
test al,1 ;whitespace before and word after?
jne omatch_no ;nope--no match.
inc di ;match the character.
clc
ret


public omatch_BOW
omatch_BOW:
;match beginning of word.
cmp di,botbot
je omatch_no
cmp es:[di].w,LINENEW
je omatch_no
call chars_around_di
cmp al,1 ;whitespace before and word after?
jne omatch_no ;nope--no match.
clc
ret


public omatch_EOW
omatch_EOW:
;match end of word.
call chars_around_di
cmp al,2 ;word before and whitespace after?
jne omatch_no ;nope--no match.
clc
ret


public omatch_WOR
omatch_WOR:
;match end of word.
call chars_around_di
cmp al,2 ;word before and whitespace after?
je omatch_WOR_yes ;yes - match.
cmp al,1 ;whitespace before and word after?
je omatch_WOR_yes ;yes - match.
stc
ret
omatch_WOR_yes:
clc
ret


public omatch_NWR
omatch_NWR:
;match end of word.
call chars_around_di
cmp al,0 ;whitespace before and whitespace after?
je omatch_NWR_yes ;yes - match.
cmp al,3 ;word before and word after?
je omatch_NWR_yes ;yes - match.
stc
ret
omatch_NWR_yes:
clc
ret


public omatch_EOB
omatch_EOB:
;match end of buffer.
cmp di,botbot ;are we at the end of the buffer?
je omatch_NWR_yes ;yes.
stc
ret


public omatch_EOL
omatch_EOL:
;match end of line.
cmp di,botbot ;are we at the end?
je omatch_EOL_yes ;yes.
cmp es:[di].w,LINENEW
jne omatch_no
omatch_EOL_yes:
clc
ret
omatch_no:
stc
ret


public omatch_ANY
omatch_ANY:
;match any single character.
cmp di,right_ptr ;are we at the end?
je omatch_no ;yes - we never match ANY
cmp es:[di].w,LINENEW ;we never match EOL.
je omatch_no
inc di
clc
ret


public omatch_CCL
omatch_CCL:
;match a character class.
cmp di,right_ptr ;are we at the end?
je omatch_ccl_no ;yes - we never match CCL
cmp es:[di].w,LINENEW ;we never match EOL.
je omatch_ccl_newline
call locate ;see if it's in our set.
jnz omatch_no ;nope.
inc di
clc
ret
omatch_ccl_newline:
lea ax,[di+1] ;are we near the end?
cmp ax,right_ptr
je omatch_ccl_no ;yes - no match.
cmp ds:[si+1].w,LINENEW ;does the class begin with crlf?
jne omatch_ccl_no ;no - don't match it.
lodsb ;skip past this pattern.
xor ah,ah
add si,ax
add di,2
clc
ret


public omatch_NCCL
omatch_NCCL:
;match not in a character class.
cmp di,right_ptr ;are we at the end?
je omatch_ccl_no ;yes - we never match NCCL
cmp es:[di].w,LINENEW ;we only match EOL if it begins the class.
je omatch_ccl_no
call locate ;see if it's in our set.
jz omatch_ccl_no ;yes - we don't match.
inc di
clc
ret
omatch_ccl_no:
lodsb ;skip past the pattern.
xor ah,ah
add si,ax
stc
ret


locate:
;es:di -> search string, bx -> case translate table.
;ds:si -> CCL
;exit with zr if found, nz if not found, si -> after the pattern.
push cx
lodsb ;get the count.
mov cl,al
xor ch,ch
mov al,es:[di] ;get the character we're trying to match.
xlat ;case translate it.
mov ah,al ;keep it somewhere safe.
locate_2:
lodsb
xlat
cmp al,ah ;is this one it?
loopne locate_2
lahf ;remember whether or not we found it.
add si,cx
sahf
pop cx
ret


chars_around_di:
;return al bit 1=syntax of char to left of point.
; al bit 0=syntax of char to right of point.
push di ;get the character before point.
cmp di,bottop ;are we at the point?
jne chars_around_di_1 ;yes.
mov di,topbot
chars_around_di_1:
xor al,al ;if no character, it's whitespace.
cmp di,toptop
je chars_around_di_2
mov al,es:[di-1]
call get_syntax ;get the syntax for the char before point.
and al,1 ;isolate the 'word' bit.
chars_around_di_2:
shl al,1
mov ah,al
pop di

xor al,al ;if no character, it's whitespace.
cmp di,botbot ;are we at the end?
je chars_around_di_3 ;yes - can't match beginning of word.
mov al,es:[di]
call get_syntax
and al,1
chars_around_di_3:
or al,ah ;include the syntax of the char to left of point.
ret


assume ds:data

public set_pattern
set_pattern:
;enter with si, cx->pattern. dx<>0 if regular expression. di <> 0 if we
; want to fold case.
;exit with cy=1 if error.
call init_case
mov ax,offset omatch_CHR
or di,di
je set_pattern_0
mov ax,offset omatch_NCHR
set_pattern_0:
mov which_chr,ax ;remember which omatch_CHR to use.
or dx,dx
jne regexp_pat
mov di,offset outpat
jcxz set_pattern_1
mov bp,offset outpat-2
add bp,OUTPATSIZE
set_pattern_2:
cmp di,bp ;do we have enough room?
jae set_pattern_3 ;no - quit now.
stosw ;store the appropriate comparison omatcher.
movsb
loop set_pattern_2
set_pattern_1:
mov ax,offset omatch_EOS ;store the end of string.
stosw
clc
ret
set_pattern_3:
stc
ret


public regexp_pat
regexp_pat:
;enter with si, cx->pattern.
;exit with cy=1 if error.
mov bx,cx
mov [si+bx],byte ptr 0 ;store the terminating null.
call makepat
jnc regexp_pat_1
mov word ptr outpat,offset omatch_EOS ;uh-oh, bad pattern -- null it.
regexp_pat_1:
ret


makepat:
;si -> source pat (null terminated)
;di -> dest pattern, dx -> last dest entry.
;bx -> last closure
;return cy=1 if error.
mov inpat_ptr,si
mov di,offset outpat
mov dx,OUTPATSIZE
add dx,di
mov last_pattern,-1 ;remember where the previous pattern started.
mov last_or,di ;remember that it's here.
makepat_1:
lodsb ;get the first character.
or al,al ;end of string?
je makepat_0 ;yes.

mov this_pattern,di ;remember where this pattern starts.

cmp al,'\' ;are we escaping something?
jne makepat_a
cmp byte ptr [si],0 ;is the '\' at the end?
je makepat_9 ;yes - just use \.
lodsb ;get the escaped char.
call escaped_char ;check for the special escapes.
jmp makepat_2
makepat_a:
cmp al,'.'
jne makepat_3
mov ax,offset omatch_ANY
call addset
jmp makepat_2
;this really belongs at the end of makepat, but the short jump can't get there.
makepat_0:
mov ax,offset omatch_EOS
call addset
cmp di,dx
jne makepat__0_1
stc
ret
makepat__0_1:
clc
ret
makepat_3:
cmp al,'^'
jne makepat_7
lea ax,[si-1] ;get the buffer pointer.
cmp ax,inpat_ptr ;are we at the beginning?
jne makepat_6 ;no - this can't be it.
mov ax,offset omatch_BOL
call addset
jmp makepat_2
makepat_6:
mov al,'^'
call addchar
jmp makepat_2
makepat_7:
cmp al,'$'
jne makepat_8
cmp word ptr [si],'\' + '|'*256;is the '$' at the end of an alternation?
je makepat_7a ;no - not special.
cmp byte ptr [si],0 ;is the '$' at the end?
jne makepat_9 ;no - not special.
makepat_7a:
mov ax,offset omatch_EOL
call addset
jmp makepat_2
makepat_9:
call addchar
jmp makepat_2
makepat_8:
cmp al,'['
jne makepat_10
call getccl
jnc makepat_2
pop di
stc
ret
makepat_10:
cmp al,'*'
jne makepat_11
cmp last_pattern,0 ;is last_pattern>0?
jnge makepat_12 ;no - not closure.
mov bx,last_pattern
mov ax,word ptr [bx]
cmp ax,offset omatch_CLO ;trying to close a closure?
je makepat_12 ;yes - not closure.
cmp ax,offset omatch_BOL ;trying to close a beginning of line?
je makepat_12 ;yes - not closure.
call stclos
mov this_pattern,bx ;remember where this one was.
jmp makepat_2
makepat_11:
;put more characters here.
makepat_12:
call addchar
jmp makepat_2
makepat_2:
mov bx,this_pattern
mov last_pattern,bx
jmp makepat_1


escaped_char:
mov cx,offset omatch_NL
cmp al,"n" ;newline?
je escaped_1

mov cx,offset omatch_BOB
cmp al,"`" ;beginning of buffer?
je escaped_1

mov cx,offset omatch_EOB
cmp al,"'" ;end of buffer?
je escaped_1

mov cx,offset omatch_WOR
cmp al,"b" ;beginning or end of word?
je escaped_1

mov cx,offset omatch_NWR
cmp al,"B" ;not beginning nor end of word?
je escaped_1

mov cx,offset omatch_BOW
cmp al,"<" ;beginning of word?
je escaped_1

mov cx,offset omatch_EOW
cmp al,">" ;end of word?
je escaped_1

mov cx,offset omatch_ISW
cmp al,"w" ;word character?
je escaped_1

mov cx,offset omatch_NOW
cmp al,"W" ;not word character?
je escaped_1

cmp al,'|' ;is this an "or" operator?
jne addchar ;no.

mov inpat_ptr,si ;start a new regexp here...
call stor ;store a "or" operator.
ret
escaped_1:
mov ax,cx
call addset
ret

addchar:
;al = CHR to put.
push ax
mov ax,which_chr ;use the right omatch_chr.
call addset
pop ax
call addbyte
ret


addset: ;only command chars call addset.
call addbyte
xchg ah,al
call addbyte
xchg ah,al
ret


addbyte:
;al = char to put, di->dest, dx->end of dest.
cmp di,dx
je addbyte_1
mov [di],al
inc di
addbyte_1:
ret


stclos:
;di->last set added + 1
;bx->last closure added
push di
stclos_1:
dec di
mov al,[di]
mov [di+2],al
cmp di,bx
jne stclos_1
stclos_2:
mov word ptr [bx],offset omatch_CLO
pop di
add di,2
ret


stor:
;di->last set added + 1
mov bx,last_or
push di
stor_1:
dec di
mov al,[di]
mov [di+4],al
cmp di,bx
jne stor_1
stor_2:
pop di ;get the new last set.
add di,4
mov ax,offset omatch_EOS ;store the end of string.
stosw
mov word ptr [bx],offset omatch_OR
mov [bx+2],di ;remember where the next starts.
mov last_or,bx
ret


getccl:
;si -> source (null terminated)
;di -> dest, dx -> end of dest
;return cy=1 if error.
lodsb
cmp al,'^'
jne getccl_1
mov ax,offset omatch_NCCL
call addset
jmp getccl_2
getccl_1:
dec si ;unparse the '^'.
mov ax,offset omatch_CCL
call addset
getccl_2:
push bx
mov bx,di
call addbyte ;leave room for count

call dodash
mov ax,di
sub ax,bx
dec al
mov [bx],al
pop bx
lodsb
cmp al,']' ;now make sure that we end in ']'.
je getccl_3 ;yup, we do.
dec si ;make si -> the null.
stc
ret
getccl_3:
clc
ret


dodash:
;si -> source pattern (null terminated)
;di -> destination pattern
;dx -> end of destination pattern
push bx
mov bx,si
dodash_1:
lodsb
or al,al
je dodash_2
cmp al,']'
je dodash_2
cmp al,'-'
je dodash_4
call addbyte
jmp dodash_1
dodash_4:
cmp si,bx ;'-' at beginning?
je dodash_5
cmp [si].b,0 ;or '-' at end?
jne dodash_6
dodash_5:
mov al,'-' ;if at beginning or at end, just a '-'
call addbyte
jmp dodash_1
dodash_6:
mov al,[si-2] ;in increasing alphabetic order?
cmp al,[si]
ja dodash_5 ;no - forget it.
call alphanumeric ;left char alphanumeric?
jnc dodash_5 ;no - forget it.
mov al,[si]
call alphanumeric ;right char alphanumeric?
jnc dodash_5 ;no - forget it.
mov al,[si-2]
dodash_7:
inc al ;pre-increment -- the first one's there.
cmp al,[si]
ja dodash_9
call addbyte
jmp dodash_7
dodash_9:
inc si
jmp dodash_1
dodash_2:
dec si
pop bx
ret


alphanumeric:
;return cy=1 if al is alphanumeric
cmp al,'0'
jb alphanumeric_1
cmp al,'9'
jbe alphanumeric_2
cmp al,'A'
jb alphanumeric_1
cmp al,'Z'
jbe alphanumeric_2
cmp al,'a'
jb alphanumeric_1
cmp al,'z'
jbe alphanumeric_2
alphanumeric_1:
clc
ret
alphanumeric_2:
stc
ret


init_case_table:
push bx
mov init_case,offset init_case_2
mov bx,0
init_case_0:
mov case_ignore_table[bx],bl
inc bl
jne init_case_0
;now translate 'a' to 'A'.
mov bx,'a'
init_case_1:
mov al,bl
sub al,20h
mov case_ignore_table[bx],al
inc bx
cmp bx,'z'
jbe init_case_1
pop bx
init_case_2:
ret


code ends

end



  3 Responses to “Category : Word Processors
Archive   : FMAC16AS.ZIP
Filename : SEARCH.ASM

  1. Very nice! Thank you for this wonderful archive. I wonder why I found it only now. Long live the BBS file archives!

  2. This is so awesome! 😀 I’d be cool if you could download an entire archive of this at once, though.

  3. But one thing that puzzles me is the “mtswslnkmcjklsdlsbdmMICROSOFT” string. There is an article about it here. It is definitely worth a read: http://www.os2museum.com/wp/mtswslnk/