Category : C Source Code
Archive   : WNSRC173.ZIP
Filename : FPU087.ASM

 
Output of file : FPU087.ASM contained in archive : WNSRC173.ZIP
TITLE fpu087.asm (C) 1989, Mark C. Peterson, CompuServe [70441,3353]
SUBTTL All rights reserved.
;
; Code may be used in any program provided the author is credited
; either during program execution or in the documentation. Source
; code may be distributed only in combination with public domain or
; shareware source code. Source code may be modified provided the
; copyright notice and this message is left unchanged and all
; modifications are clearly documented.
;
; I would appreciate a copy of any work which incorporates this code.
;
; Mark C. Peterson
; 405-C Queen St., Suite #181
; Southington, CT 06489
; (203) 276-9721
;
; References:
; The VNR Concise Encyclopedia of Mathematics
; by W. Gellert, H. Hustner, M. Hellwich, and H. Kastner
; Published by Van Nostrand Reinhold Comp, 1975
;
; 80386/80286 Assembly Language Programming
; by William H. Murray, III and Chris H. Pappas
; Published by Osborne McGraw-Hill, 1986
;
; History since Fractint 16.3:
; CJLT changed e2x and Log086 algorithms for more speed
; CJLT corrected SinCos086 for -ve angles in 2nd and 4th quadrants
; CJLT speeded up SinCos086 for angles >45 degrees in any quadrant
; (See comments containing the string `CJLT')
; 14 Aug 91 CJLT removed r16Mul - not called from anywhere
; 21 Aug 91 CJLT corrected Table[1] from 6 to b
; improved bx factors in Log086 for more accuracy
; corrected Exp086 overflow detection to 15 from 16 bits.
;
; CJLT= Chris J Lusby Taylor
; 32 Turnpike Road
; Newbury, England (where's that?)
; Contactable via Compuserve user Stan Chelchowski [100016,351]
; or Tel 011 44 635 33270 (home)
;
;
;PROCs in this module:
;FPUcplxmul PROC x:word, y:word, z:word
;FPUcplxdiv PROC x:word, y:word, z:word
;FPUcplxlog PROC x:word, z:word
;FPUsinhcosh PROC x:word, sinh:word, cosh:word
;FPUsincos PROC x:word, sinx:word, cosx:word
;r16Mul PROC uses si di, x1:word, x2:word, y1:word, y2:word
;RegFloat2Fg PROC x1:word, x2:word, Fudge:word
;RegFg2Float PROC x1:word, x2:word, FudgeFact:byte
;ExpFudged PROC uses si, x_low:word, x_high:word, Fudge:word
;LogFudged PROC uses si di, x_low:word, x_high:word, Fudge:word
;LogFloat14 PROC x1:word, x2:word
;RegSftFloat PROC x1:word, x2:word, Shift:byte
;RegDivFloat PROC uses si di, x1:word, x2:word, y1:word, y2:word
;SinCos086 PROC uses si di, LoNum:WORD, HiNum:WORD, SinAddr:WORD, \
;_e2xLT PROC ;argument in dx.ax (bitshift=16 is hard coded here)
;Exp086 PROC uses si di, LoNum:WORD, HiNum:WORD
;SinhCosh086 PROC uses si di, LoNum:WORD, HiNum:WORD, SinhAddr:WORD, \
;Log086 PROC uses si di, LoNum:WORD, HiNum:WORD, Fudge:WORD


IFDEF ??version
MASM51
QUIRKS
EMUL
ENDIF

.model medium, c

extrn cos:far
extrn _Loaded387sincos:far
extrn compiled_by_turboc:word


.data

extrn cpu:WORD

PUBLIC TrigLimit, TrigOverflow

PiFg13 dw 6487h
InvPiFg33 dd 0a2f9836eh
InvPiFg16 dw 517ch
Ln2Fg16 dw 0b172h ;ln(2) * 2^16 . True value is b172.18
Ln2Fg15 dw 058b9h ;used by e2xLT. True value is 58b9.0C
TrigOverflow dw 0
TrigLimit dd 0
;
;Table of 2^(n/16) for n=0 to 15. All entries fg15.
;Used by e2xLT
;
Table dw 08000h,085abh,08b96h,091c4h
dw 09838h,09ef5h,0a5ffh,0ad58h
dw 0b505h,0bd09h,0c567h,0ce25h
dw 0d745h,0e0cdh,0eac1h,0f525h
one dw ?
expSign dw ?
a dw ?
SinNeg dw ? ;CJLT - not now needed by SinCos086, but by
CosNeg dw ? ; ArcTan086
Ans dq ?
fake_es dw ? ; Windows can't use ES for storage

TaylorTerm MACRO
LOCAL Ratio
add Factorial, one
jnc SHORT Ratio

rcr Factorial, 1
shr Num, 1
shr one, 1

Ratio:
mul Num
div Factorial
ENDM



_4_ dq 4.0
_2_ dq 2.0
_1_ dq 1.0
PointFive dq 0.5
temp dq ?
Sign dw ?

extrn fpu:word

.code


FPUcplxmul PROC x:word, y:word, z:word
mov bx, x
fld QWORD PTR [bx] ; x.x
fld QWORD PTR [bx+8] ; x.y, x.x
mov bx, y
fld QWORD PTR [bx] ; y.x, x.y, x.x
fld QWORD PTR [bx+8] ; y.y, y.x, x.y, x.x
mov bx, z
fld st ; y.y, y.y, y.x, x.y, x.x
fmul st, st(3) ; y.y*x.y, y.y. y.x, x.y, x.x
fld st(2) ; y.x, y.y*x.y, y.y, y.x, x.y, x.x
fmul st, st(5) ; y.x*x.x, y.y*x.y, y.y, y.x, x.y, x.x
fsubr ; y.x*x.x - y.y*x.y, y.y, y.x, x.y, x.x
fstp QWORD PTR [bx] ; y.y, y.x, x.y, x.x
fmulp st(3), st ; y.x, x.y, x.x*y.y
fmul ; y.x*x.y, x.x*y.y
fadd ; y.x*x.y + x.x*y.y
fstp QWORD PTR [bx+8]
ret
FPUcplxmul ENDP





FPUcplxdiv PROC x:word, y:word, z:word
mov bx, x
fld QWORD PTR [bx] ; x.x
fld QWORD PTR [bx+8] ; x.y, x.x
mov bx, y
fld QWORD PTR [bx] ; y.x, x.y, x.x
fld QWORD PTR [bx+8] ; y.y, y.x, x.y, x.x
fld st ; y.y, y.y, y.x, x.y, x.x
fmul st, st ; y.y*y.y, y.y, y.x, x.y, x.x
fld st(2) ; y.x, y.y*y.y, y.y, y.x, x.y, x.x
fmul st, st ; y.x*y.x, y.y*y.y, y.y, y.x, x.y, x.x
fadd ; mod, y.y, y.x, x.y, x.x
fdiv st(1), st ; mod, y.y=y.y/mod, y.x, x.y, x.x
fdivp st(2), st ; y.y, y.x=y.x/mod, x.y, x.x
mov bx, z
fld st ; y.y, y.y, y.x, x.y, x.x
fmul st, st(3) ; y.y*x.y, y.y. y.x, x.y, x.x
fld st(2) ; y.x, y.y*x.y, y.y, y.x, x.y, x.x
fmul st, st(5) ; y.x*x.x, y.y*x.y, y.y, y.x, x.y, x.x
fadd ; y.x*x.x - y.y*x.y, y.y, y.x, x.y, x.x
fstp QWORD PTR [bx] ; y.y, y.x, x.y, x.x
fmulp st(3), st ; y.x, x.y, x.x*y.y
fmul ; y.x*x.y, x.x*y.y
fsubr ; y.x*x.y + x.x*y.y
fstp QWORD PTR [bx+8]
ret
FPUcplxdiv ENDP



FPUcplxlog PROC x:word, z:word
LOCAL Status:word
mov bx, x
fld QWORD PTR [bx+8] ; x.y
fld QWORD PTR [bx] ; x.x, x.y
mov bx, z
fldln2 ; ln2, x.x, x.y
fdiv _2_ ; ln2/2, x.x, x.y
fld st(2) ; x.y, ln2/2, x.x, x.y
fmul st, st ; sqr(x.y), ln2/2, x.x, x.y
fld st(2) ; x.x, sqr(x.y), ln2/2, x.x, x.y
fmul st, st ; sqr(x.x), sqr(x.y), ln2/2, x.x, x.y
fadd ; mod, ln2/2, x.x, x.y
fyl2x ; z.x, x.x, x.y
fstp QWORD PTR [bx] ; x.x, x.y
cmp fpu, 387
jne Restricted

fpatan
jmp StoreZX

Restricted:
mov bx, x
mov dh, BYTE PTR [bx+7]
or dh, dh
jns ChkYSign

fchs ; |x.x|, x.y

ChkYSign:
mov dl, BYTE PTR [bx+8+7]
or dl, dl
jns ChkMagnitudes

fxch ; x.y, |x.x|
fchs ; |x.y|, |x.x|
fxch ; |x.x|, |x.y|

ChkMagnitudes:
fcom st(1) ; x.x, x.y
fstsw Status ; x.x, x.y
test Status, 4500h
jz XisGTY

test Status, 4000h
jz XneY

fstp st ; x.y
fstp st ;
fldpi ; Pi
fdiv _4_ ; Pi/4
jmp ChkSignZ

XneY:
fxch ; x.y, x.x
fpatan ; Pi/2 - Angle
fldpi ; Pi, Pi/2 - Angle
fdiv _2_ ; Pi/2, Pi/2 - Angle
fsubr ; Angle
jmp ChkSignZ

XisGTY:
fpatan

ChkSignZ:
or dh, dh
js NegX

or dl, dl
jns StoreZX

fchs
jmp StoreZX

NegX:
or dl, dl
js QuadIII

fldpi
fsubr
jmp StoreZX

QuadIII:
fldpi
fsubr
fchs

StoreZX:
mov bx, z
fstp QWORD PTR [bx+8] ;
ret
FPUcplxlog ENDP




FPUsinhcosh PROC x:word, sinh:word, cosh:word
LOCAL Control:word
fstcw Control
push Control ; Save control word on the stack
or Control, 0000110000000000b
fldcw Control ; Set control to round towards zero

mov Sign, 0 ; Assume the sign is positive
mov bx, x

fldln2 ; ln(2)
fdivr QWORD PTR [bx] ; x/ln(2)

cmp BYTE PTR [bx+7], 0
jns DuplicateX

fchs ; x = |x|

DuplicateX:
fld st ; x/ln(2), x/ln(2)
frndint ; int = integer(|x|/ln(2)), x/ln(2)
fxch ; x/ln(2), int
fsub st, st(1) ; rem < 1.0, int
fdiv _2_ ; rem/2 < 0.5, int
f2xm1 ; (2**rem/2)-1, int
fadd _1_ ; 2**rem/2, int
fmul st, st ; 2**rem, int
fscale ; e**|x|, int
fstp st(1) ; e**|x|

cmp BYTE PTR [bx+7], 0
jns ExitFexp

fdivr _1_ ; e**x

ExitFexp:
fld st ; e**x, e**x
fdivr PointFive ; e**-x/2, e**x
fld st ; e**-x/2, e**-x/2, e**x
fxch st(2) ; e**x, e**-x/2, e**-x/2
fdiv _2_ ; e**x/2, e**-x/2, e**-x/2
fadd st(2), st ; e**x/2, e**-x/2, cosh(x)
fsubr ; sinh(x), cosh(x)

mov bx, sinh ; sinh, cosh
fstp QWORD PTR [bx] ; cosh
mov bx, cosh
fstp QWORD PTR [bx] ;

pop Control
fldcw Control ; Restore control word
ret
FPUsinhcosh ENDP


FPUsincos PROC x:word, sinx:word, cosx:word
LOCAL Status:word
mov bx, x
fld QWORD PTR [bx] ; x

cmp fpu, 387
jne Use387FPUsincos

call _Loaded387sincos ; cos(x), sin(x)
mov bx, cosx
fstp QWORD PTR [bx] ; sin(x)
mov bx, sinx
fstp QWORD PTR [bx] ;
ret

Use387FPUsincos:

sub sp, 8 ; save 'x' on the CPU stack
mov bx, sp
fstp QWORD PTR [bx] ; FPU stack:

call cos

add sp, 8 ; take 'cos(x)' off the CPU stack
mov bx, ax
cmp compiled_by_turboc,0
jne turbo_c1

fld QWORD PTR [bx] ; FPU stack: cos(x)

turbo_c1:
fld st ; FPU stack: cos(x), cos(x)
fmul st, st ; cos(x)**2, cos(x)
fsubr _1_ ; sin(x)**2, cos(x)
fsqrt ; +/-sin(x), cos(x)

mov bx, x
fld QWORD PTR [bx] ; x, +/-sin(x), cos(x)
fldpi ; Pi, x, +/-sin(x), cos(x)
fadd st, st ; 2Pi, x, +/-sin(x), cos(x)
fxch ; |x|, 2Pi, +/-sin(x), cos(x)
fprem ; Angle, 2Pi, +/-sin(x), cos(x)
fstp st(1) ; Angle, +/-sin(x), cos(x)
fldpi ; Pi, Angle, +/-sin(x), cos(x)

cmp BYTE PTR [bx+7], 0
jns SignAlignedPi

fchs ; -Pi, Angle, +/-sin(x), cos(x)

SignAlignedPi:
fcompp ; +/-sin(x), cos(x)
fstsw Status ; +/-sin(x), cos(x)

mov ax, Status
and ah, 1
jz StoreSinCos ; Angle <= Pi

fchs ; sin(x), cos(x)

StoreSinCos:
mov bx, sinx
fstp QWORD PTR [bx] ; cos(x)
mov bx, cosx
fstp QWORD PTR [bx] ;
ret
FPUsincos ENDP


PUBLIC r16Mul
r16Mul PROC uses si di, x1:word, x2:word, y1:word, y2:word
mov si, x1
mov bx, x2
mov di, y1
mov cx, y2

xor ax, ax
shl bx, 1
jz Exitr16Mult ; Destination is zero

rcl ah, 1
shl cx, 1
jnz Chkr16Exp
xor bx, bx ; Source is zero
xor si, si
jmp Exitr16Mult

Chkr16Exp:
rcl al, 1
xor ah, al ; Resulting sign in ah
stc ; Put 'one' bit back into number
rcr bl, 1
stc
rcr cl, 1

sub ch, 127 ; Determine resulting exponent
add bh, ch
mov al, bh
mov fake_es, ax ; es has the resulting exponent and sign

mov ax, di
mov al, ah
mov ah, cl

mov dx, si
mov dl, dh
mov dh, bl

mul dx
mov cx, fake_es

shl ax, 1
rcl dx, 1
jnc Remr16MulOneBit ; 'One' bit is the next bit over

inc cl ; 'One' bit removed with previous shift
jmp Afterr16MulNorm

Remr16MulOneBit:
shl ax, 1
rcl dx, 1

Afterr16MulNorm:
mov bl, dh ; Perform remaining 8 bit shift
mov dh, dl
mov dl, ah
mov si, dx
mov bh, cl ; Put in the exponent
rcr ch, 1 ; Get the sign
rcr bx, 1 ; Normalize the result
rcr si, 1
Exitr16Mult:
mov ax, si
mov dx, bx
ret
r16Mul ENDP


PUBLIC RegFloat2Fg
RegFloat2Fg PROC x1:word, x2:word, Fudge:word
mov ax, WORD PTR x1
mov dx, WORD PTR x2
mov bx, ax
or bx, dx
jz ExitRegFloat2Fg

xor bx, bx
mov cx, bx

shl ax, 1
rcl dx, 1
rcl bx, 1 ; bx contains the sign

xchg cl, dh ; cx contains the exponent

stc ; Put in the One bit
rcr dl, 1
rcr ax, 1

sub cx, 127 + 23
add cx, Fudge
jz ChkFgSign
jns ShiftFgLeft

neg cx
ShiftFgRight:
shr dx, 1
rcr ax, 1
loop ShiftFgRight
jmp ChkFgSign

ShiftFgLeft:
shl ax, 1
rcl dx, 1
loop ShiftFgLeft

ChkFgSign:
or bx, bx
jz ExitRegFloat2Fg

not ax
not dx
add ax, 1
adc dx, 0

ExitRegFloat2Fg:
ret
RegFloat2Fg ENDP



PUBLIC RegFg2Float
RegFg2Float PROC x1:word, x2:word, FudgeFact:byte
mov ax, x1
mov dx, x2

mov cx, ax
or cx, dx
jz ExitFudgedToRegFloat

mov ch, 127 + 32
sub ch, FudgeFact
xor cl, cl
shl ax, 1 ; Get the sign bit
rcl dx, 1
jnc FindOneBit

inc cl ; Fudged < 0, convert to postive
not ax
not dx
add ax, 1
adc dx, 0

FindOneBit:
shl ax, 1
rcl dx, 1
dec ch
jnc FindOneBit
dec ch

mov al, ah
mov ah, dl
mov dl, dh
mov dh, ch

shr cl, 1 ; Put sign bit in
rcr dx, 1
rcr ax, 1

ExitFudgedToRegFloat:
ret
RegFg2Float ENDP


PUBLIC ExpFudged
ExpFudged PROC uses si, x_low:word, x_high:word, Fudge:word
LOCAL exp:WORD
xor ax, ax
mov WORD PTR Ans, ax
mov WORD PTR Ans + 2, ax
mov ax, WORD PTR x_low
mov dx, WORD PTR x_high
or dx, dx
js NegativeExp

div Ln2Fg16
mov exp, ax
or dx, dx
jz Raiseexp

mov ax, dx
mov si, dx
mov bx, 1

PosExpLoop:
add WORD PTR Ans, ax
adc WORD PTR Ans + 2, 0
inc bx
mul si
mov ax, dx
xor dx, dx
div bx
or ax, ax
jnz PosExpLoop

Raiseexp:
inc WORD PTR Ans + 2
mov ax, WORD PTR Ans
mov dx, WORD PTR Ans + 2
mov cx, -16
add cx, Fudge
add cx, exp
or cx, cx
jz ExitExpFudged
jns LeftShift
neg cx

RightShift:
shr dx, 1
rcr ax, 1
loop RightShift
jmp ExitExpFudged

NegativeExp:
not ax
not dx
add ax, 1
adc dx, 0
div Ln2Fg16
neg ax
mov exp, ax

or dx, dx
jz Raiseexp

mov ax, dx
mov si, dx
mov bx, 1

NegExpLoop:
sub WORD PTR Ans, ax
sbb WORD PTR Ans + 2, 0
inc bx
mul si
mov ax, dx
xor dx, dx
div bx
or ax, ax
jz Raiseexp

add WORD PTR Ans, ax
adc WORD PTR Ans + 2, 0
inc bx
mul si
mov ax, dx
xor dx, dx
div bx
or ax, ax
jnz NegExpLoop
jmp Raiseexp

LeftShift:
shl ax, 1
rcl dx, 1
loop LeftShift

ExitExpFudged:
ret
ExpFudged ENDP



PUBLIC LogFudged
LogFudged PROC uses si di, x_low:word, x_high:word, Fudge:word
LOCAL exp:WORD
xor bx, bx
mov cx, 16
sub cx, Fudge
mov ax, x_low
mov dx, x_high

or dx, dx
jz ChkLowWord

Incexp:
shr dx, 1
jz DetermineOper
rcr ax, 1
inc cx
jmp Incexp

ChkLowWord:
or ax, ax
jnz Decexp
jmp ExitLogFudged

Decexp:
dec cx ; Determine power of two
shl ax, 1
jnc Decexp

DetermineOper:
mov exp, cx
mov si, ax ; si =: x + 1
shr si, 1
stc
rcr si, 1
mov dx, ax
xor ax, ax
shr dx, 1
rcr ax, 1
shr dx, 1
rcr ax, 1 ; dx:ax = x - 1
div si

mov bx, ax ; ax, Fudged 16, max of 0.3333333
shl ax, 1 ; x = (x - 1) / (x + 1), Fudged 16
mul ax
shl ax, 1
rcl dx, 1
mov ax, dx ; dx:ax, Fudged 35, max = 0.1111111
mov si, ax ; si = (ax * ax), Fudged 19

mov ax, bx
; bx is the accumulator, First term is x
mul si ; dx:ax, Fudged 35, max of 0.037037
mov fake_es, dx ; Save high word, Fudged (35 - 16) = 19
mov di, 0c000h ; di, 3 Fudged 14
div di ; ax, Fudged (36 - 14) = 21
or ax, ax
jz Addexp

mov cl, 5
shr ax, cl
add bx, ax ; bx, max of 0.345679
; x = x + x**3/3

mov ax, fake_es ; ax, Fudged 19
mul si ; dx:ax, Fudged 38, max of 0.004115
mov fake_es, dx ; Save high word, Fudged (38 - 16) = 22
mov di, 0a000h ; di, 5 Fudged 13
div di ; ax, Fudged (38 - 13) = 25
or ax, ax
jz Addexp

mov cl, 9
shr ax, cl
add bx, ax
; x = x + x**3/3 + x**5/5

mov ax, fake_es ; ax, Fudged 22
mul si ; dx:ax, Fudged 41, max of 0.0004572
mov di, 0e000h ; di, 7 Fudged 13
div di ; ax, Fudged (41 - 13) = 28
mov cl, 12
shr ax, cl
add bx, ax

Addexp:
shl bx, 1 ; bx *= 2, Fudged 16, max of 0.693147
; x = 2 * (x + x**3/3 + x**5/5 + x**7/7)
mov cx, exp
mov ax, Ln2Fg16 ; Answer += exp * Ln2Fg16
or cx, cx
js SubFromAns

mul cx
add ax, bx
adc dx, 0
jmp ExitLogFudged

SubFromAns:
neg cx
mul cx
xor cx, cx
xchg cx, dx
xchg bx, ax
sub ax, bx
sbb dx, cx

ExitLogFudged:
; x = 2 * (x + x**3/3 + x**5/5 + x**7/7) + (exp * Ln2Fg16)
ret
LogFudged ENDP




PUBLIC LogFloat14
LogFloat14 PROC x1:word, x2:word
mov ax, WORD PTR x1
mov dx, WORD PTR x2
shl ax, 1
rcl dx, 1
xor cx, cx
xchg cl, dh

stc
rcr dl, 1
rcr ax, 1

sub cx, 127 + 23
neg cx
push cx
push dx
push ax
call LogFudged
add sp, 6
ret
LogFloat14 ENDP


PUBLIC RegSftFloat
RegSftFloat PROC x1:word, x2:word, Shift:byte
mov ax, x1
mov dx, x2

shl dx, 1
rcl cl, 1

add dh, Shift

shr cl, 1
rcr dx, 1

ret
RegSftFloat ENDP




PUBLIC RegDivFloat
RegDivFloat PROC uses si di, x1:word, x2:word, y1:word, y2:word
mov si, x1
mov bx, x2
mov di, y1
mov cx, y2

xor ax, ax
shl bx, 1
jnz ChkOtherOp
jmp ExitRegDiv ; Destination is zero

ChkOtherOp:
rcl ah, 1
shl cx, 1
jnz ChkDivExp
xor bx, bx ; Source is zero
xor si, si
jmp ExitRegDiv

ChkDivExp:
rcl al, 1
xor ah, al ; Resulting sign in ah
stc ; Put 'one' bit back into number
rcr bl, 1
stc
rcr cl, 1

sub ch, 127 ; Determine resulting exponent
sub bh, ch
mov al, bh
mov fake_es, ax ; es has the resulting exponent and sign

mov ax, si ; 8 bit shift, bx:si moved to dx:ax
mov dh, bl
mov dl, ah
mov ah, al
xor al, al

mov bh, cl ; 8 bit shift, cx:di moved to bx:cx
mov cx, di
mov bl, ch
mov ch, cl

xor cl, cl

shr dx, 1
rcr ax, 1

div bx
mov si, dx ; Save (and shift) remainder
mov dx, cx ; Save the quess
mov cx, ax
mul dx ; Mult quess times low word
xor di, di
sub di, ax ; Determine remainder
sbb si, dx
mov ax, di
mov dx, si
jc RemainderNeg

xor di, di
jmp GetNextDigit

RemainderNeg:
mov di, 1 ; Flag digit as negative
not ax ; Convert remainder to positive
not dx
add ax, 1
adc dx, 0

GetNextDigit:
shr dx, 1
rcr ax, 1
div bx
xor bx, bx
shl dx, 1
rcl ax, 1
rcl bl, 1 ; Save high bit

mov dx, cx ; Retrieve first digit
or di, di
jz RemoveDivOneBit

neg ax ; Digit was negative
neg bx
dec dx

RemoveDivOneBit:
add dx, bx
mov cx, fake_es
shl ax, 1
rcl dx, 1
jc AfterDivNorm

dec cl
shl ax, 1
rcl dx, 1

AfterDivNorm:
mov bl, dh ; Perform remaining 8 bit shift
mov dh, dl
mov dl, ah
mov si, dx
mov bh, cl ; Put in the exponent
shr ch, 1 ; Get the sign
rcr bx, 1 ; Normalize the result
rcr si, 1

ExitRegDiv:
mov ax, si
mov dx, bx
ret
RegDivFloat ENDP



Term equ
Num equ
Factorial equ
Sin equ
Cos equ
e equ
Inve equ

SinCos086 PROC uses si di, LoNum:WORD, HiNum:WORD, SinAddr:WORD, \
CosAddr:WORD
mov ax, LoNum
mov dx, HiNum

xor cx, cx
; mov SinNeg, cx ;CJLT - not needed now
; mov CosNeg, cx ;CJLT - not needed now
mov a, cx ;CJLT - Not needed by the original code, but it
; is now!
or dx, dx
jns AnglePositive

not ax
not dx
add ax, 1
adc dx, cx ;conveniently zero
mov a,8 ;a now has 4 bits: Sign+quadrant+octant

AnglePositive:
mov si, ax
mov di, dx
mul WORD PTR InvPiFg33
mov bx, dx
mov ax, di
mul WORD PTR InvPiFg33
add bx, ax
adc cx, dx
mov ax, si
mul WORD PTR InvPiFg33 + 2
add bx, ax
adc cx, dx
mov ax, di
mul WORD PTR InvPiFg33 + 2
add ax, cx
adc dx, 0

and dx, 3 ;Remove multiples of 2 pi
shl dx, 1 ;move over to make space for octant number
;
;CJLT - new code to reduce angle to: 0 <= angle <= 45
;
or ax, ax
jns Lessthan45
inc dx ;octant number
not ax ;angle=90-angle if it was >45 degrees
Lessthan45:
add a, dx ;remember octant and Sign in a
mov Num, ax ;ax=Term, now
;
; We do the Taylor series with all numbers scaled by pi/2
; so InvPiFg33 represents one. Truly.
;
mov Factorial, WORD PTR InvPiFg33 + 2
mov one, Factorial
mov Cos, Factorial ; Cos = 1
mov Sin, Num ; Sin = Num

LoopIntSinCos:
TaylorTerm ; ax=Term = Num * (Num/2) * (Num/3) * . . .
sub Cos, Term ; Cos = 1 - Num*(Num/2) + (Num**4)/4! - . . .
cmp Term, WORD PTR TrigLimit
jbe SHORT ExitIntSinCos

TaylorTerm
sub Sin, Term ; Sin = Num - Num*(Num/2)*(Num/3) + (Num**5)/5! - . . .
cmp Term, WORD PTR TrigLimit
jbe SHORT ExitIntSinCos

TaylorTerm
add Cos, Term
cmp Term, WORD PTR TrigLimit
jbe SHORT ExitIntSinCos

TaylorTerm ; Term = Num * (x/2) * (x/3) * . . .
add Sin, Term
cmp Term, WORD PTR TrigLimit
jnbe LoopIntSinCos

ExitIntSinCos:
xor ax, ax
mov cx, ax
cmp Cos, WORD PTR InvPiFg33 + 2
jb CosDivide ; Cos < 1.0

inc cx ; Cos == 1.0
jmp StoreCos

CosDivide:
mov dx, Cos
div WORD PTR InvPiFg33 + 2

StoreCos:
mov Cos, ax ; cx:Cos

xor ax, ax
mov bx, ax
cmp Sin, WORD PTR InvPiFg33 + 2
jb SinDivide ; Sin < 1.0

inc bx ; Sin == 1.0
jmp StoreSin

SinDivide:
mov dx, Sin
div WORD PTR InvPiFg33 + 2

StoreSin:
mov Sin, ax ; bx:Sin
; CJLT again. New tests are needed to correct signs and exchange sin/cos values
mov ax, a
inc al ;forces bit 1 to xor of previous bits 0 and 1
test al, 2
jz ChkNegCos

xchg cx, bx
xchg Sin, Cos
; mov ax, SinNeg commented out by CJLT. This was a bug.
; xchg ax, CosNeg
; mov CosNeg, ax and this was meant to be mov SinNeg,ax

ChkNegCos:
inc al ;forces bit 2 to xor of original bits 1 and 2
test al, 4
jz ChkNegSin

not Cos ;negate cos if quadrant 2 or 3
not cx
add Cos, 1
adc cx, 0

ChkNegSin:
inc al
inc al ;forces bit 3 to xor of original bits 2 and 3
test al, 8
jz CorrectQuad

not Sin
not bx
add Sin, 1
adc bx, 0

CorrectQuad:

CosPolarized:
mov dx, bx
mov bx, CosAddr
mov WORD PTR [bx], Cos
mov WORD PTR [bx+2], cx

SinPolarized:
mov bx, SinAddr
mov WORD PTR [bx], Sin
mov WORD PTR [bx+2], dx
ret
SinCos086 ENDP


PUBLIC ArcTan086
;
;Used to calculate logs of complex numbers, since log(R,theta)=log(R)+i.theta
; in polar coordinates.
;
;In C we need the prototype:
;long ArcTan086(long, long)

;The parameters are x and y, the returned value arctan(y/x) in the range 0..2pi.
ArcTan086 PROC uses si di, x1:word, x2:word, y1:word, y2:word
xor ax, ax ;ax=0
mov si, x1 ;Lo
mov bx, x2 ;Hi
or bx, bx ;Sign set ?
jns xplus
inc ax
not si
not bx
add bx, 1
adc si, 0 ;We have abs(x) now
xplus:
mov di, y1 ;Lo
mov cx, y2 ;Hi
or cx, cx ;Sign set?
jns yplus
inc ax
inc ax ;Set bit 1 of ax
shl ax, 1 ; and shift it all left
not di
not cx
add di, 1
adc cx, 0 ;We have abs(y) now
yplus:
cmp bx, cx ;y>x?
jl no_xchg
jg xchg_xy
cmp si, di ;Hi words zero. What about lo words?
jle no_xchg
xchg_xy: ;Exchange them
inc ax ;Flag the exchange
xchg si, di
xchg bx, cx
no_xchg:
mov SinNeg, ax ;Remember signs of x and y, and whether exchanged
or cx, cx ;y=1.0 ?
jz ynorm ;no, so continue
normy: ;yes, normalise by reducing y to 16 bits max.
rcr cx, 1 ; (We don't really lose any precision)
rcr di, 1
clc
rcr bx, 1
rcr si, 1
or cx, cx
jnz normy
ynorm:

ret
ArcTan086 ENDP

;
;There now follows Chris Lusby Taylor's novel (such modesty!) method
;of calculating exp(x). Originally, I was going to do it by decomposing x
;into a sum of terms of the form:
;
; th -i
; i term=ln(1+2 )
; -i
;If x>term[i] we subtract term[i] from x and multiply the answer by 1 + 2
;
;We can multiply by that factor by shifting and adding. Neat eh!
;
;Unfortunately, however, for 16 bit accuracy, that method turns out to be
;slower than what follows, which is also novel. Here, I first divide x not
;by ln(2) but ln(2)/16 so that we can look up an initial answer in a table of
;2^(n/16). This leaves the remainder so small that the polynomial Taylor series
;converges in just 1+x+x^2/2 which is calculated as 1+x(1+x/2).
;
_e2xLT PROC ;argument in dx.ax (bitshift=16 is hard coded here)
or dx, dx
jns CalcExpLT

not ax ; if negative, calc exp(abs(x))
not dx
add ax, 1
adc dx, 0

CalcExpLT:
shl ax, 1
rcl dx, 1
shl ax, 1
rcl dx, 1
shl ax, 1
rcl dx, 1 ;x is now in fg19 form for accuracy
; so, relative to fg16, we have:
div Ln2Fg15 ; 8x==(a * Ln(2)/2) + Rem
; x=(a * Ln(2)/16) + Rem/8
;so exp(x)=exp((a * Ln(2)/16) + Rem/8)
; =exp(a/16 * Ln(2)) * exp(Rem/8)
; =2^(a/16) * exp(Rem)
;a mod 16 will give us an initial estimate
mov cl, 4
mov di, ax ;Remember original
shr ax, cl
mov a, ax ;a/16 will give us a bit shift
mov ax, di
and ax, 0000fh ;a mod 16
shl ax, 1 ;used as an index into 2 byte per element Table
mov di, ax
dec cx ;3, please
add dx, 4 ;to control rounding up/down
shr dx, cl ;Num=Rem/8 (convert back to fg16)
;
mov ax, dx
shr ax, 1 ;Num/2 fg 16
inc ax ;rounding control
stc
rcr ax, 1 ;1+Num/2 fg15
mul dx ;dx:ax=Num(1+Num/2) fg31, so dx alone is fg15
shl ax, 1 ;more rounding control
adc dx, 8000H ;dx=1+Num(1+Num/2) fg15
mov ax, Table[di] ;Get table entry fg15
mul dx ;Only two multiplys used! fg14, now (15+15-16)
shl ax, 1
rcl dx, 1 ;fg15
mov e, dx
ret ; return e=e**x * (2**15), 1 < e**x < 2
_e2xLT ENDP ; a= bitshift needed



Exp086 PROC uses si di, LoNum:WORD, HiNum:WORD
mov ax, LoNum
mov dx, HiNum
mov TrigOverflow, 0

call _e2xLT ;Use Chris Lusby Taylor's e2x

cmp a, 15 ;CJLT - was 16
jae Overflow

; cmp expSign, 0
; jnz NegNumber
cmp HiNum, 0 ;CJLT - we don't really need expSign
jl NegNumber

mov ax, e
mov dx, ax
inc a
mov cx, 16
sub cx, a
shr dx, cl
mov cx, a
shl ax, cl
jmp ExitExp086

Overflow:
xor ax, ax
xor dx, dx
mov TrigOverflow, 1
jmp ExitExp086

NegNumber:
cmp e, 8000h
jne DivideE

mov ax, e
dec a
jmp ShiftE

DivideE:
xor ax, ax
mov dx, ax
stc
rcr dx, 1
div e

ShiftE:
xor dx, dx
mov cx, a
shr ax, cl

ExitExp086:
ret
Exp086 ENDP



SinhCosh086 PROC uses si di, LoNum:WORD, HiNum:WORD, SinhAddr:WORD, \
CoshAddr:WORD
mov ax, LoNum
mov dx, HiNum

call _e2xLT ;calculate exp(|x|) fg 15
;answer is e*2^a
cmp e, 8000h ;e==1 ?
jne InvertE ; e > 1, so we can invert it.

mov dx, 1
xor ax, ax
cmp a, 0
jne Shiftone

mov e, ax
mov cx, ax
jmp ChkSinhSign

Shiftone:
mov cx, a
shl dx, cl
dec cx
shr e, cl
shr dx, 1
shr e, 1
mov cx, dx
sub ax, e
sbb dx, 0
xchg ax, e
xchg dx, cx
jmp ChkSinhSign

InvertE:
xor ax, ax ; calc 1/e
mov dx, 8000h
div e

mov Inve, ax

ShiftE:
mov cx, a
shr Inve, cl
inc cl
mov dx, e
shl e, cl
neg cl
add cl, 16
shr dx, cl
mov cx, dx ; cx:e == e**Exp

mov ax, e ; dx:e == e**Exp
add ax, Inve
adc dx, 0
shr dx, 1
rcr ax, 1 ; cosh(Num) = (e**Exp + 1/e**Exp) / 2

sub e, Inve
sbb cx, 0
sar cx, 1
rcr e, 1

ChkSinhSign:
or HiNum, 0
jns StoreHyperbolics

not e
not cx
add e, 1
adc cx, 0

StoreHyperbolics:
mov bx, CoshAddr
mov WORD PTR [bx], ax
mov WORD PTR [bx+2], dx

mov bx, SinhAddr
mov WORD PTR [bx], e
mov WORD PTR [bx+2], cx

ret
SinhCosh086 ENDP



Log086 PROC uses si di, LoNum:WORD, HiNum:WORD, Fudge:WORD
LOCAL Exp:WORD, Accum:WORD, LoAns:WORD, HiAns:WORD
;NOTE: CJLT's method does not need LoAns, HiAns, but he hasn't yet
;taken them out
xor bx, bx
mov cx, Fudge
mov ax, LoNum
mov dx, HiNum
mov TrigOverflow, 0

or dx, dx
js Overflow
jnz ShiftUp

or ax, ax
jnz ShiftUp

Overflow:
mov TrigOverflow, 1
jmp ExitLog086

ShiftUp:
inc cx ; cx = Exp
shl ax, 1
rcl dx, 1
or dx, dx
jns ShiftUp ;shift until dx in fg15 form

neg cx
add cx, 31
mov Exp, cx
;CJLT method starts here. We try to reduce x to make it very close to 1
;store LoAns in bx for now (fg 16; initially 0)
mov cl,2 ;shift count
redoterm2:
cmp dx, 0AAABH ;x > 4/3 ?
jb doterm3
mov ax, dx
shr ax, cl
sub dx, ax ;x:=x(1-1/4)
add bx, 49a5H ;ln(4/3) fg 15
jmp redoterm2
doterm3:
inc cl ;count=3
redoterm3:
cmp dx, 9249H ;x > 8/7 ?
jb doterm4
mov ax, dx
shr ax, cl
sub dx, ax ;x:=x(1-1/8)
add bx, 222eH ;ln(8/7)
jmp redoterm3
doterm4:
inc cl ;count=4
cmp dx, 8889H ;x > 16/15 ?
jb skipterm4
mov ax, dx
shr ax, cl
sub dx, ax ;x:=x(1-1/16)
add bx, 1085h ;ln(16/15)
;No need to retry term4 as error is acceptably low if we ignore it
skipterm4:
;Now we have reduced x to the range 1 <=x <1.072
;
;Now we continue with the conventional series, but x is so small we
;can ignore all terms except the first!
;i.e.:
;ln(x)=2(x-1)/(x+1)
sub dx, 8000h ; dx= x-1, fg 15
mov cx, dx
stc
rcr cx, 1 ; cx = 1 + (x-1)/2 fg 15
; = 1+x fg 14
mov ax,4000H ;rounding control (Trust me)
div cx ;ax=ln(x)
add bx, ax ;so add it to the rest of the Ans. No carry
MultExp:
mov cx, Exp
mov ax, Ln2Fg16
or cx, cx
js SubFromAns

mul cx ; cx = Exp * Lg2Fg16, fg 16
add ax, bx ;add bx part of answer
adc dx, 0
jmp ExitLog086

SubFromAns:
inc bx ;Somewhat artificial, but we need to add 1 somewhere
neg cx
mul cx
not ax
not dx
add ax, bx
adc dx, 0

ExitLog086:
ret
Log086 ENDP


END


  3 Responses to “Category : C Source Code
Archive   : WNSRC173.ZIP
Filename : FPU087.ASM

  1. Very nice! Thank you for this wonderful archive. I wonder why I found it only now. Long live the BBS file archives!

  2. This is so awesome! 😀 I’d be cool if you could download an entire archive of this at once, though.

  3. But one thing that puzzles me is the “mtswslnkmcjklsdlsbdmMICROSOFT” string. There is an article about it here. It is definitely worth a read: http://www.os2museum.com/wp/mtswslnk/