; Fast long integer square root routine.
; Callable from Turbo C.
; declaration:
; unsigned int near isqrt (unsigned long int);
; Program by Bruce D. Feist
; Released to the Public Domain on 1/21/88 -
; Feel free to use, modify, or modify this routine.
; I do, however, request the following -
; 1) Please don't remove my name (Bruce D. Feist) from
; this source code, even if you modify it.
; 2) If you DO modify it, please indicate in a comment
; that you did so, and that it is no longer solely my work.
; Timings on an 8 megahertz 80286 (PCs Limited 286/8):
; 100,000 iterations took roughly 11 seconds
; Compare to Turbo C's sqrt() function without an 80287:
; 10,000 iterations took roughly 7.5 seconds
; We're talking about a speedup factor of roughly 7 times.

.MODEL SMALL
.CODE
assume cs:_text,ds:dgroup,ss:dgroup
public _isqrt

_isqrt proc near
push si
push di
push bp
mov bp,sp

; Register Usage:
; AX : work; low-order portion of square
; BX : low-order portion of square
; CX : high-order portion of square
; DX : work; high-order portion of square
; SI : root
; DI : temp (square/root)

; We approximate the root by noting that it is roughly half
; the digits of the square.

; Save square in CX:BX

mov cx,word ptr [bp+10]
mov bx,word ptr [bp+8]

; If we're finding the root of 0, let's stop right now!

mov di,cx
or di,bx
jnz not0

xor ax,ax ; ax = 0
jmp return

not0:

; If we're real big, don't try to scale - we'll overflow.

cmp cx,4000h
ja realbig

; temp_square = square

mov dx,cx
mov ax,bx

; root = 1

mov si,1

scale:

; temp_square >>= 2

shr dx,1
rcr ax,1
shr dx,1
rcr ax,1

; root <<= 1

shl si,1
chk_scale:
mov di,ax
or di,dx
jne scale

jmp newtonize

realbig:

; If we're >= \$FFFF0000 division will fail; hard code it!
; Since we must do this, may as well hard code for any square
; of \$FFFF.

cmp cx,0FFFEh ; \$FFFF squared is \$FFFE0001
jbe nottoobig
mov ax,0FFFFh
jmp return

nottoobig:
mov si,0FFFFh ; this is an upper bound for the root

newtonize:
; temp := square / root

mov dx,cx
mov ax,bx
div si ; root
mov di,ax

; if (root < temp) then swap them

cmp si,di ; root,temp
ja noswap
xchg si,di
noswap:

; if (root - temp) > 1

mov ax,si
sub ax,di
cmp ax,1
ja not_done

; return temp

mov ax,di
jmp short return

not_done:
; root = (root + temp) / 2

rcr si,1 ; rotate in case si+di caused carry

jmp short newtonize

return:
; mov sp,bp
pop bp
pop di
pop si
ret
_isqrt endp

;_text ends
end
