Category : C Source Code
Archive   : PGP20SRC.ZIP
Filename : VAX.MAR

 
Output of file : VAX.MAR contained in archive : PGP20SRC.ZIP
;Last Modified: 16-APR-1992 09:06:30.46
.title fprims Fast Multiple Precision Primitives
.ident /V1.7B/
;+
; **-FPRIMS-Fast Multiple Precision Primitives
;
; Facility: PGP
;
; Language: Macro-32
;
; Functional Description:
;
; This module contains fast multiple precision routines for operating on arrays
; of long words. Error checking is minimised at the expense of speed.
;
; Restrictions:
;
; This code is shareable but NOT reentrant as written because of static data.
; A reentrant version of this module could be written but it would be slower!
;
; Version: 1
;
; Original: 00A Date: 17-Sep-1991 Author: Hugh A.J. Kennedy
;
; Based on FPRIMS.ASM written by Zhahai Stewart for the Intel 8086
; architecture.
;
; Modification: 02A Date: 27-Sep-1991 Author: Hugh A.J. Kennedy.
;
; Add fast multiply routine, P_SMUL.
; Re-organise code slightly.
; Ammend/clarify copyright and license statement.
; Add checking for maximum precision exceeded, display a warning message
; and bomb!
;
; Modification: 03A Date: 16-Mar-1992 Author: Hugh A.J. Kennedy.
;
; Sniff for MSB in P_SMUL. In this way, avoid multiplies by leading zeroes
; (not efficient).
;
; Modification: 05A Date: 17-Mar-1992 Author: Hugh A.J. Kennedy.
;
; Encode entire double precision multiply in VAX assembler.
; Correct some minor problems with handling embedded zeroes.
;
; Modification: 06A Date: 17-Mar-1992 Author: Hugh A.J. Kennedy
;
; Align everything for speed. VAXen like stuff on 64-bit, or at least 32-bit
; boundaries. Therefore, we align the add, subtract and rotate tables and then
; we align the multiply loops. The extra NOPs used to pad these loops are of
; negligable cost because they already exist in the memory buffer. When the
; following instruction is aligned, it executes MUCH faster.
;
; Modification: 07A Date: 24-Mar-1991 Author: Hugh A.J. Kennedy.
;
; Implement fast compare.
;-

.sbttl Copyright Notice And License To Use
;
; Copyright (c) 1991-1992, All Rights Reserved by
; Hugh A.J. Kennedy.
;
; A license to use and adapt this software without payment is hereby
; granted subject to the following conditions:
;
; 1) It may only be copied with the inclusion of this copyright
; notice in the program source with these associated conditions.
;
; 2) No title to or ownership of this software is hereby
; transferred.
;
; 3) The information in this software is subject to change
; without notice and should not be construed as a commitment by
; Hugh Kennedy.
;
; 4) The author assumes no liability for any damages arising from the
; use of this software, even if said damages arises from defects in
; this software.
;
; 5) No warranty as to merchantability or fitness of purpose is
; expressed or implied.
;
; 6) Any modifications to this source must be clearly identified as
; such and added to the modification history.
;
; 7) These routines may not be incorporated in a commercial cryptographic
; product.
;
; If you can not comply with these conditions, you *must* contact the author
; and obtain permission other wise you are in violation of copyright.

.sbttl Misc Macros & Definitions
;
; Assembly Parameters
;
max_unit_prec = 72 ; Maximum unit precision
supersniffer = 1 ; Enable bit msb locator.
;
; The following parameter is dependent on the kind of VAX you are running on
; and should be defined if the execution time of the SOBGTR loop control
; instruction and the appropriate operation (ADWC or SBWC) from cache is much
; less than the execution time in main memory. If you have a slow VAX you
; should comment the following line out to use a vector of instructions.
;
novector = 1 ; Use loops rather than vectors.

.macro ascid .string
;+
; *-ASCID-Build An ASCII String Referenced By Descriptor
;
; Functional Description:
;
; This macro is a little like the system supplied .ASCID directive
; but it uses a separate program section to store the ASCII data.
;
; Arguments:
;
; STRING String to create
;-
.nocross

.save_psect

.psect puret

$$$t0 = .
.ascii @.string@
$$$t1 = .-$$$t0

.restore_psect

.word $$$t1
.byte dsc$k_dtype_t
.byte dsc$k_class_s
.address -
$$$t0
.cross

.endm ascid

.sbttl Misc Data Areas
;
; Misc. Data Areas
;
.psect impurd,con,lcl,noshr,exe,rd,wrt,long

;
; This data is static and is used to hold the current precision established
; by P_SETP for other calls to this library.
;
.if not_defined novector

addoff: ; Offset into add table.
.blkl 1 ; also for sub and rot.
.endc

precis: ; Precision in longwords.
.blkl 1

.psect pure,con,rel,shr,exe,rd,nowrt,quad

.align quad

.ifndef novector

prectoobig:
ascid

.endc

.sbttl Start of Code

.sbttl P_CMP Compare two very long integers
;+
; **-P_COMP-Compare two very long integers
;
; Functional Description:
;
; This procedure is invoked to compare two extended precision unsigned
; integers.
;
; Calling Sequence
;
; short P_CMP ( r1, r2)
;
; Parameters:
;
; R1 -> Extended Precision Integer 1
; R2 -> Extended Precision Integer 2
;
; Implicit Inputs:
;
; PRECIS lr*r Precision expresses in longs.
;
; Returns:
;
; -1 if r1 < r2
; 0 if r1 = r2
; +1 if r1 > r2
;

;-

.align long

.entry p_cmp,^m

movl 4(ap),r1 ; R1 -> Sum.
movl 8(ap),r2 ; R2 -> Addend.
movl precis,r0 ; R0 = Precision.
moval (r1)[r0],r1 ; Get MS longwords.
moval (r2)[r0],r2 ; Get MS longwords.
.align long 1 ; Align loop with NOPS.
10$: cmpl -(r1),-(r2) ; Compare.
bnequ 20$ ; If ne, then exit loop.
sobgtr r0,10$ ; Loop until done.
ret ; R0 = zero so R1 = R2.
20$:
bgtru 30$ ; If R1 > R2 then branch.
movw #-1,r0 ; Flag <.
ret
30$:
movw #1,r0 ; Flag >.
ret

.sbttl P_ADDC Add two very long integers with carry
;+
; **-P_ADDC-Add very long integers
;
; Functional Description:
;
; This procedure is invoked to add two very long integers with carry. Each
; integer is represented as an array of longwords, least significant first.
;
; Calling Sequence:
;
; P_ADDC sum,addend,carry
;
; Parameters:
;
; sum lm*r Sum.
; addend lr*r Addend.
; carry lr*v Carry bit.
;
; Implicit Inputs:
;
; Addoff This is used as an offset into the various tables
; of adds, subtracts and rotates to implement the
; operation to the requested precsion.
;
; Status Returns:
;
; R0 Resulting carry bit.
;-

.align long

.entry p_addc,^m

movl 4(ap),r1 ; R1 -> Sum.
movl 8(ap),r2 ; R2 -> Addend.

.if defined novector

movl precis,r3 ; R3 = Precision.
subl3 12(ap),#0,r0 ; Set carry bit.
.align quad,1 ; Align loop with NOPs
10$: adwc (r2)+,(r1)+ ; Add with carry one longword.
.align quad,1 ; Align next instruction.
sobgtr r3,10$ ; Loop until done.

.iff ; novector

moval 10$,r3
addl2 addoff,r3 ; Jump into table.
subl3 12(ap),#0,r0 ; Set carry bit.
jmp (r3)

.align quad

10$:
.rept max_unit_prec
$$$ = .
adwc (r2)+,(r1)+ ; Add with carry one longword.
nop
addsiz = .-$$$
.endr

.endc ; novector

clrl r0 ; Assume carry clear.
bcc 20$ ; Carry set?
incl r0 ; Flag carry was set.
20$: ret

.sbttl P_SUBB Subtract very long integers with borrow
;+
; **-P_SUBB-Subtract very long integers
;
; Functional Description:
;
; This procedure is invoked to add subtract very long integers with carry. Each
; integer is represented as an array of longwords, least significant first.
;
; Calling Sequence:
;
; P_SUBB diff,sub,borrow
;
; Parameters:
;
; diff lm*r Difference
; sub lr*r Subtrahend.
; borrow lr*v Borrow bit.
;
; Implicit Inputs:
;
; Addoff This is used as an offset into the various tables
; of adds, subtracts and rotates to implement the
; operation to the requested precsion.
;
; Status Returns:
;
; R0 Resulting carry bit.
;-

.align long

.entry p_subb,^m

movl 4(ap),r1 ; R1 -> Difference.
movl 8(ap),r2 ; R2 -> Minuend.

.if defined novector

movl precis,r3 ; R3 = No. of longs.
subl3 12(ap),#0,r0 ; Set borrow bit.
.align quad,1 ; Align loop with NOPs.
10$: sbwc (r2)+,(r1)+ ; Subtract with borrow one long.
.align quad,1 ; Align with NOPs.
sobgtr r3,10$ ; Loop through.

.iff ; novector

moval 10$,r3
addl2 addoff,r3 ; Jump into table.
subl3 12(ap),#0,r0 ; Set borrow bit.
jmp (r3)

.align quad
10$:
.rept max_unit_prec
sbwc (r2)+,(r1)+ ; Subtract w/carry one longword.
nop
.endr

.endc ; novector

clrl r0 ; Assume carry clear.
bcc 20$ ; Carry set?
incl r0 ; Flag carry was set.
20$: ret

.sbttl P_ROTL Rotate left a very long integer with carry.
;+
; **-P_ROTL-Rotate left one bit very long integers
;
; Functional Description:
;
; This procedure is invoked to rotate left one bit (e.g. divide by 2) very
; long integers with carry. Each integer is represented as an array of
; longwords, least significant first. Note that we use the add with carry
; instruction here because the VAX (unlike the dear old PDP-11) lacks a
; rotate instruction that includes the carry bit.

;
; Calling Sequence:
;
; P_ROTL num,carry
;
; Parameters:
;
; num lm*r Number to be shifted
; carry lr*v Carry bit.
;
; Implicit Inputs:
;
; Addoff This is used as an offset into the various tables
; of adds, subtracts and rotates to implement the
; operation to the requested precsion.
;
; Status Returns:
;
; R0 Resulting carry bit.
;-

.align long

.entry p_rotl,^m

movl 4(ap),r1 ; R1 -> Sum.

.if defined novector

movl precis,r3 ; R3 = No. of longwords.
subl3 8(ap),#0,r0 ; Set carry bit.
.align quad,1 ; Align loop with NOPs
10$: adwc (r1),(r1)+ ; Add to itself with carry.
.align quad,1 ; Align with NOPs.
sobgtr r3,10$ ; Loop until done.

.iff ; novector

moval 10$,r3
addl2 addoff,r3 ; Jump into table.
subl3 8(ap),#0,r0 ; Set carry bit.
jmp (r3)

.align quad
10$:
.rept max_unit_prec
adwc (r1),(r1)+ ; *2+carry.
nop
.endr

.endc ; novector
clrl r0 ; Assume carry clear.
bcc 20$ ; Carry set?
incl r0 ; Flag carry was set.
20$: ret

.sbttl P_DMUL Extended Multiple Precision Multiply
;*
; **-P_DMUL-Extended Multiple Precision Multiply
;
; Functional Description:
;
; This procedure multiplies an unsigned single precision multiplier by a
; single precision multiplicand. The product register is double precision.
; It is expected that the length of the single precision multiplier and
; multiplicand has been previously set by a call to P_SETP. Note that the
; entire length of the product register is zeroed - so it must be a full
; double precision size.
;
; Calling Sequence:
;
; P_DMUL prod, multiplicand, multiplier
;
; Parameters:
;
; prod lw*r Product.
; multuplicand lr*r Multiplicand
; multiplier lr*r Multiplier
;
; Implicit Inputs:
;
; PRECIS lr*r Precision expresses in longs.
;
; Status Returns:
;
; None.
;-

.align long

.entry p_dmul,^m

movl 4(ap),r8 ; R8 -> Product.
beql 49$ ; If eq, not specified.
movl precis,r10 ; R10 = Precision (longs)
ashl #3,r10,r2 ; R0 = No. of bytes to zero.
movc5 #0,#0,#0,r2,(r8) ; Zero product buffer.
movl 8(ap),r3 ; R3 -> Multiplicand.
beql 49$ ; If eq, not specified.
pushl r3 ; Save for posterity.
movl 12(ap),r11 ; R11 -> Multiplier.
beql 49$ ; If eq, not specified.
movl r10,r12 ; R12 = Multiplicand prec.

.if defined SUPERSNIFFER

;
; Here we calculate the effective maximum precision for the multiply by
; locating the long containing the most significant bit of the multiplier
; and the multiplicand.
;
moval (r11)[r10],r0 ; Supersniffer...
.align quad,1 ; Align with nops
45$: tstl -(r0) ; Examine next long.
bneq 50$ ; If ne, then we found msb.
sobgtr r10,45$ ; Loop until done.
49$: ret ; Multiplier = 0!
50$:
moval (r3)[r12],r0 ; Supersniffer...
.align quad,1 ; Align with nops
55$: tstl -(r0) ; Examine next long.
bneq 200$ ; If ne, then we found msb.
sobgtr r12,55$ ; Loop until done.
ret ; Multiplicand = 0!
.iff

brb 200$
49$:
ret

.endc ; SUPERSNIFFER

;
; Multiplier Loop
;
; R12 = Count of multiplicand longs to process.
; R11 -> Next long of multiplier.
; R10 = Count of multiplier longs to process.
; R8 -> Next long of product.
;
.align quad,1 ; Align with nops
200$: movl r12,r5 ; Multiplicand precision.
moval (r8)+,r4 ; R4 -> Next long of product.
movl (sp),r3 ; R3 -> 1st multiplier long.
movl (r4),r0 ; R0,R1 = Partial Sum.
movl 4(r4),r1
clrl r7 ; Zero look-ahead carry.
;
; Perform an extended multiply of two unsigned numbers. This means that
; we have to compensate the hi-order product because either the multiplier
; or the multiplicand may be apparently a negative number. EMUL is a signed
; multiply - so we must be careful. Also, the EMUL longword addend is sign
; extended before adding into the product so we have to add the hard way.
;
; R6 = Current Multiplicand
; R2 = Multiplier
; R4 -> Current quadword of partial product.
; R0,R1 = Partial sum to which product is added
; R7 = Lookahead carry. This gets set if we try to carry after adding
; the partial product to the partial sum. This gets a little more
; complicated because here we are setting the high-order long of
; the next quadword to be operated on.
;
; Essentially the algorithm is as follows:
;
; 0) R0,R1 = (R4) ; Save current partial sum.
; 1) R6 = Next longword of multiplicand.
; 2) (R4) = R6 * R2 ; quad result compensating for negative numbers)
; 3) (R4) = (R4) + R0,R1 ; add back partial sum.
; 4) R7 = Carry bit.
; 5) R4 = R4 + 4 ; Point to next long.
; 6) R1 = 4(R4) + R7 ; Propagate carry to high order of next partial
; ; sum.
; 7) Loop back to step 1 until multiplicand completely processed.
;
movl (r11)+,r2 ; R2 = Multiplier.
beql 999$ ; If eq, not specified.
blss 1500$ ; This unfolds the compensation
; test out of the loop.
;
; This version of the multiply loop is entered when the multiplier is positive
; saving three instructions per unit of precision.
;
.align quad,1 ; Align with NOPs.
500$: movl (r3)+,r6 ; R6 = Current multplicand.
emul r2,r6,#0,(r4) ; Multiply (64-bit result).
;
; Because we have removed leading zeroes, multiplication by zero is very
; unlikely, 1 in 2^32 or so. It is therefore easier to perform the test after
; the EMUL (looking at the zero product) that the multiplicand was zero so we
; don't need any special case logic later to adjust the product pointer.
;
beql 550$ ; If result eq, skip.
tstl r6 ; Was multiplicand negative?
bgeq 550$ ; No, skip.
addl2 r2,4(r4) ; Yes, compensate.
550$: addl2 r0,(r4)+ ; Accumulate.
adwc r1,(r4)+
movl (r4),r1 ; R1 = Next hi-end partial sum.
adwc r7,r1 ; Add carry if needed.
clrl r7 ; Reset lookahead register.
adwc #0,r7 ; Save lookahead carry.
movl -(r4),r0 ; R0 = Next lo-end partial.
sobgtr r5,500$ ; More units?
999$: sobgtr r10,200$ ; Nope, go get next multiplier
ret
;
; This version of the above multiply loop is entered when the multiplier is
; negative - and we must compensate by adding the multiplicand to the hi-order
; product. This saves a test and a conditional branch per unit of precision.
;
.align quad,1 ; Align with NOPs.
1500$:
movl (r3)+,r6 ; R6 = Current multplicand.
emul r2,r6,#0,(r4) ; Multiply (64-bit result).
;
; Because we have removed leading zeroes, multiplication by zero is very
; unlikely, 1 in 2^32 or so. It is therefore easier to perform the test after
; the EMUL (looking at the zero product) that the multiplicand was zero so we
; don't need any special case logic later to adjust the product pointer.
;
beql 1560$ ; If result eq, skip.
tstl r6 ; Was multiplicand negative?
bgeq 1550$ ; No, skip.
addl2 r2,4(r4) ; Yes, compensate.
1550$:
; As documented above, we unfolded the following to save instructions
; tstl r2 ; Multiplier negative?
; bgeq 1560$ ; No, skip.
addl2 r6,4(r4) ; Yes, compensate.
1560$: addl2 r0,(r4)+ ; Accumulate.
adwc r1,(r4)+ ; R1 = High-end partial sum.
movl (r4),r1 ; R1 = Next hi-end partial sum.
adwc r7,r1 ; Add carry if needed.
clrl r7 ; Reset lookahead register.
adwc #0,r7 ; Save lookahead carry.
movl -(r4),r0 ; R0 = Next lo-end partial.
sobgtr r5,1500$ ; More units?
sobgtr r10,200$ ; Nope, go get next multiplier
ret

.sbttl P_SETP Set Precison.
;+
; **-P_SETP-Set Precision
;
; Functional Description:
;
; This procedure is invoked to set the operating precision of the package.
;
; Calling Sequence:
;
; P_SETP nbits
;
; Parameters:
;
; nbits rw*v Number of bits in number.
;
; Implicit Outputs:
;
; Precis Set to the number of longwords required to implement
; the requested precision.
; Addoff This is used as an offset into the various tables
; of adds, subtracts and rotates to implement the
; operation to the requested precsion.
;
; Status Returns:
;
; None.
;
; Side Effects:
;
; If the maximum precision set in 32-bit units by the assembly
; parameter "max_unit_prec" is exceeded, a message to that effect will
; be displayed and the program will terminate with a fatal error.
;-

.entry p_setp,^m<>

movzwl 4(ap),r1 ; R1 = No. of bits.
addl2 #31,r1 ; Round up to next long word.
ashl #-5,r1,r1 ; R1 = No. of 32 bit words.
movl r1,precis ; Save precision.

.if not_defined novector

subl3 r1,#max_unit_prec,r0 ; R0 = Number of steps reqd.
blss 10$ ; If > 0 then exit.
mull3 #addsiz,r0,addoff ; Get add table offset.

.iftf ; novector

ret

.ift ; novector

10$: ; Table size exceeded!
movab -80(sp),sp ; Output buffer.
pushab (sp) ; Build descriptor
movzwl #80,-(sp)
clrl -(sp) ; Receive return length.
pushl #max_unit_prec ; Compiled max table size.
pushl r1 ; Requested table size.
pushaq 8+4(sp) ; -> Output buffer descriptor.
pushaw 12(sp) ; -> Returned length.
pushaq prectoobig ; -> FAO control string.
calls #5,g^sys$fao ; Format output string.
movl (sp)+,(sp) ; Set actual buffer size.
pushaq (sp) ; -> Output buffer descr.
calls #1,g^lib$put_output ; Output message.
$exit_s - ; Exit with severe error.
code=#4

.endc ; novector

.end


  3 Responses to “Category : C Source Code
Archive   : PGP20SRC.ZIP
Filename : VAX.MAR

  1. Very nice! Thank you for this wonderful archive. I wonder why I found it only now. Long live the BBS file archives!

  2. This is so awesome! 😀 I’d be cool if you could download an entire archive of this at once, though.

  3. But one thing that puzzles me is the “mtswslnkmcjklsdlsbdmMICROSOFT” string. There is an article about it here. It is definitely worth a read: http://www.os2museum.com/wp/mtswslnk/