draconisplusplus/subprojects/openssl-3.0.8/generated-config/archs/linux64-s390x/asm/crypto/bn/s390x-mont.S

177 lines
3 KiB
ArmAsm
Raw Normal View History

2025-02-20 14:49:18 -05:00
.text
.globl bn_mul_mont
.type bn_mul_mont,@function
bn_mul_mont:
lgf %r1,164(%r15) # pull %r1
sla %r1,3 # %r1 to enumerate bytes
la %r4,0(%r1,%r4)
stg %r2,2*8(%r15)
cghi %r1,16 #
lghi %r2,0 #
blr %r14 # if(%r1<16) return 0;
cghi %r1,96 #
bhr %r14 # if(%r1>96) return 0;
stmg %r3,%r15,3*8(%r15)
lghi %r2,-160-8 # leave room for carry bit
lcgr %r7,%r1 # -%r1
lgr %r0,%r15
la %r2,0(%r2,%r15)
la %r15,0(%r7,%r2) # alloca
stg %r0,0(%r15) # back chain
sra %r1,3 # restore %r1
la %r4,0(%r7,%r4) # restore %r4
ahi %r1,-1 # adjust %r1 for inner loop
lg %r6,0(%r6) # pull n0
lg %r2,0(%r4)
lg %r9,0(%r3)
mlgr %r8,%r2 # ap[0]*bp[0]
lgr %r12,%r8
lgr %r0,%r9 # "tp[0]"*n0
msgr %r0,%r6
lg %r11,0(%r5) #
mlgr %r10,%r0 # np[0]*m1
algr %r11,%r9 # +="tp[0]"
lghi %r13,0
alcgr %r13,%r10
la %r7,8 # j=1
lr %r14,%r1
.align 16
.L1st:
lg %r9,0(%r7,%r3)
mlgr %r8,%r2 # ap[j]*bp[0]
algr %r9,%r12
lghi %r12,0
alcgr %r12,%r8
lg %r11,0(%r7,%r5)
mlgr %r10,%r0 # np[j]*m1
algr %r11,%r13
lghi %r13,0
alcgr %r10,%r13 # +="tp[j]"
algr %r11,%r9
alcgr %r13,%r10
stg %r11,160-8(%r7,%r15) # tp[j-1]=
la %r7,8(%r7) # j++
brct %r14,.L1st
algr %r13,%r12
lghi %r12,0
alcgr %r12,%r12 # upmost overflow bit
stg %r13,160-8(%r7,%r15)
stg %r12,160(%r7,%r15)
la %r4,8(%r4) # bp++
.Louter:
lg %r2,0(%r4) # bp[i]
lg %r9,0(%r3)
mlgr %r8,%r2 # ap[0]*bp[i]
alg %r9,160(%r15) # +=tp[0]
lghi %r12,0
alcgr %r12,%r8
lgr %r0,%r9
msgr %r0,%r6 # tp[0]*n0
lg %r11,0(%r5) # np[0]
mlgr %r10,%r0 # np[0]*m1
algr %r11,%r9 # +="tp[0]"
lghi %r13,0
alcgr %r13,%r10
la %r7,8 # j=1
lr %r14,%r1
.align 16
.Linner:
lg %r9,0(%r7,%r3)
mlgr %r8,%r2 # ap[j]*bp[i]
algr %r9,%r12
lghi %r12,0
alcgr %r8,%r12
alg %r9,160(%r7,%r15)# +=tp[j]
alcgr %r12,%r8
lg %r11,0(%r7,%r5)
mlgr %r10,%r0 # np[j]*m1
algr %r11,%r13
lghi %r13,0
alcgr %r10,%r13
algr %r11,%r9 # +="tp[j]"
alcgr %r13,%r10
stg %r11,160-8(%r7,%r15) # tp[j-1]=
la %r7,8(%r7) # j++
brct %r14,.Linner
algr %r13,%r12
lghi %r12,0
alcgr %r12,%r12
alg %r13,160(%r7,%r15)# accumulate previous upmost overflow bit
lghi %r8,0
alcgr %r12,%r8 # new upmost overflow bit
stg %r13,160-8(%r7,%r15)
stg %r12,160(%r7,%r15)
la %r4,8(%r4) # bp++
clg %r4,200(%r7,%r15) # compare to &bp[num]
jne .Louter
lg %r2,184(%r7,%r15) # reincarnate rp
la %r3,160(%r15)
ahi %r1,1 # restore %r1, incidentally clears "borrow"
la %r7,0
lr %r14,%r1
.Lsub: lg %r9,0(%r7,%r3)
lg %r11,0(%r7,%r5)
slbgr %r9,%r11
stg %r9,0(%r7,%r2)
la %r7,8(%r7)
brct %r14,.Lsub
lghi %r8,0
slbgr %r12,%r8 # handle upmost carry
lghi %r13,-1
xgr %r13,%r12
la %r7,0
lgr %r14,%r1
.Lcopy: lg %r8,160(%r7,%r15) # conditional copy
lg %r9,0(%r7,%r2)
ngr %r8,%r12
ngr %r9,%r13
ogr %r9,%r8
stg %r7,160(%r7,%r15) # zap tp
stg %r9,0(%r7,%r2)
la %r7,8(%r7)
brct %r14,.Lcopy
la %r1,216(%r7,%r15)
lmg %r6,%r15,0(%r1)
lghi %r2,1 # signal "processed"
br %r14
.size bn_mul_mont,.-bn_mul_mont
.string "Montgomery Multiplication for s390x, CRYPTOGAMS by <appro@openssl.org>"