1790 lines
23 KiB
ArmAsm
1790 lines
23 KiB
ArmAsm
|
.machine "any"
|
||
|
.abiversion 2
|
||
|
.text
|
||
|
|
||
|
.globl bn_mul_mont_int
|
||
|
.type bn_mul_mont_int,@function
|
||
|
.align 5
|
||
|
bn_mul_mont_int:
|
||
|
.localentry bn_mul_mont_int,0
|
||
|
|
||
|
mr 9,3
|
||
|
li 3,0
|
||
|
slwi 8,8,3
|
||
|
li 12,-4096
|
||
|
addi 3,8,352
|
||
|
subf 3,3,1
|
||
|
and 3,3,12
|
||
|
subf 3,1,3
|
||
|
mr 12,1
|
||
|
srwi 8,8,3
|
||
|
stdux 1,1,3
|
||
|
|
||
|
std 20,-96(12)
|
||
|
std 21,-88(12)
|
||
|
std 22,-80(12)
|
||
|
std 23,-72(12)
|
||
|
std 24,-64(12)
|
||
|
std 25,-56(12)
|
||
|
std 26,-48(12)
|
||
|
std 27,-40(12)
|
||
|
std 28,-32(12)
|
||
|
std 29,-24(12)
|
||
|
std 30,-16(12)
|
||
|
std 31,-8(12)
|
||
|
|
||
|
ld 7,0(7)
|
||
|
addi 8,8,-2
|
||
|
|
||
|
ld 23,0(5)
|
||
|
ld 10,0(4)
|
||
|
addi 22,1,64
|
||
|
mulld 25,10,23
|
||
|
mulhdu 26,10,23
|
||
|
|
||
|
ld 10,8(4)
|
||
|
ld 11,0(6)
|
||
|
|
||
|
mulld 24,25,7
|
||
|
|
||
|
mulld 29,10,23
|
||
|
mulhdu 30,10,23
|
||
|
|
||
|
mulld 27,11,24
|
||
|
mulhdu 28,11,24
|
||
|
ld 11,8(6)
|
||
|
addc 27,27,25
|
||
|
addze 28,28
|
||
|
|
||
|
mulld 31,11,24
|
||
|
mulhdu 0,11,24
|
||
|
|
||
|
mtctr 8
|
||
|
li 21,16
|
||
|
.align 4
|
||
|
.L1st:
|
||
|
ldx 10,4,21
|
||
|
addc 25,29,26
|
||
|
ldx 11,6,21
|
||
|
addze 26,30
|
||
|
mulld 29,10,23
|
||
|
addc 27,31,28
|
||
|
mulhdu 30,10,23
|
||
|
addze 28,0
|
||
|
mulld 31,11,24
|
||
|
addc 27,27,25
|
||
|
mulhdu 0,11,24
|
||
|
addze 28,28
|
||
|
std 27,0(22)
|
||
|
|
||
|
addi 21,21,8
|
||
|
addi 22,22,8
|
||
|
bdnz .L1st
|
||
|
|
||
|
addc 25,29,26
|
||
|
addze 26,30
|
||
|
|
||
|
addc 27,31,28
|
||
|
addze 28,0
|
||
|
addc 27,27,25
|
||
|
addze 28,28
|
||
|
std 27,0(22)
|
||
|
|
||
|
li 3,0
|
||
|
addc 28,28,26
|
||
|
addze 3,3
|
||
|
std 28,8(22)
|
||
|
|
||
|
li 20,8
|
||
|
.align 4
|
||
|
.Louter:
|
||
|
ldx 23,5,20
|
||
|
ld 10,0(4)
|
||
|
addi 22,1,64
|
||
|
ld 12,64(1)
|
||
|
mulld 25,10,23
|
||
|
mulhdu 26,10,23
|
||
|
ld 10,8(4)
|
||
|
ld 11,0(6)
|
||
|
addc 25,25,12
|
||
|
mulld 29,10,23
|
||
|
addze 26,26
|
||
|
mulld 24,25,7
|
||
|
mulhdu 30,10,23
|
||
|
mulld 27,11,24
|
||
|
mulhdu 28,11,24
|
||
|
ld 11,8(6)
|
||
|
addc 27,27,25
|
||
|
mulld 31,11,24
|
||
|
addze 28,28
|
||
|
mulhdu 0,11,24
|
||
|
|
||
|
mtctr 8
|
||
|
li 21,16
|
||
|
.align 4
|
||
|
.Linner:
|
||
|
ldx 10,4,21
|
||
|
addc 25,29,26
|
||
|
ld 12,8(22)
|
||
|
addze 26,30
|
||
|
ldx 11,6,21
|
||
|
addc 27,31,28
|
||
|
mulld 29,10,23
|
||
|
addze 28,0
|
||
|
mulhdu 30,10,23
|
||
|
addc 25,25,12
|
||
|
mulld 31,11,24
|
||
|
addze 26,26
|
||
|
mulhdu 0,11,24
|
||
|
addc 27,27,25
|
||
|
addi 21,21,8
|
||
|
addze 28,28
|
||
|
std 27,0(22)
|
||
|
addi 22,22,8
|
||
|
bdnz .Linner
|
||
|
|
||
|
ld 12,8(22)
|
||
|
addc 25,29,26
|
||
|
addze 26,30
|
||
|
addc 25,25,12
|
||
|
addze 26,26
|
||
|
|
||
|
addc 27,31,28
|
||
|
addze 28,0
|
||
|
addc 27,27,25
|
||
|
addze 28,28
|
||
|
std 27,0(22)
|
||
|
|
||
|
addic 3,3,-1
|
||
|
li 3,0
|
||
|
adde 28,28,26
|
||
|
addze 3,3
|
||
|
std 28,8(22)
|
||
|
|
||
|
slwi 12,8,3
|
||
|
cmpld 20,12
|
||
|
addi 20,20,8
|
||
|
ble .Louter
|
||
|
|
||
|
addi 8,8,2
|
||
|
subfc 21,21,21
|
||
|
addi 22,1,64
|
||
|
mtctr 8
|
||
|
|
||
|
.align 4
|
||
|
.Lsub: ldx 12,22,21
|
||
|
ldx 11,6,21
|
||
|
subfe 10,11,12
|
||
|
stdx 10,9,21
|
||
|
addi 21,21,8
|
||
|
bdnz .Lsub
|
||
|
|
||
|
li 21,0
|
||
|
mtctr 8
|
||
|
subfe 3,21,3
|
||
|
|
||
|
.align 4
|
||
|
.Lcopy:
|
||
|
ldx 12,22,21
|
||
|
ldx 10,9,21
|
||
|
and 12,12,3
|
||
|
andc 10,10,3
|
||
|
stdx 21,22,21
|
||
|
or 10,10,12
|
||
|
stdx 10,9,21
|
||
|
addi 21,21,8
|
||
|
bdnz .Lcopy
|
||
|
|
||
|
ld 12,0(1)
|
||
|
li 3,1
|
||
|
ld 20,-96(12)
|
||
|
ld 21,-88(12)
|
||
|
ld 22,-80(12)
|
||
|
ld 23,-72(12)
|
||
|
ld 24,-64(12)
|
||
|
ld 25,-56(12)
|
||
|
ld 26,-48(12)
|
||
|
ld 27,-40(12)
|
||
|
ld 28,-32(12)
|
||
|
ld 29,-24(12)
|
||
|
ld 30,-16(12)
|
||
|
ld 31,-8(12)
|
||
|
mr 1,12
|
||
|
blr
|
||
|
.long 0
|
||
|
.byte 0,12,4,0,0x80,12,6,0
|
||
|
.long 0
|
||
|
.size bn_mul_mont_int,.-bn_mul_mont_int
|
||
|
.globl bn_mul4x_mont_int
|
||
|
.type bn_mul4x_mont_int,@function
|
||
|
.align 5
|
||
|
bn_mul4x_mont_int:
|
||
|
.localentry bn_mul4x_mont_int,0
|
||
|
|
||
|
andi. 0,8,7
|
||
|
bne .Lmul4x_do
|
||
|
cmpld 4,5
|
||
|
bne .Lmul4x_do
|
||
|
b .Lsqr8x_do
|
||
|
.Lmul4x_do:
|
||
|
slwi 8,8,3
|
||
|
mr 9,1
|
||
|
li 10,-32*8
|
||
|
sub 10,10,8
|
||
|
stdux 1,1,10
|
||
|
|
||
|
std 14,-8*18(9)
|
||
|
std 15,-8*17(9)
|
||
|
std 16,-8*16(9)
|
||
|
std 17,-8*15(9)
|
||
|
std 18,-8*14(9)
|
||
|
std 19,-8*13(9)
|
||
|
std 20,-8*12(9)
|
||
|
std 21,-8*11(9)
|
||
|
std 22,-8*10(9)
|
||
|
std 23,-8*9(9)
|
||
|
std 24,-8*8(9)
|
||
|
std 25,-8*7(9)
|
||
|
std 26,-8*6(9)
|
||
|
std 27,-8*5(9)
|
||
|
std 28,-8*4(9)
|
||
|
std 29,-8*3(9)
|
||
|
std 30,-8*2(9)
|
||
|
std 31,-8*1(9)
|
||
|
|
||
|
subi 4,4,8
|
||
|
subi 6,6,8
|
||
|
subi 3,3,8
|
||
|
ld 7,0(7)
|
||
|
|
||
|
add 14,5,8
|
||
|
add 30,4,8
|
||
|
subi 14,14,8*4
|
||
|
|
||
|
ld 27,8*0(5)
|
||
|
li 22,0
|
||
|
ld 9,8*1(4)
|
||
|
li 23,0
|
||
|
ld 10,8*2(4)
|
||
|
li 24,0
|
||
|
ld 11,8*3(4)
|
||
|
li 25,0
|
||
|
ldu 12,8*4(4)
|
||
|
ld 18,8*1(6)
|
||
|
ld 19,8*2(6)
|
||
|
ld 20,8*3(6)
|
||
|
ldu 21,8*4(6)
|
||
|
|
||
|
std 3,8*6(1)
|
||
|
std 14,8*7(1)
|
||
|
li 3,0
|
||
|
addic 29,1,8*7
|
||
|
li 31,0
|
||
|
li 0,0
|
||
|
b .Loop_mul4x_1st_reduction
|
||
|
|
||
|
.align 5
|
||
|
.Loop_mul4x_1st_reduction:
|
||
|
mulld 14,9,27
|
||
|
addze 3,3
|
||
|
mulld 15,10,27
|
||
|
addi 31,31,8
|
||
|
mulld 16,11,27
|
||
|
andi. 31,31,8*4-1
|
||
|
mulld 17,12,27
|
||
|
addc 22,22,14
|
||
|
mulhdu 14,9,27
|
||
|
adde 23,23,15
|
||
|
mulhdu 15,10,27
|
||
|
adde 24,24,16
|
||
|
mulld 28,22,7
|
||
|
adde 25,25,17
|
||
|
mulhdu 16,11,27
|
||
|
addze 26,0
|
||
|
mulhdu 17,12,27
|
||
|
ldx 27,5,31
|
||
|
addc 23,23,14
|
||
|
|
||
|
stdu 28,8(29)
|
||
|
adde 24,24,15
|
||
|
mulld 15,19,28
|
||
|
adde 25,25,16
|
||
|
mulld 16,20,28
|
||
|
adde 26,26,17
|
||
|
mulld 17,21,28
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
addic 22,22,-1
|
||
|
mulhdu 14,18,28
|
||
|
adde 22,23,15
|
||
|
mulhdu 15,19,28
|
||
|
adde 23,24,16
|
||
|
mulhdu 16,20,28
|
||
|
adde 24,25,17
|
||
|
mulhdu 17,21,28
|
||
|
adde 25,26,3
|
||
|
addze 3,0
|
||
|
addc 22,22,14
|
||
|
adde 23,23,15
|
||
|
adde 24,24,16
|
||
|
adde 25,25,17
|
||
|
|
||
|
bne .Loop_mul4x_1st_reduction
|
||
|
|
||
|
cmpld 30,4
|
||
|
beq .Lmul4x4_post_condition
|
||
|
|
||
|
ld 9,8*1(4)
|
||
|
ld 10,8*2(4)
|
||
|
ld 11,8*3(4)
|
||
|
ldu 12,8*4(4)
|
||
|
ld 28,8*8(1)
|
||
|
ld 18,8*1(6)
|
||
|
ld 19,8*2(6)
|
||
|
ld 20,8*3(6)
|
||
|
ldu 21,8*4(6)
|
||
|
b .Loop_mul4x_1st_tail
|
||
|
|
||
|
.align 5
|
||
|
.Loop_mul4x_1st_tail:
|
||
|
mulld 14,9,27
|
||
|
addze 3,3
|
||
|
mulld 15,10,27
|
||
|
addi 31,31,8
|
||
|
mulld 16,11,27
|
||
|
andi. 31,31,8*4-1
|
||
|
mulld 17,12,27
|
||
|
addc 22,22,14
|
||
|
mulhdu 14,9,27
|
||
|
adde 23,23,15
|
||
|
mulhdu 15,10,27
|
||
|
adde 24,24,16
|
||
|
mulhdu 16,11,27
|
||
|
adde 25,25,17
|
||
|
mulhdu 17,12,27
|
||
|
addze 26,0
|
||
|
ldx 27,5,31
|
||
|
addc 23,23,14
|
||
|
mulld 14,18,28
|
||
|
adde 24,24,15
|
||
|
mulld 15,19,28
|
||
|
adde 25,25,16
|
||
|
mulld 16,20,28
|
||
|
adde 26,26,17
|
||
|
mulld 17,21,28
|
||
|
addc 22,22,14
|
||
|
mulhdu 14,18,28
|
||
|
adde 23,23,15
|
||
|
mulhdu 15,19,28
|
||
|
adde 24,24,16
|
||
|
mulhdu 16,20,28
|
||
|
adde 25,25,17
|
||
|
adde 26,26,3
|
||
|
mulhdu 17,21,28
|
||
|
addze 3,0
|
||
|
addi 28,1,8*8
|
||
|
ldx 28,28,31
|
||
|
stdu 22,8(29)
|
||
|
addc 22,23,14
|
||
|
adde 23,24,15
|
||
|
adde 24,25,16
|
||
|
adde 25,26,17
|
||
|
|
||
|
bne .Loop_mul4x_1st_tail
|
||
|
|
||
|
sub 15,30,8
|
||
|
cmpld 30,4
|
||
|
beq .Lmul4x_proceed
|
||
|
|
||
|
ld 9,8*1(4)
|
||
|
ld 10,8*2(4)
|
||
|
ld 11,8*3(4)
|
||
|
ldu 12,8*4(4)
|
||
|
ld 18,8*1(6)
|
||
|
ld 19,8*2(6)
|
||
|
ld 20,8*3(6)
|
||
|
ldu 21,8*4(6)
|
||
|
b .Loop_mul4x_1st_tail
|
||
|
|
||
|
.align 5
|
||
|
.Lmul4x_proceed:
|
||
|
ldu 27,8*4(5)
|
||
|
addze 3,3
|
||
|
ld 9,8*1(15)
|
||
|
ld 10,8*2(15)
|
||
|
ld 11,8*3(15)
|
||
|
ld 12,8*4(15)
|
||
|
addi 4,15,8*4
|
||
|
sub 6,6,8
|
||
|
|
||
|
std 22,8*1(29)
|
||
|
std 23,8*2(29)
|
||
|
std 24,8*3(29)
|
||
|
std 25,8*4(29)
|
||
|
std 3,8*5(29)
|
||
|
ld 22,8*12(1)
|
||
|
ld 23,8*13(1)
|
||
|
ld 24,8*14(1)
|
||
|
ld 25,8*15(1)
|
||
|
|
||
|
ld 18,8*1(6)
|
||
|
ld 19,8*2(6)
|
||
|
ld 20,8*3(6)
|
||
|
ldu 21,8*4(6)
|
||
|
addic 29,1,8*7
|
||
|
li 3,0
|
||
|
b .Loop_mul4x_reduction
|
||
|
|
||
|
.align 5
|
||
|
.Loop_mul4x_reduction:
|
||
|
mulld 14,9,27
|
||
|
addze 3,3
|
||
|
mulld 15,10,27
|
||
|
addi 31,31,8
|
||
|
mulld 16,11,27
|
||
|
andi. 31,31,8*4-1
|
||
|
mulld 17,12,27
|
||
|
addc 22,22,14
|
||
|
mulhdu 14,9,27
|
||
|
adde 23,23,15
|
||
|
mulhdu 15,10,27
|
||
|
adde 24,24,16
|
||
|
mulld 28,22,7
|
||
|
adde 25,25,17
|
||
|
mulhdu 16,11,27
|
||
|
addze 26,0
|
||
|
mulhdu 17,12,27
|
||
|
ldx 27,5,31
|
||
|
addc 23,23,14
|
||
|
|
||
|
stdu 28,8(29)
|
||
|
adde 24,24,15
|
||
|
mulld 15,19,28
|
||
|
adde 25,25,16
|
||
|
mulld 16,20,28
|
||
|
adde 26,26,17
|
||
|
mulld 17,21,28
|
||
|
|
||
|
addic 22,22,-1
|
||
|
mulhdu 14,18,28
|
||
|
adde 22,23,15
|
||
|
mulhdu 15,19,28
|
||
|
adde 23,24,16
|
||
|
mulhdu 16,20,28
|
||
|
adde 24,25,17
|
||
|
mulhdu 17,21,28
|
||
|
adde 25,26,3
|
||
|
addze 3,0
|
||
|
addc 22,22,14
|
||
|
adde 23,23,15
|
||
|
adde 24,24,16
|
||
|
adde 25,25,17
|
||
|
|
||
|
bne .Loop_mul4x_reduction
|
||
|
|
||
|
ld 14,8*5(29)
|
||
|
addze 3,3
|
||
|
ld 15,8*6(29)
|
||
|
ld 16,8*7(29)
|
||
|
ld 17,8*8(29)
|
||
|
ld 9,8*1(4)
|
||
|
ld 10,8*2(4)
|
||
|
ld 11,8*3(4)
|
||
|
ldu 12,8*4(4)
|
||
|
addc 22,22,14
|
||
|
adde 23,23,15
|
||
|
adde 24,24,16
|
||
|
adde 25,25,17
|
||
|
|
||
|
|
||
|
ld 28,8*8(1)
|
||
|
ld 18,8*1(6)
|
||
|
ld 19,8*2(6)
|
||
|
ld 20,8*3(6)
|
||
|
ldu 21,8*4(6)
|
||
|
b .Loop_mul4x_tail
|
||
|
|
||
|
.align 5
|
||
|
.Loop_mul4x_tail:
|
||
|
mulld 14,9,27
|
||
|
addze 3,3
|
||
|
mulld 15,10,27
|
||
|
addi 31,31,8
|
||
|
mulld 16,11,27
|
||
|
andi. 31,31,8*4-1
|
||
|
mulld 17,12,27
|
||
|
addc 22,22,14
|
||
|
mulhdu 14,9,27
|
||
|
adde 23,23,15
|
||
|
mulhdu 15,10,27
|
||
|
adde 24,24,16
|
||
|
mulhdu 16,11,27
|
||
|
adde 25,25,17
|
||
|
mulhdu 17,12,27
|
||
|
addze 26,0
|
||
|
ldx 27,5,31
|
||
|
addc 23,23,14
|
||
|
mulld 14,18,28
|
||
|
adde 24,24,15
|
||
|
mulld 15,19,28
|
||
|
adde 25,25,16
|
||
|
mulld 16,20,28
|
||
|
adde 26,26,17
|
||
|
mulld 17,21,28
|
||
|
addc 22,22,14
|
||
|
mulhdu 14,18,28
|
||
|
adde 23,23,15
|
||
|
mulhdu 15,19,28
|
||
|
adde 24,24,16
|
||
|
mulhdu 16,20,28
|
||
|
adde 25,25,17
|
||
|
mulhdu 17,21,28
|
||
|
adde 26,26,3
|
||
|
addi 28,1,8*8
|
||
|
ldx 28,28,31
|
||
|
addze 3,0
|
||
|
stdu 22,8(29)
|
||
|
addc 22,23,14
|
||
|
adde 23,24,15
|
||
|
adde 24,25,16
|
||
|
adde 25,26,17
|
||
|
|
||
|
bne .Loop_mul4x_tail
|
||
|
|
||
|
ld 14,8*5(29)
|
||
|
sub 15,6,8
|
||
|
addze 3,3
|
||
|
cmpld 30,4
|
||
|
beq .Loop_mul4x_break
|
||
|
|
||
|
ld 15,8*6(29)
|
||
|
ld 16,8*7(29)
|
||
|
ld 17,8*8(29)
|
||
|
ld 9,8*1(4)
|
||
|
ld 10,8*2(4)
|
||
|
ld 11,8*3(4)
|
||
|
ldu 12,8*4(4)
|
||
|
addc 22,22,14
|
||
|
adde 23,23,15
|
||
|
adde 24,24,16
|
||
|
adde 25,25,17
|
||
|
|
||
|
|
||
|
ld 18,8*1(6)
|
||
|
ld 19,8*2(6)
|
||
|
ld 20,8*3(6)
|
||
|
ldu 21,8*4(6)
|
||
|
b .Loop_mul4x_tail
|
||
|
|
||
|
.align 5
|
||
|
.Loop_mul4x_break:
|
||
|
ld 16,8*6(1)
|
||
|
ld 17,8*7(1)
|
||
|
addc 9,22,14
|
||
|
ld 22,8*12(1)
|
||
|
addze 10,23
|
||
|
ld 23,8*13(1)
|
||
|
addze 11,24
|
||
|
ld 24,8*14(1)
|
||
|
addze 12,25
|
||
|
ld 25,8*15(1)
|
||
|
addze 3,3
|
||
|
std 9,8*1(29)
|
||
|
sub 4,30,8
|
||
|
std 10,8*2(29)
|
||
|
std 11,8*3(29)
|
||
|
std 12,8*4(29)
|
||
|
std 3,8*5(29)
|
||
|
|
||
|
ld 18,8*1(15)
|
||
|
ld 19,8*2(15)
|
||
|
ld 20,8*3(15)
|
||
|
ld 21,8*4(15)
|
||
|
addi 6,15,8*4
|
||
|
cmpld 5,17
|
||
|
beq .Lmul4x_post
|
||
|
|
||
|
ldu 27,8*4(5)
|
||
|
ld 9,8*1(4)
|
||
|
ld 10,8*2(4)
|
||
|
ld 11,8*3(4)
|
||
|
ldu 12,8*4(4)
|
||
|
li 3,0
|
||
|
addic 29,1,8*7
|
||
|
b .Loop_mul4x_reduction
|
||
|
|
||
|
.align 5
|
||
|
.Lmul4x_post:
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
srwi 31,8,5
|
||
|
mr 5,16
|
||
|
subi 31,31,1
|
||
|
mr 30,16
|
||
|
subfc 14,18,22
|
||
|
addi 29,1,8*15
|
||
|
subfe 15,19,23
|
||
|
|
||
|
mtctr 31
|
||
|
.Lmul4x_sub:
|
||
|
ld 18,8*1(6)
|
||
|
ld 22,8*1(29)
|
||
|
subfe 16,20,24
|
||
|
ld 19,8*2(6)
|
||
|
ld 23,8*2(29)
|
||
|
subfe 17,21,25
|
||
|
ld 20,8*3(6)
|
||
|
ld 24,8*3(29)
|
||
|
ldu 21,8*4(6)
|
||
|
ldu 25,8*4(29)
|
||
|
std 14,8*1(5)
|
||
|
std 15,8*2(5)
|
||
|
subfe 14,18,22
|
||
|
std 16,8*3(5)
|
||
|
stdu 17,8*4(5)
|
||
|
subfe 15,19,23
|
||
|
bdnz .Lmul4x_sub
|
||
|
|
||
|
ld 9,8*1(30)
|
||
|
std 14,8*1(5)
|
||
|
ld 14,8*12(1)
|
||
|
subfe 16,20,24
|
||
|
ld 10,8*2(30)
|
||
|
std 15,8*2(5)
|
||
|
ld 15,8*13(1)
|
||
|
subfe 17,21,25
|
||
|
subfe 3,0,3
|
||
|
addi 29,1,8*12
|
||
|
ld 11,8*3(30)
|
||
|
std 16,8*3(5)
|
||
|
ld 16,8*14(1)
|
||
|
ld 12,8*4(30)
|
||
|
std 17,8*4(5)
|
||
|
ld 17,8*15(1)
|
||
|
|
||
|
mtctr 31
|
||
|
.Lmul4x_cond_copy:
|
||
|
and 14,14,3
|
||
|
andc 9,9,3
|
||
|
std 0,8*0(29)
|
||
|
and 15,15,3
|
||
|
andc 10,10,3
|
||
|
std 0,8*1(29)
|
||
|
and 16,16,3
|
||
|
andc 11,11,3
|
||
|
std 0,8*2(29)
|
||
|
and 17,17,3
|
||
|
andc 12,12,3
|
||
|
std 0,8*3(29)
|
||
|
or 22,14,9
|
||
|
ld 9,8*5(30)
|
||
|
ld 14,8*4(29)
|
||
|
or 23,15,10
|
||
|
ld 10,8*6(30)
|
||
|
ld 15,8*5(29)
|
||
|
or 24,16,11
|
||
|
ld 11,8*7(30)
|
||
|
ld 16,8*6(29)
|
||
|
or 25,17,12
|
||
|
ld 12,8*8(30)
|
||
|
ld 17,8*7(29)
|
||
|
addi 29,29,8*4
|
||
|
std 22,8*1(30)
|
||
|
std 23,8*2(30)
|
||
|
std 24,8*3(30)
|
||
|
stdu 25,8*4(30)
|
||
|
bdnz .Lmul4x_cond_copy
|
||
|
|
||
|
ld 5,0(1)
|
||
|
and 14,14,3
|
||
|
andc 9,9,3
|
||
|
std 0,8*0(29)
|
||
|
and 15,15,3
|
||
|
andc 10,10,3
|
||
|
std 0,8*1(29)
|
||
|
and 16,16,3
|
||
|
andc 11,11,3
|
||
|
std 0,8*2(29)
|
||
|
and 17,17,3
|
||
|
andc 12,12,3
|
||
|
std 0,8*3(29)
|
||
|
or 22,14,9
|
||
|
or 23,15,10
|
||
|
std 0,8*4(29)
|
||
|
or 24,16,11
|
||
|
or 25,17,12
|
||
|
std 22,8*1(30)
|
||
|
std 23,8*2(30)
|
||
|
std 24,8*3(30)
|
||
|
std 25,8*4(30)
|
||
|
|
||
|
b .Lmul4x_done
|
||
|
|
||
|
.align 4
|
||
|
.Lmul4x4_post_condition:
|
||
|
ld 4,8*6(1)
|
||
|
ld 5,0(1)
|
||
|
addze 3,3
|
||
|
|
||
|
subfc 9,18,22
|
||
|
subfe 10,19,23
|
||
|
subfe 11,20,24
|
||
|
subfe 12,21,25
|
||
|
subfe 3,0,3
|
||
|
|
||
|
and 18,18,3
|
||
|
and 19,19,3
|
||
|
addc 9,9,18
|
||
|
and 20,20,3
|
||
|
adde 10,10,19
|
||
|
and 21,21,3
|
||
|
adde 11,11,20
|
||
|
adde 12,12,21
|
||
|
|
||
|
std 9,8*1(4)
|
||
|
std 10,8*2(4)
|
||
|
std 11,8*3(4)
|
||
|
std 12,8*4(4)
|
||
|
|
||
|
.Lmul4x_done:
|
||
|
std 0,8*8(1)
|
||
|
std 0,8*9(1)
|
||
|
std 0,8*10(1)
|
||
|
std 0,8*11(1)
|
||
|
li 3,1
|
||
|
ld 14,-8*18(5)
|
||
|
ld 15,-8*17(5)
|
||
|
ld 16,-8*16(5)
|
||
|
ld 17,-8*15(5)
|
||
|
ld 18,-8*14(5)
|
||
|
ld 19,-8*13(5)
|
||
|
ld 20,-8*12(5)
|
||
|
ld 21,-8*11(5)
|
||
|
ld 22,-8*10(5)
|
||
|
ld 23,-8*9(5)
|
||
|
ld 24,-8*8(5)
|
||
|
ld 25,-8*7(5)
|
||
|
ld 26,-8*6(5)
|
||
|
ld 27,-8*5(5)
|
||
|
ld 28,-8*4(5)
|
||
|
ld 29,-8*3(5)
|
||
|
ld 30,-8*2(5)
|
||
|
ld 31,-8*1(5)
|
||
|
mr 1,5
|
||
|
blr
|
||
|
.long 0
|
||
|
.byte 0,12,4,0x20,0x80,18,6,0
|
||
|
.long 0
|
||
|
.size bn_mul4x_mont_int,.-bn_mul4x_mont_int
|
||
|
.align 5
|
||
|
__bn_sqr8x_mont:
|
||
|
.Lsqr8x_do:
|
||
|
mr 9,1
|
||
|
slwi 10,8,4
|
||
|
li 11,-32*8
|
||
|
sub 10,11,10
|
||
|
slwi 8,8,3
|
||
|
stdux 1,1,10
|
||
|
|
||
|
std 14,-8*18(9)
|
||
|
std 15,-8*17(9)
|
||
|
std 16,-8*16(9)
|
||
|
std 17,-8*15(9)
|
||
|
std 18,-8*14(9)
|
||
|
std 19,-8*13(9)
|
||
|
std 20,-8*12(9)
|
||
|
std 21,-8*11(9)
|
||
|
std 22,-8*10(9)
|
||
|
std 23,-8*9(9)
|
||
|
std 24,-8*8(9)
|
||
|
std 25,-8*7(9)
|
||
|
std 26,-8*6(9)
|
||
|
std 27,-8*5(9)
|
||
|
std 28,-8*4(9)
|
||
|
std 29,-8*3(9)
|
||
|
std 30,-8*2(9)
|
||
|
std 31,-8*1(9)
|
||
|
|
||
|
subi 4,4,8
|
||
|
subi 18,6,8
|
||
|
subi 3,3,8
|
||
|
ld 7,0(7)
|
||
|
li 0,0
|
||
|
|
||
|
add 6,4,8
|
||
|
ld 9,8*1(4)
|
||
|
|
||
|
ld 10,8*2(4)
|
||
|
li 23,0
|
||
|
ld 11,8*3(4)
|
||
|
li 24,0
|
||
|
ld 12,8*4(4)
|
||
|
li 25,0
|
||
|
ld 14,8*5(4)
|
||
|
li 26,0
|
||
|
ld 15,8*6(4)
|
||
|
li 27,0
|
||
|
ld 16,8*7(4)
|
||
|
li 28,0
|
||
|
ldu 17,8*8(4)
|
||
|
li 29,0
|
||
|
|
||
|
addi 5,1,8*11
|
||
|
subic. 30,8,8*8
|
||
|
b .Lsqr8x_zero_start
|
||
|
|
||
|
.align 5
|
||
|
.Lsqr8x_zero:
|
||
|
subic. 30,30,8*8
|
||
|
std 0,8*1(5)
|
||
|
std 0,8*2(5)
|
||
|
std 0,8*3(5)
|
||
|
std 0,8*4(5)
|
||
|
std 0,8*5(5)
|
||
|
std 0,8*6(5)
|
||
|
std 0,8*7(5)
|
||
|
std 0,8*8(5)
|
||
|
.Lsqr8x_zero_start:
|
||
|
std 0,8*9(5)
|
||
|
std 0,8*10(5)
|
||
|
std 0,8*11(5)
|
||
|
std 0,8*12(5)
|
||
|
std 0,8*13(5)
|
||
|
std 0,8*14(5)
|
||
|
std 0,8*15(5)
|
||
|
stdu 0,8*16(5)
|
||
|
bne .Lsqr8x_zero
|
||
|
|
||
|
std 3,8*6(1)
|
||
|
std 18,8*7(1)
|
||
|
std 7,8*8(1)
|
||
|
std 5,8*9(1)
|
||
|
std 0,8*10(1)
|
||
|
addi 5,1,8*11
|
||
|
|
||
|
|
||
|
.align 5
|
||
|
.Lsqr8x_outer_loop:
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
mulld 18,10,9
|
||
|
mulld 19,11,9
|
||
|
mulld 20,12,9
|
||
|
mulld 21,14,9
|
||
|
addc 23,23,18
|
||
|
mulld 18,15,9
|
||
|
adde 24,24,19
|
||
|
mulld 19,16,9
|
||
|
adde 25,25,20
|
||
|
mulld 20,17,9
|
||
|
adde 26,26,21
|
||
|
mulhdu 21,10,9
|
||
|
adde 27,27,18
|
||
|
mulhdu 18,11,9
|
||
|
adde 28,28,19
|
||
|
mulhdu 19,12,9
|
||
|
adde 29,29,20
|
||
|
mulhdu 20,14,9
|
||
|
std 22,8*1(5)
|
||
|
addze 22,0
|
||
|
std 23,8*2(5)
|
||
|
addc 24,24,21
|
||
|
mulhdu 21,15,9
|
||
|
adde 25,25,18
|
||
|
mulhdu 18,16,9
|
||
|
adde 26,26,19
|
||
|
mulhdu 19,17,9
|
||
|
adde 27,27,20
|
||
|
mulld 20,11,10
|
||
|
adde 28,28,21
|
||
|
mulld 21,12,10
|
||
|
adde 29,29,18
|
||
|
mulld 18,14,10
|
||
|
adde 22,22,19
|
||
|
|
||
|
mulld 19,15,10
|
||
|
addc 25,25,20
|
||
|
mulld 20,16,10
|
||
|
adde 26,26,21
|
||
|
mulld 21,17,10
|
||
|
adde 27,27,18
|
||
|
mulhdu 18,11,10
|
||
|
adde 28,28,19
|
||
|
mulhdu 19,12,10
|
||
|
adde 29,29,20
|
||
|
mulhdu 20,14,10
|
||
|
adde 22,22,21
|
||
|
mulhdu 21,15,10
|
||
|
std 24,8*3(5)
|
||
|
addze 23,0
|
||
|
std 25,8*4(5)
|
||
|
addc 26,26,18
|
||
|
mulhdu 18,16,10
|
||
|
adde 27,27,19
|
||
|
mulhdu 19,17,10
|
||
|
adde 28,28,20
|
||
|
mulld 20,12,11
|
||
|
adde 29,29,21
|
||
|
mulld 21,14,11
|
||
|
adde 22,22,18
|
||
|
mulld 18,15,11
|
||
|
adde 23,23,19
|
||
|
|
||
|
mulld 19,16,11
|
||
|
addc 27,27,20
|
||
|
mulld 20,17,11
|
||
|
adde 28,28,21
|
||
|
mulhdu 21,12,11
|
||
|
adde 29,29,18
|
||
|
mulhdu 18,14,11
|
||
|
adde 22,22,19
|
||
|
mulhdu 19,15,11
|
||
|
adde 23,23,20
|
||
|
mulhdu 20,16,11
|
||
|
std 26,8*5(5)
|
||
|
addze 24,0
|
||
|
std 27,8*6(5)
|
||
|
addc 28,28,21
|
||
|
mulhdu 21,17,11
|
||
|
adde 29,29,18
|
||
|
mulld 18,14,12
|
||
|
adde 22,22,19
|
||
|
mulld 19,15,12
|
||
|
adde 23,23,20
|
||
|
mulld 20,16,12
|
||
|
adde 24,24,21
|
||
|
|
||
|
mulld 21,17,12
|
||
|
addc 29,29,18
|
||
|
mulhdu 18,14,12
|
||
|
adde 22,22,19
|
||
|
mulhdu 19,15,12
|
||
|
adde 23,23,20
|
||
|
mulhdu 20,16,12
|
||
|
adde 24,24,21
|
||
|
mulhdu 21,17,12
|
||
|
std 28,8*7(5)
|
||
|
addze 25,0
|
||
|
stdu 29,8*8(5)
|
||
|
addc 22,22,18
|
||
|
mulld 18,15,14
|
||
|
adde 23,23,19
|
||
|
mulld 19,16,14
|
||
|
adde 24,24,20
|
||
|
mulld 20,17,14
|
||
|
adde 25,25,21
|
||
|
|
||
|
mulhdu 21,15,14
|
||
|
addc 23,23,18
|
||
|
mulhdu 18,16,14
|
||
|
adde 24,24,19
|
||
|
mulhdu 19,17,14
|
||
|
adde 25,25,20
|
||
|
mulld 20,16,15
|
||
|
addze 26,0
|
||
|
addc 24,24,21
|
||
|
mulld 21,17,15
|
||
|
adde 25,25,18
|
||
|
mulhdu 18,16,15
|
||
|
adde 26,26,19
|
||
|
|
||
|
mulhdu 19,17,15
|
||
|
addc 25,25,20
|
||
|
mulld 20,17,16
|
||
|
adde 26,26,21
|
||
|
mulhdu 21,17,16
|
||
|
addze 27,0
|
||
|
addc 26,26,18
|
||
|
cmpld 6,4
|
||
|
adde 27,27,19
|
||
|
|
||
|
addc 27,27,20
|
||
|
sub 18,6,8
|
||
|
addze 28,0
|
||
|
add 28,28,21
|
||
|
|
||
|
beq .Lsqr8x_outer_break
|
||
|
|
||
|
mr 7,9
|
||
|
ld 9,8*1(5)
|
||
|
ld 10,8*2(5)
|
||
|
ld 11,8*3(5)
|
||
|
ld 12,8*4(5)
|
||
|
ld 14,8*5(5)
|
||
|
ld 15,8*6(5)
|
||
|
ld 16,8*7(5)
|
||
|
ld 17,8*8(5)
|
||
|
addc 22,22,9
|
||
|
ld 9,8*1(4)
|
||
|
adde 23,23,10
|
||
|
ld 10,8*2(4)
|
||
|
adde 24,24,11
|
||
|
ld 11,8*3(4)
|
||
|
adde 25,25,12
|
||
|
ld 12,8*4(4)
|
||
|
adde 26,26,14
|
||
|
ld 14,8*5(4)
|
||
|
adde 27,27,15
|
||
|
ld 15,8*6(4)
|
||
|
adde 28,28,16
|
||
|
ld 16,8*7(4)
|
||
|
subi 3,4,8*7
|
||
|
addze 29,17
|
||
|
ldu 17,8*8(4)
|
||
|
|
||
|
li 30,0
|
||
|
b .Lsqr8x_mul
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
.align 5
|
||
|
.Lsqr8x_mul:
|
||
|
mulld 18,9,7
|
||
|
addze 31,0
|
||
|
mulld 19,10,7
|
||
|
addi 30,30,8
|
||
|
mulld 20,11,7
|
||
|
andi. 30,30,8*8-1
|
||
|
mulld 21,12,7
|
||
|
addc 22,22,18
|
||
|
mulld 18,14,7
|
||
|
adde 23,23,19
|
||
|
mulld 19,15,7
|
||
|
adde 24,24,20
|
||
|
mulld 20,16,7
|
||
|
adde 25,25,21
|
||
|
mulld 21,17,7
|
||
|
adde 26,26,18
|
||
|
mulhdu 18,9,7
|
||
|
adde 27,27,19
|
||
|
mulhdu 19,10,7
|
||
|
adde 28,28,20
|
||
|
mulhdu 20,11,7
|
||
|
adde 29,29,21
|
||
|
mulhdu 21,12,7
|
||
|
addze 31,31
|
||
|
stdu 22,8(5)
|
||
|
addc 22,23,18
|
||
|
mulhdu 18,14,7
|
||
|
adde 23,24,19
|
||
|
mulhdu 19,15,7
|
||
|
adde 24,25,20
|
||
|
mulhdu 20,16,7
|
||
|
adde 25,26,21
|
||
|
mulhdu 21,17,7
|
||
|
ldx 7,3,30
|
||
|
adde 26,27,18
|
||
|
adde 27,28,19
|
||
|
adde 28,29,20
|
||
|
adde 29,31,21
|
||
|
|
||
|
bne .Lsqr8x_mul
|
||
|
|
||
|
|
||
|
cmpld 4,6
|
||
|
beq .Lsqr8x_break
|
||
|
|
||
|
ld 9,8*1(5)
|
||
|
ld 10,8*2(5)
|
||
|
ld 11,8*3(5)
|
||
|
ld 12,8*4(5)
|
||
|
ld 14,8*5(5)
|
||
|
ld 15,8*6(5)
|
||
|
ld 16,8*7(5)
|
||
|
ld 17,8*8(5)
|
||
|
addc 22,22,9
|
||
|
ld 9,8*1(4)
|
||
|
adde 23,23,10
|
||
|
ld 10,8*2(4)
|
||
|
adde 24,24,11
|
||
|
ld 11,8*3(4)
|
||
|
adde 25,25,12
|
||
|
ld 12,8*4(4)
|
||
|
adde 26,26,14
|
||
|
ld 14,8*5(4)
|
||
|
adde 27,27,15
|
||
|
ld 15,8*6(4)
|
||
|
adde 28,28,16
|
||
|
ld 16,8*7(4)
|
||
|
adde 29,29,17
|
||
|
ldu 17,8*8(4)
|
||
|
|
||
|
b .Lsqr8x_mul
|
||
|
|
||
|
.align 5
|
||
|
.Lsqr8x_break:
|
||
|
ld 9,8*8(3)
|
||
|
addi 4,3,8*15
|
||
|
ld 10,8*9(3)
|
||
|
sub. 18,6,4
|
||
|
ld 11,8*10(3)
|
||
|
sub 19,5,18
|
||
|
ld 12,8*11(3)
|
||
|
ld 14,8*12(3)
|
||
|
ld 15,8*13(3)
|
||
|
ld 16,8*14(3)
|
||
|
ld 17,8*15(3)
|
||
|
beq .Lsqr8x_outer_loop
|
||
|
|
||
|
std 22,8*1(5)
|
||
|
ld 22,8*1(19)
|
||
|
std 23,8*2(5)
|
||
|
ld 23,8*2(19)
|
||
|
std 24,8*3(5)
|
||
|
ld 24,8*3(19)
|
||
|
std 25,8*4(5)
|
||
|
ld 25,8*4(19)
|
||
|
std 26,8*5(5)
|
||
|
ld 26,8*5(19)
|
||
|
std 27,8*6(5)
|
||
|
ld 27,8*6(19)
|
||
|
std 28,8*7(5)
|
||
|
ld 28,8*7(19)
|
||
|
std 29,8*8(5)
|
||
|
ld 29,8*8(19)
|
||
|
mr 5,19
|
||
|
b .Lsqr8x_outer_loop
|
||
|
|
||
|
.align 5
|
||
|
.Lsqr8x_outer_break:
|
||
|
|
||
|
|
||
|
ld 10,8*1(18)
|
||
|
ld 12,8*2(18)
|
||
|
ld 15,8*3(18)
|
||
|
ld 17,8*4(18)
|
||
|
addi 4,18,8*4
|
||
|
|
||
|
ld 19,8*13(1)
|
||
|
ld 20,8*14(1)
|
||
|
ld 21,8*15(1)
|
||
|
ld 18,8*16(1)
|
||
|
|
||
|
std 22,8*1(5)
|
||
|
srwi 30,8,5
|
||
|
std 23,8*2(5)
|
||
|
subi 30,30,1
|
||
|
std 24,8*3(5)
|
||
|
std 25,8*4(5)
|
||
|
std 26,8*5(5)
|
||
|
std 27,8*6(5)
|
||
|
std 28,8*7(5)
|
||
|
|
||
|
addi 5,1,8*11
|
||
|
mulld 22,10,10
|
||
|
mulhdu 10,10,10
|
||
|
add 23,19,19
|
||
|
srdi 19,19,64-1
|
||
|
mulld 11,12,12
|
||
|
mulhdu 12,12,12
|
||
|
addc 23,23,10
|
||
|
add 24,20,20
|
||
|
srdi 20,20,64-1
|
||
|
add 25,21,21
|
||
|
srdi 21,21,64-1
|
||
|
or 24,24,19
|
||
|
|
||
|
mtctr 30
|
||
|
.Lsqr4x_shift_n_add:
|
||
|
mulld 14,15,15
|
||
|
mulhdu 15,15,15
|
||
|
ld 19,8*6(5)
|
||
|
ld 10,8*1(4)
|
||
|
adde 24,24,11
|
||
|
add 26,18,18
|
||
|
srdi 18,18,64-1
|
||
|
or 25,25,20
|
||
|
ld 20,8*7(5)
|
||
|
adde 25,25,12
|
||
|
ld 12,8*2(4)
|
||
|
add 27,19,19
|
||
|
srdi 19,19,64-1
|
||
|
or 26,26,21
|
||
|
ld 21,8*8(5)
|
||
|
mulld 16,17,17
|
||
|
mulhdu 17,17,17
|
||
|
adde 26,26,14
|
||
|
add 28,20,20
|
||
|
srdi 20,20,64-1
|
||
|
or 27,27,18
|
||
|
ld 18,8*9(5)
|
||
|
adde 27,27,15
|
||
|
ld 15,8*3(4)
|
||
|
add 29,21,21
|
||
|
srdi 21,21,64-1
|
||
|
or 28,28,19
|
||
|
ld 19,8*10(5)
|
||
|
mulld 9,10,10
|
||
|
mulhdu 10,10,10
|
||
|
adde 28,28,16
|
||
|
std 22,8*1(5)
|
||
|
add 22,18,18
|
||
|
srdi 18,18,64-1
|
||
|
or 29,29,20
|
||
|
ld 20,8*11(5)
|
||
|
adde 29,29,17
|
||
|
ldu 17,8*4(4)
|
||
|
std 23,8*2(5)
|
||
|
add 23,19,19
|
||
|
srdi 19,19,64-1
|
||
|
or 22,22,21
|
||
|
ld 21,8*12(5)
|
||
|
mulld 11,12,12
|
||
|
mulhdu 12,12,12
|
||
|
adde 22,22,9
|
||
|
std 24,8*3(5)
|
||
|
add 24,20,20
|
||
|
srdi 20,20,64-1
|
||
|
or 23,23,18
|
||
|
ld 18,8*13(5)
|
||
|
adde 23,23,10
|
||
|
std 25,8*4(5)
|
||
|
std 26,8*5(5)
|
||
|
std 27,8*6(5)
|
||
|
std 28,8*7(5)
|
||
|
stdu 29,8*8(5)
|
||
|
add 25,21,21
|
||
|
srdi 21,21,64-1
|
||
|
or 24,24,19
|
||
|
bdnz .Lsqr4x_shift_n_add
|
||
|
ld 4,8*7(1)
|
||
|
ld 7,8*8(1)
|
||
|
|
||
|
mulld 14,15,15
|
||
|
mulhdu 15,15,15
|
||
|
std 22,8*1(5)
|
||
|
ld 22,8*12(1)
|
||
|
ld 19,8*6(5)
|
||
|
adde 24,24,11
|
||
|
add 26,18,18
|
||
|
srdi 18,18,64-1
|
||
|
or 25,25,20
|
||
|
ld 20,8*7(5)
|
||
|
adde 25,25,12
|
||
|
add 27,19,19
|
||
|
srdi 19,19,64-1
|
||
|
or 26,26,21
|
||
|
mulld 16,17,17
|
||
|
mulhdu 17,17,17
|
||
|
adde 26,26,14
|
||
|
add 28,20,20
|
||
|
srdi 20,20,64-1
|
||
|
or 27,27,18
|
||
|
std 23,8*2(5)
|
||
|
ld 23,8*13(1)
|
||
|
adde 27,27,15
|
||
|
or 28,28,19
|
||
|
ld 9,8*1(4)
|
||
|
ld 10,8*2(4)
|
||
|
adde 28,28,16
|
||
|
ld 11,8*3(4)
|
||
|
ld 12,8*4(4)
|
||
|
adde 29,17,20
|
||
|
ld 14,8*5(4)
|
||
|
ld 15,8*6(4)
|
||
|
|
||
|
|
||
|
|
||
|
mulld 31,7,22
|
||
|
li 30,8
|
||
|
ld 16,8*7(4)
|
||
|
add 6,4,8
|
||
|
ldu 17,8*8(4)
|
||
|
std 24,8*3(5)
|
||
|
ld 24,8*14(1)
|
||
|
std 25,8*4(5)
|
||
|
ld 25,8*15(1)
|
||
|
std 26,8*5(5)
|
||
|
ld 26,8*16(1)
|
||
|
std 27,8*6(5)
|
||
|
ld 27,8*17(1)
|
||
|
std 28,8*7(5)
|
||
|
ld 28,8*18(1)
|
||
|
std 29,8*8(5)
|
||
|
ld 29,8*19(1)
|
||
|
addi 5,1,8*11
|
||
|
mtctr 30
|
||
|
b .Lsqr8x_reduction
|
||
|
|
||
|
.align 5
|
||
|
.Lsqr8x_reduction:
|
||
|
|
||
|
mulld 19,10,31
|
||
|
mulld 20,11,31
|
||
|
stdu 31,8(5)
|
||
|
mulld 21,12,31
|
||
|
|
||
|
addic 22,22,-1
|
||
|
mulld 18,14,31
|
||
|
adde 22,23,19
|
||
|
mulld 19,15,31
|
||
|
adde 23,24,20
|
||
|
mulld 20,16,31
|
||
|
adde 24,25,21
|
||
|
mulld 21,17,31
|
||
|
adde 25,26,18
|
||
|
mulhdu 18,9,31
|
||
|
adde 26,27,19
|
||
|
mulhdu 19,10,31
|
||
|
adde 27,28,20
|
||
|
mulhdu 20,11,31
|
||
|
adde 28,29,21
|
||
|
mulhdu 21,12,31
|
||
|
addze 29,0
|
||
|
addc 22,22,18
|
||
|
mulhdu 18,14,31
|
||
|
adde 23,23,19
|
||
|
mulhdu 19,15,31
|
||
|
adde 24,24,20
|
||
|
mulhdu 20,16,31
|
||
|
adde 25,25,21
|
||
|
mulhdu 21,17,31
|
||
|
mulld 31,7,22
|
||
|
adde 26,26,18
|
||
|
adde 27,27,19
|
||
|
adde 28,28,20
|
||
|
adde 29,29,21
|
||
|
bdnz .Lsqr8x_reduction
|
||
|
|
||
|
ld 18,8*1(5)
|
||
|
ld 19,8*2(5)
|
||
|
ld 20,8*3(5)
|
||
|
ld 21,8*4(5)
|
||
|
subi 3,5,8*7
|
||
|
cmpld 6,4
|
||
|
addc 22,22,18
|
||
|
ld 18,8*5(5)
|
||
|
adde 23,23,19
|
||
|
ld 19,8*6(5)
|
||
|
adde 24,24,20
|
||
|
ld 20,8*7(5)
|
||
|
adde 25,25,21
|
||
|
ld 21,8*8(5)
|
||
|
adde 26,26,18
|
||
|
adde 27,27,19
|
||
|
adde 28,28,20
|
||
|
adde 29,29,21
|
||
|
|
||
|
beq .Lsqr8x8_post_condition
|
||
|
|
||
|
ld 7,8*0(3)
|
||
|
ld 9,8*1(4)
|
||
|
ld 10,8*2(4)
|
||
|
ld 11,8*3(4)
|
||
|
ld 12,8*4(4)
|
||
|
ld 14,8*5(4)
|
||
|
ld 15,8*6(4)
|
||
|
ld 16,8*7(4)
|
||
|
ldu 17,8*8(4)
|
||
|
li 30,0
|
||
|
|
||
|
.align 5
|
||
|
.Lsqr8x_tail:
|
||
|
mulld 18,9,7
|
||
|
addze 31,0
|
||
|
mulld 19,10,7
|
||
|
addi 30,30,8
|
||
|
mulld 20,11,7
|
||
|
andi. 30,30,8*8-1
|
||
|
mulld 21,12,7
|
||
|
addc 22,22,18
|
||
|
mulld 18,14,7
|
||
|
adde 23,23,19
|
||
|
mulld 19,15,7
|
||
|
adde 24,24,20
|
||
|
mulld 20,16,7
|
||
|
adde 25,25,21
|
||
|
mulld 21,17,7
|
||
|
adde 26,26,18
|
||
|
mulhdu 18,9,7
|
||
|
adde 27,27,19
|
||
|
mulhdu 19,10,7
|
||
|
adde 28,28,20
|
||
|
mulhdu 20,11,7
|
||
|
adde 29,29,21
|
||
|
mulhdu 21,12,7
|
||
|
addze 31,31
|
||
|
stdu 22,8(5)
|
||
|
addc 22,23,18
|
||
|
mulhdu 18,14,7
|
||
|
adde 23,24,19
|
||
|
mulhdu 19,15,7
|
||
|
adde 24,25,20
|
||
|
mulhdu 20,16,7
|
||
|
adde 25,26,21
|
||
|
mulhdu 21,17,7
|
||
|
ldx 7,3,30
|
||
|
adde 26,27,18
|
||
|
adde 27,28,19
|
||
|
adde 28,29,20
|
||
|
adde 29,31,21
|
||
|
|
||
|
bne .Lsqr8x_tail
|
||
|
|
||
|
|
||
|
ld 9,8*1(5)
|
||
|
ld 31,8*10(1)
|
||
|
cmpld 6,4
|
||
|
ld 10,8*2(5)
|
||
|
sub 20,6,8
|
||
|
ld 11,8*3(5)
|
||
|
ld 12,8*4(5)
|
||
|
ld 14,8*5(5)
|
||
|
ld 15,8*6(5)
|
||
|
ld 16,8*7(5)
|
||
|
ld 17,8*8(5)
|
||
|
beq .Lsqr8x_tail_break
|
||
|
|
||
|
addc 22,22,9
|
||
|
ld 9,8*1(4)
|
||
|
adde 23,23,10
|
||
|
ld 10,8*2(4)
|
||
|
adde 24,24,11
|
||
|
ld 11,8*3(4)
|
||
|
adde 25,25,12
|
||
|
ld 12,8*4(4)
|
||
|
adde 26,26,14
|
||
|
ld 14,8*5(4)
|
||
|
adde 27,27,15
|
||
|
ld 15,8*6(4)
|
||
|
adde 28,28,16
|
||
|
ld 16,8*7(4)
|
||
|
adde 29,29,17
|
||
|
ldu 17,8*8(4)
|
||
|
|
||
|
b .Lsqr8x_tail
|
||
|
|
||
|
.align 5
|
||
|
.Lsqr8x_tail_break:
|
||
|
ld 7,8*8(1)
|
||
|
ld 21,8*9(1)
|
||
|
addi 30,5,8*8
|
||
|
|
||
|
addic 31,31,-1
|
||
|
adde 18,22,9
|
||
|
ld 22,8*8(3)
|
||
|
ld 9,8*1(20)
|
||
|
adde 19,23,10
|
||
|
ld 23,8*9(3)
|
||
|
ld 10,8*2(20)
|
||
|
adde 24,24,11
|
||
|
ld 11,8*3(20)
|
||
|
adde 25,25,12
|
||
|
ld 12,8*4(20)
|
||
|
adde 26,26,14
|
||
|
ld 14,8*5(20)
|
||
|
adde 27,27,15
|
||
|
ld 15,8*6(20)
|
||
|
adde 28,28,16
|
||
|
ld 16,8*7(20)
|
||
|
adde 29,29,17
|
||
|
ld 17,8*8(20)
|
||
|
addi 4,20,8*8
|
||
|
addze 20,0
|
||
|
mulld 31,7,22
|
||
|
std 18,8*1(5)
|
||
|
cmpld 30,21
|
||
|
std 19,8*2(5)
|
||
|
li 30,8
|
||
|
std 24,8*3(5)
|
||
|
ld 24,8*10(3)
|
||
|
std 25,8*4(5)
|
||
|
ld 25,8*11(3)
|
||
|
std 26,8*5(5)
|
||
|
ld 26,8*12(3)
|
||
|
std 27,8*6(5)
|
||
|
ld 27,8*13(3)
|
||
|
std 28,8*7(5)
|
||
|
ld 28,8*14(3)
|
||
|
std 29,8*8(5)
|
||
|
ld 29,8*15(3)
|
||
|
std 20,8*10(1)
|
||
|
addi 5,3,8*7
|
||
|
mtctr 30
|
||
|
bne .Lsqr8x_reduction
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
ld 3,8*6(1)
|
||
|
srwi 30,8,6
|
||
|
mr 7,5
|
||
|
addi 5,5,8*8
|
||
|
subi 30,30,1
|
||
|
subfc 18,9,22
|
||
|
subfe 19,10,23
|
||
|
mr 31,20
|
||
|
mr 6,3
|
||
|
|
||
|
mtctr 30
|
||
|
b .Lsqr8x_sub
|
||
|
|
||
|
.align 5
|
||
|
.Lsqr8x_sub:
|
||
|
ld 9,8*1(4)
|
||
|
ld 22,8*1(5)
|
||
|
ld 10,8*2(4)
|
||
|
ld 23,8*2(5)
|
||
|
subfe 20,11,24
|
||
|
ld 11,8*3(4)
|
||
|
ld 24,8*3(5)
|
||
|
subfe 21,12,25
|
||
|
ld 12,8*4(4)
|
||
|
ld 25,8*4(5)
|
||
|
std 18,8*1(3)
|
||
|
subfe 18,14,26
|
||
|
ld 14,8*5(4)
|
||
|
ld 26,8*5(5)
|
||
|
std 19,8*2(3)
|
||
|
subfe 19,15,27
|
||
|
ld 15,8*6(4)
|
||
|
ld 27,8*6(5)
|
||
|
std 20,8*3(3)
|
||
|
subfe 20,16,28
|
||
|
ld 16,8*7(4)
|
||
|
ld 28,8*7(5)
|
||
|
std 21,8*4(3)
|
||
|
subfe 21,17,29
|
||
|
ldu 17,8*8(4)
|
||
|
ldu 29,8*8(5)
|
||
|
std 18,8*5(3)
|
||
|
subfe 18,9,22
|
||
|
std 19,8*6(3)
|
||
|
subfe 19,10,23
|
||
|
std 20,8*7(3)
|
||
|
stdu 21,8*8(3)
|
||
|
bdnz .Lsqr8x_sub
|
||
|
|
||
|
srwi 30,8,5
|
||
|
ld 9,8*1(6)
|
||
|
ld 22,8*1(7)
|
||
|
subi 30,30,1
|
||
|
ld 10,8*2(6)
|
||
|
ld 23,8*2(7)
|
||
|
subfe 20,11,24
|
||
|
ld 11,8*3(6)
|
||
|
ld 24,8*3(7)
|
||
|
subfe 21,12,25
|
||
|
ld 12,8*4(6)
|
||
|
ldu 25,8*4(7)
|
||
|
std 18,8*1(3)
|
||
|
subfe 18,14,26
|
||
|
std 19,8*2(3)
|
||
|
subfe 19,15,27
|
||
|
std 20,8*3(3)
|
||
|
subfe 20,16,28
|
||
|
std 21,8*4(3)
|
||
|
subfe 21,17,29
|
||
|
std 18,8*5(3)
|
||
|
subfe 31,0,31
|
||
|
std 19,8*6(3)
|
||
|
std 20,8*7(3)
|
||
|
std 21,8*8(3)
|
||
|
|
||
|
addi 5,1,8*11
|
||
|
mtctr 30
|
||
|
|
||
|
.Lsqr4x_cond_copy:
|
||
|
andc 9,9,31
|
||
|
std 0,-8*3(7)
|
||
|
and 22,22,31
|
||
|
std 0,-8*2(7)
|
||
|
andc 10,10,31
|
||
|
std 0,-8*1(7)
|
||
|
and 23,23,31
|
||
|
std 0,-8*0(7)
|
||
|
andc 11,11,31
|
||
|
std 0,8*1(5)
|
||
|
and 24,24,31
|
||
|
std 0,8*2(5)
|
||
|
andc 12,12,31
|
||
|
std 0,8*3(5)
|
||
|
and 25,25,31
|
||
|
stdu 0,8*4(5)
|
||
|
or 18,9,22
|
||
|
ld 9,8*5(6)
|
||
|
ld 22,8*1(7)
|
||
|
or 19,10,23
|
||
|
ld 10,8*6(6)
|
||
|
ld 23,8*2(7)
|
||
|
or 20,11,24
|
||
|
ld 11,8*7(6)
|
||
|
ld 24,8*3(7)
|
||
|
or 21,12,25
|
||
|
ld 12,8*8(6)
|
||
|
ldu 25,8*4(7)
|
||
|
std 18,8*1(6)
|
||
|
std 19,8*2(6)
|
||
|
std 20,8*3(6)
|
||
|
stdu 21,8*4(6)
|
||
|
bdnz .Lsqr4x_cond_copy
|
||
|
|
||
|
ld 4,0(1)
|
||
|
andc 9,9,31
|
||
|
and 22,22,31
|
||
|
andc 10,10,31
|
||
|
and 23,23,31
|
||
|
andc 11,11,31
|
||
|
and 24,24,31
|
||
|
andc 12,12,31
|
||
|
and 25,25,31
|
||
|
or 18,9,22
|
||
|
or 19,10,23
|
||
|
or 20,11,24
|
||
|
or 21,12,25
|
||
|
std 18,8*1(6)
|
||
|
std 19,8*2(6)
|
||
|
std 20,8*3(6)
|
||
|
std 21,8*4(6)
|
||
|
|
||
|
b .Lsqr8x_done
|
||
|
|
||
|
.align 5
|
||
|
.Lsqr8x8_post_condition:
|
||
|
ld 3,8*6(1)
|
||
|
ld 4,0(1)
|
||
|
addze 31,0
|
||
|
|
||
|
|
||
|
subfc 22,9,22
|
||
|
subfe 23,10,23
|
||
|
std 0,8*12(1)
|
||
|
std 0,8*13(1)
|
||
|
subfe 24,11,24
|
||
|
std 0,8*14(1)
|
||
|
std 0,8*15(1)
|
||
|
subfe 25,12,25
|
||
|
std 0,8*16(1)
|
||
|
std 0,8*17(1)
|
||
|
subfe 26,14,26
|
||
|
std 0,8*18(1)
|
||
|
std 0,8*19(1)
|
||
|
subfe 27,15,27
|
||
|
std 0,8*20(1)
|
||
|
std 0,8*21(1)
|
||
|
subfe 28,16,28
|
||
|
std 0,8*22(1)
|
||
|
std 0,8*23(1)
|
||
|
subfe 29,17,29
|
||
|
std 0,8*24(1)
|
||
|
std 0,8*25(1)
|
||
|
subfe 31,0,31
|
||
|
std 0,8*26(1)
|
||
|
std 0,8*27(1)
|
||
|
|
||
|
and 9,9,31
|
||
|
and 10,10,31
|
||
|
addc 22,22,9
|
||
|
and 11,11,31
|
||
|
adde 23,23,10
|
||
|
and 12,12,31
|
||
|
adde 24,24,11
|
||
|
and 14,14,31
|
||
|
adde 25,25,12
|
||
|
and 15,15,31
|
||
|
adde 26,26,14
|
||
|
and 16,16,31
|
||
|
adde 27,27,15
|
||
|
and 17,17,31
|
||
|
adde 28,28,16
|
||
|
adde 29,29,17
|
||
|
std 22,8*1(3)
|
||
|
std 23,8*2(3)
|
||
|
std 24,8*3(3)
|
||
|
std 25,8*4(3)
|
||
|
std 26,8*5(3)
|
||
|
std 27,8*6(3)
|
||
|
std 28,8*7(3)
|
||
|
std 29,8*8(3)
|
||
|
|
||
|
.Lsqr8x_done:
|
||
|
std 0,8*8(1)
|
||
|
std 0,8*10(1)
|
||
|
|
||
|
ld 14,-8*18(4)
|
||
|
li 3,1
|
||
|
ld 15,-8*17(4)
|
||
|
ld 16,-8*16(4)
|
||
|
ld 17,-8*15(4)
|
||
|
ld 18,-8*14(4)
|
||
|
ld 19,-8*13(4)
|
||
|
ld 20,-8*12(4)
|
||
|
ld 21,-8*11(4)
|
||
|
ld 22,-8*10(4)
|
||
|
ld 23,-8*9(4)
|
||
|
ld 24,-8*8(4)
|
||
|
ld 25,-8*7(4)
|
||
|
ld 26,-8*6(4)
|
||
|
ld 27,-8*5(4)
|
||
|
ld 28,-8*4(4)
|
||
|
ld 29,-8*3(4)
|
||
|
ld 30,-8*2(4)
|
||
|
ld 31,-8*1(4)
|
||
|
mr 1,4
|
||
|
blr
|
||
|
.long 0
|
||
|
.byte 0,12,4,0x20,0x80,18,6,0
|
||
|
.long 0
|
||
|
.size __bn_sqr8x_mont,.-__bn_sqr8x_mont
|
||
|
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,80,80,67,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||
|
.align 2
|