| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520 |
- // +build arm64,!noasm
- #include "textflag.h"
- TEXT ·fp751ConditionalSwap(SB), NOSPLIT, $0-17
- MOVD x+0(FP), R0
- MOVD y+8(FP), R1
- MOVB choice+16(FP), R2
- // Set flags
- // If choice is not 0 or 1, this implementation will swap completely
- CMP $0, R2
- LDP 0(R0), (R3, R4)
- LDP 0(R1), (R5, R6)
- CSEL EQ, R3, R5, R7
- CSEL EQ, R4, R6, R8
- STP (R7, R8), 0(R0)
- CSEL NE, R3, R5, R9
- CSEL NE, R4, R6, R10
- STP (R9, R10), 0(R1)
- LDP 16(R0), (R3, R4)
- LDP 16(R1), (R5, R6)
- CSEL EQ, R3, R5, R7
- CSEL EQ, R4, R6, R8
- STP (R7, R8), 16(R0)
- CSEL NE, R3, R5, R9
- CSEL NE, R4, R6, R10
- STP (R9, R10), 16(R1)
- LDP 32(R0), (R3, R4)
- LDP 32(R1), (R5, R6)
- CSEL EQ, R3, R5, R7
- CSEL EQ, R4, R6, R8
- STP (R7, R8), 32(R0)
- CSEL NE, R3, R5, R9
- CSEL NE, R4, R6, R10
- STP (R9, R10), 32(R1)
- LDP 48(R0), (R3, R4)
- LDP 48(R1), (R5, R6)
- CSEL EQ, R3, R5, R7
- CSEL EQ, R4, R6, R8
- STP (R7, R8), 48(R0)
- CSEL NE, R3, R5, R9
- CSEL NE, R4, R6, R10
- STP (R9, R10), 48(R1)
- LDP 64(R0), (R3, R4)
- LDP 64(R1), (R5, R6)
- CSEL EQ, R3, R5, R7
- CSEL EQ, R4, R6, R8
- STP (R7, R8), 64(R0)
- CSEL NE, R3, R5, R9
- CSEL NE, R4, R6, R10
- STP (R9, R10), 64(R1)
- LDP 80(R0), (R3, R4)
- LDP 80(R1), (R5, R6)
- CSEL EQ, R3, R5, R7
- CSEL EQ, R4, R6, R8
- STP (R7, R8), 80(R0)
- CSEL NE, R3, R5, R9
- CSEL NE, R4, R6, R10
- STP (R9, R10), 80(R1)
- RET
- TEXT ·fp751AddReduced(SB), NOSPLIT, $0-24
- MOVD z+0(FP), R2
- MOVD x+8(FP), R0
- MOVD y+16(FP), R1
- // Load first summand into R3-R14
- // Add first summand and second summand and store result in R3-R14
- LDP 0(R0), (R3, R4)
- LDP 0(R1), (R15, R16)
- LDP 16(R0), (R5, R6)
- LDP 16(R1), (R17, R19)
- ADDS R15, R3
- ADCS R16, R4
- ADCS R17, R5
- ADCS R19, R6
- LDP 32(R0), (R7, R8)
- LDP 32(R1), (R15, R16)
- LDP 48(R0), (R9, R10)
- LDP 48(R1), (R17, R19)
- ADCS R15, R7
- ADCS R16, R8
- ADCS R17, R9
- ADCS R19, R10
- LDP 64(R0), (R11, R12)
- LDP 64(R1), (R15, R16)
- LDP 80(R0), (R13, R14)
- LDP 80(R1), (R17, R19)
- ADCS R15, R11
- ADCS R16, R12
- ADCS R17, R13
- ADC R19, R14
- // Subtract 2 * p751 in R15-R24 from the result in R3-R14
- LDP ·p751x2+0(SB), (R15, R16)
- SUBS R15, R3
- SBCS R16, R4
- LDP ·p751x2+40(SB), (R17, R19)
- SBCS R16, R5
- SBCS R16, R6
- SBCS R16, R7
- LDP ·p751x2+56(SB), (R20, R21)
- SBCS R17, R8
- SBCS R19, R9
- LDP ·p751x2+72(SB), (R22, R23)
- SBCS R20, R10
- SBCS R21, R11
- MOVD ·p751x2+88(SB), R24
- SBCS R22, R12
- SBCS R23, R13
- SBCS R24, R14
- SBC ZR, ZR, R25
- // If x + y - 2 * p751 < 0, R25 is 1 and 2 * p751 should be added
- AND R25, R15
- AND R25, R16
- AND R25, R17
- AND R25, R19
- AND R25, R20
- AND R25, R21
- AND R25, R22
- AND R25, R23
- AND R25, R24
- ADDS R15, R3
- ADCS R16, R4
- STP (R3, R4), 0(R2)
- ADCS R16, R5
- ADCS R16, R6
- STP (R5, R6), 16(R2)
- ADCS R16, R7
- ADCS R17, R8
- STP (R7, R8), 32(R2)
- ADCS R19, R9
- ADCS R20, R10
- STP (R9, R10), 48(R2)
- ADCS R21, R11
- ADCS R22, R12
- STP (R11, R12), 64(R2)
- ADCS R23, R13
- ADC R24, R14
- STP (R13, R14), 80(R2)
- RET
- TEXT ·fp751SubReduced(SB), NOSPLIT, $0-24
- MOVD z+0(FP), R2
- MOVD x+8(FP), R0
- MOVD y+16(FP), R1
- // Load x into R3-R14
- // Subtract y from x and store result in R3-R14
- LDP 0(R0), (R3, R4)
- LDP 0(R1), (R15, R16)
- LDP 16(R0), (R5, R6)
- LDP 16(R1), (R17, R19)
- SUBS R15, R3
- SBCS R16, R4
- SBCS R17, R5
- SBCS R19, R6
- LDP 32(R0), (R7, R8)
- LDP 32(R1), (R15, R16)
- LDP 48(R0), (R9, R10)
- LDP 48(R1), (R17, R19)
- SBCS R15, R7
- SBCS R16, R8
- SBCS R17, R9
- SBCS R19, R10
- LDP 64(R0), (R11, R12)
- LDP 64(R1), (R15, R16)
- LDP 80(R0), (R13, R14)
- LDP 80(R1), (R17, R19)
- SBCS R15, R11
- SBCS R16, R12
- SBCS R17, R13
- SBCS R19, R14
- SBC ZR, ZR, R15
- // If x - y < 0, R15 is 1 and 2 * p751 should be added
- LDP ·p751x2+0(SB), (R16, R17)
- AND R15, R16
- AND R15, R17
- LDP ·p751x2+40(SB), (R19, R20)
- AND R15, R19
- AND R15, R20
- ADDS R16, R3
- ADCS R17, R4
- STP (R3, R4), 0(R2)
- ADCS R17, R5
- ADCS R17, R6
- STP (R5, R6), 16(R2)
- ADCS R17, R7
- ADCS R19, R8
- STP (R7, R8), 32(R2)
- ADCS R20, R9
- LDP ·p751x2+56(SB), (R16, R17)
- AND R15, R16
- AND R15, R17
- LDP ·p751x2+72(SB), (R19, R20)
- AND R15, R19
- AND R15, R20
- ADCS R16, R10
- STP (R9, R10), 48(R2)
- ADCS R17, R11
- ADCS R19, R12
- STP (R11, R12), 64(R2)
- ADCS R20, R13
- MOVD ·p751x2+88(SB), R16
- AND R15, R16
- ADC R16, R14
- STP (R13, R14), 80(R2)
- RET
- TEXT ·fp751AddLazy(SB), NOSPLIT, $0-24
- MOVD z+0(FP), R2
- MOVD x+8(FP), R0
- MOVD y+16(FP), R1
- // Load first summand into R3-R14
- // Add first summand and second summand and store result in R3-R14
- LDP 0(R0), (R3, R4)
- LDP 0(R1), (R15, R16)
- LDP 16(R0), (R5, R6)
- LDP 16(R1), (R17, R19)
- ADDS R15, R3
- ADCS R16, R4
- STP (R3, R4), 0(R2)
- ADCS R17, R5
- ADCS R19, R6
- STP (R5, R6), 16(R2)
- LDP 32(R0), (R7, R8)
- LDP 32(R1), (R15, R16)
- LDP 48(R0), (R9, R10)
- LDP 48(R1), (R17, R19)
- ADCS R15, R7
- ADCS R16, R8
- STP (R7, R8), 32(R2)
- ADCS R17, R9
- ADCS R19, R10
- STP (R9, R10), 48(R2)
- LDP 64(R0), (R11, R12)
- LDP 64(R1), (R15, R16)
- LDP 80(R0), (R13, R14)
- LDP 80(R1), (R17, R19)
- ADCS R15, R11
- ADCS R16, R12
- STP (R11, R12), 64(R2)
- ADCS R17, R13
- ADC R19, R14
- STP (R13, R14), 80(R2)
- RET
- TEXT ·fp751X2AddLazy(SB), NOSPLIT, $0-24
- MOVD z+0(FP), R2
- MOVD x+8(FP), R0
- MOVD y+16(FP), R1
- LDP 0(R0), (R3, R4)
- LDP 0(R1), (R15, R16)
- LDP 16(R0), (R5, R6)
- LDP 16(R1), (R17, R19)
- ADDS R15, R3
- ADCS R16, R4
- STP (R3, R4), 0(R2)
- ADCS R17, R5
- ADCS R19, R6
- STP (R5, R6), 16(R2)
- LDP 32(R0), (R7, R8)
- LDP 32(R1), (R15, R16)
- LDP 48(R0), (R9, R10)
- LDP 48(R1), (R17, R19)
- ADCS R15, R7
- ADCS R16, R8
- STP (R7, R8), 32(R2)
- ADCS R17, R9
- ADCS R19, R10
- STP (R9, R10), 48(R2)
- LDP 64(R0), (R11, R12)
- LDP 64(R1), (R15, R16)
- LDP 80(R0), (R13, R14)
- LDP 80(R1), (R17, R19)
- ADCS R15, R11
- ADCS R16, R12
- STP (R11, R12), 64(R2)
- ADCS R17, R13
- ADCS R19, R14
- STP (R13, R14), 80(R2)
- LDP 96(R0), (R3, R4)
- LDP 96(R1), (R15, R16)
- LDP 112(R0), (R5, R6)
- LDP 112(R1), (R17, R19)
- ADCS R15, R3
- ADCS R16, R4
- STP (R3, R4), 96(R2)
- ADCS R17, R5
- ADCS R19, R6
- STP (R5, R6), 112(R2)
- LDP 128(R0), (R7, R8)
- LDP 128(R1), (R15, R16)
- LDP 144(R0), (R9, R10)
- LDP 144(R1), (R17, R19)
- ADCS R15, R7
- ADCS R16, R8
- STP (R7, R8), 128(R2)
- ADCS R17, R9
- ADCS R19, R10
- STP (R9, R10), 144(R2)
- LDP 160(R0), (R11, R12)
- LDP 160(R1), (R15, R16)
- LDP 176(R0), (R13, R14)
- LDP 176(R1), (R17, R19)
- ADCS R15, R11
- ADCS R16, R12
- STP (R11, R12), 160(R2)
- ADCS R17, R13
- ADC R19, R14
- STP (R13, R14), 176(R2)
- RET
- TEXT ·fp751X2SubLazy(SB), NOSPLIT, $0-24
- MOVD z+0(FP), R2
- MOVD x+8(FP), R0
- MOVD y+16(FP), R1
- LDP 0(R0), (R3, R4)
- LDP 0(R1), (R15, R16)
- LDP 16(R0), (R5, R6)
- LDP 16(R1), (R17, R19)
- SUBS R15, R3
- SBCS R16, R4
- STP (R3, R4), 0(R2)
- SBCS R17, R5
- SBCS R19, R6
- STP (R5, R6), 16(R2)
- LDP 32(R0), (R7, R8)
- LDP 32(R1), (R15, R16)
- LDP 48(R0), (R9, R10)
- LDP 48(R1), (R17, R19)
- SBCS R15, R7
- SBCS R16, R8
- STP (R7, R8), 32(R2)
- SBCS R17, R9
- SBCS R19, R10
- STP (R9, R10), 48(R2)
- LDP 64(R0), (R11, R12)
- LDP 64(R1), (R15, R16)
- LDP 80(R0), (R13, R14)
- LDP 80(R1), (R17, R19)
- SBCS R15, R11
- SBCS R16, R12
- STP (R11, R12), 64(R2)
- SBCS R17, R13
- SBCS R19, R14
- STP (R13, R14), 80(R2)
- LDP 96(R0), (R3, R4)
- LDP 96(R1), (R15, R16)
- LDP 112(R0), (R5, R6)
- LDP 112(R1), (R17, R19)
- SBCS R15, R3
- SBCS R16, R4
- SBCS R17, R5
- SBCS R19, R6
- LDP 128(R0), (R7, R8)
- LDP 128(R1), (R15, R16)
- LDP 144(R0), (R9, R10)
- LDP 144(R1), (R17, R19)
- SBCS R15, R7
- SBCS R16, R8
- SBCS R17, R9
- SBCS R19, R10
- LDP 160(R0), (R11, R12)
- LDP 160(R1), (R15, R16)
- LDP 176(R0), (R13, R14)
- LDP 176(R1), (R17, R19)
- SBCS R15, R11
- SBCS R16, R12
- SBCS R17, R13
- SBCS R19, R14
- SBC ZR, ZR, R15
- // If x - y < 0, R15 is 1 and p751 should be added
- MOVD ·p751+0(SB), R20
- AND R15, R20
- LDP ·p751+40(SB), (R16, R17)
- ADDS R20, R3
- ADCS R20, R4
- STP (R3, R4), 96(R2)
- ADCS R20, R5
- ADCS R20, R6
- STP (R5, R6), 112(R2)
- ADCS R20, R7
- LDP ·p751+56(SB), (R19, R20)
- AND R15, R16
- AND R15, R17
- ADCS R16, R8
- STP (R7, R8), 128(R2)
- ADCS R17, R9
- LDP ·p751+72(SB), (R16, R17)
- AND R15, R19
- AND R15, R20
- ADCS R19, R10
- STP (R9, R10), 144(R2)
- ADCS R20, R11
- MOVD ·p751+88(SB), R19
- AND R15, R16
- AND R15, R17
- ADCS R16, R12
- STP (R11, R12), 160(R2)
- ADCS R17, R13
- AND R15, R19
- ADC R19, R14
- STP (R13, R14), 176(R2)
- RET
- // Expects that X0*Y0 is already in Z0(low),Z3(high) and X0*Y1 in Z1(low),Z2(high)
- // Z0 is not actually touched
- // Result of (X0-X2) * (Y0-Y2) will be in Z0-Z5
- // Inputs remain intact
- #define mul192x192comba(X0, X1, X2, Y0, Y1, Y2, Z0, Z1, Z2, Z3, Z4, Z5, T0, T1, T2, T3) \
- MUL X1, Y0, T2 \
- UMULH X1, Y0, T3 \
- \
- ADDS Z3, Z1 \
- ADCS ZR, Z2 \
- ADC ZR, ZR, Z3 \
- \
- MUL X0, Y2, T0 \
- UMULH X0, Y2, T1 \
- \
- ADDS T2, Z1 \
- ADCS T3, Z2 \
- ADC ZR, Z3 \
- \
- MUL X1, Y1, T2 \
- UMULH X1, Y1, T3 \
- \
- ADDS T0, Z2 \
- ADCS T1, Z3 \
- ADC ZR, ZR, Z4 \
- \
- MUL X2, Y0, T0 \
- UMULH X2, Y0, T1 \
- \
- ADDS T2, Z2 \
- ADCS T3, Z3 \
- ADC ZR, Z4 \
- \
- MUL X1, Y2, T2 \
- UMULH X1, Y2, T3 \
- \
- ADDS T0, Z2 \
- ADCS T1, Z3 \
- ADC ZR, Z4 \
- \
- MUL X2, Y1, T0 \
- UMULH X2, Y1, T1 \
- \
- ADDS T2, Z3 \
- ADCS T3, Z4 \
- ADC ZR, ZR, Z5 \
- \
- MUL X2, Y2, T2 \
- UMULH X2, Y2, T3 \
- \
- ADDS T0, Z3 \
- ADCS T1, Z4 \
- ADC ZR, Z5 \
- \
- ADDS T2, Z4 \
- ADC T3, Z5
- // Expects that X points to (X4-X6), Y to (Y4-Y6)
- // Result of (X0-X5) * (Y0-Y5) will be in (0(Z), 8(Z), 16(Z), T0-T8)
- // Inputs get overwritten
- #define mul384x384karatsuba(X, Y, Z, X0, X1, X2, X3, X4, X5, Y0, Y1, Y2, Y3, Y4, Y5, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10)\
- ADDS X0, X3 \ // xH + xL, destroys xH
- ADCS X1, X4 \
- ADCS X2, X5 \
- ADC ZR, ZR, T10 \
- \
- ADDS Y0, Y3 \ // yH + yL, destroys yH
- ADCS Y1, Y4 \
- ADCS Y2, Y5 \
- ADC ZR, ZR, T6 \
- \
- SUB T10, ZR, T7 \
- SUB T6, ZR, T8 \
- AND T6, T10 \ // combined carry
- \
- AND T7, Y3, T0 \ // masked(yH + yL)
- AND T7, Y4, T1 \
- AND T7, Y5, T2 \
- \
- AND T8, X3, T3 \ // masked(xH + xL)
- AND T8, X4, T4 \
- AND T8, X5, T5 \
- \
- ADDS T3, T0 \
- ADCS T4, T1 \
- STP (T0, T1), 0+Z \
- \
- MUL X3, Y3, T0 \
- MUL X3, Y4, T1 \
- \
- ADCS T5, T2 \
- MOVD T2, 16+Z \
- \
- UMULH X3, Y4, T2 \
- UMULH X3, Y3, T3 \
- \
- ADC ZR, T10 \
- \ // (xH + xL) * (yH + yL)
- mul192x192comba(X3, X4, X5, Y3, Y4, Y5, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9)\
- \
- MUL X0, Y0, X3 \
- LDP 0+Z, (T6, T7) \
- MOVD 16+Z, T8 \
- \
- UMULH X0, Y0, Y3 \
- ADDS T6, T3 \
- ADCS T7, T4 \
- MUL X0, Y1, X4 \
- ADCS T8, T5 \
- ADC ZR, T10 \
- UMULH X0, Y1, X5 \
- \ // xL * yL
- mul192x192comba(X0, X1, X2, Y0, Y1, Y2, X3, X4, X5, Y3, Y4, Y5, T6, T7, T8, T9)\
- \
- STP (X3, X4), 0+Z \
- MOVD X5, 16+Z \
- \
- SUBS X3, T0 \ // (xH + xL) * (yH + yL) - xL * yL
- SBCS X4, T1 \
- LDP 0+X, (X3, X4) \
- SBCS X5, T2 \
- MOVD 16+X, X5 \
- SBCS Y3, T3 \
- SBCS Y4, T4 \
- SBCS Y5, T5 \
- SBC ZR, T10 \
- \
- ADDS Y3, T0 \ // ((xH + xL) * (yH + yL) - xL * yL) * 2^192 + xL * yL
- ADCS Y4, T1 \
- LDP 0+Y, (Y3, Y4) \
- MUL X3, Y3, X0 \
- ADCS Y5, T2 \
- UMULH X3, Y3, Y0 \
- MOVD 16+Y, Y5 \
- MUL X3, Y4, X1 \
- ADCS ZR, T3 \
- UMULH X3, Y4, X2 \
- ADCS ZR, T4 \
- ADCS ZR, T5 \
- ADC ZR, T10 \
- \ // xH * yH, overwrite xLow, yLow
- mul192x192comba(X3, X4, X5, Y3, Y4, Y5, X0, X1, X2, Y0, Y1, Y2, T6, T7, T8, T9)\
- \
- SUBS X0, T0 \ // ((xH + xL) * (yH + yL) - xL * yL - xH * yH)
- SBCS X1, T1 \
- SBCS X2, T2 \
- SBCS Y0, T3 \
- SBCS Y1, T4 \
- SBCS Y2, T5 \
- SBC ZR, T10 \
- \
- ADDS X0, T3 \
- ADCS X1, T4 \
- ADCS X2, T5 \
- ADCS T10, Y0, T6 \
- ADCS ZR, Y1, T7 \
- ADC ZR, Y2, T8
- TEXT ·fp751Mul(SB), NOSPLIT, $0-24
- MOVD z+0(FP), R2
- MOVD x+8(FP), R0
- MOVD y+16(FP), R1
- // Load xL in R3-R8, xH in R9-R14
- // (xH + xL) in R3-R8, destroys xH
- LDP 0(R0), (R3, R4)
- LDP 48(R0), (R9, R10)
- ADDS R9, R3
- ADCS R10, R4
- LDP 16(R0), (R5, R6)
- LDP 64(R0), (R11, R12)
- ADCS R11, R5
- ADCS R12, R6
- LDP 32(R0), (R7, R8)
- LDP 80(R0), (R13, R14)
- ADCS R13, R7
- ADCS R14, R8
- ADC ZR, ZR, R22
- // Load yL in R9-R14, yH in R15-21
- // (yH + yL) in R9-R14, destroys yH
- LDP 0(R1), (R9, R10)
- LDP 48(R1), (R15, R16)
- ADDS R15, R9
- ADCS R16, R10
- LDP 16(R1), (R11, R12)
- LDP 64(R1), (R17, R19)
- ADCS R17, R11
- ADCS R19, R12
- LDP 32(R1), (R13, R14)
- LDP 80(R1), (R20, R21)
- ADCS R20, R13
- ADCS R21, R14
- ADC ZR, ZR, R23
- // Compute masks and combined carry
- SUB R22, ZR, R24
- SUB R23, ZR, R25
- AND R23, R22
- // Store xH, yH in z so mul384x384karatsuba can retrieve them from memory
- // It doesn't have enough registers
- // Meanwhile computed masked(xH + xL) in R15-R21
- STP (R6, R7), 0(R2)
- AND R25, R3, R15
- AND R25, R4, R16
- STP (R8, R12), 16(R2)
- AND R25, R5, R17
- AND R25, R6, R19
- STP (R13, R14), 32(R2)
- AND R25, R7, R20
- AND R25, R8, R21
- // Masked(xH + xL) + masked(yH + yL) in R15-R21
- // Store intermediate values in z
- AND R24, R9, R25
- AND R24, R10, R26
- ADDS R25, R15
- ADCS R26, R16
- STP (R15, R16), 96(R2)
- AND R24, R11, R25
- AND R24, R12, R26
- ADCS R25, R17
- ADCS R26, R19
- STP (R17, R19), 112(R2)
- AND R24, R13, R25
- AND R24, R14, R26
- ADCS R25, R20
- ADCS R26, R21
- STP (R20, R21), 128(R2)
- // Store carry in R29 so it can remain there
- ADC ZR, R22, R29
- // (xH + xL) * (yH + yL)
- mul384x384karatsuba(0(R2), 24(R2), 48(R2), R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R19, R20, R21, R22, R23, R24, R25, R26)
- // Load masked(xH + xL) + masked(yH + yL) and add that to its top half
- // Store the result back in z
- STP (R15, R16), 72(R2)
- LDP 96(R2), (R3, R4)
- ADDS R3, R19
- STP (R17, R19), 88(R2)
- ADCS R4, R20
- LDP 112(R2), (R5, R6)
- ADCS R5, R21
- STP (R20, R21), 104(R2)
- ADCS R6, R22
- LDP 128(R2), (R7, R8)
- ADCS R7, R23
- STP (R22, R23), 120(R2)
- ADCS R8, R24
- MOVD R24, 136(R2)
- ADC ZR, R29
- // Load xL, yL
- LDP 0(R0), (R3, R4)
- LDP 16(R0), (R5, R6)
- LDP 32(R0), (R7, R8)
- LDP 0(R1), (R9, R10)
- LDP 16(R1), (R11, R12)
- LDP 32(R1), (R13, R14)
- // xL * yL
- mul384x384karatsuba(24(R0), 24(R1), 0(R2), R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R19, R20, R21, R22, R23, R24, R25, R26)
- // (xH + xL) * (yH + yL) - xL * yL in R3-R14
- LDP 0(R2), (R12, R13)
- LDP 48(R2), (R3, R4)
- SUBS R12, R3
- LDP 64(R2), (R5, R6)
- MOVD 16(R2), R14
- SBCS R13, R4
- SBCS R14, R5
- LDP 80(R2), (R7, R8)
- SBCS R15, R6
- SBCS R16, R7
- LDP 96(R2), (R9, R10)
- SBCS R17, R8
- SBCS R19, R9
- LDP 112(R2), (R11, R12)
- SBCS R20, R10
- SBCS R21, R11
- LDP 128(R2), (R13, R14)
- SBCS R22, R12
- SBCS R23, R13
- SBCS R24, R14
- SBC ZR, R29
- STP (R15, R16), 24(R2)
- MOVD R17, 40(R2)
- // ((xH + xL) * (yH + yL) - xL * yL) * 2^384 + xL * yL and store back in z
- ADDS R19, R3
- ADCS R20, R4
- STP (R3, R4), 48(R2)
- ADCS R21, R5
- ADCS R22, R6
- STP (R5, R6), 64(R2)
- ADCS R23, R7
- ADCS R24, R8
- STP (R7, R8), 80(R2)
- ADCS ZR, R9
- ADCS ZR, R10
- STP (R9, R10), 96(R2)
- ADCS ZR, R11
- ADCS ZR, R12
- STP (R11, R12), 112(R2)
- ADCS ZR, R13
- ADCS ZR, R14
- STP (R13, R14), 128(R2)
- ADC ZR, R29
- // Load xH, yH
- LDP 48(R0), (R3, R4)
- LDP 64(R0), (R5, R6)
- LDP 80(R0), (R7, R8)
- LDP 48(R1), (R9, R10)
- LDP 64(R1), (R11, R12)
- LDP 80(R1), (R13, R14)
- // xH * yH
- mul384x384karatsuba(72(R0), 72(R1), 144(R2), R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R19, R20, R21, R22, R23, R24, R25, R26)
- LDP 144(R2), (R12, R13)
- MOVD 160(R2), R14
- // (xH + xL) * (yH + yL) - xL * yL - xH * yH in R3-R14
- // Store lower half in z, that's done
- LDP 48(R2), (R3, R4)
- SUBS R12, R3
- LDP 64(R2), (R5, R6)
- SBCS R13, R4
- SBCS R14, R5
- LDP 80(R2), (R7, R8)
- SBCS R15, R6
- SBCS R16, R7
- LDP 96(R2), (R9, R10)
- SBCS R17, R8
- SBCS R19, R9
- LDP 112(R2), (R11, R12)
- SBCS R20, R10
- SBCS R21, R11
- LDP 128(R2), (R13, R14)
- SBCS R22, R12
- SBCS R23, R13
- STP (R3, R4), 48(R2)
- SBCS R24, R14
- STP (R5, R6), 64(R2)
- SBC ZR, R29
- STP (R7, R8), 80(R2)
- // (xH * yH) * 2^768 + ((xH + xL) * (yH + yL) - xL * yL - xH * yH) * 2^384 + xL * yL
- // Store remaining limbs in z
- LDP 144(R2), (R3, R4)
- MOVD 160(R2), R5
- ADDS R3, R9
- ADCS R4, R10
- STP (R9, R10), 96(R2)
- ADCS R5, R11
- ADCS R15, R12
- STP (R11, R12), 112(R2)
- ADCS R16, R13
- ADCS R17, R14
- STP (R13, R14), 128(R2)
- ADCS R29, R19
- ADCS ZR, R20
- STP (R19, R20), 144(R2)
- ADCS ZR, R21
- ADCS ZR, R22
- STP (R21, R22), 160(R2)
- ADCS ZR, R23
- ADC ZR, R24
- STP (R23, R24), 176(R2)
- RET
- TEXT ·fp751MontgomeryReduce(SB), NOSPLIT, $0-16
- MOVD z+0(FP), R0
- MOVD x+8(FP), R1
- // Load p751+1 in R14-R17, R29, R19-R20, spread over arithmetic
- LDP ·p751p1+40(SB), (R14, R15)
- // z0-z11 will be R2-R13
- // Load x0-x4 to z0-z4 and x5, spread over arithmetic
- LDP 0(R1), (R2, R3)
- // x5 iteration
- MUL R2, R14, R22
- LDP 32(R1), (R6, R21)
- UMULH R2, R14, R23
- ADDS R21, R22, R7 // Set z5
- ADC ZR, R23, R25
- // x6 iteration
- MUL R2, R15, R22
- MOVD 48(R1), R21
- UMULH R2, R15, R23
- ADDS R22, R25
- ADC R23, ZR, R26
- MUL R3, R14, R22
- LDP ·p751p1+56(SB), (R16, R17)
- UMULH R3, R14, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, ZR, R24
- ADDS R21, R25, R8 // Set z6
- ADCS ZR, R26
- ADC ZR, R24
- // x7 iteration
- MUL R2, R16, R22
- MOVD 56(R1), R21
- UMULH R2, R16, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, ZR, R25
- MUL R3, R15, R22
- LDP 16(R1), (R4, R5)
- UMULH R3, R15, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, R25
- MUL R4, R14, R22
- LDP ·p751p1+72(SB), (R29, R19)
- UMULH R4, R14, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, R25
- ADDS R21, R26, R9 // Set z7
- ADCS ZR, R24
- ADC ZR, R25
- // x8 iteration
- MUL R2, R17, R22
- MOVD 64(R1), R21
- UMULH R2, R17, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, ZR, R26
- MUL R3, R16, R22
- MOVD ·p751p1+88(SB), R20
- UMULH R3, R16, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, R26
- MUL R4, R15, R22
- UMULH R4, R15, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, R26
- MUL R5, R14, R22
- UMULH R5, R14, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, R26
- ADDS R24, R21, R10 // Set z8
- ADCS ZR, R25
- ADC ZR, R26
- // x9 iteration
- MUL R2, R29, R22
- MOVD 72(R1), R21
- UMULH R2, R29, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, ZR, R24
- MUL R3, R17, R22
- UMULH R3, R17, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, R24
- MUL R4, R16, R22
- UMULH R4, R16, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, R24
- MUL R5, R15, R22
- UMULH R5, R15, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, R24
- MUL R6, R14, R22
- UMULH R6, R14, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, R24
- ADDS R21, R25, R11 // Set z9
- ADCS ZR, R26
- ADC ZR, R24
- // x10 iteration
- MUL R2, R19, R22
- MOVD 80(R1), R21
- UMULH R2, R19, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, ZR, R25
- MUL R3, R29, R22
- UMULH R3, R29, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, R25
- MUL R4, R17, R22
- UMULH R4, R17, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, R25
- MUL R5, R16, R22
- UMULH R5, R16, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, R25
- MUL R6, R15, R22
- UMULH R6, R15, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, R25
- MUL R7, R14, R22
- UMULH R7, R14, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, R25
- ADDS R21, R26, R12 // Set z10
- ADCS ZR, R24
- ADC ZR, R25
- // x11 iteration
- MUL R2, R20, R22
- MOVD 88(R1), R21
- UMULH R2, R20, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, ZR, R26
- MUL R3, R19, R22
- UMULH R3, R19, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, R26
- MUL R4, R29, R22
- UMULH R4, R29, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, R26
- MUL R5, R17, R22
- UMULH R5, R17, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, R26
- MUL R6, R16, R22
- UMULH R6, R16, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, R26
- MUL R7, R15, R22
- UMULH R7, R15, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, R26
- MUL R8, R14, R22
- UMULH R8, R14, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, R26
- ADDS R21, R24, R13 // Set z11
- ADCS ZR, R25
- ADC ZR, R26
- // x12 iteration
- MUL R3, R20, R22
- MOVD 96(R1), R21
- UMULH R3, R20, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, ZR, R24
- MUL R4, R19, R22
- UMULH R4, R19, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, R24
- MUL R5, R29, R22
- UMULH R5, R29, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, R24
- MUL R6, R17, R22
- UMULH R6, R17, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, R24
- MUL R7, R16, R22
- UMULH R7, R16, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, R24
- MUL R8, R15, R22
- UMULH R8, R15, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, R24
- MUL R9, R14, R22
- UMULH R9, R14, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, R24
- ADDS R21, R25, R2 // Set z0
- ADCS ZR, R26
- ADC ZR, R24
- // x13 iteration
- MUL R4, R20, R22
- MOVD 104(R1), R21
- UMULH R4, R20, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, ZR, R25
- MUL R5, R19, R22
- UMULH R5, R19, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, R25
- MUL R6, R29, R22
- UMULH R6, R29, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, R25
- MUL R7, R17, R22
- UMULH R7, R17, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, R25
- MUL R8, R16, R22
- UMULH R8, R16, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, R25
- MUL R9, R15, R22
- UMULH R9, R15, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, R25
- MUL R10, R14, R22
- UMULH R10, R14, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, R25
- ADDS R21, R26, R3 // Set z1
- STP (R2, R3), 0(R0)
- ADCS ZR, R24
- ADC ZR, R25
- // x14 iteration
- MUL R5, R20, R22
- MOVD 112(R1), R21
- UMULH R5, R20, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, ZR, R26
- MUL R6, R19, R22
- UMULH R6, R19, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, R26
- MUL R7, R29, R22
- UMULH R7, R29, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, R26
- MUL R8, R17, R22
- UMULH R8, R17, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, R26
- MUL R9, R16, R22
- UMULH R9, R16, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, R26
- MUL R10, R15, R22
- UMULH R10, R15, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, R26
- MUL R11, R14, R22
- UMULH R11, R14, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, R26
- ADDS R21, R24, R4 // Set z2
- ADCS ZR, R25
- ADC ZR, R26
- // x15 iteration
- MUL R6, R20, R22
- MOVD 120(R1), R21
- UMULH R6, R20, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, ZR, R24
- MUL R7, R19, R22
- UMULH R7, R19, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, R24
- MUL R8, R29, R22
- UMULH R8, R29, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, R24
- MUL R9, R17, R22
- UMULH R9, R17, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, R24
- MUL R10, R16, R22
- UMULH R10, R16, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, R24
- MUL R11, R15, R22
- UMULH R11, R15, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, R24
- MUL R12, R14, R22
- UMULH R12, R14, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, R24
- ADDS R21, R25, R5 // Set z3
- STP (R4, R5), 16(R0)
- ADCS ZR, R26
- ADC ZR, R24
- // x16 iteration
- MUL R7, R20, R22
- MOVD 128(R1), R21
- UMULH R7, R20, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, ZR, R25
- MUL R8, R19, R22
- UMULH R8, R19, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, R25
- MUL R9, R29, R22
- UMULH R9, R29, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, R25
- MUL R10, R17, R22
- UMULH R10, R17, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, R25
- MUL R11, R16, R22
- UMULH R11, R16, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, R25
- MUL R12, R15, R22
- UMULH R12, R15, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, R25
- MUL R13, R14, R22
- UMULH R13, R14, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, R25
- ADDS R21, R26, R6 // Set z4
- ADCS ZR, R24
- ADC ZR, R25
- // x17 iteration
- MUL R8, R20, R22
- MOVD 136(R1), R21
- UMULH R8, R20, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, ZR, R26
- MUL R9, R19, R22
- UMULH R9, R19, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, R26
- MUL R10, R29, R22
- UMULH R10, R29, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, R26
- MUL R11, R17, R22
- UMULH R11, R17, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, R26
- MUL R12, R16, R22
- UMULH R12, R16, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, R26
- MUL R13, R15, R22
- UMULH R13, R15, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, R26
- ADDS R21, R24, R7 // Set z5
- STP (R6, R7), 32(R0)
- ADCS ZR, R25
- ADC ZR, R26
- // x18 iteration
- MUL R9, R20, R22
- MOVD 144(R1), R21
- UMULH R9, R20, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, ZR, R24
- MUL R10, R19, R22
- UMULH R10, R19, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, R24
- MUL R11, R29, R22
- UMULH R11, R29, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, R24
- MUL R12, R17, R22
- UMULH R12, R17, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, R24
- MUL R13, R16, R22
- UMULH R13, R16, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, R24
- ADDS R21, R25, R8 // Set z6
- ADCS ZR, R26
- ADC ZR, R24
- // x19 iteration
- MUL R10, R20, R22
- MOVD 152(R1), R21
- UMULH R10, R20, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, ZR, R25
- MUL R11, R19, R22
- UMULH R11, R19, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, R25
- MUL R12, R29, R22
- UMULH R12, R29, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, R25
- MUL R13, R17, R22
- UMULH R13, R17, R23
- ADDS R22, R26
- ADCS R23, R24
- ADC ZR, R25
- ADDS R21, R26, R9 // Set z7
- STP (R8, R9), 48(R0)
- ADCS ZR, R24
- ADC ZR, R25
- // x20 iteration
- MUL R11, R20, R22
- MOVD 160(R1), R21
- UMULH R11, R20, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, ZR, R26
- MUL R12, R19, R22
- UMULH R12, R19, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, R26
- MUL R13, R29, R22
- UMULH R13, R29, R23
- ADDS R22, R24
- ADCS R23, R25
- ADC ZR, R26
- ADDS R21, R24, R10 // Set z8
- ADCS ZR, R25
- ADC ZR, R26
- // x21 iteration
- MUL R12, R20, R22
- MOVD 168(R1), R21
- UMULH R12, R20, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, ZR, R24
- MUL R13, R19, R22
- UMULH R13, R19, R23
- ADDS R22, R25
- ADCS R23, R26
- ADC ZR, R24
- ADDS R21, R25, R11 // Set z9
- STP (R10, R11), 64(R0)
- ADCS ZR, R26
- ADC ZR, R24
- // x22 iteration
- MUL R13, R20, R22
- MOVD 176(R1), R21
- UMULH R13, R20, R23
- ADDS R22, R26
- ADC R23, R24
- ADDS R21, R26, R12 // Set z10
- MOVD 184(R1), R21
- ADC R21, R24, R13 // Set z11
- STP (R12, R13), 80(R0)
- RET
- TEXT ·fp751StrongReduce(SB), NOSPLIT, $0-8
- MOVD x+0(FP), R0
- // Keep x in R1-R12, p751 in R13-R21, subtract to R1-R12
- MOVD ·p751+0(SB), R13
- LDP 0(R0), (R1, R2)
- LDP 16(R0), (R3, R4)
- SUBS R13, R1
- SBCS R13, R2
- LDP 32(R0), (R5, R6)
- LDP ·p751+40(SB), (R14, R15)
- SBCS R13, R3
- SBCS R13, R4
- LDP 48(R0), (R7, R8)
- LDP ·p751+56(SB), (R16, R17)
- SBCS R13, R5
- SBCS R14, R6
- LDP 64(R0), (R9, R10)
- LDP ·p751+72(SB), (R19, R20)
- SBCS R15, R7
- SBCS R16, R8
- LDP 80(R0), (R11, R12)
- MOVD ·p751+88(SB), R21
- SBCS R17, R9
- SBCS R19, R10
- SBCS R20, R11
- SBCS R21, R12
- SBC ZR, ZR, R22
- // Mask with the borrow and add p751
- AND R22, R13
- AND R22, R14
- AND R22, R15
- AND R22, R16
- AND R22, R17
- AND R22, R19
- AND R22, R20
- AND R22, R21
- ADDS R13, R1
- ADCS R13, R2
- STP (R1, R2), 0(R0)
- ADCS R13, R3
- ADCS R13, R4
- STP (R3, R4), 16(R0)
- ADCS R13, R5
- ADCS R14, R6
- STP (R5, R6), 32(R0)
- ADCS R15, R7
- ADCS R16, R8
- STP (R7, R8), 48(R0)
- ADCS R17, R9
- ADCS R19, R10
- STP (R9, R10), 64(R0)
- ADCS R20, R11
- ADC R21, R12
- STP (R11, R12), 80(R0)
- RET
|