You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
	
	
		
			794 lines
		
	
	
		
			21 KiB
		
	
	
	
		
			Perl
		
	
		
		
			
		
	
	
			794 lines
		
	
	
		
			21 KiB
		
	
	
	
		
			Perl
		
	
| 
											10 years ago
										 | #!/usr/bin/env perl | ||
|  | 
 | ||
|  | # ==================================================================== | ||
|  | # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | ||
|  | # project. The module is, however, dual licensed under OpenSSL and | ||
|  | # CRYPTOGAMS licenses depending on where you obtain it. For further | ||
|  | # details see http://www.openssl.org/~appro/cryptogams/. | ||
|  | # ==================================================================== | ||
|  | 
 | ||
|  | # SHA256/512 block procedure for PA-RISC. | ||
|  | 
 | ||
|  | # June 2009. | ||
|  | # | ||
|  | # SHA256 performance is >75% better than gcc 3.2 generated code on | ||
|  | # PA-7100LC. Compared to code generated by vendor compiler this | ||
|  | # implementation is almost 70% faster in 64-bit build, but delivers | ||
|  | # virtually same performance in 32-bit build on PA-8600. | ||
|  | # | ||
|  | # SHA512 performance is >2.9x better than gcc 3.2 generated code on | ||
|  | # PA-7100LC, PA-RISC 1.1 processor. Then implementation detects if the | ||
|  | # code is executed on PA-RISC 2.0 processor and switches to 64-bit | ||
|  | # code path delivering adequate peformance even in "blended" 32-bit | ||
|  | # build. Though 64-bit code is not any faster than code generated by | ||
|  | # vendor compiler on PA-8600... | ||
|  | # | ||
|  | # Special thanks to polarhome.com for providing HP-UX account. | ||
|  | 
 | ||
|  | $flavour = shift; | ||
|  | $output = shift; | ||
|  | open STDOUT,">$output"; | ||
|  | 
 | ||
|  | if ($flavour =~ /64/) { | ||
|  | 	$LEVEL		="2.0W"; | ||
|  | 	$SIZE_T		=8; | ||
|  | 	$FRAME_MARKER	=80; | ||
|  | 	$SAVED_RP	=16; | ||
|  | 	$PUSH		="std"; | ||
|  | 	$PUSHMA		="std,ma"; | ||
|  | 	$POP		="ldd"; | ||
|  | 	$POPMB		="ldd,mb"; | ||
|  | } else { | ||
|  | 	$LEVEL		="1.0"; | ||
|  | 	$SIZE_T		=4; | ||
|  | 	$FRAME_MARKER	=48; | ||
|  | 	$SAVED_RP	=20; | ||
|  | 	$PUSH		="stw"; | ||
|  | 	$PUSHMA		="stwm"; | ||
|  | 	$POP		="ldw"; | ||
|  | 	$POPMB		="ldwm"; | ||
|  | } | ||
|  | 
 | ||
|  | if ($output =~ /512/) { | ||
|  | 	$func="sha512_block_data_order"; | ||
|  | 	$SZ=8; | ||
|  | 	@Sigma0=(28,34,39); | ||
|  | 	@Sigma1=(14,18,41); | ||
|  | 	@sigma0=(1,  8, 7); | ||
|  | 	@sigma1=(19,61, 6); | ||
|  | 	$rounds=80; | ||
|  | 	$LAST10BITS=0x017; | ||
|  | 	$LD="ldd"; | ||
|  | 	$LDM="ldd,ma"; | ||
|  | 	$ST="std"; | ||
|  | } else { | ||
|  | 	$func="sha256_block_data_order"; | ||
|  | 	$SZ=4; | ||
|  | 	@Sigma0=( 2,13,22); | ||
|  | 	@Sigma1=( 6,11,25); | ||
|  | 	@sigma0=( 7,18, 3); | ||
|  | 	@sigma1=(17,19,10); | ||
|  | 	$rounds=64; | ||
|  | 	$LAST10BITS=0x0f2; | ||
|  | 	$LD="ldw"; | ||
|  | 	$LDM="ldwm"; | ||
|  | 	$ST="stw"; | ||
|  | } | ||
|  | 
 | ||
|  | $FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker | ||
|  | 				#                 [+ argument transfer] | ||
|  | $XOFF=16*$SZ+32;		# local variables | ||
|  | $FRAME+=$XOFF; | ||
|  | $XOFF+=$FRAME_MARKER;		# distance between %sp and local variables | ||
|  | 
 | ||
|  | $ctx="%r26";	# zapped by $a0 | ||
|  | $inp="%r25";	# zapped by $a1 | ||
|  | $num="%r24";	# zapped by $t0 | ||
|  | 
 | ||
|  | $a0 ="%r26"; | ||
|  | $a1 ="%r25"; | ||
|  | $t0 ="%r24"; | ||
|  | $t1 ="%r29"; | ||
|  | $Tbl="%r31"; | ||
|  | 
 | ||
|  | @V=($A,$B,$C,$D,$E,$F,$G,$H)=("%r17","%r18","%r19","%r20","%r21","%r22","%r23","%r28"); | ||
|  | 
 | ||
|  | @X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", | ||
|  |     "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$inp); | ||
|  | 
 | ||
|  | sub ROUND_00_15 { | ||
|  | my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; | ||
|  | $code.=<<___; | ||
|  | 	_ror	$e,$Sigma1[0],$a0 | ||
|  | 	and	$f,$e,$t0 | ||
|  | 	_ror	$e,$Sigma1[1],$a1 | ||
|  | 	addl	$t1,$h,$h | ||
|  | 	andcm	$g,$e,$t1 | ||
|  | 	xor	$a1,$a0,$a0 | ||
|  | 	_ror	$a1,`$Sigma1[2]-$Sigma1[1]`,$a1 | ||
|  | 	or	$t0,$t1,$t1		; Ch(e,f,g) | ||
|  | 	addl	@X[$i%16],$h,$h | ||
|  | 	xor	$a0,$a1,$a1		; Sigma1(e) | ||
|  | 	addl	$t1,$h,$h | ||
|  | 	_ror	$a,$Sigma0[0],$a0 | ||
|  | 	addl	$a1,$h,$h | ||
|  | 
 | ||
|  | 	_ror	$a,$Sigma0[1],$a1 | ||
|  | 	and	$a,$b,$t0 | ||
|  | 	and	$a,$c,$t1 | ||
|  | 	xor	$a1,$a0,$a0 | ||
|  | 	_ror	$a1,`$Sigma0[2]-$Sigma0[1]`,$a1 | ||
|  | 	xor	$t1,$t0,$t0 | ||
|  | 	and	$b,$c,$t1 | ||
|  | 	xor	$a0,$a1,$a1		; Sigma0(a) | ||
|  | 	addl	$h,$d,$d | ||
|  | 	xor	$t1,$t0,$t0		; Maj(a,b,c) | ||
|  | 	`"$LDM	$SZ($Tbl),$t1" if ($i<15)` | ||
|  | 	addl	$a1,$h,$h | ||
|  | 	addl	$t0,$h,$h | ||
|  | 
 | ||
|  | ___ | ||
|  | } | ||
|  | 
 | ||
|  | sub ROUND_16_xx { | ||
|  | my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; | ||
|  | $i-=16; | ||
|  | $code.=<<___; | ||
|  | 	_ror	@X[($i+1)%16],$sigma0[0],$a0 | ||
|  | 	_ror	@X[($i+1)%16],$sigma0[1],$a1 | ||
|  | 	addl	@X[($i+9)%16],@X[$i],@X[$i] | ||
|  | 	_ror	@X[($i+14)%16],$sigma1[0],$t0 | ||
|  | 	_ror	@X[($i+14)%16],$sigma1[1],$t1 | ||
|  | 	xor	$a1,$a0,$a0 | ||
|  | 	_shr	@X[($i+1)%16],$sigma0[2],$a1 | ||
|  | 	xor	$t1,$t0,$t0 | ||
|  | 	_shr	@X[($i+14)%16],$sigma1[2],$t1 | ||
|  | 	xor	$a1,$a0,$a0		; sigma0(X[(i+1)&0x0f]) | ||
|  | 	xor	$t1,$t0,$t0		; sigma1(X[(i+14)&0x0f]) | ||
|  | 	$LDM	$SZ($Tbl),$t1 | ||
|  | 	addl	$a0,@X[$i],@X[$i] | ||
|  | 	addl	$t0,@X[$i],@X[$i] | ||
|  | ___ | ||
|  | $code.=<<___ if ($i==15); | ||
|  | 	extru	$t1,31,10,$a1 | ||
|  | 	comiclr,<> $LAST10BITS,$a1,%r0 | ||
|  | 	ldo	1($Tbl),$Tbl		; signal end of $Tbl | ||
|  | ___ | ||
|  | &ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h); | ||
|  | } | ||
|  | 
 | ||
|  | $code=<<___; | ||
|  | 	.LEVEL	$LEVEL | ||
|  | 	.SPACE	\$TEXT\$ | ||
|  | 	.SUBSPA	\$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY | ||
|  | 
 | ||
|  | 	.ALIGN	64 | ||
|  | L\$table | ||
|  | ___ | ||
|  | $code.=<<___ if ($SZ==8); | ||
|  | 	.WORD	0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd | ||
|  | 	.WORD	0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc | ||
|  | 	.WORD	0x3956c25b,0xf348b538,0x59f111f1,0xb605d019 | ||
|  | 	.WORD	0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118 | ||
|  | 	.WORD	0xd807aa98,0xa3030242,0x12835b01,0x45706fbe | ||
|  | 	.WORD	0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2 | ||
|  | 	.WORD	0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1 | ||
|  | 	.WORD	0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694 | ||
|  | 	.WORD	0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3 | ||
|  | 	.WORD	0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65 | ||
|  | 	.WORD	0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483 | ||
|  | 	.WORD	0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5 | ||
|  | 	.WORD	0x983e5152,0xee66dfab,0xa831c66d,0x2db43210 | ||
|  | 	.WORD	0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4 | ||
|  | 	.WORD	0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725 | ||
|  | 	.WORD	0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70 | ||
|  | 	.WORD	0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926 | ||
|  | 	.WORD	0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df | ||
|  | 	.WORD	0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8 | ||
|  | 	.WORD	0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b | ||
|  | 	.WORD	0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001 | ||
|  | 	.WORD	0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30 | ||
|  | 	.WORD	0xd192e819,0xd6ef5218,0xd6990624,0x5565a910 | ||
|  | 	.WORD	0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8 | ||
|  | 	.WORD	0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53 | ||
|  | 	.WORD	0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8 | ||
|  | 	.WORD	0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb | ||
|  | 	.WORD	0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3 | ||
|  | 	.WORD	0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60 | ||
|  | 	.WORD	0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec | ||
|  | 	.WORD	0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9 | ||
|  | 	.WORD	0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b | ||
|  | 	.WORD	0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207 | ||
|  | 	.WORD	0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178 | ||
|  | 	.WORD	0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6 | ||
|  | 	.WORD	0x113f9804,0xbef90dae,0x1b710b35,0x131c471b | ||
|  | 	.WORD	0x28db77f5,0x23047d84,0x32caab7b,0x40c72493 | ||
|  | 	.WORD	0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c | ||
|  | 	.WORD	0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a | ||
|  | 	.WORD	0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817 | ||
|  | ___ | ||
|  | $code.=<<___ if ($SZ==4); | ||
|  | 	.WORD	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 | ||
|  | 	.WORD	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 | ||
|  | 	.WORD	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 | ||
|  | 	.WORD	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 | ||
|  | 	.WORD	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc | ||
|  | 	.WORD	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da | ||
|  | 	.WORD	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 | ||
|  | 	.WORD	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 | ||
|  | 	.WORD	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 | ||
|  | 	.WORD	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 | ||
|  | 	.WORD	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 | ||
|  | 	.WORD	0xd192e819,0xd6990624,0xf40e3585,0x106aa070 | ||
|  | 	.WORD	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 | ||
|  | 	.WORD	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 | ||
|  | 	.WORD	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 | ||
|  | 	.WORD	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 | ||
|  | ___ | ||
|  | $code.=<<___; | ||
|  | 
 | ||
|  | 	.EXPORT	$func,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR | ||
|  | 	.ALIGN	64 | ||
|  | $func | ||
|  | 	.PROC | ||
|  | 	.CALLINFO	FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18 | ||
|  | 	.ENTRY | ||
|  | 	$PUSH	%r2,-$SAVED_RP(%sp)	; standard prologue | ||
|  | 	$PUSHMA	%r3,$FRAME(%sp) | ||
|  | 	$PUSH	%r4,`-$FRAME+1*$SIZE_T`(%sp) | ||
|  | 	$PUSH	%r5,`-$FRAME+2*$SIZE_T`(%sp) | ||
|  | 	$PUSH	%r6,`-$FRAME+3*$SIZE_T`(%sp) | ||
|  | 	$PUSH	%r7,`-$FRAME+4*$SIZE_T`(%sp) | ||
|  | 	$PUSH	%r8,`-$FRAME+5*$SIZE_T`(%sp) | ||
|  | 	$PUSH	%r9,`-$FRAME+6*$SIZE_T`(%sp) | ||
|  | 	$PUSH	%r10,`-$FRAME+7*$SIZE_T`(%sp) | ||
|  | 	$PUSH	%r11,`-$FRAME+8*$SIZE_T`(%sp) | ||
|  | 	$PUSH	%r12,`-$FRAME+9*$SIZE_T`(%sp) | ||
|  | 	$PUSH	%r13,`-$FRAME+10*$SIZE_T`(%sp) | ||
|  | 	$PUSH	%r14,`-$FRAME+11*$SIZE_T`(%sp) | ||
|  | 	$PUSH	%r15,`-$FRAME+12*$SIZE_T`(%sp) | ||
|  | 	$PUSH	%r16,`-$FRAME+13*$SIZE_T`(%sp) | ||
|  | 	$PUSH	%r17,`-$FRAME+14*$SIZE_T`(%sp) | ||
|  | 	$PUSH	%r18,`-$FRAME+15*$SIZE_T`(%sp) | ||
|  | 
 | ||
|  | 	_shl	$num,`log(16*$SZ)/log(2)`,$num | ||
|  | 	addl	$inp,$num,$num		; $num to point at the end of $inp | ||
|  | 
 | ||
|  | 	$PUSH	$num,`-$FRAME_MARKER-4*$SIZE_T`(%sp)	; save arguments | ||
|  | 	$PUSH	$inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) | ||
|  | 	$PUSH	$ctx,`-$FRAME_MARKER-2*$SIZE_T`(%sp) | ||
|  | 
 | ||
|  | 	blr	%r0,$Tbl | ||
|  | 	ldi	3,$t1 | ||
|  | L\$pic | ||
|  | 	andcm	$Tbl,$t1,$Tbl		; wipe privilege level | ||
|  | 	ldo	L\$table-L\$pic($Tbl),$Tbl | ||
|  | ___ | ||
|  | $code.=<<___ if ($SZ==8 && $SIZE_T==4); | ||
|  | 	ldi	31,$t1 | ||
|  | 	mtctl	$t1,%cr11 | ||
|  | 	extrd,u,*= $t1,%sar,1,$t1	; executes on PA-RISC 1.0 | ||
|  | 	b	L\$parisc1 | ||
|  | 	nop | ||
|  | ___ | ||
|  | $code.=<<___; | ||
|  | 	$LD	`0*$SZ`($ctx),$A	; load context | ||
|  | 	$LD	`1*$SZ`($ctx),$B | ||
|  | 	$LD	`2*$SZ`($ctx),$C | ||
|  | 	$LD	`3*$SZ`($ctx),$D | ||
|  | 	$LD	`4*$SZ`($ctx),$E | ||
|  | 	$LD	`5*$SZ`($ctx),$F | ||
|  | 	$LD	`6*$SZ`($ctx),$G | ||
|  | 	$LD	`7*$SZ`($ctx),$H | ||
|  | 
 | ||
|  | 	extru	$inp,31,`log($SZ)/log(2)`,$t0 | ||
|  | 	sh3addl	$t0,%r0,$t0 | ||
|  | 	subi	`8*$SZ`,$t0,$t0 | ||
|  | 	mtctl	$t0,%cr11		; load %sar with align factor | ||
|  | 
 | ||
|  | L\$oop | ||
|  | 	ldi	`$SZ-1`,$t0 | ||
|  | 	$LDM	$SZ($Tbl),$t1 | ||
|  | 	andcm	$inp,$t0,$t0		; align $inp | ||
|  | ___ | ||
|  | 	for ($i=0;$i<15;$i++) {		# load input block | ||
|  | 	$code.="\t$LD	`$SZ*$i`($t0),@X[$i]\n";		} | ||
|  | $code.=<<___; | ||
|  | 	cmpb,*=	$inp,$t0,L\$aligned | ||
|  | 	$LD	`$SZ*15`($t0),@X[15] | ||
|  | 	$LD	`$SZ*16`($t0),@X[16] | ||
|  | ___ | ||
|  | 	for ($i=0;$i<16;$i++) {		# align data | ||
|  | 	$code.="\t_align	@X[$i],@X[$i+1],@X[$i]\n";	} | ||
|  | $code.=<<___; | ||
|  | L\$aligned | ||
|  | 	nop	; otherwise /usr/ccs/bin/as is confused by below .WORD | ||
|  | ___ | ||
|  | 
 | ||
|  | for($i=0;$i<16;$i++)	{ &ROUND_00_15($i,@V); unshift(@V,pop(@V)); } | ||
|  | $code.=<<___; | ||
|  | L\$rounds | ||
|  | 	nop	; otherwise /usr/ccs/bin/as is confused by below .WORD | ||
|  | ___ | ||
|  | for(;$i<32;$i++)	{ &ROUND_16_xx($i,@V); unshift(@V,pop(@V)); } | ||
|  | $code.=<<___; | ||
|  | 	bb,>=	$Tbl,31,L\$rounds	; end of $Tbl signalled? | ||
|  | 	nop | ||
|  | 
 | ||
|  | 	$POP	`-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx	; restore arguments | ||
|  | 	$POP	`-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp | ||
|  | 	$POP	`-$FRAME_MARKER-4*$SIZE_T`(%sp),$num | ||
|  | 	ldo	`-$rounds*$SZ-1`($Tbl),$Tbl		; rewind $Tbl | ||
|  | 
 | ||
|  | 	$LD	`0*$SZ`($ctx),@X[0]	; load context | ||
|  | 	$LD	`1*$SZ`($ctx),@X[1] | ||
|  | 	$LD	`2*$SZ`($ctx),@X[2] | ||
|  | 	$LD	`3*$SZ`($ctx),@X[3] | ||
|  | 	$LD	`4*$SZ`($ctx),@X[4] | ||
|  | 	$LD	`5*$SZ`($ctx),@X[5] | ||
|  | 	addl	@X[0],$A,$A | ||
|  | 	$LD	`6*$SZ`($ctx),@X[6] | ||
|  | 	addl	@X[1],$B,$B | ||
|  | 	$LD	`7*$SZ`($ctx),@X[7] | ||
|  | 	ldo	`16*$SZ`($inp),$inp	; advance $inp | ||
|  | 
 | ||
|  | 	$ST	$A,`0*$SZ`($ctx)	; save context | ||
|  | 	addl	@X[2],$C,$C | ||
|  | 	$ST	$B,`1*$SZ`($ctx) | ||
|  | 	addl	@X[3],$D,$D | ||
|  | 	$ST	$C,`2*$SZ`($ctx) | ||
|  | 	addl	@X[4],$E,$E | ||
|  | 	$ST	$D,`3*$SZ`($ctx) | ||
|  | 	addl	@X[5],$F,$F | ||
|  | 	$ST	$E,`4*$SZ`($ctx) | ||
|  | 	addl	@X[6],$G,$G | ||
|  | 	$ST	$F,`5*$SZ`($ctx) | ||
|  | 	addl	@X[7],$H,$H | ||
|  | 	$ST	$G,`6*$SZ`($ctx) | ||
|  | 	$ST	$H,`7*$SZ`($ctx) | ||
|  | 
 | ||
|  | 	cmpb,*<>,n $inp,$num,L\$oop | ||
|  | 	$PUSH	$inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp)	; save $inp | ||
|  | ___ | ||
|  | if ($SZ==8 && $SIZE_T==4)	# SHA512 for 32-bit PA-RISC 1.0 | ||
|  | {{ | ||
|  | $code.=<<___; | ||
|  | 	b	L\$done | ||
|  | 	nop | ||
|  | 
 | ||
|  | 	.ALIGN	64 | ||
|  | L\$parisc1 | ||
|  | ___ | ||
|  | 
 | ||
|  | @V=(  $Ahi,  $Alo,  $Bhi,  $Blo,  $Chi,  $Clo,  $Dhi,  $Dlo, | ||
|  |       $Ehi,  $Elo,  $Fhi,  $Flo,  $Ghi,  $Glo,  $Hhi,  $Hlo) =  | ||
|  |    ( "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", | ||
|  |      "%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16"); | ||
|  | $a0 ="%r17"; | ||
|  | $a1 ="%r18"; | ||
|  | $a2 ="%r19"; | ||
|  | $a3 ="%r20"; | ||
|  | $t0 ="%r21"; | ||
|  | $t1 ="%r22"; | ||
|  | $t2 ="%r28"; | ||
|  | $t3 ="%r29"; | ||
|  | $Tbl="%r31"; | ||
|  | 
 | ||
|  | @X=("%r23","%r24","%r25","%r26");	# zaps $num,$inp,$ctx | ||
|  | 
 | ||
|  | sub ROUND_00_15_pa1 { | ||
|  | my ($i,$ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo, | ||
|  |        $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo,$flag)=@_; | ||
|  | my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X; | ||
|  | 
 | ||
|  | $code.=<<___ if (!$flag); | ||
|  | 	ldw	`-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi | ||
|  | 	ldw	`-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo	; load X[i+1] | ||
|  | ___ | ||
|  | $code.=<<___; | ||
|  | 	shd	$ehi,$elo,$Sigma1[0],$t0 | ||
|  | 	 add	$Xlo,$hlo,$hlo | ||
|  | 	shd	$elo,$ehi,$Sigma1[0],$t1 | ||
|  | 	 addc	$Xhi,$hhi,$hhi		; h += X[i] | ||
|  | 	shd	$ehi,$elo,$Sigma1[1],$t2 | ||
|  | 	 ldwm	8($Tbl),$Xhi | ||
|  | 	shd	$elo,$ehi,$Sigma1[1],$t3 | ||
|  | 	 ldw	-4($Tbl),$Xlo		; load K[i] | ||
|  | 	xor	$t2,$t0,$t0 | ||
|  | 	xor	$t3,$t1,$t1 | ||
|  | 	 and	$flo,$elo,$a0 | ||
|  | 	 and	$fhi,$ehi,$a1 | ||
|  | 	shd	$ehi,$elo,$Sigma1[2],$t2 | ||
|  | 	 andcm	$glo,$elo,$a2 | ||
|  | 	shd	$elo,$ehi,$Sigma1[2],$t3 | ||
|  | 	 andcm	$ghi,$ehi,$a3 | ||
|  | 	xor	$t2,$t0,$t0 | ||
|  | 	xor	$t3,$t1,$t1		; Sigma1(e) | ||
|  | 	add	$Xlo,$hlo,$hlo | ||
|  | 	 xor	$a2,$a0,$a0 | ||
|  | 	addc	$Xhi,$hhi,$hhi		; h += K[i] | ||
|  | 	 xor	$a3,$a1,$a1		; Ch(e,f,g) | ||
|  | 
 | ||
|  | 	 add	$t0,$hlo,$hlo | ||
|  | 	shd	$ahi,$alo,$Sigma0[0],$t0 | ||
|  | 	 addc	$t1,$hhi,$hhi		; h += Sigma1(e) | ||
|  | 	shd	$alo,$ahi,$Sigma0[0],$t1	 | ||
|  | 	 add	$a0,$hlo,$hlo | ||
|  | 	shd	$ahi,$alo,$Sigma0[1],$t2 | ||
|  | 	 addc	$a1,$hhi,$hhi		; h += Ch(e,f,g) | ||
|  | 	shd	$alo,$ahi,$Sigma0[1],$t3 | ||
|  | 
 | ||
|  | 	xor	$t2,$t0,$t0 | ||
|  | 	xor	$t3,$t1,$t1 | ||
|  | 	shd	$ahi,$alo,$Sigma0[2],$t2 | ||
|  | 	and	$alo,$blo,$a0 | ||
|  | 	shd	$alo,$ahi,$Sigma0[2],$t3 | ||
|  | 	and	$ahi,$bhi,$a1 | ||
|  | 	xor	$t2,$t0,$t0 | ||
|  | 	xor	$t3,$t1,$t1		; Sigma0(a) | ||
|  | 
 | ||
|  | 	and	$alo,$clo,$a2 | ||
|  | 	and	$ahi,$chi,$a3 | ||
|  | 	xor	$a2,$a0,$a0 | ||
|  | 	 add	$hlo,$dlo,$dlo | ||
|  | 	xor	$a3,$a1,$a1 | ||
|  | 	 addc	$hhi,$dhi,$dhi		; d += h | ||
|  | 	and	$blo,$clo,$a2 | ||
|  | 	 add	$t0,$hlo,$hlo | ||
|  | 	and	$bhi,$chi,$a3 | ||
|  | 	 addc	$t1,$hhi,$hhi		; h += Sigma0(a) | ||
|  | 	xor	$a2,$a0,$a0 | ||
|  | 	 add	$a0,$hlo,$hlo | ||
|  | 	xor	$a3,$a1,$a1		; Maj(a,b,c) | ||
|  | 	 addc	$a1,$hhi,$hhi		; h += Maj(a,b,c) | ||
|  | 
 | ||
|  | ___ | ||
|  | $code.=<<___ if ($i==15 && $flag); | ||
|  | 	extru	$Xlo,31,10,$Xlo | ||
|  | 	comiclr,= $LAST10BITS,$Xlo,%r0 | ||
|  | 	b	L\$rounds_pa1 | ||
|  | 	nop | ||
|  | ___ | ||
|  | push(@X,shift(@X)); push(@X,shift(@X)); | ||
|  | } | ||
|  | 
 | ||
|  | sub ROUND_16_xx_pa1 { | ||
|  | my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X; | ||
|  | my ($i)=shift; | ||
|  | $i-=16; | ||
|  | $code.=<<___; | ||
|  | 	ldw	`-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi | ||
|  | 	ldw	`-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo	; load X[i+1] | ||
|  | 	ldw	`-$XOFF+8*(($i+9)%16)`(%sp),$a1 | ||
|  | 	ldw	`-$XOFF+8*(($i+9)%16)+4`(%sp),$a0	; load X[i+9] | ||
|  | 	ldw	`-$XOFF+8*(($i+14)%16)`(%sp),$a3 | ||
|  | 	ldw	`-$XOFF+8*(($i+14)%16)+4`(%sp),$a2	; load X[i+14] | ||
|  | 	shd	$Xnhi,$Xnlo,$sigma0[0],$t0 | ||
|  | 	shd	$Xnlo,$Xnhi,$sigma0[0],$t1 | ||
|  | 	 add	$a0,$Xlo,$Xlo | ||
|  | 	shd	$Xnhi,$Xnlo,$sigma0[1],$t2 | ||
|  | 	 addc	$a1,$Xhi,$Xhi | ||
|  | 	shd	$Xnlo,$Xnhi,$sigma0[1],$t3 | ||
|  | 	xor	$t2,$t0,$t0 | ||
|  | 	shd	$Xnhi,$Xnlo,$sigma0[2],$t2 | ||
|  | 	xor	$t3,$t1,$t1 | ||
|  | 	extru	$Xnhi,`31-$sigma0[2]`,`32-$sigma0[2]`,$t3 | ||
|  | 	xor	$t2,$t0,$t0 | ||
|  | 	 shd	$a3,$a2,$sigma1[0],$a0 | ||
|  | 	xor	$t3,$t1,$t1		; sigma0(X[i+1)&0x0f]) | ||
|  | 	 shd	$a2,$a3,$sigma1[0],$a1 | ||
|  | 	add	$t0,$Xlo,$Xlo | ||
|  | 	 shd	$a3,$a2,$sigma1[1],$t2 | ||
|  | 	addc	$t1,$Xhi,$Xhi | ||
|  | 	 shd	$a2,$a3,$sigma1[1],$t3 | ||
|  | 	xor	$t2,$a0,$a0 | ||
|  | 	shd	$a3,$a2,$sigma1[2],$t2 | ||
|  | 	xor	$t3,$a1,$a1 | ||
|  | 	extru	$a3,`31-$sigma1[2]`,`32-$sigma1[2]`,$t3 | ||
|  | 	xor	$t2,$a0,$a0 | ||
|  | 	xor	$t3,$a1,$a1		; sigma0(X[i+14)&0x0f]) | ||
|  | 	add	$a0,$Xlo,$Xlo | ||
|  | 	addc	$a1,$Xhi,$Xhi | ||
|  | 
 | ||
|  | 	stw	$Xhi,`-$XOFF+8*($i%16)`(%sp) | ||
|  | 	stw	$Xlo,`-$XOFF+8*($i%16)+4`(%sp) | ||
|  | ___ | ||
|  | &ROUND_00_15_pa1($i,@_,1); | ||
|  | } | ||
|  | $code.=<<___; | ||
|  | 	ldw	`0*4`($ctx),$Ahi		; load context | ||
|  | 	ldw	`1*4`($ctx),$Alo | ||
|  | 	ldw	`2*4`($ctx),$Bhi | ||
|  | 	ldw	`3*4`($ctx),$Blo | ||
|  | 	ldw	`4*4`($ctx),$Chi | ||
|  | 	ldw	`5*4`($ctx),$Clo | ||
|  | 	ldw	`6*4`($ctx),$Dhi | ||
|  | 	ldw	`7*4`($ctx),$Dlo | ||
|  | 	ldw	`8*4`($ctx),$Ehi | ||
|  | 	ldw	`9*4`($ctx),$Elo | ||
|  | 	ldw	`10*4`($ctx),$Fhi | ||
|  | 	ldw	`11*4`($ctx),$Flo | ||
|  | 	ldw	`12*4`($ctx),$Ghi | ||
|  | 	ldw	`13*4`($ctx),$Glo | ||
|  | 	ldw	`14*4`($ctx),$Hhi | ||
|  | 	ldw	`15*4`($ctx),$Hlo | ||
|  | 
 | ||
|  | 	extru	$inp,31,2,$t0 | ||
|  | 	sh3addl	$t0,%r0,$t0 | ||
|  | 	subi	32,$t0,$t0 | ||
|  | 	mtctl	$t0,%cr11		; load %sar with align factor | ||
|  | 
 | ||
|  | L\$oop_pa1 | ||
|  | 	extru	$inp,31,2,$a3 | ||
|  | 	comib,=	0,$a3,L\$aligned_pa1 | ||
|  | 	sub	$inp,$a3,$inp | ||
|  | 
 | ||
|  | 	ldw	`0*4`($inp),$X[0] | ||
|  | 	ldw	`1*4`($inp),$X[1] | ||
|  | 	ldw	`2*4`($inp),$t2 | ||
|  | 	ldw	`3*4`($inp),$t3 | ||
|  | 	ldw	`4*4`($inp),$a0 | ||
|  | 	ldw	`5*4`($inp),$a1 | ||
|  | 	ldw	`6*4`($inp),$a2 | ||
|  | 	ldw	`7*4`($inp),$a3 | ||
|  | 	vshd	$X[0],$X[1],$X[0] | ||
|  | 	vshd	$X[1],$t2,$X[1] | ||
|  | 	stw	$X[0],`-$XOFF+0*4`(%sp) | ||
|  | 	ldw	`8*4`($inp),$t0 | ||
|  | 	vshd	$t2,$t3,$t2 | ||
|  | 	stw	$X[1],`-$XOFF+1*4`(%sp) | ||
|  | 	ldw	`9*4`($inp),$t1 | ||
|  | 	vshd	$t3,$a0,$t3 | ||
|  | ___ | ||
|  | { | ||
|  | my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1); | ||
|  | for ($i=2;$i<=(128/4-8);$i++) { | ||
|  | $code.=<<___; | ||
|  | 	stw	$t[0],`-$XOFF+$i*4`(%sp) | ||
|  | 	ldw	`(8+$i)*4`($inp),$t[0] | ||
|  | 	vshd	$t[1],$t[2],$t[1] | ||
|  | ___ | ||
|  | push(@t,shift(@t)); | ||
|  | } | ||
|  | for (;$i<(128/4-1);$i++) { | ||
|  | $code.=<<___; | ||
|  | 	stw	$t[0],`-$XOFF+$i*4`(%sp) | ||
|  | 	vshd	$t[1],$t[2],$t[1] | ||
|  | ___ | ||
|  | push(@t,shift(@t)); | ||
|  | } | ||
|  | $code.=<<___; | ||
|  | 	b	L\$collected_pa1 | ||
|  | 	stw	$t[0],`-$XOFF+$i*4`(%sp) | ||
|  | 
 | ||
|  | ___ | ||
|  | } | ||
|  | $code.=<<___; | ||
|  | L\$aligned_pa1 | ||
|  | 	ldw	`0*4`($inp),$X[0] | ||
|  | 	ldw	`1*4`($inp),$X[1] | ||
|  | 	ldw	`2*4`($inp),$t2 | ||
|  | 	ldw	`3*4`($inp),$t3 | ||
|  | 	ldw	`4*4`($inp),$a0 | ||
|  | 	ldw	`5*4`($inp),$a1 | ||
|  | 	ldw	`6*4`($inp),$a2 | ||
|  | 	ldw	`7*4`($inp),$a3 | ||
|  | 	stw	$X[0],`-$XOFF+0*4`(%sp) | ||
|  | 	ldw	`8*4`($inp),$t0 | ||
|  | 	stw	$X[1],`-$XOFF+1*4`(%sp) | ||
|  | 	ldw	`9*4`($inp),$t1 | ||
|  | ___ | ||
|  | { | ||
|  | my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1); | ||
|  | for ($i=2;$i<(128/4-8);$i++) { | ||
|  | $code.=<<___; | ||
|  | 	stw	$t[0],`-$XOFF+$i*4`(%sp) | ||
|  | 	ldw	`(8+$i)*4`($inp),$t[0] | ||
|  | ___ | ||
|  | push(@t,shift(@t)); | ||
|  | } | ||
|  | for (;$i<128/4;$i++) { | ||
|  | $code.=<<___; | ||
|  | 	stw	$t[0],`-$XOFF+$i*4`(%sp) | ||
|  | ___ | ||
|  | push(@t,shift(@t)); | ||
|  | } | ||
|  | $code.="L\$collected_pa1\n"; | ||
|  | } | ||
|  | 
 | ||
|  | for($i=0;$i<16;$i++)	{ &ROUND_00_15_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); } | ||
|  | $code.="L\$rounds_pa1\n"; | ||
|  | for(;$i<32;$i++)	{ &ROUND_16_xx_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); } | ||
|  | 
 | ||
|  | $code.=<<___; | ||
|  | 	$POP	`-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx	; restore arguments | ||
|  | 	$POP	`-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp | ||
|  | 	$POP	`-$FRAME_MARKER-4*$SIZE_T`(%sp),$num | ||
|  | 	ldo	`-$rounds*$SZ`($Tbl),$Tbl		; rewind $Tbl | ||
|  | 
 | ||
|  | 	ldw	`0*4`($ctx),$t1		; update context | ||
|  | 	ldw	`1*4`($ctx),$t0 | ||
|  | 	ldw	`2*4`($ctx),$t3 | ||
|  | 	ldw	`3*4`($ctx),$t2 | ||
|  | 	ldw	`4*4`($ctx),$a1 | ||
|  | 	ldw	`5*4`($ctx),$a0 | ||
|  | 	ldw	`6*4`($ctx),$a3 | ||
|  | 	add	$t0,$Alo,$Alo | ||
|  | 	ldw	`7*4`($ctx),$a2 | ||
|  | 	addc	$t1,$Ahi,$Ahi | ||
|  | 	ldw	`8*4`($ctx),$t1 | ||
|  | 	add	$t2,$Blo,$Blo | ||
|  | 	ldw	`9*4`($ctx),$t0 | ||
|  | 	addc	$t3,$Bhi,$Bhi | ||
|  | 	ldw	`10*4`($ctx),$t3 | ||
|  | 	add	$a0,$Clo,$Clo | ||
|  | 	ldw	`11*4`($ctx),$t2 | ||
|  | 	addc	$a1,$Chi,$Chi | ||
|  | 	ldw	`12*4`($ctx),$a1 | ||
|  | 	add	$a2,$Dlo,$Dlo | ||
|  | 	ldw	`13*4`($ctx),$a0 | ||
|  | 	addc	$a3,$Dhi,$Dhi | ||
|  | 	ldw	`14*4`($ctx),$a3 | ||
|  | 	add	$t0,$Elo,$Elo | ||
|  | 	ldw	`15*4`($ctx),$a2 | ||
|  | 	addc	$t1,$Ehi,$Ehi | ||
|  | 	stw	$Ahi,`0*4`($ctx) | ||
|  | 	add	$t2,$Flo,$Flo | ||
|  | 	stw	$Alo,`1*4`($ctx) | ||
|  | 	addc	$t3,$Fhi,$Fhi | ||
|  | 	stw	$Bhi,`2*4`($ctx) | ||
|  | 	add	$a0,$Glo,$Glo | ||
|  | 	stw	$Blo,`3*4`($ctx) | ||
|  | 	addc	$a1,$Ghi,$Ghi | ||
|  | 	stw	$Chi,`4*4`($ctx) | ||
|  | 	add	$a2,$Hlo,$Hlo | ||
|  | 	stw	$Clo,`5*4`($ctx) | ||
|  | 	addc	$a3,$Hhi,$Hhi | ||
|  | 	stw	$Dhi,`6*4`($ctx) | ||
|  | 	ldo	`16*$SZ`($inp),$inp	; advance $inp | ||
|  | 	stw	$Dlo,`7*4`($ctx) | ||
|  | 	stw	$Ehi,`8*4`($ctx) | ||
|  | 	stw	$Elo,`9*4`($ctx) | ||
|  | 	stw	$Fhi,`10*4`($ctx) | ||
|  | 	stw	$Flo,`11*4`($ctx) | ||
|  | 	stw	$Ghi,`12*4`($ctx) | ||
|  | 	stw	$Glo,`13*4`($ctx) | ||
|  | 	stw	$Hhi,`14*4`($ctx) | ||
|  | 	comb,=	$inp,$num,L\$done | ||
|  | 	stw	$Hlo,`15*4`($ctx) | ||
|  | 	b	L\$oop_pa1 | ||
|  | 	$PUSH	$inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp)	; save $inp | ||
|  | L\$done | ||
|  | ___ | ||
|  | }} | ||
|  | $code.=<<___; | ||
|  | 	$POP	`-$FRAME-$SAVED_RP`(%sp),%r2		; standard epilogue | ||
|  | 	$POP	`-$FRAME+1*$SIZE_T`(%sp),%r4 | ||
|  | 	$POP	`-$FRAME+2*$SIZE_T`(%sp),%r5 | ||
|  | 	$POP	`-$FRAME+3*$SIZE_T`(%sp),%r6 | ||
|  | 	$POP	`-$FRAME+4*$SIZE_T`(%sp),%r7 | ||
|  | 	$POP	`-$FRAME+5*$SIZE_T`(%sp),%r8 | ||
|  | 	$POP	`-$FRAME+6*$SIZE_T`(%sp),%r9 | ||
|  | 	$POP	`-$FRAME+7*$SIZE_T`(%sp),%r10 | ||
|  | 	$POP	`-$FRAME+8*$SIZE_T`(%sp),%r11 | ||
|  | 	$POP	`-$FRAME+9*$SIZE_T`(%sp),%r12 | ||
|  | 	$POP	`-$FRAME+10*$SIZE_T`(%sp),%r13 | ||
|  | 	$POP	`-$FRAME+11*$SIZE_T`(%sp),%r14 | ||
|  | 	$POP	`-$FRAME+12*$SIZE_T`(%sp),%r15 | ||
|  | 	$POP	`-$FRAME+13*$SIZE_T`(%sp),%r16 | ||
|  | 	$POP	`-$FRAME+14*$SIZE_T`(%sp),%r17 | ||
|  | 	$POP	`-$FRAME+15*$SIZE_T`(%sp),%r18 | ||
|  | 	bv	(%r2) | ||
|  | 	.EXIT | ||
|  | 	$POPMB	-$FRAME(%sp),%r3 | ||
|  | 	.PROCEND | ||
|  | 	.STRINGZ "SHA`64*$SZ` block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>" | ||
|  | ___ | ||
|  | 
 | ||
|  | # Explicitly encode PA-RISC 2.0 instructions used in this module, so | ||
|  | # that it can be compiled with .LEVEL 1.0. It should be noted that I | ||
|  | # wouldn't have to do this, if GNU assembler understood .ALLOW 2.0 | ||
|  | # directive... | ||
|  | 
 | ||
|  | my $ldd = sub { | ||
|  |   my ($mod,$args) = @_; | ||
|  |   my $orig = "ldd$mod\t$args"; | ||
|  | 
 | ||
|  |     if ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 3 suffices | ||
|  |     {	my $opcode=(0x14<<26)|($2<<21)|($3<<16)|(($1&0x1FF8)<<1)|(($1>>13)&1); | ||
|  | 	$opcode|=(1<<3) if ($mod =~ /^,m/); | ||
|  | 	$opcode|=(1<<2) if ($mod =~ /^,mb/); | ||
|  | 	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; | ||
|  |     } | ||
|  |     else { "\t".$orig; } | ||
|  | }; | ||
|  | 
 | ||
|  | my $std = sub { | ||
|  |   my ($mod,$args) = @_; | ||
|  |   my $orig = "std$mod\t$args"; | ||
|  | 
 | ||
|  |     if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices | ||
|  |     {	my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1); | ||
|  | 	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; | ||
|  |     } | ||
|  |     else { "\t".$orig; } | ||
|  | }; | ||
|  | 
 | ||
|  | my $extrd = sub { | ||
|  |   my ($mod,$args) = @_; | ||
|  |   my $orig = "extrd$mod\t$args"; | ||
|  | 
 | ||
|  |     # I only have ",u" completer, it's implicitly encoded... | ||
|  |     if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/)	# format 15 | ||
|  |     {	my $opcode=(0x36<<26)|($1<<21)|($4<<16); | ||
|  | 	my $len=32-$3; | ||
|  | 	$opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5);		# encode pos | ||
|  | 	$opcode |= (($len&0x20)<<7)|($len&0x1f);		# encode len | ||
|  | 	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; | ||
|  |     } | ||
|  |     elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/)	# format 12 | ||
|  |     {	my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9); | ||
|  | 	my $len=32-$2; | ||
|  | 	$opcode |= (($len&0x20)<<3)|($len&0x1f);		# encode len | ||
|  | 	$opcode |= (1<<13) if ($mod =~ /,\**=/); | ||
|  | 	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; | ||
|  |     } | ||
|  |     else { "\t".$orig; } | ||
|  | }; | ||
|  | 
 | ||
|  | my $shrpd = sub { | ||
|  |   my ($mod,$args) = @_; | ||
|  |   my $orig = "shrpd$mod\t$args"; | ||
|  | 
 | ||
|  |     if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/)	# format 14 | ||
|  |     {	my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4; | ||
|  | 	my $cpos=63-$3; | ||
|  | 	$opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5);		# encode sa | ||
|  | 	sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; | ||
|  |     } | ||
|  |     elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/)	# format 11 | ||
|  |     {	sprintf "\t.WORD\t0x%08x\t; %s", | ||
|  | 		(0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig; | ||
|  |     } | ||
|  |     else { "\t".$orig; } | ||
|  | }; | ||
|  | 
 | ||
|  | sub assemble { | ||
|  |   my ($mnemonic,$mod,$args)=@_; | ||
|  |   my $opcode = eval("\$$mnemonic"); | ||
|  | 
 | ||
|  |     ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args"; | ||
|  | } | ||
|  | 
 | ||
|  | foreach (split("\n",$code)) { | ||
|  | 	s/\`([^\`]*)\`/eval $1/ge; | ||
|  | 
 | ||
|  | 	s/shd\s+(%r[0-9]+),(%r[0-9]+),([0-9]+)/ | ||
|  | 		$3>31 ? sprintf("shd\t%$2,%$1,%d",$3-32)	# rotation for >=32 | ||
|  | 		:       sprintf("shd\t%$1,%$2,%d",$3)/e			or | ||
|  | 	# translate made up instructons: _ror, _shr, _align, _shl | ||
|  | 	s/_ror(\s+)(%r[0-9]+),/ | ||
|  | 		($SZ==4 ? "shd" : "shrpd")."$1$2,$2,"/e			or | ||
|  | 
 | ||
|  | 	s/_shr(\s+%r[0-9]+),([0-9]+),/ | ||
|  | 		$SZ==4 ? sprintf("extru%s,%d,%d,",$1,31-$2,32-$2) | ||
|  | 		:        sprintf("extrd,u%s,%d,%d,",$1,63-$2,64-$2)/e	or | ||
|  | 
 | ||
|  | 	s/_align(\s+%r[0-9]+,%r[0-9]+),/ | ||
|  | 		($SZ==4 ? "vshd$1," : "shrpd$1,%sar,")/e		or | ||
|  | 
 | ||
|  | 	s/_shl(\s+%r[0-9]+),([0-9]+),/ | ||
|  | 		$SIZE_T==4 ? sprintf("zdep%s,%d,%d,",$1,31-$2,32-$2) | ||
|  | 		:            sprintf("depd,z%s,%d,%d,",$1,63-$2,64-$2)/e; | ||
|  | 
 | ||
|  | 	s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($SIZE_T==4); | ||
|  | 
 | ||
|  | 	s/cmpb,\*/comb,/ if ($SIZE_T==4); | ||
|  | 
 | ||
|  | 	s/\bbv\b/bve/    if ($SIZE_T==8); | ||
|  | 
 | ||
|  | 	print $_,"\n"; | ||
|  | } | ||
|  | 
 | ||
|  | close STDOUT; |