You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
	
	
		
			116 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			ArmAsm
		
	
		
		
			
		
	
	
			116 lines
		
	
	
		
			2.7 KiB
		
	
	
	
		
			ArmAsm
		
	
| 
											10 years ago
										 | #include "arm_arch.h" | ||
|  | 
 | ||
|  | .text | ||
|  | .arch	armv8-a+crypto | ||
|  | .global	gcm_init_v8
 | ||
|  | .type	gcm_init_v8,%function | ||
|  | .align	4
 | ||
|  | gcm_init_v8: | ||
|  | 	ld1		{v17.2d},[x1]		//load H | ||
|  | 	movi		v16.16b,#0xe1 | ||
|  | 	ext		v3.16b,v17.16b,v17.16b,#8 | ||
|  | 	shl	v16.2d,v16.2d,#57 | ||
|  | 	ushr	v18.2d,v16.2d,#63 | ||
|  | 	ext		v16.16b,v18.16b,v16.16b,#8		//t0=0xc2....01 | ||
|  | 	dup		v17.4s,v17.s[1] | ||
|  | 	ushr	v19.2d,v3.2d,#63 | ||
|  | 	sshr	v17.4s,v17.4s,#31		//broadcast carry bit | ||
|  | 	and		v19.16b,v19.16b,v16.16b | ||
|  | 	shl	v3.2d,v3.2d,#1 | ||
|  | 	ext		v19.16b,v19.16b,v19.16b,#8 | ||
|  | 	and		v16.16b,v16.16b,v17.16b | ||
|  | 	orr		v3.16b,v3.16b,v19.16b		//H<<<=1 | ||
|  | 	eor		v3.16b,v3.16b,v16.16b		//twisted H | ||
|  | 	st1		{v3.2d},[x0] | ||
|  | 
 | ||
|  | 	ret | ||
|  | .size	gcm_init_v8,.-gcm_init_v8 | ||
|  | 
 | ||
|  | .global	gcm_gmult_v8
 | ||
|  | .type	gcm_gmult_v8,%function | ||
|  | .align	4
 | ||
|  | gcm_gmult_v8: | ||
|  | 	ld1		{v17.2d},[x0]		//load Xi | ||
|  | 	movi		v19.16b,#0xe1 | ||
|  | 	ld1		{v20.2d},[x1]		//load twisted H | ||
|  | 	shl	v19.2d,v19.2d,#57 | ||
|  | #ifndef __ARMEB__ | ||
|  | 	rev64	v17.16b,v17.16b | ||
|  | #endif | ||
|  | 	ext		v21.16b,v20.16b,v20.16b,#8 | ||
|  | 	mov		x3,#0 | ||
|  | 	ext		v3.16b,v17.16b,v17.16b,#8 | ||
|  | 	mov		x12,#0 | ||
|  | 	eor		v21.16b,v21.16b,v20.16b		//Karatsuba pre-processing | ||
|  | 	mov		x2,x0 | ||
|  | 	b		.Lgmult_v8 | ||
|  | .size	gcm_gmult_v8,.-gcm_gmult_v8 | ||
|  | 
 | ||
|  | .global	gcm_ghash_v8
 | ||
|  | .type	gcm_ghash_v8,%function | ||
|  | .align	4
 | ||
|  | gcm_ghash_v8: | ||
|  | 	ld1		{v0.2d},[x0]		//load [rotated] Xi | ||
|  | 	subs		x3,x3,#16 | ||
|  | 	movi		v19.16b,#0xe1 | ||
|  | 	mov		x12,#16 | ||
|  | 	ld1		{v20.2d},[x1]		//load twisted H | ||
|  | 	csel	x12,xzr,x12,eq | ||
|  | 	ext		v0.16b,v0.16b,v0.16b,#8 | ||
|  | 	shl	v19.2d,v19.2d,#57 | ||
|  | 	ld1		{v17.2d},[x2],x12	//load [rotated] inp | ||
|  | 	ext		v21.16b,v20.16b,v20.16b,#8 | ||
|  | #ifndef __ARMEB__ | ||
|  | 	rev64	v0.16b,v0.16b | ||
|  | 	rev64	v17.16b,v17.16b | ||
|  | #endif | ||
|  | 	eor		v21.16b,v21.16b,v20.16b		//Karatsuba pre-processing | ||
|  | 	ext		v3.16b,v17.16b,v17.16b,#8 | ||
|  | 	b		.Loop_v8 | ||
|  | 
 | ||
|  | .align	4
 | ||
|  | .Loop_v8: | ||
|  | 	ext		v18.16b,v0.16b,v0.16b,#8 | ||
|  | 	eor		v3.16b,v3.16b,v0.16b		//inp^=Xi | ||
|  | 	eor		v17.16b,v17.16b,v18.16b		//v17.16b is rotated inp^Xi | ||
|  | 
 | ||
|  | .Lgmult_v8: | ||
|  | 	pmull	v0.1q,v20.1d,v3.1d		//H.lo·Xi.lo | ||
|  | 	eor		v17.16b,v17.16b,v3.16b		//Karatsuba pre-processing | ||
|  | 	pmull2	v2.1q,v20.2d,v3.2d		//H.hi·Xi.hi | ||
|  | 	subs		x3,x3,#16 | ||
|  | 	pmull	v1.1q,v21.1d,v17.1d		//(H.lo+H.hi)·(Xi.lo+Xi.hi) | ||
|  | 	csel	x12,xzr,x12,eq | ||
|  | 
 | ||
|  | 	ext		v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing | ||
|  | 	eor		v18.16b,v0.16b,v2.16b | ||
|  | 	eor		v1.16b,v1.16b,v17.16b | ||
|  | 	 ld1	{v17.2d},[x2],x12	//load [rotated] inp | ||
|  | 	eor		v1.16b,v1.16b,v18.16b | ||
|  | 	pmull	v18.1q,v0.1d,v19.1d		//1st phase | ||
|  | 
 | ||
|  | 	ins	v2.d[0],v1.d[1] | ||
|  | 	ins	v1.d[1],v0.d[0] | ||
|  | #ifndef __ARMEB__ | ||
|  | 	 rev64	v17.16b,v17.16b | ||
|  | #endif | ||
|  | 	eor		v0.16b,v1.16b,v18.16b | ||
|  | 	 ext		v3.16b,v17.16b,v17.16b,#8 | ||
|  | 
 | ||
|  | 	ext		v18.16b,v0.16b,v0.16b,#8		//2nd phase | ||
|  | 	pmull	v0.1q,v0.1d,v19.1d | ||
|  | 	eor		v18.16b,v18.16b,v2.16b | ||
|  | 	eor		v0.16b,v0.16b,v18.16b | ||
|  | 	b.hs		.Loop_v8 | ||
|  | 
 | ||
|  | #ifndef __ARMEB__ | ||
|  | 	rev64	v0.16b,v0.16b | ||
|  | #endif | ||
|  | 	ext		v0.16b,v0.16b,v0.16b,#8 | ||
|  | 	st1		{v0.2d},[x0]		//write out Xi | ||
|  | 
 | ||
|  | 	ret | ||
|  | .size	gcm_ghash_v8,.-gcm_ghash_v8 | ||
|  | .asciz  "GHASH for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
 | ||
|  | .align  2
 |