lua-symmetric

Symmetric crypto for Lua
Log | Files | Refs

sha512.c (17544B)


      1 /* $OpenBSD: sha512.c,v 1.12 2014/07/10 22:45:58 jsing Exp $ */
      2 /* ====================================================================
      3  * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
      4  * according to the OpenSSL license [found in ../../LICENSE].
      5  * ====================================================================
      6  */
      7 
      8 #include <machine/endian.h>
      9 
     10 #include <stdlib.h>
     11 #include <string.h>
     12 
     13 #include <openssl/opensslconf.h>
     14 
     15 #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
     16 /*
     17  * IMPLEMENTATION NOTES.
     18  *
     19  * As you might have noticed 32-bit hash algorithms:
     20  *
     21  * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
     22  * - optimized versions implement two transform functions: one operating
     23  *   on [aligned] data in host byte order and one - on data in input
     24  *   stream byte order;
     25  * - share common byte-order neutral collector and padding function
     26  *   implementations, ../md32_common.h;
     27  *
     28  * Neither of the above applies to this SHA-512 implementations. Reasons
     29  * [in reverse order] are:
     30  *
     31  * - it's the only 64-bit hash algorithm for the moment of this writing,
     32  *   there is no need for common collector/padding implementation [yet];
     33  * - by supporting only one transform function [which operates on
     34  *   *aligned* data in input stream byte order, big-endian in this case]
     35  *   we minimize burden of maintenance in two ways: a) collector/padding
     36  *   function is simpler; b) only one transform function to stare at;
     37  * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
     38  *   apply a number of optimizations to mitigate potential performance
     39  *   penalties caused by previous design decision;
     40  *
     41  * Caveat lector.
     42  *
     43  * Implementation relies on the fact that "long long" is 64-bit on
     44  * both 32- and 64-bit platforms. If some compiler vendor comes up
     45  * with 128-bit long long, adjustment to sha.h would be required.
     46  * As this implementation relies on 64-bit integer type, it's totally
     47  * inappropriate for platforms which don't support it, most notably
     48  * 16-bit platforms.
     49  *					<appro@fy.chalmers.se>
     50  */
     51 
     52 #include <openssl/crypto.h>
     53 #include <openssl/opensslv.h>
     54 #include <openssl/sha.h>
     55 
     56 #if !defined(__STRICT_ALIGNMENT) || defined(SHA512_ASM)
     57 #define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
     58 #endif
     59 
     60 int SHA384_Init(SHA512_CTX *c)
     61 	{
     62 	c->h[0]=U64(0xcbbb9d5dc1059ed8);
     63 	c->h[1]=U64(0x629a292a367cd507);
     64 	c->h[2]=U64(0x9159015a3070dd17);
     65 	c->h[3]=U64(0x152fecd8f70e5939);
     66 	c->h[4]=U64(0x67332667ffc00b31);
     67 	c->h[5]=U64(0x8eb44a8768581511);
     68 	c->h[6]=U64(0xdb0c2e0d64f98fa7);
     69 	c->h[7]=U64(0x47b5481dbefa4fa4);
     70 
     71         c->Nl=0;        c->Nh=0;
     72         c->num=0;       c->md_len=SHA384_DIGEST_LENGTH;
     73         return 1;
     74 	}
     75 
     76 int SHA512_Init(SHA512_CTX *c)
     77 	{
     78 	c->h[0]=U64(0x6a09e667f3bcc908);
     79 	c->h[1]=U64(0xbb67ae8584caa73b);
     80 	c->h[2]=U64(0x3c6ef372fe94f82b);
     81 	c->h[3]=U64(0xa54ff53a5f1d36f1);
     82 	c->h[4]=U64(0x510e527fade682d1);
     83 	c->h[5]=U64(0x9b05688c2b3e6c1f);
     84 	c->h[6]=U64(0x1f83d9abfb41bd6b);
     85 	c->h[7]=U64(0x5be0cd19137e2179);
     86 
     87         c->Nl=0;        c->Nh=0;
     88         c->num=0;       c->md_len=SHA512_DIGEST_LENGTH;
     89         return 1;
     90 	}
     91 
     92 #ifndef SHA512_ASM
     93 static
     94 #endif
     95 void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num);
     96 
     97 int SHA512_Final (unsigned char *md, SHA512_CTX *c)
     98 	{
     99 	unsigned char *p=(unsigned char *)c->u.p;
    100 	size_t n=c->num;
    101 
    102 	p[n]=0x80;	/* There always is a room for one */
    103 	n++;
    104 	if (n > (sizeof(c->u)-16))
    105 		memset (p+n,0,sizeof(c->u)-n), n=0,
    106 		sha512_block_data_order (c,p,1);
    107 
    108 	memset (p+n,0,sizeof(c->u)-16-n);
    109 #if BYTE_ORDER == BIG_ENDIAN
    110 	c->u.d[SHA_LBLOCK-2] = c->Nh;
    111 	c->u.d[SHA_LBLOCK-1] = c->Nl;
    112 #else
    113 	p[sizeof(c->u)-1]  = (unsigned char)(c->Nl);
    114 	p[sizeof(c->u)-2]  = (unsigned char)(c->Nl>>8);
    115 	p[sizeof(c->u)-3]  = (unsigned char)(c->Nl>>16);
    116 	p[sizeof(c->u)-4]  = (unsigned char)(c->Nl>>24);
    117 	p[sizeof(c->u)-5]  = (unsigned char)(c->Nl>>32);
    118 	p[sizeof(c->u)-6]  = (unsigned char)(c->Nl>>40);
    119 	p[sizeof(c->u)-7]  = (unsigned char)(c->Nl>>48);
    120 	p[sizeof(c->u)-8]  = (unsigned char)(c->Nl>>56);
    121 	p[sizeof(c->u)-9]  = (unsigned char)(c->Nh);
    122 	p[sizeof(c->u)-10] = (unsigned char)(c->Nh>>8);
    123 	p[sizeof(c->u)-11] = (unsigned char)(c->Nh>>16);
    124 	p[sizeof(c->u)-12] = (unsigned char)(c->Nh>>24);
    125 	p[sizeof(c->u)-13] = (unsigned char)(c->Nh>>32);
    126 	p[sizeof(c->u)-14] = (unsigned char)(c->Nh>>40);
    127 	p[sizeof(c->u)-15] = (unsigned char)(c->Nh>>48);
    128 	p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56);
    129 #endif
    130 
    131 	sha512_block_data_order (c,p,1);
    132 
    133 	if (md==0) return 0;
    134 
    135 	switch (c->md_len)
    136 		{
    137 		/* Let compiler decide if it's appropriate to unroll... */
    138 		case SHA384_DIGEST_LENGTH:
    139 			for (n=0;n<SHA384_DIGEST_LENGTH/8;n++)
    140 				{
    141 				SHA_LONG64 t = c->h[n];
    142 
    143 				*(md++)	= (unsigned char)(t>>56);
    144 				*(md++)	= (unsigned char)(t>>48);
    145 				*(md++)	= (unsigned char)(t>>40);
    146 				*(md++)	= (unsigned char)(t>>32);
    147 				*(md++)	= (unsigned char)(t>>24);
    148 				*(md++)	= (unsigned char)(t>>16);
    149 				*(md++)	= (unsigned char)(t>>8);
    150 				*(md++)	= (unsigned char)(t);
    151 				}
    152 			break;
    153 		case SHA512_DIGEST_LENGTH:
    154 			for (n=0;n<SHA512_DIGEST_LENGTH/8;n++)
    155 				{
    156 				SHA_LONG64 t = c->h[n];
    157 
    158 				*(md++)	= (unsigned char)(t>>56);
    159 				*(md++)	= (unsigned char)(t>>48);
    160 				*(md++)	= (unsigned char)(t>>40);
    161 				*(md++)	= (unsigned char)(t>>32);
    162 				*(md++)	= (unsigned char)(t>>24);
    163 				*(md++)	= (unsigned char)(t>>16);
    164 				*(md++)	= (unsigned char)(t>>8);
    165 				*(md++)	= (unsigned char)(t);
    166 				}
    167 			break;
    168 		/* ... as well as make sure md_len is not abused. */
    169 		default:	return 0;
    170 		}
    171 
    172 	return 1;
    173 	}
    174 
    175 int SHA384_Final (unsigned char *md,SHA512_CTX *c)
    176 {   return SHA512_Final (md,c);   }
    177 
    178 int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
    179 	{
    180 	SHA_LONG64	l;
    181 	unsigned char  *p=c->u.p;
    182 	const unsigned char *data=(const unsigned char *)_data;
    183 
    184 	if (len==0) return  1;
    185 
    186 	l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
    187 	if (l < c->Nl)		c->Nh++;
    188 	if (sizeof(len)>=8)	c->Nh+=(((SHA_LONG64)len)>>61);
    189 	c->Nl=l;
    190 
    191 	if (c->num != 0)
    192 		{
    193 		size_t n = sizeof(c->u) - c->num;
    194 
    195 		if (len < n)
    196 			{
    197 			memcpy (p+c->num,data,len), c->num += (unsigned int)len;
    198 			return 1;
    199 			}
    200 		else	{
    201 			memcpy (p+c->num,data,n), c->num = 0;
    202 			len-=n, data+=n;
    203 			sha512_block_data_order (c,p,1);
    204 			}
    205 		}
    206 
    207 	if (len >= sizeof(c->u))
    208 		{
    209 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
    210 		if ((size_t)data%sizeof(c->u.d[0]) != 0)
    211 			while (len >= sizeof(c->u))
    212 				memcpy (p,data,sizeof(c->u)),
    213 				sha512_block_data_order (c,p,1),
    214 				len  -= sizeof(c->u),
    215 				data += sizeof(c->u);
    216 		else
    217 #endif
    218 			sha512_block_data_order (c,data,len/sizeof(c->u)),
    219 			data += len,
    220 			len  %= sizeof(c->u),
    221 			data -= len;
    222 		}
    223 
    224 	if (len != 0)	memcpy (p,data,len), c->num = (int)len;
    225 
    226 	return 1;
    227 	}
    228 
    229 int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
    230 {   return SHA512_Update (c,data,len);   }
    231 
    232 void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
    233 	{
    234 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
    235 	if ((size_t)data%sizeof(c->u.d[0]) != 0)
    236 		memcpy(c->u.p,data,sizeof(c->u.p)),
    237 		data = c->u.p;
    238 #endif
    239 	sha512_block_data_order (c,data,1);
    240 	}
    241 
    242 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
    243 	{
    244 	SHA512_CTX c;
    245 	static unsigned char m[SHA384_DIGEST_LENGTH];
    246 
    247 	if (md == NULL) md=m;
    248 	SHA384_Init(&c);
    249 	SHA512_Update(&c,d,n);
    250 	SHA512_Final(md,&c);
    251 	OPENSSL_cleanse(&c,sizeof(c));
    252 	return(md);
    253 	}
    254 
    255 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
    256 	{
    257 	SHA512_CTX c;
    258 	static unsigned char m[SHA512_DIGEST_LENGTH];
    259 
    260 	if (md == NULL) md=m;
    261 	SHA512_Init(&c);
    262 	SHA512_Update(&c,d,n);
    263 	SHA512_Final(md,&c);
    264 	OPENSSL_cleanse(&c,sizeof(c));
    265 	return(md);
    266 	}
    267 
    268 #ifndef SHA512_ASM
    269 static const SHA_LONG64 K512[80] = {
    270         U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
    271         U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
    272         U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
    273         U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
    274         U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
    275         U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
    276         U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
    277         U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
    278         U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
    279         U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
    280         U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
    281         U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
    282         U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
    283         U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
    284         U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
    285         U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
    286         U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
    287         U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
    288         U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
    289         U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
    290         U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
    291         U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
    292         U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
    293         U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
    294         U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
    295         U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
    296         U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
    297         U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
    298         U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
    299         U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
    300         U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
    301         U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
    302         U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
    303         U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
    304         U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
    305         U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
    306         U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
    307         U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
    308         U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
    309         U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
    310 
    311 #if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
    312 # if defined(__x86_64) || defined(__x86_64__)
    313 #  define ROTR(a,n)	({ SHA_LONG64 ret;		\
    314 				asm ("rorq %1,%0"	\
    315 				: "=r"(ret)		\
    316 				: "J"(n),"0"(a)		\
    317 				: "cc"); ret;		})
    318 #   define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));	\
    319 				asm ("bswapq	%0"		\
    320 				: "=r"(ret)			\
    321 				: "0"(ret)); ret;		})
    322 # elif (defined(__i386) || defined(__i386__))
    323 #  if defined(I386_ONLY)
    324 #   define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
    325 			 unsigned int hi=p[0],lo=p[1];		\
    326 				asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
    327 				    "roll $16,%%eax; roll $16,%%edx; "\
    328 				    "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
    329 				: "=a"(lo),"=d"(hi)		\
    330 				: "0"(lo),"1"(hi) : "cc");	\
    331 				((SHA_LONG64)hi)<<32|lo;	})
    332 #  else
    333 #   define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
    334 			 unsigned int hi=p[0],lo=p[1];		\
    335 				asm ("bswapl %0; bswapl %1;"	\
    336 				: "=r"(lo),"=r"(hi)		\
    337 				: "0"(lo),"1"(hi));		\
    338 				((SHA_LONG64)hi)<<32|lo;	})
    339 #  endif
    340 # elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
    341 #  define ROTR(a,n)	({ SHA_LONG64 ret;		\
    342 				asm ("rotrdi %0,%1,%2"	\
    343 				: "=r"(ret)		\
    344 				: "r"(a),"K"(n)); ret;	})
    345 # endif
    346 #endif
    347 
    348 #ifndef PULL64
    349 #define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
    350 #define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
    351 #endif
    352 
    353 #ifndef ROTR
    354 #define ROTR(x,s)	(((x)>>s) | (x)<<(64-s))
    355 #endif
    356 
    357 #define Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
    358 #define Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
    359 #define sigma0(x)	(ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
    360 #define sigma1(x)	(ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
    361 
    362 #define Ch(x,y,z)	(((x) & (y)) ^ ((~(x)) & (z)))
    363 #define Maj(x,y,z)	(((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
    364 
    365 
    366 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
    367 /*
    368  * This code should give better results on 32-bit CPU with less than
    369  * ~24 registers, both size and performance wise...
    370  */
    371 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
    372 	{
    373 	const SHA_LONG64 *W=in;
    374 	SHA_LONG64	A,E,T;
    375 	SHA_LONG64	X[9+80],*F;
    376 	int i;
    377 
    378 			while (num--) {
    379 
    380 	F    = X+80;
    381 	A    = ctx->h[0];	F[1] = ctx->h[1];
    382 	F[2] = ctx->h[2];	F[3] = ctx->h[3];
    383 	E    = ctx->h[4];	F[5] = ctx->h[5];
    384 	F[6] = ctx->h[6];	F[7] = ctx->h[7];
    385 
    386 	for (i=0;i<16;i++,F--)
    387 		{
    388 		T = PULL64(W[i]);
    389 		F[0] = A;
    390 		F[4] = E;
    391 		F[8] = T;
    392 		T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
    393 		E    = F[3] + T;
    394 		A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
    395 		}
    396 
    397 	for (;i<80;i++,F--)
    398 		{
    399 		T    = sigma0(F[8+16-1]);
    400 		T   += sigma1(F[8+16-14]);
    401 		T   += F[8+16] + F[8+16-9];
    402 
    403 		F[0] = A;
    404 		F[4] = E;
    405 		F[8] = T;
    406 		T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
    407 		E    = F[3] + T;
    408 		A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
    409 		}
    410 
    411 	ctx->h[0] += A;		ctx->h[1] += F[1];
    412 	ctx->h[2] += F[2];	ctx->h[3] += F[3];
    413 	ctx->h[4] += E;		ctx->h[5] += F[5];
    414 	ctx->h[6] += F[6];	ctx->h[7] += F[7];
    415 
    416 			W+=SHA_LBLOCK;
    417 			}
    418 	}
    419 
    420 #elif defined(OPENSSL_SMALL_FOOTPRINT)
    421 
    422 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
    423 	{
    424 	const SHA_LONG64 *W=in;
    425 	SHA_LONG64	a,b,c,d,e,f,g,h,s0,s1,T1,T2;
    426 	SHA_LONG64	X[16];
    427 	int i;
    428 
    429 			while (num--) {
    430 
    431 	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
    432 	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];
    433 
    434 	for (i=0;i<16;i++)
    435 		{
    436 #if BYTE_ORDER == BIG_ENDIAN
    437 		T1 = X[i] = W[i];
    438 #else
    439 		T1 = X[i] = PULL64(W[i]);
    440 #endif
    441 		T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
    442 		T2 = Sigma0(a) + Maj(a,b,c);
    443 		h = g;	g = f;	f = e;	e = d + T1;
    444 		d = c;	c = b;	b = a;	a = T1 + T2;
    445 		}
    446 
    447 	for (;i<80;i++)
    448 		{
    449 		s0 = X[(i+1)&0x0f];	s0 = sigma0(s0);
    450 		s1 = X[(i+14)&0x0f];	s1 = sigma1(s1);
    451 
    452 		T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
    453 		T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
    454 		T2 = Sigma0(a) + Maj(a,b,c);
    455 		h = g;	g = f;	f = e;	e = d + T1;
    456 		d = c;	c = b;	b = a;	a = T1 + T2;
    457 		}
    458 
    459 	ctx->h[0] += a;	ctx->h[1] += b;	ctx->h[2] += c;	ctx->h[3] += d;
    460 	ctx->h[4] += e;	ctx->h[5] += f;	ctx->h[6] += g;	ctx->h[7] += h;
    461 
    462 			W+=SHA_LBLOCK;
    463 			}
    464 	}
    465 
    466 #else
    467 
    468 #define	ROUND_00_15(i,a,b,c,d,e,f,g,h)		do {	\
    469 	T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];	\
    470 	h = Sigma0(a) + Maj(a,b,c);			\
    471 	d += T1;	h += T1;		} while (0)
    472 
    473 #define	ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)	do {	\
    474 	s0 = X[(j+1)&0x0f];	s0 = sigma0(s0);	\
    475 	s1 = X[(j+14)&0x0f];	s1 = sigma1(s1);	\
    476 	T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];	\
    477 	ROUND_00_15(i+j,a,b,c,d,e,f,g,h);		} while (0)
    478 
    479 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
    480 	{
    481 	const SHA_LONG64 *W=in;
    482 	SHA_LONG64	a,b,c,d,e,f,g,h,s0,s1,T1;
    483 	SHA_LONG64	X[16];
    484 	int i;
    485 
    486 			while (num--) {
    487 
    488 	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
    489 	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];
    490 
    491 #if BYTE_ORDER == BIG_ENDIAN
    492 	T1 = X[0] = W[0];	ROUND_00_15(0,a,b,c,d,e,f,g,h);
    493 	T1 = X[1] = W[1];	ROUND_00_15(1,h,a,b,c,d,e,f,g);
    494 	T1 = X[2] = W[2];	ROUND_00_15(2,g,h,a,b,c,d,e,f);
    495 	T1 = X[3] = W[3];	ROUND_00_15(3,f,g,h,a,b,c,d,e);
    496 	T1 = X[4] = W[4];	ROUND_00_15(4,e,f,g,h,a,b,c,d);
    497 	T1 = X[5] = W[5];	ROUND_00_15(5,d,e,f,g,h,a,b,c);
    498 	T1 = X[6] = W[6];	ROUND_00_15(6,c,d,e,f,g,h,a,b);
    499 	T1 = X[7] = W[7];	ROUND_00_15(7,b,c,d,e,f,g,h,a);
    500 	T1 = X[8] = W[8];	ROUND_00_15(8,a,b,c,d,e,f,g,h);
    501 	T1 = X[9] = W[9];	ROUND_00_15(9,h,a,b,c,d,e,f,g);
    502 	T1 = X[10] = W[10];	ROUND_00_15(10,g,h,a,b,c,d,e,f);
    503 	T1 = X[11] = W[11];	ROUND_00_15(11,f,g,h,a,b,c,d,e);
    504 	T1 = X[12] = W[12];	ROUND_00_15(12,e,f,g,h,a,b,c,d);
    505 	T1 = X[13] = W[13];	ROUND_00_15(13,d,e,f,g,h,a,b,c);
    506 	T1 = X[14] = W[14];	ROUND_00_15(14,c,d,e,f,g,h,a,b);
    507 	T1 = X[15] = W[15];	ROUND_00_15(15,b,c,d,e,f,g,h,a);
    508 #else
    509 	T1 = X[0]  = PULL64(W[0]);	ROUND_00_15(0,a,b,c,d,e,f,g,h);
    510 	T1 = X[1]  = PULL64(W[1]);	ROUND_00_15(1,h,a,b,c,d,e,f,g);
    511 	T1 = X[2]  = PULL64(W[2]);	ROUND_00_15(2,g,h,a,b,c,d,e,f);
    512 	T1 = X[3]  = PULL64(W[3]);	ROUND_00_15(3,f,g,h,a,b,c,d,e);
    513 	T1 = X[4]  = PULL64(W[4]);	ROUND_00_15(4,e,f,g,h,a,b,c,d);
    514 	T1 = X[5]  = PULL64(W[5]);	ROUND_00_15(5,d,e,f,g,h,a,b,c);
    515 	T1 = X[6]  = PULL64(W[6]);	ROUND_00_15(6,c,d,e,f,g,h,a,b);
    516 	T1 = X[7]  = PULL64(W[7]);	ROUND_00_15(7,b,c,d,e,f,g,h,a);
    517 	T1 = X[8]  = PULL64(W[8]);	ROUND_00_15(8,a,b,c,d,e,f,g,h);
    518 	T1 = X[9]  = PULL64(W[9]);	ROUND_00_15(9,h,a,b,c,d,e,f,g);
    519 	T1 = X[10] = PULL64(W[10]);	ROUND_00_15(10,g,h,a,b,c,d,e,f);
    520 	T1 = X[11] = PULL64(W[11]);	ROUND_00_15(11,f,g,h,a,b,c,d,e);
    521 	T1 = X[12] = PULL64(W[12]);	ROUND_00_15(12,e,f,g,h,a,b,c,d);
    522 	T1 = X[13] = PULL64(W[13]);	ROUND_00_15(13,d,e,f,g,h,a,b,c);
    523 	T1 = X[14] = PULL64(W[14]);	ROUND_00_15(14,c,d,e,f,g,h,a,b);
    524 	T1 = X[15] = PULL64(W[15]);	ROUND_00_15(15,b,c,d,e,f,g,h,a);
    525 #endif
    526 
    527 	for (i=16;i<80;i+=16)
    528 		{
    529 		ROUND_16_80(i, 0,a,b,c,d,e,f,g,h,X);
    530 		ROUND_16_80(i, 1,h,a,b,c,d,e,f,g,X);
    531 		ROUND_16_80(i, 2,g,h,a,b,c,d,e,f,X);
    532 		ROUND_16_80(i, 3,f,g,h,a,b,c,d,e,X);
    533 		ROUND_16_80(i, 4,e,f,g,h,a,b,c,d,X);
    534 		ROUND_16_80(i, 5,d,e,f,g,h,a,b,c,X);
    535 		ROUND_16_80(i, 6,c,d,e,f,g,h,a,b,X);
    536 		ROUND_16_80(i, 7,b,c,d,e,f,g,h,a,X);
    537 		ROUND_16_80(i, 8,a,b,c,d,e,f,g,h,X);
    538 		ROUND_16_80(i, 9,h,a,b,c,d,e,f,g,X);
    539 		ROUND_16_80(i,10,g,h,a,b,c,d,e,f,X);
    540 		ROUND_16_80(i,11,f,g,h,a,b,c,d,e,X);
    541 		ROUND_16_80(i,12,e,f,g,h,a,b,c,d,X);
    542 		ROUND_16_80(i,13,d,e,f,g,h,a,b,c,X);
    543 		ROUND_16_80(i,14,c,d,e,f,g,h,a,b,X);
    544 		ROUND_16_80(i,15,b,c,d,e,f,g,h,a,X);
    545 		}
    546 
    547 	ctx->h[0] += a;	ctx->h[1] += b;	ctx->h[2] += c;	ctx->h[3] += d;
    548 	ctx->h[4] += e;	ctx->h[5] += f;	ctx->h[6] += g;	ctx->h[7] += h;
    549 
    550 			W+=SHA_LBLOCK;
    551 			}
    552 	}
    553 
    554 #endif
    555 
    556 #endif /* SHA512_ASM */
    557 
    558 #endif /* !OPENSSL_NO_SHA512 */