lua-arc4random

Cryptographically secure PRNG for Lua
Log | Files | Refs

commit 92196c0a049c2b3492d94c60b9610f24c33df1b0
Author: Michael Savage <mikejsavage@gmail.com>
Date:   Tue, 20 Jan 2015 17:44:34 +0000

Initial commit

Diffstat:
.gitignore | 4++++
Makefile | 22++++++++++++++++++++++
Makefile.mess | 42++++++++++++++++++++++++++++++++++++++++++
Makefile.obsd | 9+++++++++
README.md | 57+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/arc4random/arc4random.c | 195+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/arc4random/arc4random.h | 26++++++++++++++++++++++++++
compat/arc4random/arc4random_freebsd.h | 85+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/arc4random/arc4random_linux.h | 85+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/arc4random/arc4random_osx.h | 79+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/arc4random/arc4random_solaris.h | 79+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/arc4random/arc4random_uniform.c | 56++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/arc4random/arc4random_win.h | 74++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/arc4random/chacha_private.h | 222+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/getentropy/getentropy_freebsd.c | 64++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/getentropy/getentropy_linux.c | 548+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/getentropy/getentropy_osx.c | 429+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/getentropy/getentropy_solaris.c | 445+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/getentropy/getentropy_win.c | 59+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/safebfuns.c | 52++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/sha/sha512-elf-x86_64.S | 1802+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/sha/sha512-macosx-x86_64.S | 1802+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/sha/sha512.c | 558+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
include/blf.h | 82+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
include/machine/endian.h | 40++++++++++++++++++++++++++++++++++++++++
include/stdlib.h | 30++++++++++++++++++++++++++++++
include/string.h | 69+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
include/sys/types.h | 21+++++++++++++++++++++
include/unistd.h | 19+++++++++++++++++++
rockspec/arc4random-scm-1.rockspec | 33+++++++++++++++++++++++++++++++++
src/main.c | 106+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
test.lua | 16++++++++++++++++
32 files changed, 7210 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1,4 @@ +.tags + +arc4random.so +*.o diff --git a/Makefile b/Makefile @@ -0,0 +1,22 @@ +all: arc4random.so + +include Makefile.mess + +SRCS += src/main.c + +CFLAGS += -Wall -Wno-pointer-sign +CFLAGS += -O2 -fPIC -DNDEBUG + +LDFLAGS += -lm + +OBJS := $(patsubst %.c,%.o,$(SRCS)) + +debug: CFLAGS += -ggdb3 -UNDEBUG +debug: all + +arc4random.so: $(OBJS) + $(CC) -o arc4random.so $(LDFLAGS) $(OBJS) + +clean: + rm -f arc4random.so + rm -f $(OBJS) diff --git a/Makefile.mess b/Makefile.mess @@ -0,0 +1,42 @@ +# This is more or less what libressl does +# See http://openbsd.cs.toronto.edu/cgi-bin/cvsweb/src/lib/libssl/src/crypto/mem_clr.c?rev=1.4&content-type=text/x-cvsweb-markup +CFLAGS += -DOPENSSL_cleanse=explicit_bzero + +# GCC whines without this. Assume everyone has strndup anyway +CFLAGS += -DHAVE_STRNDUP + +# Let Luarocks point us to the right headers +ifdef LUA_INCDIR + CFLAGS += -I$(LUA_INCDIR) +endif + +# OS detection +uname := $(shell uname -s) + +ifneq ($(uname),Darwin) + LDFLAGS += -shared +else + LDFLAGS += -bundle -undefined dynamic_lookup +endif + +CFLAGS += -Iinclude + +SRCS += compat/arc4random/arc4random.c +SRCS += compat/arc4random/arc4random_uniform.c +SRCS += compat/safebfuns.c +SRCS += compat/sha/sha512.c + +ifeq ($(uname),Linux) + SRCS += compat/getentropy/getentropy_linux.c +endif + +ifeq ($(uname),Darwin) + SRCS += compat/getentropy/getentropy_osx.c +endif + +ifeq ($(uname),FreeBSD) + SRCS += compat/getentropy/getentropy_freebsd.c +endif + +compat/safebfuns.o: compat/safebfuns.c + $(CC) $(CFLAGS) -O0 -c -o $@ $^ diff --git a/Makefile.obsd b/Makefile.obsd @@ -0,0 +1,9 @@ +LIB = arc4random +SRCS = src/main.c + +CFLAGS += `pkg-config --cflags lua51` + +SHLIB_MAJOR = 1 +SHLIB_MINOR = 0 + +.include <bsd.lib.mk> diff --git a/README.md b/README.md @@ -0,0 +1,57 @@ +A Lua wrapper for OpenBSD's arc4random. + + +Requirements +------------ + +lua >= 5.1 + + +Copying +------- + +Many of the files in this repository have been taken from OpenBSD's +tree. You should consult individual file headers for specific licensing +information. More broadly, everything here is compatible with the [ISC +license][ISC]. + +[ISC]: http://en.wikipedia.org/wiki/ISC_license + + +Installation +------------ + + $ luarocks install arc4random + + +Usage +----- + +The library provides two methods, `random` and `buf`. `random` is +intended to be a drop in replacement for `math.random`, so it handles +three cases. + +`arc4.random()` returns a random floating point number in the range +[0,1]. It differs slightly from `math.random()`, which uses the range +[0,1). + +`arc4.random( n )` and `arc4.random( m, n )` return a random integer in +the range [1,n] and [m,n] respectively. This is identical to the +behavior of `math.random`. + +`arc4.buf( n )` returns a string of `n` random characters. It is +suitable for generating private keys and IVs. + +Some example code: + + local arc4 = require( "arc4random" ) + + print( arc4.random() ) + print( arc4.random( 3 ) ) + print( arc4.random( -5, 5 ) ) + + local str = arc4.buf( 16 ) + str = str:gsub( "(.)", function( c ) + return ( "%02x" ):format( string.byte( c ) ) + end ) + print( str ) diff --git a/compat/arc4random/arc4random.c b/compat/arc4random/arc4random.c @@ -0,0 +1,195 @@ +/* $OpenBSD: arc4random.c,v 1.49 2014/07/20 20:51:13 bcook Exp $ */ + +/* + * Copyright (c) 1996, David Mazieres <dm@uun.org> + * Copyright (c) 2008, Damien Miller <djm@openbsd.org> + * Copyright (c) 2013, Markus Friedl <markus@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * ChaCha based random number generator for OpenBSD. + */ + +#include <fcntl.h> +#include <limits.h> +#include <signal.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/time.h> + +#define KEYSTREAM_ONLY +#include "chacha_private.h" + +#define min(a, b) ((a) < (b) ? (a) : (b)) +#ifdef __GNUC__ +#define inline __inline +#else /* !__GNUC__ */ +#define inline +#endif /* !__GNUC__ */ + +#define KEYSZ 32 +#define IVSZ 8 +#define BLOCKSZ 64 +#define RSBUFSZ (16*BLOCKSZ) + +/* Marked MAP_INHERIT_ZERO, so zero'd out in fork children. */ +static struct _rs { + size_t rs_have; /* valid bytes at end of rs_buf */ + size_t rs_count; /* bytes till reseed */ +} *rs; + +/* Maybe be preserved in fork children, if _rs_allocate() decides. */ +static struct _rsx { + chacha_ctx rs_chacha; /* chacha context for random keystream */ + u_char rs_buf[RSBUFSZ]; /* keystream blocks */ +} *rsx; + +static inline int _rs_allocate(struct _rs **, struct _rsx **); +static inline void _rs_forkdetect(void); +#include "arc4random.h" + +static inline void _rs_rekey(u_char *dat, size_t datlen); + +static inline void +_rs_init(u_char *buf, size_t n) +{ + if (n < KEYSZ + IVSZ) + return; + + if (rs == NULL) { + if (_rs_allocate(&rs, &rsx) == -1) + abort(); + } + + chacha_keysetup(&rsx->rs_chacha, buf, KEYSZ * 8, 0); + chacha_ivsetup(&rsx->rs_chacha, buf + KEYSZ); +} + +static void +_rs_stir(void) +{ + u_char rnd[KEYSZ + IVSZ]; + + if (getentropy(rnd, sizeof rnd) == -1) + _getentropy_fail(); + + if (!rs) + _rs_init(rnd, sizeof(rnd)); + else + _rs_rekey(rnd, sizeof(rnd)); + explicit_bzero(rnd, sizeof(rnd)); /* discard source seed */ + + /* invalidate rs_buf */ + rs->rs_have = 0; + memset(rsx->rs_buf, 0, sizeof(rsx->rs_buf)); + + rs->rs_count = 1600000; +} + +static inline void +_rs_stir_if_needed(size_t len) +{ + _rs_forkdetect(); + if (!rs || rs->rs_count <= len) + _rs_stir(); + if (rs->rs_count <= len) + rs->rs_count = 0; + else + rs->rs_count -= len; +} + +static inline void +_rs_rekey(u_char *dat, size_t datlen) +{ +#ifndef KEYSTREAM_ONLY + memset(rsx->rs_buf, 0, sizeof(rsx->rs_buf)); +#endif + /* fill rs_buf with the keystream */ + chacha_encrypt_bytes(&rsx->rs_chacha, rsx->rs_buf, + rsx->rs_buf, sizeof(rsx->rs_buf)); + /* mix in optional user provided data */ + if (dat) { + size_t i, m; + + m = min(datlen, KEYSZ + IVSZ); + for (i = 0; i < m; i++) + rsx->rs_buf[i] ^= dat[i]; + } + /* immediately reinit for backtracking resistance */ + _rs_init(rsx->rs_buf, KEYSZ + IVSZ); + memset(rsx->rs_buf, 0, KEYSZ + IVSZ); + rs->rs_have = sizeof(rsx->rs_buf) - KEYSZ - IVSZ; +} + +static inline void +_rs_random_buf(void *_buf, size_t n) +{ + u_char *buf = (u_char *)_buf; + u_char *keystream; + size_t m; + + _rs_stir_if_needed(n); + while (n > 0) { + if (rs->rs_have > 0) { + m = min(n, rs->rs_have); + keystream = rsx->rs_buf + sizeof(rsx->rs_buf) + - rs->rs_have; + memcpy(buf, keystream, m); + memset(keystream, 0, m); + buf += m; + n -= m; + rs->rs_have -= m; + } + if (rs->rs_have == 0) + _rs_rekey(NULL, 0); + } +} + +static inline void +_rs_random_u32(uint32_t *val) +{ + u_char *keystream; + + _rs_stir_if_needed(sizeof(*val)); + if (rs->rs_have < sizeof(*val)) + _rs_rekey(NULL, 0); + keystream = rsx->rs_buf + sizeof(rsx->rs_buf) - rs->rs_have; + memcpy(val, keystream, sizeof(*val)); + memset(keystream, 0, sizeof(*val)); + rs->rs_have -= sizeof(*val); +} + +uint32_t +arc4random(void) +{ + uint32_t val; + + _ARC4_LOCK(); + _rs_random_u32(&val); + _ARC4_UNLOCK(); + return val; +} + +void +arc4random_buf(void *buf, size_t n) +{ + _ARC4_LOCK(); + _rs_random_buf(buf, n); + _ARC4_UNLOCK(); +} diff --git a/compat/arc4random/arc4random.h b/compat/arc4random/arc4random.h @@ -0,0 +1,26 @@ +#ifndef LIBCRYPTOCOMPAT_ARC4RANDOM_H +#define LIBCRYPTOCOMPAT_ARC4RANDOM_H + +#include <sys/param.h> + +#if defined(__FreeBSD__) +#include "arc4random_freebsd.h" + +#elif defined(__linux__) +#include "arc4random_linux.h" + +#elif defined(__APPLE__) +#include "arc4random_osx.h" + +#elif defined(__sun) +#include "arc4random_solaris.h" + +#elif defined(_WIN32) +#include "arc4random_win.h" + +#else +#error "No arc4random hooks defined for this platform." + +#endif + +#endif diff --git a/compat/arc4random/arc4random_freebsd.h b/compat/arc4random/arc4random_freebsd.h @@ -0,0 +1,85 @@ +/* $OpenBSD: arc4random_freebsd.h,v 1.1 2014/07/20 20:51:13 bcook Exp $ */ + +/* + * Copyright (c) 1996, David Mazieres <dm@uun.org> + * Copyright (c) 2008, Damien Miller <djm@openbsd.org> + * Copyright (c) 2013, Markus Friedl <markus@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Stub functions for portability. + */ + +#include <sys/mman.h> + +#include <pthread.h> +#include <signal.h> + +static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER; +#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx) +#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx) + +/* + * Unfortunately, pthread_atfork() is broken on FreeBSD (at least 9 and 10) if + * a program does not link to -lthr. Callbacks registered with pthread_atfork() + * appear to fail silently. So, it is not always possible to detect a PID + * wraparound. + */ +#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f)) + +static inline void +_getentropy_fail(void) +{ + raise(SIGKILL); +} + +static volatile sig_atomic_t _rs_forked; + +static inline void +_rs_forkhandler(void) +{ + _rs_forked = 1; +} + +static inline void +_rs_forkdetect(void) +{ + static pid_t _rs_pid = 0; + pid_t pid = getpid(); + + if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) { + _rs_pid = pid; + _rs_forked = 0; + if (rs) + memset(rs, 0, sizeof(*rs)); + } +} + +static inline int +_rs_allocate(struct _rs **rsp, struct _rsx **rsxp) +{ + if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) + return -1; + + if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) { + munmap(*rsp, sizeof(**rsp)); + return -1; + } + + _ARC4_ATFORK(_rs_forkhandler); + return 0; +} diff --git a/compat/arc4random/arc4random_linux.h b/compat/arc4random/arc4random_linux.h @@ -0,0 +1,85 @@ +/* $OpenBSD: arc4random_linux.h,v 1.7 2014/07/20 20:51:13 bcook Exp $ */ + +/* + * Copyright (c) 1996, David Mazieres <dm@uun.org> + * Copyright (c) 2008, Damien Miller <djm@openbsd.org> + * Copyright (c) 2013, Markus Friedl <markus@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Stub functions for portability. + */ + +#include <sys/mman.h> + +#include <pthread.h> +#include <signal.h> + +static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER; +#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx) +#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx) + +#ifdef __GLIBC__ +extern void *__dso_handle; +extern int __register_atfork(void (*)(void), void(*)(void), void (*)(void), void *); +#define _ARC4_ATFORK(f) __register_atfork(NULL, NULL, (f), __dso_handle) +#else +#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f)) +#endif + +static inline void +_getentropy_fail(void) +{ + raise(SIGKILL); +} + +static volatile sig_atomic_t _rs_forked; + +static inline void +_rs_forkhandler(void) +{ + _rs_forked = 1; +} + +static inline void +_rs_forkdetect(void) +{ + static pid_t _rs_pid = 0; + pid_t pid = getpid(); + + if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) { + _rs_pid = pid; + _rs_forked = 0; + if (rs) + memset(rs, 0, sizeof(*rs)); + } +} + +static inline int +_rs_allocate(struct _rs **rsp, struct _rsx **rsxp) +{ + if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) + return (-1); + + if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) { + munmap(*rsp, sizeof(**rsp)); + return (-1); + } + + _ARC4_ATFORK(_rs_forkhandler); + return (0); +} diff --git a/compat/arc4random/arc4random_osx.h b/compat/arc4random/arc4random_osx.h @@ -0,0 +1,79 @@ +/* $OpenBSD: arc4random_osx.h,v 1.7 2014/07/20 20:51:13 bcook Exp $ */ + +/* + * Copyright (c) 1996, David Mazieres <dm@uun.org> + * Copyright (c) 2008, Damien Miller <djm@openbsd.org> + * Copyright (c) 2013, Markus Friedl <markus@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Stub functions for portability. + */ + +#include <sys/mman.h> + +#include <pthread.h> +#include <signal.h> + +static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER; +#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx) +#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx) + +#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f)) + +static inline void +_getentropy_fail(void) +{ + raise(SIGKILL); +} + +static volatile sig_atomic_t _rs_forked; + +static inline void +_rs_forkhandler(void) +{ + _rs_forked = 1; +} + +static inline void +_rs_forkdetect(void) +{ + static pid_t _rs_pid = 0; + pid_t pid = getpid(); + + if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) { + _rs_pid = pid; + _rs_forked = 0; + if (rs) + memset(rs, 0, sizeof(*rs)); + } +} + +static inline int +_rs_allocate(struct _rs **rsp, struct _rsx **rsxp) +{ + if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) + return -1; + + if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) { + munmap(*rsp, sizeof(**rsp)); + return -1; + } + + _ARC4_ATFORK(_rs_forkhandler); + return 0; +} diff --git a/compat/arc4random/arc4random_solaris.h b/compat/arc4random/arc4random_solaris.h @@ -0,0 +1,79 @@ +/* $OpenBSD: arc4random_solaris.h,v 1.7 2014/07/20 20:51:13 bcook Exp $ */ + +/* + * Copyright (c) 1996, David Mazieres <dm@uun.org> + * Copyright (c) 2008, Damien Miller <djm@openbsd.org> + * Copyright (c) 2013, Markus Friedl <markus@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Stub functions for portability. + */ + +#include <sys/mman.h> + +#include <pthread.h> +#include <signal.h> + +static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER; +#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx) +#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx) + +#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f)) + +static inline void +_getentropy_fail(void) +{ + raise(SIGKILL); +} + +static volatile sig_atomic_t _rs_forked; + +static inline void +_rs_forkhandler(void) +{ + _rs_forked = 1; +} + +static inline void +_rs_forkdetect(void) +{ + static pid_t _rs_pid = 0; + pid_t pid = getpid(); + + if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) { + _rs_pid = pid; + _rs_forked = 0; + if (rs) + memset(rs, 0, sizeof(*rs)); + } +} + +static inline int +_rs_allocate(struct _rs **rsp, struct _rsx **rsxp) +{ + if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) + return (-1); + + if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) { + munmap(*rsp, sizeof(**rsp)); + return (-1); + } + + _ARC4_ATFORK(_rs_forkhandler); + return (0); +} diff --git a/compat/arc4random/arc4random_uniform.c b/compat/arc4random/arc4random_uniform.c @@ -0,0 +1,56 @@ +/* $OpenBSD: arc4random_uniform.c,v 1.1 2014/07/12 13:24:54 deraadt Exp $ */ + +/* + * Copyright (c) 2008, Damien Miller <djm@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <stdlib.h> + +/* + * Calculate a uniformly distributed random number less than upper_bound + * avoiding "modulo bias". + * + * Uniformity is achieved by generating new random numbers until the one + * returned is outside the range [0, 2**32 % upper_bound). This + * guarantees the selected random number will be inside + * [2**32 % upper_bound, 2**32) which maps back to [0, upper_bound) + * after reduction modulo upper_bound. + */ +uint32_t +arc4random_uniform(uint32_t upper_bound) +{ + uint32_t r, min; + + if (upper_bound < 2) + return 0; + + /* 2**32 % x == (2**32 - x) % x */ + min = -upper_bound % upper_bound; + + /* + * This could theoretically loop forever but each retry has + * p > 0.5 (worst case, usually far better) of selecting a + * number inside the range we need, so it should rarely need + * to re-roll. + */ + for (;;) { + r = arc4random(); + if (r >= min) + break; + } + + return r % upper_bound; +} diff --git a/compat/arc4random/arc4random_win.h b/compat/arc4random/arc4random_win.h @@ -0,0 +1,74 @@ +/* $OpenBSD: arc4random_win.h,v 1.3 2014/07/20 16:59:31 bcook Exp $ */ + +/* + * Copyright (c) 1996, David Mazieres <dm@uun.org> + * Copyright (c) 2008, Damien Miller <djm@openbsd.org> + * Copyright (c) 2013, Markus Friedl <markus@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Stub functions for portability. + */ + +#include <windows.h> + +static volatile HANDLE arc4random_mtx = NULL; + +/* + * Initialize the mutex on the first lock attempt. On collision, each thread + * will attempt to allocate a mutex and compare-and-swap it into place as the + * global mutex. On failure to swap in the global mutex, the mutex is closed. + */ +#define _ARC4_LOCK() { \ + if (!arc4random_mtx) { \ + HANDLE p = CreateMutex(NULL, FALSE, NULL); \ + if (InterlockedCompareExchangePointer((void **)&arc4random_mtx, (void *)p, NULL)) \ + CloseHandle(p); \ + } \ + WaitForSingleObject(arc4random_mtx, INFINITE); \ +} \ + +#define _ARC4_UNLOCK() ReleaseMutex(arc4random_mtx) + +static inline void +_getentropy_fail(void) +{ + TerminateProcess(GetCurrentProcess(), 0); +} + +static inline int +_rs_allocate(struct _rs **rsp, struct _rsx **rsxp) +{ + *rsp = calloc(1, sizeof(**rsp)); + if (*rsp == NULL) + return (-1); + + *rsxp = calloc(1, sizeof(**rsxp)); + if (*rsxp == NULL) { + free(*rsp); + return (-1); + } + return (0); +} + +static inline void +_rs_forkhandler(void) +{ +} + +static inline void +_rs_forkdetect(void) +{ +} diff --git a/compat/arc4random/chacha_private.h b/compat/arc4random/chacha_private.h @@ -0,0 +1,222 @@ +/* +chacha-merged.c version 20080118 +D. J. Bernstein +Public domain. +*/ + +/* $OpenBSD$ */ + +typedef unsigned char u8; +typedef unsigned int u32; + +typedef struct +{ + u32 input[16]; /* could be compressed */ +} chacha_ctx; + +#define U8C(v) (v##U) +#define U32C(v) (v##U) + +#define U8V(v) ((u8)(v) & U8C(0xFF)) +#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF)) + +#define ROTL32(v, n) \ + (U32V((v) << (n)) | ((v) >> (32 - (n)))) + +#define U8TO32_LITTLE(p) \ + (((u32)((p)[0]) ) | \ + ((u32)((p)[1]) << 8) | \ + ((u32)((p)[2]) << 16) | \ + ((u32)((p)[3]) << 24)) + +#define U32TO8_LITTLE(p, v) \ + do { \ + (p)[0] = U8V((v) ); \ + (p)[1] = U8V((v) >> 8); \ + (p)[2] = U8V((v) >> 16); \ + (p)[3] = U8V((v) >> 24); \ + } while (0) + +#define ROTATE(v,c) (ROTL32(v,c)) +#define XOR(v,w) ((v) ^ (w)) +#define PLUS(v,w) (U32V((v) + (w))) +#define PLUSONE(v) (PLUS((v),1)) + +#define QUARTERROUND(a,b,c,d) \ + a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \ + c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \ + a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \ + c = PLUS(c,d); b = ROTATE(XOR(b,c), 7); + +static const char sigma[16] = "expand 32-byte k"; +static const char tau[16] = "expand 16-byte k"; + +static void +chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits,u32 ivbits) +{ + const char *constants; + + x->input[4] = U8TO32_LITTLE(k + 0); + x->input[5] = U8TO32_LITTLE(k + 4); + x->input[6] = U8TO32_LITTLE(k + 8); + x->input[7] = U8TO32_LITTLE(k + 12); + if (kbits == 256) { /* recommended */ + k += 16; + constants = sigma; + } else { /* kbits == 128 */ + constants = tau; + } + x->input[8] = U8TO32_LITTLE(k + 0); + x->input[9] = U8TO32_LITTLE(k + 4); + x->input[10] = U8TO32_LITTLE(k + 8); + x->input[11] = U8TO32_LITTLE(k + 12); + x->input[0] = U8TO32_LITTLE(constants + 0); + x->input[1] = U8TO32_LITTLE(constants + 4); + x->input[2] = U8TO32_LITTLE(constants + 8); + x->input[3] = U8TO32_LITTLE(constants + 12); +} + +static void +chacha_ivsetup(chacha_ctx *x,const u8 *iv) +{ + x->input[12] = 0; + x->input[13] = 0; + x->input[14] = U8TO32_LITTLE(iv + 0); + x->input[15] = U8TO32_LITTLE(iv + 4); +} + +static void +chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes) +{ + u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; + u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; + u8 *ctarget = NULL; + u8 tmp[64]; + u_int i; + + if (!bytes) return; + + j0 = x->input[0]; + j1 = x->input[1]; + j2 = x->input[2]; + j3 = x->input[3]; + j4 = x->input[4]; + j5 = x->input[5]; + j6 = x->input[6]; + j7 = x->input[7]; + j8 = x->input[8]; + j9 = x->input[9]; + j10 = x->input[10]; + j11 = x->input[11]; + j12 = x->input[12]; + j13 = x->input[13]; + j14 = x->input[14]; + j15 = x->input[15]; + + for (;;) { + if (bytes < 64) { + for (i = 0;i < bytes;++i) tmp[i] = m[i]; + m = tmp; + ctarget = c; + c = tmp; + } + x0 = j0; + x1 = j1; + x2 = j2; + x3 = j3; + x4 = j4; + x5 = j5; + x6 = j6; + x7 = j7; + x8 = j8; + x9 = j9; + x10 = j10; + x11 = j11; + x12 = j12; + x13 = j13; + x14 = j14; + x15 = j15; + for (i = 20;i > 0;i -= 2) { + QUARTERROUND( x0, x4, x8,x12) + QUARTERROUND( x1, x5, x9,x13) + QUARTERROUND( x2, x6,x10,x14) + QUARTERROUND( x3, x7,x11,x15) + QUARTERROUND( x0, x5,x10,x15) + QUARTERROUND( x1, x6,x11,x12) + QUARTERROUND( x2, x7, x8,x13) + QUARTERROUND( x3, x4, x9,x14) + } + x0 = PLUS(x0,j0); + x1 = PLUS(x1,j1); + x2 = PLUS(x2,j2); + x3 = PLUS(x3,j3); + x4 = PLUS(x4,j4); + x5 = PLUS(x5,j5); + x6 = PLUS(x6,j6); + x7 = PLUS(x7,j7); + x8 = PLUS(x8,j8); + x9 = PLUS(x9,j9); + x10 = PLUS(x10,j10); + x11 = PLUS(x11,j11); + x12 = PLUS(x12,j12); + x13 = PLUS(x13,j13); + x14 = PLUS(x14,j14); + x15 = PLUS(x15,j15); + +#ifndef KEYSTREAM_ONLY + x0 = XOR(x0,U8TO32_LITTLE(m + 0)); + x1 = XOR(x1,U8TO32_LITTLE(m + 4)); + x2 = XOR(x2,U8TO32_LITTLE(m + 8)); + x3 = XOR(x3,U8TO32_LITTLE(m + 12)); + x4 = XOR(x4,U8TO32_LITTLE(m + 16)); + x5 = XOR(x5,U8TO32_LITTLE(m + 20)); + x6 = XOR(x6,U8TO32_LITTLE(m + 24)); + x7 = XOR(x7,U8TO32_LITTLE(m + 28)); + x8 = XOR(x8,U8TO32_LITTLE(m + 32)); + x9 = XOR(x9,U8TO32_LITTLE(m + 36)); + x10 = XOR(x10,U8TO32_LITTLE(m + 40)); + x11 = XOR(x11,U8TO32_LITTLE(m + 44)); + x12 = XOR(x12,U8TO32_LITTLE(m + 48)); + x13 = XOR(x13,U8TO32_LITTLE(m + 52)); + x14 = XOR(x14,U8TO32_LITTLE(m + 56)); + x15 = XOR(x15,U8TO32_LITTLE(m + 60)); +#endif + + j12 = PLUSONE(j12); + if (!j12) { + j13 = PLUSONE(j13); + /* stopping at 2^70 bytes per nonce is user's responsibility */ + } + + U32TO8_LITTLE(c + 0,x0); + U32TO8_LITTLE(c + 4,x1); + U32TO8_LITTLE(c + 8,x2); + U32TO8_LITTLE(c + 12,x3); + U32TO8_LITTLE(c + 16,x4); + U32TO8_LITTLE(c + 20,x5); + U32TO8_LITTLE(c + 24,x6); + U32TO8_LITTLE(c + 28,x7); + U32TO8_LITTLE(c + 32,x8); + U32TO8_LITTLE(c + 36,x9); + U32TO8_LITTLE(c + 40,x10); + U32TO8_LITTLE(c + 44,x11); + U32TO8_LITTLE(c + 48,x12); + U32TO8_LITTLE(c + 52,x13); + U32TO8_LITTLE(c + 56,x14); + U32TO8_LITTLE(c + 60,x15); + + if (bytes <= 64) { + if (bytes < 64) { + for (i = 0;i < bytes;++i) ctarget[i] = c[i]; + } + x->input[12] = j12; + x->input[13] = j13; + return; + } + bytes -= 64; + c += 64; +#ifndef KEYSTREAM_ONLY + m += 64; +#endif + } +} diff --git a/compat/getentropy/getentropy_freebsd.c b/compat/getentropy/getentropy_freebsd.c @@ -0,0 +1,64 @@ +/* $OpenBSD: getentropy_osx.c,v 1.7 2014/07/19 16:12:00 deraadt Exp $ */ + +/* + * Copyright (c) 2014 Pawel Jakub Dawidek <pjd@FreeBSD.org> + * Copyright (c) 2014 Brent Cook <bcook@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Emulation of getentropy(2) as documented at: + * http://www.openbsd.org/cgi-bin/man.cgi/OpenBSD-current/man2/getentropy.2 + */ + +#include <sys/types.h> +#include <sys/sysctl.h> + +#include <errno.h> +#include <stddef.h> + +/* + * Derived from lib/libc/gen/arc4random.c from FreeBSD. + */ +static size_t +getentropy_sysctl(u_char *buf, size_t size) +{ + int mib[2]; + size_t len, done; + + mib[0] = CTL_KERN; + mib[1] = KERN_ARND; + done = 0; + + do { + len = size; + if (sysctl(mib, 2, buf, &len, NULL, 0) == -1) + return (done); + done += len; + buf += len; + size -= len; + } while (size > 0); + + return (done); +} + +int +getentropy(void *buf, size_t len) +{ + if (len <= 256 && + getentropy_sysctl(buf, len) == len) { + return 0; + } + + errno = EIO; + return -1; +} diff --git a/compat/getentropy/getentropy_linux.c b/compat/getentropy/getentropy_linux.c @@ -0,0 +1,548 @@ +/* $OpenBSD: getentropy_linux.c,v 1.35 2014/08/28 01:00:57 bcook Exp $ */ + +/* + * Copyright (c) 2014 Theo de Raadt <deraadt@openbsd.org> + * Copyright (c) 2014 Bob Beck <beck@obtuse.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Emulation of getentropy(2) as documented at: + * http://www.openbsd.org/cgi-bin/man.cgi/OpenBSD-current/man2/getentropy.2 + */ + +#define _POSIX_C_SOURCE 199309L +#define _GNU_SOURCE 1 +#include <sys/types.h> +#include <sys/param.h> +#include <sys/ioctl.h> +#include <sys/resource.h> +#include <sys/syscall.h> +#ifdef HAVE_SYS_SYSCTL_H +#include <sys/sysctl.h> +#endif +#include <sys/statvfs.h> +#include <sys/socket.h> +#include <sys/mount.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <link.h> +#include <termios.h> +#include <fcntl.h> +#include <signal.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <time.h> +#include <openssl/sha.h> + +#include <linux/types.h> +#include <linux/random.h> +#include <linux/sysctl.h> +#ifdef HAVE_GETAUXVAL +#include <sys/auxv.h> +#endif +#include <sys/vfs.h> + +#define REPEAT 5 +#define min(a, b) (((a) < (b)) ? (a) : (b)) + +#define HX(a, b) \ + do { \ + if ((a)) \ + HD(errno); \ + else \ + HD(b); \ + } while (0) + +#define HR(x, l) (SHA512_Update(&ctx, (char *)(x), (l))) +#define HD(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (x))) +#define HF(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (void*))) + +int getentropy(void *buf, size_t len); + +static int gotdata(char *buf, size_t len); +#ifdef SYS_getrandom +static int getentropy_getrandom(void *buf, size_t len); +#endif +static int getentropy_urandom(void *buf, size_t len); +#ifdef SYS__sysctl +static int getentropy_sysctl(void *buf, size_t len); +#endif +static int getentropy_fallback(void *buf, size_t len); +static int getentropy_phdr(struct dl_phdr_info *info, size_t size, void *data); + +int +getentropy(void *buf, size_t len) +{ + int ret = -1; + + if (len > 256) { + errno = EIO; + return -1; + } + +#ifdef SYS_getrandom + /* + * Try descriptor-less getrandom() + */ + ret = getentropy_getrandom(buf, len); + if (ret != -1) + return (ret); + if (errno != ENOSYS) + return (-1); +#endif + + /* + * Try to get entropy with /dev/urandom + * + * This can fail if the process is inside a chroot or if file + * descriptors are exhausted. + */ + ret = getentropy_urandom(buf, len); + if (ret != -1) + return (ret); + +#ifdef SYS__sysctl + /* + * Try to use sysctl CTL_KERN, KERN_RANDOM, RANDOM_UUID. + * sysctl is a failsafe API, so it guarantees a result. This + * should work inside a chroot, or when file descriptors are + * exhuasted. + * + * However this can fail if the Linux kernel removes support + * for sysctl. Starting in 2007, there have been efforts to + * deprecate the sysctl API/ABI, and push callers towards use + * of the chroot-unavailable fd-using /proc mechanism -- + * essentially the same problems as /dev/urandom. + * + * Numerous setbacks have been encountered in their deprecation + * schedule, so as of June 2014 the kernel ABI still exists on + * most Linux architectures. The sysctl() stub in libc is missing + * on some systems. There are also reports that some kernels + * spew messages to the console. + */ + ret = getentropy_sysctl(buf, len); + if (ret != -1) + return (ret); +#endif /* SYS__sysctl */ + + /* + * Entropy collection via /dev/urandom and sysctl have failed. + * + * No other API exists for collecting entropy. See the large + * comment block above. + * + * We have very few options: + * - Even syslog_r is unsafe to call at this low level, so + * there is no way to alert the user or program. + * - Cannot call abort() because some systems have unsafe + * corefiles. + * - Could raise(SIGKILL) resulting in silent program termination. + * - Return EIO, to hint that arc4random's stir function + * should raise(SIGKILL) + * - Do the best under the circumstances.... + * + * This code path exists to bring light to the issue that Linux + * does not provide a failsafe API for entropy collection. + * + * We hope this demonstrates that Linux should either retain their + * sysctl ABI, or consider providing a new failsafe API which + * works in a chroot or when file descriptors are exhausted. + */ +#undef FAIL_INSTEAD_OF_TRYING_FALLBACK +#ifdef FAIL_INSTEAD_OF_TRYING_FALLBACK + raise(SIGKILL); +#endif + ret = getentropy_fallback(buf, len); + if (ret != -1) + return (ret); + + errno = EIO; + return (ret); +} + +/* + * Basic sanity checking; wish we could do better. + */ +static int +gotdata(char *buf, size_t len) +{ + char any_set = 0; + size_t i; + + for (i = 0; i < len; ++i) + any_set |= buf[i]; + if (any_set == 0) + return -1; + return 0; +} + +#ifdef SYS_getrandom +static int +getentropy_getrandom(void *buf, size_t len) +{ + int pre_errno = errno; + int ret; + if (len > 256) + return (-1); + do { + ret = syscall(SYS_getrandom, buf, len, 0); + } while (ret == -1 && errno == EINTR); + + if (ret != len) + return (-1); + errno = pre_errno; + return (0); +} +#endif + +static int +getentropy_urandom(void *buf, size_t len) +{ + struct stat st; + size_t i; + int fd, cnt, flags; + int save_errno = errno; + +start: + + flags = O_RDONLY; +#ifdef O_NOFOLLOW + flags |= O_NOFOLLOW; +#endif +#ifdef O_CLOEXEC + flags |= O_CLOEXEC; +#endif + fd = open("/dev/urandom", flags, 0); + if (fd == -1) { + if (errno == EINTR) + goto start; + goto nodevrandom; + } +#ifndef O_CLOEXEC + fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); +#endif + + /* Lightly verify that the device node looks sane */ + if (fstat(fd, &st) == -1 || !S_ISCHR(st.st_mode)) { + close(fd); + goto nodevrandom; + } + if (ioctl(fd, RNDGETENTCNT, &cnt) == -1) { + close(fd); + goto nodevrandom; + } + for (i = 0; i < len; ) { + size_t wanted = len - i; + ssize_t ret = read(fd, (char *)buf + i, wanted); + + if (ret == -1) { + if (errno == EAGAIN || errno == EINTR) + continue; + close(fd); + goto nodevrandom; + } + i += ret; + } + close(fd); + if (gotdata(buf, len) == 0) { + errno = save_errno; + return 0; /* satisfied */ + } +nodevrandom: + errno = EIO; + return -1; +} + +#ifdef SYS__sysctl +static int +getentropy_sysctl(void *buf, size_t len) +{ + static int mib[] = { CTL_KERN, KERN_RANDOM, RANDOM_UUID }; + size_t i; + int save_errno = errno; + + for (i = 0; i < len; ) { + size_t chunk = min(len - i, 16); + + /* SYS__sysctl because some systems already removed sysctl() */ + struct __sysctl_args args = { + .name = mib, + .nlen = 3, + .oldval = (char *)buf + i, + .oldlenp = &chunk, + }; + if (syscall(SYS__sysctl, &args) != 0) + goto sysctlfailed; + i += chunk; + } + if (gotdata(buf, len) == 0) { + errno = save_errno; + return (0); /* satisfied */ + } +sysctlfailed: + errno = EIO; + return -1; +} +#endif /* SYS__sysctl */ + +static int cl[] = { + CLOCK_REALTIME, +#ifdef CLOCK_MONOTONIC + CLOCK_MONOTONIC, +#endif +#ifdef CLOCK_MONOTONIC_RAW + CLOCK_MONOTONIC_RAW, +#endif +#ifdef CLOCK_TAI + CLOCK_TAI, +#endif +#ifdef CLOCK_VIRTUAL + CLOCK_VIRTUAL, +#endif +#ifdef CLOCK_UPTIME + CLOCK_UPTIME, +#endif +#ifdef CLOCK_PROCESS_CPUTIME_ID + CLOCK_PROCESS_CPUTIME_ID, +#endif +#ifdef CLOCK_THREAD_CPUTIME_ID + CLOCK_THREAD_CPUTIME_ID, +#endif +}; + +static int +getentropy_phdr(struct dl_phdr_info *info, size_t size, void *data) +{ + SHA512_CTX *ctx = data; + + SHA512_Update(ctx, &info->dlpi_addr, sizeof (info->dlpi_addr)); + return 0; +} + +static int +getentropy_fallback(void *buf, size_t len) +{ + uint8_t results[SHA512_DIGEST_LENGTH]; + int save_errno = errno, e, pgs = getpagesize(), faster = 0, repeat; + static int cnt; + struct timespec ts; + struct timeval tv; + struct rusage ru; + sigset_t sigset; + struct stat st; + SHA512_CTX ctx; + static pid_t lastpid; + pid_t pid; + size_t i, ii, m; + char *p; + + pid = getpid(); + if (lastpid == pid) { + faster = 1; + repeat = 2; + } else { + faster = 0; + lastpid = pid; + repeat = REPEAT; + } + for (i = 0; i < len; ) { + int j; + SHA512_Init(&ctx); + for (j = 0; j < repeat; j++) { + HX((e = gettimeofday(&tv, NULL)) == -1, tv); + if (e != -1) { + cnt += (int)tv.tv_sec; + cnt += (int)tv.tv_usec; + } + + dl_iterate_phdr(getentropy_phdr, &ctx); + + for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]); ii++) + HX(clock_gettime(cl[ii], &ts) == -1, ts); + + HX((pid = getpid()) == -1, pid); + HX((pid = getsid(pid)) == -1, pid); + HX((pid = getppid()) == -1, pid); + HX((pid = getpgid(0)) == -1, pid); + HX((e = getpriority(0, 0)) == -1, e); + + if (!faster) { + ts.tv_sec = 0; + ts.tv_nsec = 1; + (void) nanosleep(&ts, NULL); + } + + HX(sigpending(&sigset) == -1, sigset); + HX(sigprocmask(SIG_BLOCK, NULL, &sigset) == -1, + sigset); + + HF(getentropy); /* an addr in this library */ + HF(printf); /* an addr in libc */ + p = (char *)&p; + HD(p); /* an addr on stack */ + p = (char *)&errno; + HD(p); /* the addr of errno */ + + if (i == 0) { + struct sockaddr_storage ss; + struct statvfs stvfs; + struct termios tios; + struct statfs stfs; + socklen_t ssl; + off_t off; + + /* + * Prime-sized mappings encourage fragmentation; + * thus exposing some address entropy. + */ + struct mm { + size_t npg; + void *p; + } mm[] = { + { 17, MAP_FAILED }, { 3, MAP_FAILED }, + { 11, MAP_FAILED }, { 2, MAP_FAILED }, + { 5, MAP_FAILED }, { 3, MAP_FAILED }, + { 7, MAP_FAILED }, { 1, MAP_FAILED }, + { 57, MAP_FAILED }, { 3, MAP_FAILED }, + { 131, MAP_FAILED }, { 1, MAP_FAILED }, + }; + + for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) { + HX(mm[m].p = mmap(NULL, + mm[m].npg * pgs, + PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, + (off_t)0), mm[m].p); + if (mm[m].p != MAP_FAILED) { + size_t mo; + + /* Touch some memory... */ + p = mm[m].p; + mo = cnt % + (mm[m].npg * pgs - 1); + p[mo] = 1; + cnt += (int)((long)(mm[m].p) + / pgs); + } + + /* Check cnts and times... */ + for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]); + ii++) { + HX((e = clock_gettime(cl[ii], + &ts)) == -1, ts); + if (e != -1) + cnt += (int)ts.tv_nsec; + } + + HX((e = getrusage(RUSAGE_SELF, + &ru)) == -1, ru); + if (e != -1) { + cnt += (int)ru.ru_utime.tv_sec; + cnt += (int)ru.ru_utime.tv_usec; + } + } + + for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) { + if (mm[m].p != MAP_FAILED) + munmap(mm[m].p, mm[m].npg * pgs); + mm[m].p = MAP_FAILED; + } + + HX(stat(".", &st) == -1, st); + HX(statvfs(".", &stvfs) == -1, stvfs); + HX(statfs(".", &stfs) == -1, stfs); + + HX(stat("/", &st) == -1, st); + HX(statvfs("/", &stvfs) == -1, stvfs); + HX(statfs("/", &stfs) == -1, stfs); + + HX((e = fstat(0, &st)) == -1, st); + if (e == -1) { + if (S_ISREG(st.st_mode) || + S_ISFIFO(st.st_mode) || + S_ISSOCK(st.st_mode)) { + HX(fstatvfs(0, &stvfs) == -1, + stvfs); + HX(fstatfs(0, &stfs) == -1, + stfs); + HX((off = lseek(0, (off_t)0, + SEEK_CUR)) < 0, off); + } + if (S_ISCHR(st.st_mode)) { + HX(tcgetattr(0, &tios) == -1, + tios); + } else if (S_ISSOCK(st.st_mode)) { + memset(&ss, 0, sizeof ss); + ssl = sizeof(ss); + HX(getpeername(0, + (void *)&ss, &ssl) == -1, + ss); + } + } + + HX((e = getrusage(RUSAGE_CHILDREN, + &ru)) == -1, ru); + if (e != -1) { + cnt += (int)ru.ru_utime.tv_sec; + cnt += (int)ru.ru_utime.tv_usec; + } + } else { + /* Subsequent hashes absorb previous result */ + HD(results); + } + + HX((e = gettimeofday(&tv, NULL)) == -1, tv); + if (e != -1) { + cnt += (int)tv.tv_sec; + cnt += (int)tv.tv_usec; + } + + HD(cnt); + } +#ifdef HAVE_GETAUXVAL +#ifdef AT_RANDOM + /* Not as random as you think but we take what we are given */ + p = (char *) getauxval(AT_RANDOM); + if (p) + HR(p, 16); +#endif +#ifdef AT_SYSINFO_EHDR + p = (char *) getauxval(AT_SYSINFO_EHDR); + if (p) + HR(p, pgs); +#endif +#ifdef AT_BASE + p = (char *) getauxval(AT_BASE); + if (p) + HD(p); +#endif +#endif + + SHA512_Final(results, &ctx); + memcpy((char *)buf + i, results, min(sizeof(results), len - i)); + i += min(sizeof(results), len - i); + } + explicit_bzero(&ctx, sizeof ctx); + explicit_bzero(results, sizeof results); + if (gotdata(buf, len) == 0) { + errno = save_errno; + return 0; /* satisfied */ + } + errno = EIO; + return -1; +} diff --git a/compat/getentropy/getentropy_osx.c b/compat/getentropy/getentropy_osx.c @@ -0,0 +1,429 @@ +/* $OpenBSD: getentropy_osx.c,v 1.7 2014/07/19 16:12:00 deraadt Exp $ */ + +/* + * Copyright (c) 2014 Theo de Raadt <deraadt@openbsd.org> + * Copyright (c) 2014 Bob Beck <beck@obtuse.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Emulation of getentropy(2) as documented at: + * http://www.openbsd.org/cgi-bin/man.cgi/OpenBSD-current/man2/getentropy.2 + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/ioctl.h> +#include <sys/resource.h> +#include <sys/syscall.h> +#include <sys/sysctl.h> +#include <sys/statvfs.h> +#include <sys/socket.h> +#include <sys/mount.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <termios.h> +#include <fcntl.h> +#include <signal.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <time.h> +#include <mach/mach_time.h> +#include <mach/mach_host.h> +#include <mach/host_info.h> +#include <sys/socketvar.h> +#include <sys/vmmeter.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <netinet/ip_var.h> +#include <netinet/tcp_var.h> +#include <netinet/udp_var.h> +#include <CommonCrypto/CommonDigest.h> +#define SHA512_Update(a, b, c) (CC_SHA512_Update((a), (b), (c))) +#define SHA512_Init(xxx) (CC_SHA512_Init((xxx))) +#define SHA512_Final(xxx, yyy) (CC_SHA512_Final((xxx), (yyy))) +#define SHA512_CTX CC_SHA512_CTX +#define SHA512_DIGEST_LENGTH CC_SHA512_DIGEST_LENGTH + +#define REPEAT 5 +#define min(a, b) (((a) < (b)) ? (a) : (b)) + +#define HX(a, b) \ + do { \ + if ((a)) \ + HD(errno); \ + else \ + HD(b); \ + } while (0) + +#define HR(x, l) (SHA512_Update(&ctx, (char *)(x), (l))) +#define HD(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (x))) +#define HF(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (void*))) + +int getentropy(void *buf, size_t len); + +static int gotdata(char *buf, size_t len); +static int getentropy_urandom(void *buf, size_t len); +static int getentropy_fallback(void *buf, size_t len); + +int +getentropy(void *buf, size_t len) +{ + int ret = -1; + + if (len > 256) { + errno = EIO; + return -1; + } + + /* + * Try to get entropy with /dev/urandom + * + * This can fail if the process is inside a chroot or if file + * descriptors are exhausted. + */ + ret = getentropy_urandom(buf, len); + if (ret != -1) + return (ret); + + /* + * Entropy collection via /dev/urandom and sysctl have failed. + * + * No other API exists for collecting entropy, and we have + * no failsafe way to get it on OSX that is not sensitive + * to resource exhaustion. + * + * We have very few options: + * - Even syslog_r is unsafe to call at this low level, so + * there is no way to alert the user or program. + * - Cannot call abort() because some systems have unsafe + * corefiles. + * - Could raise(SIGKILL) resulting in silent program termination. + * - Return EIO, to hint that arc4random's stir function + * should raise(SIGKILL) + * - Do the best under the circumstances.... + * + * This code path exists to bring light to the issue that OSX + * does not provide a failsafe API for entropy collection. + * + * We hope this demonstrates that OSX should consider + * providing a new failsafe API which works in a chroot or + * when file descriptors are exhausted. + */ +#undef FAIL_INSTEAD_OF_TRYING_FALLBACK +#ifdef FAIL_INSTEAD_OF_TRYING_FALLBACK + raise(SIGKILL); +#endif + ret = getentropy_fallback(buf, len); + if (ret != -1) + return (ret); + + errno = EIO; + return (ret); +} + +/* + * Basic sanity checking; wish we could do better. + */ +static int +gotdata(char *buf, size_t len) +{ + char any_set = 0; + size_t i; + + for (i = 0; i < len; ++i) + any_set |= buf[i]; + if (any_set == 0) + return -1; + return 0; +} + +static int +getentropy_urandom(void *buf, size_t len) +{ + struct stat st; + size_t i; + int fd, flags; + int save_errno = errno; + +start: + + flags = O_RDONLY; +#ifdef O_NOFOLLOW + flags |= O_NOFOLLOW; +#endif +#ifdef O_CLOEXEC + flags |= O_CLOEXEC; +#endif + fd = open("/dev/urandom", flags, 0); + if (fd == -1) { + if (errno == EINTR) + goto start; + goto nodevrandom; + } +#ifndef O_CLOEXEC + fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); +#endif + + /* Lightly verify that the device node looks sane */ + if (fstat(fd, &st) == -1 || !S_ISCHR(st.st_mode)) { + close(fd); + goto nodevrandom; + } + for (i = 0; i < len; ) { + size_t wanted = len - i; + ssize_t ret = read(fd, (char *)buf + i, wanted); + + if (ret == -1) { + if (errno == EAGAIN || errno == EINTR) + continue; + close(fd); + goto nodevrandom; + } + i += ret; + } + close(fd); + if (gotdata(buf, len) == 0) { + errno = save_errno; + return 0; /* satisfied */ + } +nodevrandom: + errno = EIO; + return -1; +} + +static int tcpmib[] = { CTL_NET, AF_INET, IPPROTO_TCP, TCPCTL_STATS }; +static int udpmib[] = { CTL_NET, AF_INET, IPPROTO_UDP, UDPCTL_STATS }; +static int ipmib[] = { CTL_NET, AF_INET, IPPROTO_IP, IPCTL_STATS }; +static int kmib[] = { CTL_KERN, KERN_USRSTACK }; +static int hwmib[] = { CTL_HW, HW_USERMEM }; + +static int +getentropy_fallback(void *buf, size_t len) +{ + uint8_t results[SHA512_DIGEST_LENGTH]; + int save_errno = errno, e, pgs = getpagesize(), faster = 0, repeat; + static int cnt; + struct timespec ts; + struct timeval tv; + struct rusage ru; + sigset_t sigset; + struct stat st; + SHA512_CTX ctx; + static pid_t lastpid; + pid_t pid; + size_t i, ii, m; + char *p; + struct tcpstat tcpstat; + struct udpstat udpstat; + struct ipstat ipstat; + u_int64_t mach_time; + unsigned int idata; + void *addr; + + pid = getpid(); + if (lastpid == pid) { + faster = 1; + repeat = 2; + } else { + faster = 0; + lastpid = pid; + repeat = REPEAT; + } + for (i = 0; i < len; ) { + int j; + SHA512_Init(&ctx); + for (j = 0; j < repeat; j++) { + HX((e = gettimeofday(&tv, NULL)) == -1, tv); + if (e != -1) { + cnt += (int)tv.tv_sec; + cnt += (int)tv.tv_usec; + } + + mach_time = mach_absolute_time(); + HD(mach_time); + + ii = sizeof(addr); + HX(sysctl(kmib, sizeof(kmib) / sizeof(kmib[0]), + &addr, &ii, NULL, 0) == -1, addr); + + ii = sizeof(idata); + HX(sysctl(hwmib, sizeof(hwmib) / sizeof(hwmib[0]), + &idata, &ii, NULL, 0) == -1, idata); + + ii = sizeof(tcpstat); + HX(sysctl(tcpmib, sizeof(tcpmib) / sizeof(tcpmib[0]), + &tcpstat, &ii, NULL, 0) == -1, tcpstat); + + ii = sizeof(udpstat); + HX(sysctl(udpmib, sizeof(udpmib) / sizeof(udpmib[0]), + &udpstat, &ii, NULL, 0) == -1, udpstat); + + ii = sizeof(ipstat); + HX(sysctl(ipmib, sizeof(ipmib) / sizeof(ipmib[0]), + &ipstat, &ii, NULL, 0) == -1, ipstat); + + HX((pid = getpid()) == -1, pid); + HX((pid = getsid(pid)) == -1, pid); + HX((pid = getppid()) == -1, pid); + HX((pid = getpgid(0)) == -1, pid); + HX((e = getpriority(0, 0)) == -1, e); + + if (!faster) { + ts.tv_sec = 0; + ts.tv_nsec = 1; + (void) nanosleep(&ts, NULL); + } + + HX(sigpending(&sigset) == -1, sigset); + HX(sigprocmask(SIG_BLOCK, NULL, &sigset) == -1, + sigset); + + HF(getentropy); /* an addr in this library */ + HF(printf); /* an addr in libc */ + p = (char *)&p; + HD(p); /* an addr on stack */ + p = (char *)&errno; + HD(p); /* the addr of errno */ + + if (i == 0) { + struct sockaddr_storage ss; + struct statvfs stvfs; + struct termios tios; + struct statfs stfs; + socklen_t ssl; + off_t off; + + /* + * Prime-sized mappings encourage fragmentation; + * thus exposing some address entropy. + */ + struct mm { + size_t npg; + void *p; + } mm[] = { + { 17, MAP_FAILED }, { 3, MAP_FAILED }, + { 11, MAP_FAILED }, { 2, MAP_FAILED }, + { 5, MAP_FAILED }, { 3, MAP_FAILED }, + { 7, MAP_FAILED }, { 1, MAP_FAILED }, + { 57, MAP_FAILED }, { 3, MAP_FAILED }, + { 131, MAP_FAILED }, { 1, MAP_FAILED }, + }; + + for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) { + HX(mm[m].p = mmap(NULL, + mm[m].npg * pgs, + PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, + (off_t)0), mm[m].p); + if (mm[m].p != MAP_FAILED) { + size_t mo; + + /* Touch some memory... */ + p = mm[m].p; + mo = cnt % + (mm[m].npg * pgs - 1); + p[mo] = 1; + cnt += (int)((long)(mm[m].p) + / pgs); + } + + /* Check cnts and times... */ + mach_time = mach_absolute_time(); + HD(mach_time); + cnt += (int)mach_time; + + HX((e = getrusage(RUSAGE_SELF, + &ru)) == -1, ru); + if (e != -1) { + cnt += (int)ru.ru_utime.tv_sec; + cnt += (int)ru.ru_utime.tv_usec; + } + } + + for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) { + if (mm[m].p != MAP_FAILED) + munmap(mm[m].p, mm[m].npg * pgs); + mm[m].p = MAP_FAILED; + } + + HX(stat(".", &st) == -1, st); + HX(statvfs(".", &stvfs) == -1, stvfs); + HX(statfs(".", &stfs) == -1, stfs); + + HX(stat("/", &st) == -1, st); + HX(statvfs("/", &stvfs) == -1, stvfs); + HX(statfs("/", &stfs) == -1, stfs); + + HX((e = fstat(0, &st)) == -1, st); + if (e == -1) { + if (S_ISREG(st.st_mode) || + S_ISFIFO(st.st_mode) || + S_ISSOCK(st.st_mode)) { + HX(fstatvfs(0, &stvfs) == -1, + stvfs); + HX(fstatfs(0, &stfs) == -1, + stfs); + HX((off = lseek(0, (off_t)0, + SEEK_CUR)) < 0, off); + } + if (S_ISCHR(st.st_mode)) { + HX(tcgetattr(0, &tios) == -1, + tios); + } else if (S_ISSOCK(st.st_mode)) { + memset(&ss, 0, sizeof ss); + ssl = sizeof(ss); + HX(getpeername(0, + (void *)&ss, &ssl) == -1, + ss); + } + } + + HX((e = getrusage(RUSAGE_CHILDREN, + &ru)) == -1, ru); + if (e != -1) { + cnt += (int)ru.ru_utime.tv_sec; + cnt += (int)ru.ru_utime.tv_usec; + } + } else { + /* Subsequent hashes absorb previous result */ + HD(results); + } + + HX((e = gettimeofday(&tv, NULL)) == -1, tv); + if (e != -1) { + cnt += (int)tv.tv_sec; + cnt += (int)tv.tv_usec; + } + + HD(cnt); + } + + SHA512_Final(results, &ctx); + memcpy((char *)buf + i, results, min(sizeof(results), len - i)); + i += min(sizeof(results), len - i); + } + explicit_bzero(&ctx, sizeof ctx); + explicit_bzero(results, sizeof results); + if (gotdata(buf, len) == 0) { + errno = save_errno; + return 0; /* satisfied */ + } + errno = EIO; + return -1; +} diff --git a/compat/getentropy/getentropy_solaris.c b/compat/getentropy/getentropy_solaris.c @@ -0,0 +1,445 @@ +/* $OpenBSD: getentropy_solaris.c,v 1.8 2014/07/19 16:12:00 deraadt Exp $ */ + +/* + * Copyright (c) 2014 Theo de Raadt <deraadt@openbsd.org> + * Copyright (c) 2014 Bob Beck <beck@obtuse.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Emulation of getentropy(2) as documented at: + * http://www.openbsd.org/cgi-bin/man.cgi/OpenBSD-current/man2/getentropy.2 + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/ioctl.h> +#include <sys/resource.h> +#include <sys/syscall.h> +#include <sys/statvfs.h> +#include <sys/socket.h> +#include <sys/mount.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <link.h> +#include <termios.h> +#include <fcntl.h> +#include <signal.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <time.h> +#include <sys/sha2.h> +#define SHA512_Init SHA512Init +#define SHA512_Update SHA512Update +#define SHA512_Final SHA512Final + +#include <sys/vfs.h> +#include <sys/statfs.h> +#include <sys/loadavg.h> + +#define REPEAT 5 +#define min(a, b) (((a) < (b)) ? (a) : (b)) + +#define HX(a, b) \ + do { \ + if ((a)) \ + HD(errno); \ + else \ + HD(b); \ + } while (0) + +#define HR(x, l) (SHA512_Update(&ctx, (char *)(x), (l))) +#define HD(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (x))) +#define HF(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (void*))) + +int getentropy(void *buf, size_t len); + +static int gotdata(char *buf, size_t len); +static int getentropy_urandom(void *buf, size_t len, const char *path, + int devfscheck); +static int getentropy_fallback(void *buf, size_t len); +static int getentropy_phdr(struct dl_phdr_info *info, size_t size, void *data); + +int +getentropy(void *buf, size_t len) +{ + int ret = -1; + + if (len > 256) { + errno = EIO; + return -1; + } + + /* + * Try to get entropy with /dev/urandom + * + * Solaris provides /dev/urandom as a symbolic link to + * /devices/pseudo/random@0:urandom which is provided by + * a devfs filesystem. Best practice is to use O_NOFOLLOW, + * so we must try the unpublished name directly. + * + * This can fail if the process is inside a chroot which lacks + * the devfs mount, or if file descriptors are exhausted. + */ + ret = getentropy_urandom(buf, len, + "/devices/pseudo/random@0:urandom", 1); + if (ret != -1) + return (ret); + + /* + * Unfortunately, chroot spaces on Solaris are sometimes setup + * with direct device node of the well-known /dev/urandom name + * (perhaps to avoid dragging all of devfs into the space). + * + * This can fail if the process is inside a chroot or if file + * descriptors are exhausted. + */ + ret = getentropy_urandom(buf, len, "/dev/urandom", 0); + if (ret != -1) + return (ret); + + /* + * Entropy collection via /dev/urandom has failed. + * + * No other API exists for collecting entropy, and we have + * no failsafe way to get it on Solaris that is not sensitive + * to resource exhaustion. + * + * We have very few options: + * - Even syslog_r is unsafe to call at this low level, so + * there is no way to alert the user or program. + * - Cannot call abort() because some systems have unsafe + * corefiles. + * - Could raise(SIGKILL) resulting in silent program termination. + * - Return EIO, to hint that arc4random's stir function + * should raise(SIGKILL) + * - Do the best under the circumstances.... + * + * This code path exists to bring light to the issue that Solaris + * does not provide a failsafe API for entropy collection. + * + * We hope this demonstrates that Solaris should consider + * providing a new failsafe API which works in a chroot or + * when file descriptors are exhausted. + */ +#undef FAIL_INSTEAD_OF_TRYING_FALLBACK +#ifdef FAIL_INSTEAD_OF_TRYING_FALLBACK + raise(SIGKILL); +#endif + ret = getentropy_fallback(buf, len); + if (ret != -1) + return (ret); + + errno = EIO; + return (ret); +} + +/* + * Basic sanity checking; wish we could do better. + */ +static int +gotdata(char *buf, size_t len) +{ + char any_set = 0; + size_t i; + + for (i = 0; i < len; ++i) + any_set |= buf[i]; + if (any_set == 0) + return -1; + return 0; +} + +static int +getentropy_urandom(void *buf, size_t len, const char *path, int devfscheck) +{ + struct stat st; + size_t i; + int fd, flags; + int save_errno = errno; + +start: + + flags = O_RDONLY; +#ifdef O_NOFOLLOW + flags |= O_NOFOLLOW; +#endif +#ifdef O_CLOEXEC + flags |= O_CLOEXEC; +#endif + fd = open(path, flags, 0); + if (fd == -1) { + if (errno == EINTR) + goto start; + goto nodevrandom; + } +#ifndef O_CLOEXEC + fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); +#endif + + /* Lightly verify that the device node looks sane */ + if (fstat(fd, &st) == -1 || !S_ISCHR(st.st_mode) || + (devfscheck && (strcmp(st.st_fstype, "devfs") != 0))) { + close(fd); + goto nodevrandom; + } + for (i = 0; i < len; ) { + size_t wanted = len - i; + ssize_t ret = read(fd, (char *)buf + i, wanted); + + if (ret == -1) { + if (errno == EAGAIN || errno == EINTR) + continue; + close(fd); + goto nodevrandom; + } + i += ret; + } + close(fd); + if (gotdata(buf, len) == 0) { + errno = save_errno; + return 0; /* satisfied */ + } +nodevrandom: + errno = EIO; + return -1; +} + +static const int cl[] = { + CLOCK_REALTIME, +#ifdef CLOCK_MONOTONIC + CLOCK_MONOTONIC, +#endif +#ifdef CLOCK_MONOTONIC_RAW + CLOCK_MONOTONIC_RAW, +#endif +#ifdef CLOCK_TAI + CLOCK_TAI, +#endif +#ifdef CLOCK_VIRTUAL + CLOCK_VIRTUAL, +#endif +#ifdef CLOCK_UPTIME + CLOCK_UPTIME, +#endif +#ifdef CLOCK_PROCESS_CPUTIME_ID + CLOCK_PROCESS_CPUTIME_ID, +#endif +#ifdef CLOCK_THREAD_CPUTIME_ID + CLOCK_THREAD_CPUTIME_ID, +#endif +}; + +static int +getentropy_phdr(struct dl_phdr_info *info, size_t size, void *data) +{ + SHA512_CTX *ctx = data; + + SHA512_Update(ctx, &info->dlpi_addr, sizeof (info->dlpi_addr)); + return 0; +} + +static int +getentropy_fallback(void *buf, size_t len) +{ + uint8_t results[SHA512_DIGEST_LENGTH]; + int save_errno = errno, e, pgs = getpagesize(), faster = 0, repeat; + static int cnt; + struct timespec ts; + struct timeval tv; + double loadavg[3]; + struct rusage ru; + sigset_t sigset; + struct stat st; + SHA512_CTX ctx; + static pid_t lastpid; + pid_t pid; + size_t i, ii, m; + char *p; + + pid = getpid(); + if (lastpid == pid) { + faster = 1; + repeat = 2; + } else { + faster = 0; + lastpid = pid; + repeat = REPEAT; + } + for (i = 0; i < len; ) { + int j; + SHA512_Init(&ctx); + for (j = 0; j < repeat; j++) { + HX((e = gettimeofday(&tv, NULL)) == -1, tv); + if (e != -1) { + cnt += (int)tv.tv_sec; + cnt += (int)tv.tv_usec; + } + + dl_iterate_phdr(getentropy_phdr, &ctx); + + for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]); ii++) + HX(clock_gettime(cl[ii], &ts) == -1, ts); + + HX((pid = getpid()) == -1, pid); + HX((pid = getsid(pid)) == -1, pid); + HX((pid = getppid()) == -1, pid); + HX((pid = getpgid(0)) == -1, pid); + HX((e = getpriority(0, 0)) == -1, e); + HX((getloadavg(loadavg, 3) == -1), loadavg); + + if (!faster) { + ts.tv_sec = 0; + ts.tv_nsec = 1; + (void) nanosleep(&ts, NULL); + } + + HX(sigpending(&sigset) == -1, sigset); + HX(sigprocmask(SIG_BLOCK, NULL, &sigset) == -1, + sigset); + + HF(getentropy); /* an addr in this library */ + HF(printf); /* an addr in libc */ + p = (char *)&p; + HD(p); /* an addr on stack */ + p = (char *)&errno; + HD(p); /* the addr of errno */ + + if (i == 0) { + struct sockaddr_storage ss; + struct statvfs stvfs; + struct termios tios; + socklen_t ssl; + off_t off; + + /* + * Prime-sized mappings encourage fragmentation; + * thus exposing some address entropy. + */ + struct mm { + size_t npg; + void *p; + } mm[] = { + { 17, MAP_FAILED }, { 3, MAP_FAILED }, + { 11, MAP_FAILED }, { 2, MAP_FAILED }, + { 5, MAP_FAILED }, { 3, MAP_FAILED }, + { 7, MAP_FAILED }, { 1, MAP_FAILED }, + { 57, MAP_FAILED }, { 3, MAP_FAILED }, + { 131, MAP_FAILED }, { 1, MAP_FAILED }, + }; + + for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) { + HX(mm[m].p = mmap(NULL, + mm[m].npg * pgs, + PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, + (off_t)0), mm[m].p); + if (mm[m].p != MAP_FAILED) { + size_t mo; + + /* Touch some memory... */ + p = mm[m].p; + mo = cnt % + (mm[m].npg * pgs - 1); + p[mo] = 1; + cnt += (int)((long)(mm[m].p) + / pgs); + } + + /* Check cnts and times... */ + for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]); + ii++) { + HX((e = clock_gettime(cl[ii], + &ts)) == -1, ts); + if (e != -1) + cnt += (int)ts.tv_nsec; + } + + HX((e = getrusage(RUSAGE_SELF, + &ru)) == -1, ru); + if (e != -1) { + cnt += (int)ru.ru_utime.tv_sec; + cnt += (int)ru.ru_utime.tv_usec; + } + } + + for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) { + if (mm[m].p != MAP_FAILED) + munmap(mm[m].p, mm[m].npg * pgs); + mm[m].p = MAP_FAILED; + } + + HX(stat(".", &st) == -1, st); + HX(statvfs(".", &stvfs) == -1, stvfs); + + HX(stat("/", &st) == -1, st); + HX(statvfs("/", &stvfs) == -1, stvfs); + + HX((e = fstat(0, &st)) == -1, st); + if (e == -1) { + if (S_ISREG(st.st_mode) || + S_ISFIFO(st.st_mode) || + S_ISSOCK(st.st_mode)) { + HX(fstatvfs(0, &stvfs) == -1, + stvfs); + HX((off = lseek(0, (off_t)0, + SEEK_CUR)) < 0, off); + } + if (S_ISCHR(st.st_mode)) { + HX(tcgetattr(0, &tios) == -1, + tios); + } else if (S_ISSOCK(st.st_mode)) { + memset(&ss, 0, sizeof ss); + ssl = sizeof(ss); + HX(getpeername(0, + (void *)&ss, &ssl) == -1, + ss); + } + } + + HX((e = getrusage(RUSAGE_CHILDREN, + &ru)) == -1, ru); + if (e != -1) { + cnt += (int)ru.ru_utime.tv_sec; + cnt += (int)ru.ru_utime.tv_usec; + } + } else { + /* Subsequent hashes absorb previous result */ + HD(results); + } + + HX((e = gettimeofday(&tv, NULL)) == -1, tv); + if (e != -1) { + cnt += (int)tv.tv_sec; + cnt += (int)tv.tv_usec; + } + + HD(cnt); + } + SHA512_Final(results, &ctx); + memcpy((char *)buf + i, results, min(sizeof(results), len - i)); + i += min(sizeof(results), len - i); + } + explicit_bzero(&ctx, sizeof ctx); + explicit_bzero(results, sizeof results); + if (gotdata(buf, len) == 0) { + errno = save_errno; + return 0; /* satisfied */ + } + errno = EIO; + return -1; +} diff --git a/compat/getentropy/getentropy_win.c b/compat/getentropy/getentropy_win.c @@ -0,0 +1,59 @@ +/* $OpenBSD: getentropy_win.c,v 1.2 2014/07/13 13:03:09 deraadt Exp $ */ + +/* + * Copyright (c) 2014, Theo de Raadt <deraadt@openbsd.org> + * Copyright (c) 2014, Bob Beck <beck@obtuse.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Emulation of getentropy(2) as documented at: + * http://www.openbsd.org/cgi-bin/man.cgi/OpenBSD-current/man2/getentropy.2 + */ + +#include <windows.h> +#include <errno.h> +#include <stdint.h> +#include <sys/types.h> +#include <wincrypt.h> +#include <process.h> + +int getentropy(void *buf, size_t len); + +/* + * On Windows, CryptGenRandom is supposed to be a well-seeded + * cryptographically strong random number generator. + */ +int +getentropy(void *buf, size_t len) +{ + HCRYPTPROV provider; + + if (len > 256) { + errno = EIO; + return -1; + } + + if (CryptAcquireContext(&provider, NULL, NULL, PROV_RSA_FULL, + CRYPT_VERIFYCONTEXT) == 0) + goto fail; + if (CryptGenRandom(provider, len, buf) == 0) { + CryptReleaseContext(provider, 0); + goto fail; + } + CryptReleaseContext(provider, 0); + return (0); + +fail: + errno = EIO; + return (-1); +} diff --git a/compat/safebfuns.c b/compat/safebfuns.c @@ -0,0 +1,52 @@ +/* Public domain */ + +#include <string.h> + +#if __clang__ + /* + * http://clang.llvm.org/docs/LanguageExtensions.html#feature-checking-macros + * http://lists.cs.uiuc.edu/pipermail/cfe-dev/2014-December/040627.html + */ + #if __has_attribute( noinline ) /* && __has_attribute( optnone ) */ + #define NOOPT /* __attribute__ (( optnone )) */ + #define NOINLINE __attribute__ (( noinline )) + #else + #error "require clang with noinline and optnone attributes" + #endif +#elif __GNUC__ + /* + * http://gcc.gnu.org/onlinedocs/gcc/Function-Specific-Option-Pragmas.html + * http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html + */ + #if __GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 4 ) + #define NOOPT __attribute__ (( optimize( 0 ) )) + #define NOINLINE __attribute__ (( noinline )) + #else + #error "require gcc >= 4.4" + #endif +#else + #error "unrecognised compiler" + explode +#endif + +NOOPT NOINLINE void explicit_bzero( void * const buf, const size_t n ) { + size_t i; + unsigned char * p = buf; + + for( i = 0; i < n; i++ ) { + p[ i ] = 0; + } +} + +NOOPT NOINLINE int timingsafe_bcmp( const void * const b1, const void * const b2, const size_t n ) { + size_t i; + const unsigned char * const p1 = b1; + const unsigned char * const p2 = b2; + int result = 0; + + for( i = 0; i < n; i++ ) { + result |= p1[ i ] ^ p2[ i ]; + } + + return result != 0; +} diff --git a/compat/sha/sha512-elf-x86_64.S b/compat/sha/sha512-elf-x86_64.S @@ -0,0 +1,1802 @@ +.text + +.globl sha512_block_data_order +.type sha512_block_data_order,@function +.align 16 +sha512_block_data_order: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $128+32,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %r11,128+24(%rsp) +.Lprologue: + + leaq K512(%rip),%rbp + + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp .Lloop + +.align 16 +.Lloop: + xorq %rdi,%rdi + movq 0(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,0(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 8(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,8(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 16(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,16(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 24(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,24(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 32(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,32(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 40(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,40(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 48(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,48(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 56(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,56(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + movq 64(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,64(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 72(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,72(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 80(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,80(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 88(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,88(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 96(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,96(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 104(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,104(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 112(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,112(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 120(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,120(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + jmp .Lrounds_16_xx +.align 16 +.Lrounds_16_xx: + movq 8(%rsp),%r13 + movq 112(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 72(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 0(%rsp),%r12 + movq %r8,%r13 + addq %r14,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,0(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 16(%rsp),%r13 + movq 120(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 80(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 8(%rsp),%r12 + movq %rdx,%r13 + addq %r14,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,8(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 24(%rsp),%r13 + movq 0(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 88(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 16(%rsp),%r12 + movq %rcx,%r13 + addq %r14,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,16(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 32(%rsp),%r13 + movq 8(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 96(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 24(%rsp),%r12 + movq %rbx,%r13 + addq %r14,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,24(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 40(%rsp),%r13 + movq 16(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 104(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 32(%rsp),%r12 + movq %rax,%r13 + addq %r14,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,32(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 48(%rsp),%r13 + movq 24(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 112(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 40(%rsp),%r12 + movq %r11,%r13 + addq %r14,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,40(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 56(%rsp),%r13 + movq 32(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 120(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 48(%rsp),%r12 + movq %r10,%r13 + addq %r14,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,48(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 64(%rsp),%r13 + movq 40(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 0(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 56(%rsp),%r12 + movq %r9,%r13 + addq %r14,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,56(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + movq 72(%rsp),%r13 + movq 48(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 8(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 64(%rsp),%r12 + movq %r8,%r13 + addq %r14,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,64(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 80(%rsp),%r13 + movq 56(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 16(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 72(%rsp),%r12 + movq %rdx,%r13 + addq %r14,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,72(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 88(%rsp),%r13 + movq 64(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 24(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 80(%rsp),%r12 + movq %rcx,%r13 + addq %r14,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,80(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 96(%rsp),%r13 + movq 72(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 32(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 88(%rsp),%r12 + movq %rbx,%r13 + addq %r14,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,88(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 104(%rsp),%r13 + movq 80(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 40(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 96(%rsp),%r12 + movq %rax,%r13 + addq %r14,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,96(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 112(%rsp),%r13 + movq 88(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 48(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 104(%rsp),%r12 + movq %r11,%r13 + addq %r14,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,104(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 120(%rsp),%r13 + movq 96(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 56(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 112(%rsp),%r12 + movq %r10,%r13 + addq %r14,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,112(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 0(%rsp),%r13 + movq 104(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 64(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 120(%rsp),%r12 + movq %r9,%r13 + addq %r14,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,120(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + cmpq $80,%rdi + jb .Lrounds_16_xx + + movq 128+0(%rsp),%rdi + leaq 128(%rsi),%rsi + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb .Lloop + + movq 128+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size sha512_block_data_order,.-sha512_block_data_order +.align 64 +.type K512,@object +K512: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 diff --git a/compat/sha/sha512-macosx-x86_64.S b/compat/sha/sha512-macosx-x86_64.S @@ -0,0 +1,1802 @@ +.text + +.globl _sha512_block_data_order + +.p2align 4 +_sha512_block_data_order: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $128+32,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %r11,128+24(%rsp) +L$prologue: + + leaq K512(%rip),%rbp + + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp L$loop + +.p2align 4 +L$loop: + xorq %rdi,%rdi + movq 0(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,0(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 8(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,8(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 16(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,16(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 24(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,24(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 32(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,32(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 40(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,40(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 48(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,48(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 56(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,56(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + movq 64(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,64(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 72(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,72(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 80(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,80(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 88(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,88(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 96(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,96(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 104(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,104(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 112(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,112(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 120(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,120(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + jmp L$rounds_16_xx +.p2align 4 +L$rounds_16_xx: + movq 8(%rsp),%r13 + movq 112(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 72(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 0(%rsp),%r12 + movq %r8,%r13 + addq %r14,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,0(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 16(%rsp),%r13 + movq 120(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 80(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 8(%rsp),%r12 + movq %rdx,%r13 + addq %r14,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,8(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 24(%rsp),%r13 + movq 0(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 88(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 16(%rsp),%r12 + movq %rcx,%r13 + addq %r14,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,16(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 32(%rsp),%r13 + movq 8(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 96(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 24(%rsp),%r12 + movq %rbx,%r13 + addq %r14,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,24(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 40(%rsp),%r13 + movq 16(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 104(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 32(%rsp),%r12 + movq %rax,%r13 + addq %r14,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,32(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 48(%rsp),%r13 + movq 24(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 112(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 40(%rsp),%r12 + movq %r11,%r13 + addq %r14,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,40(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 56(%rsp),%r13 + movq 32(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 120(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 48(%rsp),%r12 + movq %r10,%r13 + addq %r14,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,48(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 64(%rsp),%r13 + movq 40(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 0(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 56(%rsp),%r12 + movq %r9,%r13 + addq %r14,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,56(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + movq 72(%rsp),%r13 + movq 48(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 8(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 64(%rsp),%r12 + movq %r8,%r13 + addq %r14,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,64(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 80(%rsp),%r13 + movq 56(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 16(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 72(%rsp),%r12 + movq %rdx,%r13 + addq %r14,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,72(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 88(%rsp),%r13 + movq 64(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 24(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 80(%rsp),%r12 + movq %rcx,%r13 + addq %r14,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,80(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 96(%rsp),%r13 + movq 72(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 32(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 88(%rsp),%r12 + movq %rbx,%r13 + addq %r14,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,88(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 104(%rsp),%r13 + movq 80(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 40(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 96(%rsp),%r12 + movq %rax,%r13 + addq %r14,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,96(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 112(%rsp),%r13 + movq 88(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 48(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 104(%rsp),%r12 + movq %r11,%r13 + addq %r14,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,104(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 120(%rsp),%r13 + movq 96(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 56(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 112(%rsp),%r12 + movq %r10,%r13 + addq %r14,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,112(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 0(%rsp),%r13 + movq 104(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 64(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 120(%rsp),%r12 + movq %r9,%r13 + addq %r14,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,120(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + cmpq $80,%rdi + jb L$rounds_16_xx + + movq 128+0(%rsp),%rdi + leaq 128(%rsi),%rsi + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb L$loop + + movq 128+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue: + .byte 0xf3,0xc3 + +.p2align 6 + +K512: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 diff --git a/compat/sha/sha512.c b/compat/sha/sha512.c @@ -0,0 +1,558 @@ +/* $OpenBSD: sha512.c,v 1.12 2014/07/10 22:45:58 jsing Exp $ */ +/* ==================================================================== + * Copyright (c) 2004 The OpenSSL Project. All rights reserved + * according to the OpenSSL license [found in ../../LICENSE]. + * ==================================================================== + */ + +#include <machine/endian.h> + +#include <stdlib.h> +#include <string.h> + +#include <openssl/opensslconf.h> + +#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512) +/* + * IMPLEMENTATION NOTES. + * + * As you might have noticed 32-bit hash algorithms: + * + * - permit SHA_LONG to be wider than 32-bit (case on CRAY); + * - optimized versions implement two transform functions: one operating + * on [aligned] data in host byte order and one - on data in input + * stream byte order; + * - share common byte-order neutral collector and padding function + * implementations, ../md32_common.h; + * + * Neither of the above applies to this SHA-512 implementations. Reasons + * [in reverse order] are: + * + * - it's the only 64-bit hash algorithm for the moment of this writing, + * there is no need for common collector/padding implementation [yet]; + * - by supporting only one transform function [which operates on + * *aligned* data in input stream byte order, big-endian in this case] + * we minimize burden of maintenance in two ways: a) collector/padding + * function is simpler; b) only one transform function to stare at; + * - SHA_LONG64 is required to be exactly 64-bit in order to be able to + * apply a number of optimizations to mitigate potential performance + * penalties caused by previous design decision; + * + * Caveat lector. + * + * Implementation relies on the fact that "long long" is 64-bit on + * both 32- and 64-bit platforms. If some compiler vendor comes up + * with 128-bit long long, adjustment to sha.h would be required. + * As this implementation relies on 64-bit integer type, it's totally + * inappropriate for platforms which don't support it, most notably + * 16-bit platforms. + * <appro@fy.chalmers.se> + */ + +#include <openssl/crypto.h> +#include <openssl/opensslv.h> +#include <openssl/sha.h> + +#if !defined(__STRICT_ALIGNMENT) || defined(SHA512_ASM) +#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA +#endif + +int SHA384_Init(SHA512_CTX *c) + { + c->h[0]=U64(0xcbbb9d5dc1059ed8); + c->h[1]=U64(0x629a292a367cd507); + c->h[2]=U64(0x9159015a3070dd17); + c->h[3]=U64(0x152fecd8f70e5939); + c->h[4]=U64(0x67332667ffc00b31); + c->h[5]=U64(0x8eb44a8768581511); + c->h[6]=U64(0xdb0c2e0d64f98fa7); + c->h[7]=U64(0x47b5481dbefa4fa4); + + c->Nl=0; c->Nh=0; + c->num=0; c->md_len=SHA384_DIGEST_LENGTH; + return 1; + } + +int SHA512_Init(SHA512_CTX *c) + { + c->h[0]=U64(0x6a09e667f3bcc908); + c->h[1]=U64(0xbb67ae8584caa73b); + c->h[2]=U64(0x3c6ef372fe94f82b); + c->h[3]=U64(0xa54ff53a5f1d36f1); + c->h[4]=U64(0x510e527fade682d1); + c->h[5]=U64(0x9b05688c2b3e6c1f); + c->h[6]=U64(0x1f83d9abfb41bd6b); + c->h[7]=U64(0x5be0cd19137e2179); + + c->Nl=0; c->Nh=0; + c->num=0; c->md_len=SHA512_DIGEST_LENGTH; + return 1; + } + +#ifndef SHA512_ASM +static +#endif +void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num); + +int SHA512_Final (unsigned char *md, SHA512_CTX *c) + { + unsigned char *p=(unsigned char *)c->u.p; + size_t n=c->num; + + p[n]=0x80; /* There always is a room for one */ + n++; + if (n > (sizeof(c->u)-16)) + memset (p+n,0,sizeof(c->u)-n), n=0, + sha512_block_data_order (c,p,1); + + memset (p+n,0,sizeof(c->u)-16-n); +#if BYTE_ORDER == BIG_ENDIAN + c->u.d[SHA_LBLOCK-2] = c->Nh; + c->u.d[SHA_LBLOCK-1] = c->Nl; +#else + p[sizeof(c->u)-1] = (unsigned char)(c->Nl); + p[sizeof(c->u)-2] = (unsigned char)(c->Nl>>8); + p[sizeof(c->u)-3] = (unsigned char)(c->Nl>>16); + p[sizeof(c->u)-4] = (unsigned char)(c->Nl>>24); + p[sizeof(c->u)-5] = (unsigned char)(c->Nl>>32); + p[sizeof(c->u)-6] = (unsigned char)(c->Nl>>40); + p[sizeof(c->u)-7] = (unsigned char)(c->Nl>>48); + p[sizeof(c->u)-8] = (unsigned char)(c->Nl>>56); + p[sizeof(c->u)-9] = (unsigned char)(c->Nh); + p[sizeof(c->u)-10] = (unsigned char)(c->Nh>>8); + p[sizeof(c->u)-11] = (unsigned char)(c->Nh>>16); + p[sizeof(c->u)-12] = (unsigned char)(c->Nh>>24); + p[sizeof(c->u)-13] = (unsigned char)(c->Nh>>32); + p[sizeof(c->u)-14] = (unsigned char)(c->Nh>>40); + p[sizeof(c->u)-15] = (unsigned char)(c->Nh>>48); + p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56); +#endif + + sha512_block_data_order (c,p,1); + + if (md==0) return 0; + + switch (c->md_len) + { + /* Let compiler decide if it's appropriate to unroll... */ + case SHA384_DIGEST_LENGTH: + for (n=0;n<SHA384_DIGEST_LENGTH/8;n++) + { + SHA_LONG64 t = c->h[n]; + + *(md++) = (unsigned char)(t>>56); + *(md++) = (unsigned char)(t>>48); + *(md++) = (unsigned char)(t>>40); + *(md++) = (unsigned char)(t>>32); + *(md++) = (unsigned char)(t>>24); + *(md++) = (unsigned char)(t>>16); + *(md++) = (unsigned char)(t>>8); + *(md++) = (unsigned char)(t); + } + break; + case SHA512_DIGEST_LENGTH: + for (n=0;n<SHA512_DIGEST_LENGTH/8;n++) + { + SHA_LONG64 t = c->h[n]; + + *(md++) = (unsigned char)(t>>56); + *(md++) = (unsigned char)(t>>48); + *(md++) = (unsigned char)(t>>40); + *(md++) = (unsigned char)(t>>32); + *(md++) = (unsigned char)(t>>24); + *(md++) = (unsigned char)(t>>16); + *(md++) = (unsigned char)(t>>8); + *(md++) = (unsigned char)(t); + } + break; + /* ... as well as make sure md_len is not abused. */ + default: return 0; + } + + return 1; + } + +int SHA384_Final (unsigned char *md,SHA512_CTX *c) +{ return SHA512_Final (md,c); } + +int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len) + { + SHA_LONG64 l; + unsigned char *p=c->u.p; + const unsigned char *data=(const unsigned char *)_data; + + if (len==0) return 1; + + l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff); + if (l < c->Nl) c->Nh++; + if (sizeof(len)>=8) c->Nh+=(((SHA_LONG64)len)>>61); + c->Nl=l; + + if (c->num != 0) + { + size_t n = sizeof(c->u) - c->num; + + if (len < n) + { + memcpy (p+c->num,data,len), c->num += (unsigned int)len; + return 1; + } + else { + memcpy (p+c->num,data,n), c->num = 0; + len-=n, data+=n; + sha512_block_data_order (c,p,1); + } + } + + if (len >= sizeof(c->u)) + { +#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA + if ((size_t)data%sizeof(c->u.d[0]) != 0) + while (len >= sizeof(c->u)) + memcpy (p,data,sizeof(c->u)), + sha512_block_data_order (c,p,1), + len -= sizeof(c->u), + data += sizeof(c->u); + else +#endif + sha512_block_data_order (c,data,len/sizeof(c->u)), + data += len, + len %= sizeof(c->u), + data -= len; + } + + if (len != 0) memcpy (p,data,len), c->num = (int)len; + + return 1; + } + +int SHA384_Update (SHA512_CTX *c, const void *data, size_t len) +{ return SHA512_Update (c,data,len); } + +void SHA512_Transform (SHA512_CTX *c, const unsigned char *data) + { +#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA + if ((size_t)data%sizeof(c->u.d[0]) != 0) + memcpy(c->u.p,data,sizeof(c->u.p)), + data = c->u.p; +#endif + sha512_block_data_order (c,data,1); + } + +unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md) + { + SHA512_CTX c; + static unsigned char m[SHA384_DIGEST_LENGTH]; + + if (md == NULL) md=m; + SHA384_Init(&c); + SHA512_Update(&c,d,n); + SHA512_Final(md,&c); + OPENSSL_cleanse(&c,sizeof(c)); + return(md); + } + +unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md) + { + SHA512_CTX c; + static unsigned char m[SHA512_DIGEST_LENGTH]; + + if (md == NULL) md=m; + SHA512_Init(&c); + SHA512_Update(&c,d,n); + SHA512_Final(md,&c); + OPENSSL_cleanse(&c,sizeof(c)); + return(md); + } + +#ifndef SHA512_ASM +static const SHA_LONG64 K512[80] = { + U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd), + U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc), + U64(0x3956c25bf348b538),U64(0x59f111f1b605d019), + U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118), + U64(0xd807aa98a3030242),U64(0x12835b0145706fbe), + U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2), + U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1), + U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694), + U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3), + U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65), + U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483), + U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5), + U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210), + U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4), + U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725), + U64(0x06ca6351e003826f),U64(0x142929670a0e6e70), + U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926), + U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df), + U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8), + U64(0x81c2c92e47edaee6),U64(0x92722c851482353b), + U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001), + U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30), + U64(0xd192e819d6ef5218),U64(0xd69906245565a910), + U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8), + U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53), + U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8), + U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb), + U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3), + U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60), + U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec), + U64(0x90befffa23631e28),U64(0xa4506cebde82bde9), + U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b), + U64(0xca273eceea26619c),U64(0xd186b8c721c0c207), + U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178), + U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6), + U64(0x113f9804bef90dae),U64(0x1b710b35131c471b), + U64(0x28db77f523047d84),U64(0x32caab7b40c72493), + U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c), + U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a), + U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) }; + +#if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) +# if defined(__x86_64) || defined(__x86_64__) +# define ROTR(a,n) ({ SHA_LONG64 ret; \ + asm ("rorq %1,%0" \ + : "=r"(ret) \ + : "J"(n),"0"(a) \ + : "cc"); ret; }) +# define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \ + asm ("bswapq %0" \ + : "=r"(ret) \ + : "0"(ret)); ret; }) +# elif (defined(__i386) || defined(__i386__)) +# if defined(I386_ONLY) +# define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\ + unsigned int hi=p[0],lo=p[1]; \ + asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\ + "roll $16,%%eax; roll $16,%%edx; "\ + "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \ + : "=a"(lo),"=d"(hi) \ + : "0"(lo),"1"(hi) : "cc"); \ + ((SHA_LONG64)hi)<<32|lo; }) +# else +# define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\ + unsigned int hi=p[0],lo=p[1]; \ + asm ("bswapl %0; bswapl %1;" \ + : "=r"(lo),"=r"(hi) \ + : "0"(lo),"1"(hi)); \ + ((SHA_LONG64)hi)<<32|lo; }) +# endif +# elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64) +# define ROTR(a,n) ({ SHA_LONG64 ret; \ + asm ("rotrdi %0,%1,%2" \ + : "=r"(ret) \ + : "r"(a),"K"(n)); ret; }) +# endif +#endif + +#ifndef PULL64 +#define B(x,j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8)) +#define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7)) +#endif + +#ifndef ROTR +#define ROTR(x,s) (((x)>>s) | (x)<<(64-s)) +#endif + +#define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) +#define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) +#define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) +#define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) + +#define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z))) +#define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) + + +#if defined(__i386) || defined(__i386__) || defined(_M_IX86) +/* + * This code should give better results on 32-bit CPU with less than + * ~24 registers, both size and performance wise... + */ +static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num) + { + const SHA_LONG64 *W=in; + SHA_LONG64 A,E,T; + SHA_LONG64 X[9+80],*F; + int i; + + while (num--) { + + F = X+80; + A = ctx->h[0]; F[1] = ctx->h[1]; + F[2] = ctx->h[2]; F[3] = ctx->h[3]; + E = ctx->h[4]; F[5] = ctx->h[5]; + F[6] = ctx->h[6]; F[7] = ctx->h[7]; + + for (i=0;i<16;i++,F--) + { + T = PULL64(W[i]); + F[0] = A; + F[4] = E; + F[8] = T; + T += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i]; + E = F[3] + T; + A = T + Sigma0(A) + Maj(A,F[1],F[2]); + } + + for (;i<80;i++,F--) + { + T = sigma0(F[8+16-1]); + T += sigma1(F[8+16-14]); + T += F[8+16] + F[8+16-9]; + + F[0] = A; + F[4] = E; + F[8] = T; + T += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i]; + E = F[3] + T; + A = T + Sigma0(A) + Maj(A,F[1],F[2]); + } + + ctx->h[0] += A; ctx->h[1] += F[1]; + ctx->h[2] += F[2]; ctx->h[3] += F[3]; + ctx->h[4] += E; ctx->h[5] += F[5]; + ctx->h[6] += F[6]; ctx->h[7] += F[7]; + + W+=SHA_LBLOCK; + } + } + +#elif defined(OPENSSL_SMALL_FOOTPRINT) + +static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num) + { + const SHA_LONG64 *W=in; + SHA_LONG64 a,b,c,d,e,f,g,h,s0,s1,T1,T2; + SHA_LONG64 X[16]; + int i; + + while (num--) { + + a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3]; + e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7]; + + for (i=0;i<16;i++) + { +#if BYTE_ORDER == BIG_ENDIAN + T1 = X[i] = W[i]; +#else + T1 = X[i] = PULL64(W[i]); +#endif + T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; + T2 = Sigma0(a) + Maj(a,b,c); + h = g; g = f; f = e; e = d + T1; + d = c; c = b; b = a; a = T1 + T2; + } + + for (;i<80;i++) + { + s0 = X[(i+1)&0x0f]; s0 = sigma0(s0); + s1 = X[(i+14)&0x0f]; s1 = sigma1(s1); + + T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf]; + T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; + T2 = Sigma0(a) + Maj(a,b,c); + h = g; g = f; f = e; e = d + T1; + d = c; c = b; b = a; a = T1 + T2; + } + + ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d; + ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h; + + W+=SHA_LBLOCK; + } + } + +#else + +#define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \ + T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; \ + h = Sigma0(a) + Maj(a,b,c); \ + d += T1; h += T1; } while (0) + +#define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X) do { \ + s0 = X[(j+1)&0x0f]; s0 = sigma0(s0); \ + s1 = X[(j+14)&0x0f]; s1 = sigma1(s1); \ + T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f]; \ + ROUND_00_15(i+j,a,b,c,d,e,f,g,h); } while (0) + +static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num) + { + const SHA_LONG64 *W=in; + SHA_LONG64 a,b,c,d,e,f,g,h,s0,s1,T1; + SHA_LONG64 X[16]; + int i; + + while (num--) { + + a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3]; + e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7]; + +#if BYTE_ORDER == BIG_ENDIAN + T1 = X[0] = W[0]; ROUND_00_15(0,a,b,c,d,e,f,g,h); + T1 = X[1] = W[1]; ROUND_00_15(1,h,a,b,c,d,e,f,g); + T1 = X[2] = W[2]; ROUND_00_15(2,g,h,a,b,c,d,e,f); + T1 = X[3] = W[3]; ROUND_00_15(3,f,g,h,a,b,c,d,e); + T1 = X[4] = W[4]; ROUND_00_15(4,e,f,g,h,a,b,c,d); + T1 = X[5] = W[5]; ROUND_00_15(5,d,e,f,g,h,a,b,c); + T1 = X[6] = W[6]; ROUND_00_15(6,c,d,e,f,g,h,a,b); + T1 = X[7] = W[7]; ROUND_00_15(7,b,c,d,e,f,g,h,a); + T1 = X[8] = W[8]; ROUND_00_15(8,a,b,c,d,e,f,g,h); + T1 = X[9] = W[9]; ROUND_00_15(9,h,a,b,c,d,e,f,g); + T1 = X[10] = W[10]; ROUND_00_15(10,g,h,a,b,c,d,e,f); + T1 = X[11] = W[11]; ROUND_00_15(11,f,g,h,a,b,c,d,e); + T1 = X[12] = W[12]; ROUND_00_15(12,e,f,g,h,a,b,c,d); + T1 = X[13] = W[13]; ROUND_00_15(13,d,e,f,g,h,a,b,c); + T1 = X[14] = W[14]; ROUND_00_15(14,c,d,e,f,g,h,a,b); + T1 = X[15] = W[15]; ROUND_00_15(15,b,c,d,e,f,g,h,a); +#else + T1 = X[0] = PULL64(W[0]); ROUND_00_15(0,a,b,c,d,e,f,g,h); + T1 = X[1] = PULL64(W[1]); ROUND_00_15(1,h,a,b,c,d,e,f,g); + T1 = X[2] = PULL64(W[2]); ROUND_00_15(2,g,h,a,b,c,d,e,f); + T1 = X[3] = PULL64(W[3]); ROUND_00_15(3,f,g,h,a,b,c,d,e); + T1 = X[4] = PULL64(W[4]); ROUND_00_15(4,e,f,g,h,a,b,c,d); + T1 = X[5] = PULL64(W[5]); ROUND_00_15(5,d,e,f,g,h,a,b,c); + T1 = X[6] = PULL64(W[6]); ROUND_00_15(6,c,d,e,f,g,h,a,b); + T1 = X[7] = PULL64(W[7]); ROUND_00_15(7,b,c,d,e,f,g,h,a); + T1 = X[8] = PULL64(W[8]); ROUND_00_15(8,a,b,c,d,e,f,g,h); + T1 = X[9] = PULL64(W[9]); ROUND_00_15(9,h,a,b,c,d,e,f,g); + T1 = X[10] = PULL64(W[10]); ROUND_00_15(10,g,h,a,b,c,d,e,f); + T1 = X[11] = PULL64(W[11]); ROUND_00_15(11,f,g,h,a,b,c,d,e); + T1 = X[12] = PULL64(W[12]); ROUND_00_15(12,e,f,g,h,a,b,c,d); + T1 = X[13] = PULL64(W[13]); ROUND_00_15(13,d,e,f,g,h,a,b,c); + T1 = X[14] = PULL64(W[14]); ROUND_00_15(14,c,d,e,f,g,h,a,b); + T1 = X[15] = PULL64(W[15]); ROUND_00_15(15,b,c,d,e,f,g,h,a); +#endif + + for (i=16;i<80;i+=16) + { + ROUND_16_80(i, 0,a,b,c,d,e,f,g,h,X); + ROUND_16_80(i, 1,h,a,b,c,d,e,f,g,X); + ROUND_16_80(i, 2,g,h,a,b,c,d,e,f,X); + ROUND_16_80(i, 3,f,g,h,a,b,c,d,e,X); + ROUND_16_80(i, 4,e,f,g,h,a,b,c,d,X); + ROUND_16_80(i, 5,d,e,f,g,h,a,b,c,X); + ROUND_16_80(i, 6,c,d,e,f,g,h,a,b,X); + ROUND_16_80(i, 7,b,c,d,e,f,g,h,a,X); + ROUND_16_80(i, 8,a,b,c,d,e,f,g,h,X); + ROUND_16_80(i, 9,h,a,b,c,d,e,f,g,X); + ROUND_16_80(i,10,g,h,a,b,c,d,e,f,X); + ROUND_16_80(i,11,f,g,h,a,b,c,d,e,X); + ROUND_16_80(i,12,e,f,g,h,a,b,c,d,X); + ROUND_16_80(i,13,d,e,f,g,h,a,b,c,X); + ROUND_16_80(i,14,c,d,e,f,g,h,a,b,X); + ROUND_16_80(i,15,b,c,d,e,f,g,h,a,X); + } + + ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d; + ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h; + + W+=SHA_LBLOCK; + } + } + +#endif + +#endif /* SHA512_ASM */ + +#endif /* !OPENSSL_NO_SHA512 */ diff --git a/include/blf.h b/include/blf.h @@ -0,0 +1,82 @@ +/* $OpenBSD: blf.h,v 1.7 2007/03/14 17:59:41 grunk Exp $ */ +/* + * Blowfish - a fast block cipher designed by Bruce Schneier + * + * Copyright 1997 Niels Provos <provos@physnet.uni-hamburg.de> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Niels Provos. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _BLF_H_ +#define _BLF_H_ + +/* Schneier specifies a maximum key length of 56 bytes. + * This ensures that every key bit affects every cipher + * bit. However, the subkeys can hold up to 72 bytes. + * Warning: For normal blowfish encryption only 56 bytes + * of the key affect all cipherbits. + */ + +#define BLF_N 16 /* Number of Subkeys */ +#define BLF_MAXKEYLEN ((BLF_N-2)*4) /* 448 bits */ +#define BLF_MAXUTILIZED ((BLF_N+2)*4) /* 576 bits */ + +/* Blowfish context */ +typedef struct BlowfishContext { + u_int32_t S[4][256]; /* S-Boxes */ + u_int32_t P[BLF_N + 2]; /* Subkeys */ +} blf_ctx; + +/* Raw access to customized Blowfish + * blf_key is just: + * Blowfish_initstate( state ) + * Blowfish_expand0state( state, key, keylen ) + */ + +void Blowfish_encipher(blf_ctx *, u_int32_t *, u_int32_t *); +void Blowfish_decipher(blf_ctx *, u_int32_t *, u_int32_t *); +void Blowfish_initstate(blf_ctx *); +void Blowfish_expand0state(blf_ctx *, const u_int8_t *, u_int16_t); +void Blowfish_expandstate +(blf_ctx *, const u_int8_t *, u_int16_t, const u_int8_t *, u_int16_t); + +/* Standard Blowfish */ + +void blf_key(blf_ctx *, const u_int8_t *, u_int16_t); +void blf_enc(blf_ctx *, u_int32_t *, u_int16_t); +void blf_dec(blf_ctx *, u_int32_t *, u_int16_t); + +void blf_ecb_encrypt(blf_ctx *, u_int8_t *, u_int32_t); +void blf_ecb_decrypt(blf_ctx *, u_int8_t *, u_int32_t); + +void blf_cbc_encrypt(blf_ctx *, u_int8_t *, u_int8_t *, u_int32_t); +void blf_cbc_decrypt(blf_ctx *, u_int8_t *, u_int8_t *, u_int32_t); + +/* Converts u_int8_t to u_int32_t */ +u_int32_t Blowfish_stream2word(const u_int8_t *, u_int16_t , u_int16_t *); + +#endif diff --git a/include/machine/endian.h b/include/machine/endian.h @@ -0,0 +1,40 @@ +/* + * Public domain + * machine/endian.h compatibility shim + */ + +#ifndef LIBCRYPTOCOMPAT_BYTE_ORDER_H_ +#define LIBCRYPTOCOMPAT_BYTE_ORDER_H_ + +#if defined(_WIN32) + +#define LITTLE_ENDIAN 1234 +#define BIG_ENDIAN 4321 +#define PDP_ENDIAN 3412 + +/* + * Use GCC and Visual Studio compiler defines to determine endian. + */ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define BYTE_ORDER LITTLE_ENDIAN +#else +#define BYTE_ORDER BIG_ENDIAN +#endif + +#elif defined(__linux__) +#include <endian.h> + +#elif defined(__sun) || defined(_AIX) || defined(__hpux) +#include <sys/types.h> +#include <arpa/nameser_compat.h> + +#elif defined(__sgi) +#include <standards.h> +#include <sys/endian.h> + +#else +#include_next <machine/endian.h> + +#endif + +#endif diff --git a/include/stdlib.h b/include/stdlib.h @@ -0,0 +1,30 @@ +/* + * stdlib.h compatibility shim + * Public domain + */ + +#include_next <stdlib.h> + +#ifndef LIBCRYPTOCOMPAT_STDLIB_H +#define LIBCRYPTOCOMPAT_STDLIB_H + +#include <sys/stat.h> +#include <sys/time.h> +#include <stdint.h> + +#ifndef HAVE_ARC4RANDOM_BUF +uint32_t arc4random(void); +uint32_t arc4random_uniform(uint32_t); +void arc4random_buf(void *_buf, size_t n); +#endif + +#ifndef HAVE_REALLOCARRAY +void *reallocarray(void *, size_t, size_t); +#endif + +#ifndef HAVE_STRTONUM +long long strtonum(const char *nptr, long long minval, + long long maxval, const char **errstr); +#endif + +#endif diff --git a/include/string.h b/include/string.h @@ -0,0 +1,69 @@ +/* + * Public domain + * string.h compatibility shim + */ + +#include_next <string.h> + +#ifndef LIBCRYPTOCOMPAT_STRING_H +#define LIBCRYPTOCOMPAT_STRING_H + +#include <sys/types.h> + +#if defined(__sun) || defined(__hpux) +/* Some functions historically defined in string.h were placed in strings.h by + * SUS. Use the same hack as OS X and FreeBSD use to work around on Solaris and HPUX. + */ +#include <strings.h> +#endif + +#ifndef HAVE_STRLCPY +size_t strlcpy(char *dst, const char *src, size_t siz); +#endif + +#ifndef HAVE_STRLCAT +size_t strlcat(char *dst, const char *src, size_t siz); +#endif + +#ifndef HAVE_STRNDUP +char * strndup(const char *str, size_t maxlen); +/* the only user of strnlen is strndup, so only build it if needed */ +#ifndef HAVE_STRNLEN +size_t strnlen(const char *str, size_t maxlen); +#endif +#endif + +#ifndef HAVE_EXPLICIT_BZERO +void explicit_bzero(void *, size_t); +#endif + +#ifndef HAVE_TIMINGSAFE_BCMP +int timingsafe_bcmp(const void *b1, const void *b2, size_t n); +#endif + +#ifndef HAVE_TIMINGSAFE_MEMCMP +int timingsafe_memcmp(const void *b1, const void *b2, size_t len); +#endif + +#ifndef HAVE_MEMMEM +void * memmem(const void *big, size_t big_len, const void *little, + size_t little_len); +#endif + +#ifdef _WIN32 +#include <errno.h> + +static inline char * +posix_strerror(int errnum) +{ + if (errnum == ECONNREFUSED) { + return "Connection refused"; + } + return strerror(errnum); +} + +#define strerror(errnum) posix_strerror(errnum) + +#endif + +#endif diff --git a/include/sys/types.h b/include/sys/types.h @@ -0,0 +1,21 @@ +/* + * Public domain + * sys/types.h compatibility shim + */ + +#include_next <sys/types.h> + +#ifndef LIBCRYPTOCOMPAT_SYS_TYPES_H +#define LIBCRYPTOCOMPAT_SYS_TYPES_H + +#include <stdint.h> + +#ifdef __MINGW32__ +#include <_bsd_types.h> +#endif + +#if !defined(HAVE_ATTRIBUTE__BOUNDED__) && !defined(__bounded__) +# define __bounded__(x, y, z) +#endif + +#endif diff --git a/include/unistd.h b/include/unistd.h @@ -0,0 +1,19 @@ +/* + * Public domain + * unistd.h compatibility shim + */ + +#include_next <unistd.h> + +#ifndef LIBCRYPTOCOMPAT_UNISTD_H +#define LIBCRYPTOCOMPAT_UNISTD_H + +#ifndef HAVE_GETENTROPY +int getentropy(void *buf, size_t buflen); +#endif + +#ifndef HAVE_ISSETUGID +int issetugid(void); +#endif + +#endif diff --git a/rockspec/arc4random-scm-1.rockspec b/rockspec/arc4random-scm-1.rockspec @@ -0,0 +1,33 @@ +package = "arc4random" +version = "scm-1" + +source = { + url = "git://github.com/mikejsavage/lua-arc4random.git", +} + +description = { + summary = "A Lua wrapper for arc4random", + homepage = "http://github.com/mikejsavage/lua-arc4random", + license = "ISC", + maintainer = "Mike Savage", +} + +dependencies = { + "lua >= 5.1", +} + +build = { + type = "make", + + install_pass = false, + + build_variables = { + LUA_INCDIR = "$(LUA_INCDIR)", + }, + + install = { + lib = { + [ "arc4random" ] = "arc4random.so", + }, + }, +} diff --git a/src/main.c b/src/main.c @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2014, Michael Savage <mike@mikejsavage.co.uk> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +*/ + +#include <stdlib.h> +#include <sys/types.h> +#include <math.h> + +#include <lua.h> +#include <lualib.h> +#include <lauxlib.h> + +#if LUA_VERSION_NUM < 502 + #define luaL_newlib( L, l ) ( lua_newtable( L ), luaL_register( L, NULL, l ) ) +#endif + +#define LUAINT_MAX ( sizeof( lua_Integer ) == 32 ? INT32_MAX : INT64_MAX ) + +static int luaarc4_random( lua_State * const L ) { + // http://marc.info/?l=openbsd-tech&m=142175806616927&w=2 + if( lua_gettop( L ) == 0 ) { + uint16_t buf[ 3 ]; + arc4random_buf( buf, sizeof( buf ) ); + + const double d = ldexp( ( double ) buf[ 0 ], -48 ) + + ldexp( ( double ) buf[ 1 ], -32 ) + + ldexp( ( double ) buf[ 2 ], -16 ); + + lua_pushnumber( L, d ); + + return 1; + } + + lua_Integer min = 1; + lua_Integer max; + + if( lua_gettop( L ) == 1 ) { + max = lua_tointeger( L, 1 ); + } + else { + min = lua_tointeger( L, 1 ); + max = lua_tointeger( L, 2 ); + } + + if( max == LUAINT_MAX ) { + lua_pushliteral( L, "upper bound too large" ); + return lua_error( L ); + } + + lua_Integer max_plus_one = max + 1; + + if( max_plus_one <= min ) { + lua_pushliteral( L, "interval is empty" ); + return lua_error( L ); + } + + if( max_plus_one - min > UINT32_MAX ) { + lua_pushliteral( L, "interval too large" ); + return lua_error( L ); + } + + const lua_Integer r = min + arc4random_uniform( max_plus_one - min ); + lua_pushinteger( L, r ); + + return 1; +} + +static int luaarc4_buf( lua_State * const L ) { + const long bytes = luaL_checklong( L, 1 ); + char * const buf = malloc( bytes ); + + if( buf == NULL ) { + lua_pushliteral( L, "out of memory" ); + return lua_error( L ); + } + + arc4random_buf( buf, bytes ); + lua_pushlstring( L, buf, bytes ); + free( buf ); + + return 1; +} + +static const struct luaL_Reg luaarc4_lib[] = { + { "random", luaarc4_random }, + { "buf", luaarc4_buf }, + { NULL, NULL }, +}; + +LUALIB_API int luaopen_arc4random( lua_State * const L ) { + luaL_newlib( L, luaarc4_lib ); + + return 1; +} diff --git a/test.lua b/test.lua @@ -0,0 +1,16 @@ +#! /usr/bin/lua + +local arc4 = require( "arc4random" ) + +for i = 1, 100 do + print( ( "%.3f" ):format( arc4.random() ) ) +end +print( arc4.random() ) +print( arc4.random( 3 ) ) +print( arc4.random( -5, 5 ) ) + +local str = arc4.buf( 16 ) +str = str:gsub( "(.)", function( c ) + return ( "%02x" ):format( string.byte( c ) ) +end ) +print( str )