commit 92196c0a049c2b3492d94c60b9610f24c33df1b0
Author: Michael Savage <mikejsavage@gmail.com>
Date: Tue, 20 Jan 2015 17:44:34 +0000
Initial commit
Diffstat:
32 files changed, 7210 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,4 @@
+.tags
+
+arc4random.so
+*.o
diff --git a/Makefile b/Makefile
@@ -0,0 +1,22 @@
+all: arc4random.so
+
+include Makefile.mess
+
+SRCS += src/main.c
+
+CFLAGS += -Wall -Wno-pointer-sign
+CFLAGS += -O2 -fPIC -DNDEBUG
+
+LDFLAGS += -lm
+
+OBJS := $(patsubst %.c,%.o,$(SRCS))
+
+debug: CFLAGS += -ggdb3 -UNDEBUG
+debug: all
+
+arc4random.so: $(OBJS)
+ $(CC) -o arc4random.so $(LDFLAGS) $(OBJS)
+
+clean:
+ rm -f arc4random.so
+ rm -f $(OBJS)
diff --git a/Makefile.mess b/Makefile.mess
@@ -0,0 +1,42 @@
+# This is more or less what libressl does
+# See http://openbsd.cs.toronto.edu/cgi-bin/cvsweb/src/lib/libssl/src/crypto/mem_clr.c?rev=1.4&content-type=text/x-cvsweb-markup
+CFLAGS += -DOPENSSL_cleanse=explicit_bzero
+
+# GCC whines without this. Assume everyone has strndup anyway
+CFLAGS += -DHAVE_STRNDUP
+
+# Let Luarocks point us to the right headers
+ifdef LUA_INCDIR
+ CFLAGS += -I$(LUA_INCDIR)
+endif
+
+# OS detection
+uname := $(shell uname -s)
+
+ifneq ($(uname),Darwin)
+ LDFLAGS += -shared
+else
+ LDFLAGS += -bundle -undefined dynamic_lookup
+endif
+
+CFLAGS += -Iinclude
+
+SRCS += compat/arc4random/arc4random.c
+SRCS += compat/arc4random/arc4random_uniform.c
+SRCS += compat/safebfuns.c
+SRCS += compat/sha/sha512.c
+
+ifeq ($(uname),Linux)
+ SRCS += compat/getentropy/getentropy_linux.c
+endif
+
+ifeq ($(uname),Darwin)
+ SRCS += compat/getentropy/getentropy_osx.c
+endif
+
+ifeq ($(uname),FreeBSD)
+ SRCS += compat/getentropy/getentropy_freebsd.c
+endif
+
+compat/safebfuns.o: compat/safebfuns.c
+ $(CC) $(CFLAGS) -O0 -c -o $@ $^
diff --git a/Makefile.obsd b/Makefile.obsd
@@ -0,0 +1,9 @@
+LIB = arc4random
+SRCS = src/main.c
+
+CFLAGS += `pkg-config --cflags lua51`
+
+SHLIB_MAJOR = 1
+SHLIB_MINOR = 0
+
+.include <bsd.lib.mk>
diff --git a/README.md b/README.md
@@ -0,0 +1,57 @@
+A Lua wrapper for OpenBSD's arc4random.
+
+
+Requirements
+------------
+
+lua >= 5.1
+
+
+Copying
+-------
+
+Many of the files in this repository have been taken from OpenBSD's
+tree. You should consult individual file headers for specific licensing
+information. More broadly, everything here is compatible with the [ISC
+license][ISC].
+
+[ISC]: http://en.wikipedia.org/wiki/ISC_license
+
+
+Installation
+------------
+
+ $ luarocks install arc4random
+
+
+Usage
+-----
+
+The library provides two methods, `random` and `buf`. `random` is
+intended to be a drop in replacement for `math.random`, so it handles
+three cases.
+
+`arc4.random()` returns a random floating point number in the range
+[0,1]. It differs slightly from `math.random()`, which uses the range
+[0,1).
+
+`arc4.random( n )` and `arc4.random( m, n )` return a random integer in
+the range [1,n] and [m,n] respectively. This is identical to the
+behavior of `math.random`.
+
+`arc4.buf( n )` returns a string of `n` random characters. It is
+suitable for generating private keys and IVs.
+
+Some example code:
+
+ local arc4 = require( "arc4random" )
+
+ print( arc4.random() )
+ print( arc4.random( 3 ) )
+ print( arc4.random( -5, 5 ) )
+
+ local str = arc4.buf( 16 )
+ str = str:gsub( "(.)", function( c )
+ return ( "%02x" ):format( string.byte( c ) )
+ end )
+ print( str )
diff --git a/compat/arc4random/arc4random.c b/compat/arc4random/arc4random.c
@@ -0,0 +1,195 @@
+/* $OpenBSD: arc4random.c,v 1.49 2014/07/20 20:51:13 bcook Exp $ */
+
+/*
+ * Copyright (c) 1996, David Mazieres <dm@uun.org>
+ * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+ * Copyright (c) 2013, Markus Friedl <markus@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * ChaCha based random number generator for OpenBSD.
+ */
+
+#include <fcntl.h>
+#include <limits.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+
+#define KEYSTREAM_ONLY
+#include "chacha_private.h"
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#ifdef __GNUC__
+#define inline __inline
+#else /* !__GNUC__ */
+#define inline
+#endif /* !__GNUC__ */
+
+#define KEYSZ 32
+#define IVSZ 8
+#define BLOCKSZ 64
+#define RSBUFSZ (16*BLOCKSZ)
+
+/* Marked MAP_INHERIT_ZERO, so zero'd out in fork children. */
+static struct _rs {
+ size_t rs_have; /* valid bytes at end of rs_buf */
+ size_t rs_count; /* bytes till reseed */
+} *rs;
+
+/* Maybe be preserved in fork children, if _rs_allocate() decides. */
+static struct _rsx {
+ chacha_ctx rs_chacha; /* chacha context for random keystream */
+ u_char rs_buf[RSBUFSZ]; /* keystream blocks */
+} *rsx;
+
+static inline int _rs_allocate(struct _rs **, struct _rsx **);
+static inline void _rs_forkdetect(void);
+#include "arc4random.h"
+
+static inline void _rs_rekey(u_char *dat, size_t datlen);
+
+static inline void
+_rs_init(u_char *buf, size_t n)
+{
+ if (n < KEYSZ + IVSZ)
+ return;
+
+ if (rs == NULL) {
+ if (_rs_allocate(&rs, &rsx) == -1)
+ abort();
+ }
+
+ chacha_keysetup(&rsx->rs_chacha, buf, KEYSZ * 8, 0);
+ chacha_ivsetup(&rsx->rs_chacha, buf + KEYSZ);
+}
+
+static void
+_rs_stir(void)
+{
+ u_char rnd[KEYSZ + IVSZ];
+
+ if (getentropy(rnd, sizeof rnd) == -1)
+ _getentropy_fail();
+
+ if (!rs)
+ _rs_init(rnd, sizeof(rnd));
+ else
+ _rs_rekey(rnd, sizeof(rnd));
+ explicit_bzero(rnd, sizeof(rnd)); /* discard source seed */
+
+ /* invalidate rs_buf */
+ rs->rs_have = 0;
+ memset(rsx->rs_buf, 0, sizeof(rsx->rs_buf));
+
+ rs->rs_count = 1600000;
+}
+
+static inline void
+_rs_stir_if_needed(size_t len)
+{
+ _rs_forkdetect();
+ if (!rs || rs->rs_count <= len)
+ _rs_stir();
+ if (rs->rs_count <= len)
+ rs->rs_count = 0;
+ else
+ rs->rs_count -= len;
+}
+
+static inline void
+_rs_rekey(u_char *dat, size_t datlen)
+{
+#ifndef KEYSTREAM_ONLY
+ memset(rsx->rs_buf, 0, sizeof(rsx->rs_buf));
+#endif
+ /* fill rs_buf with the keystream */
+ chacha_encrypt_bytes(&rsx->rs_chacha, rsx->rs_buf,
+ rsx->rs_buf, sizeof(rsx->rs_buf));
+ /* mix in optional user provided data */
+ if (dat) {
+ size_t i, m;
+
+ m = min(datlen, KEYSZ + IVSZ);
+ for (i = 0; i < m; i++)
+ rsx->rs_buf[i] ^= dat[i];
+ }
+ /* immediately reinit for backtracking resistance */
+ _rs_init(rsx->rs_buf, KEYSZ + IVSZ);
+ memset(rsx->rs_buf, 0, KEYSZ + IVSZ);
+ rs->rs_have = sizeof(rsx->rs_buf) - KEYSZ - IVSZ;
+}
+
+static inline void
+_rs_random_buf(void *_buf, size_t n)
+{
+ u_char *buf = (u_char *)_buf;
+ u_char *keystream;
+ size_t m;
+
+ _rs_stir_if_needed(n);
+ while (n > 0) {
+ if (rs->rs_have > 0) {
+ m = min(n, rs->rs_have);
+ keystream = rsx->rs_buf + sizeof(rsx->rs_buf)
+ - rs->rs_have;
+ memcpy(buf, keystream, m);
+ memset(keystream, 0, m);
+ buf += m;
+ n -= m;
+ rs->rs_have -= m;
+ }
+ if (rs->rs_have == 0)
+ _rs_rekey(NULL, 0);
+ }
+}
+
+static inline void
+_rs_random_u32(uint32_t *val)
+{
+ u_char *keystream;
+
+ _rs_stir_if_needed(sizeof(*val));
+ if (rs->rs_have < sizeof(*val))
+ _rs_rekey(NULL, 0);
+ keystream = rsx->rs_buf + sizeof(rsx->rs_buf) - rs->rs_have;
+ memcpy(val, keystream, sizeof(*val));
+ memset(keystream, 0, sizeof(*val));
+ rs->rs_have -= sizeof(*val);
+}
+
+uint32_t
+arc4random(void)
+{
+ uint32_t val;
+
+ _ARC4_LOCK();
+ _rs_random_u32(&val);
+ _ARC4_UNLOCK();
+ return val;
+}
+
+void
+arc4random_buf(void *buf, size_t n)
+{
+ _ARC4_LOCK();
+ _rs_random_buf(buf, n);
+ _ARC4_UNLOCK();
+}
diff --git a/compat/arc4random/arc4random.h b/compat/arc4random/arc4random.h
@@ -0,0 +1,26 @@
+#ifndef LIBCRYPTOCOMPAT_ARC4RANDOM_H
+#define LIBCRYPTOCOMPAT_ARC4RANDOM_H
+
+#include <sys/param.h>
+
+#if defined(__FreeBSD__)
+#include "arc4random_freebsd.h"
+
+#elif defined(__linux__)
+#include "arc4random_linux.h"
+
+#elif defined(__APPLE__)
+#include "arc4random_osx.h"
+
+#elif defined(__sun)
+#include "arc4random_solaris.h"
+
+#elif defined(_WIN32)
+#include "arc4random_win.h"
+
+#else
+#error "No arc4random hooks defined for this platform."
+
+#endif
+
+#endif
diff --git a/compat/arc4random/arc4random_freebsd.h b/compat/arc4random/arc4random_freebsd.h
@@ -0,0 +1,85 @@
+/* $OpenBSD: arc4random_freebsd.h,v 1.1 2014/07/20 20:51:13 bcook Exp $ */
+
+/*
+ * Copyright (c) 1996, David Mazieres <dm@uun.org>
+ * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+ * Copyright (c) 2013, Markus Friedl <markus@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Stub functions for portability.
+ */
+
+#include <sys/mman.h>
+
+#include <pthread.h>
+#include <signal.h>
+
+static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER;
+#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx)
+#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx)
+
+/*
+ * Unfortunately, pthread_atfork() is broken on FreeBSD (at least 9 and 10) if
+ * a program does not link to -lthr. Callbacks registered with pthread_atfork()
+ * appear to fail silently. So, it is not always possible to detect a PID
+ * wraparound.
+ */
+#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f))
+
+static inline void
+_getentropy_fail(void)
+{
+ raise(SIGKILL);
+}
+
+static volatile sig_atomic_t _rs_forked;
+
+static inline void
+_rs_forkhandler(void)
+{
+ _rs_forked = 1;
+}
+
+static inline void
+_rs_forkdetect(void)
+{
+ static pid_t _rs_pid = 0;
+ pid_t pid = getpid();
+
+ if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) {
+ _rs_pid = pid;
+ _rs_forked = 0;
+ if (rs)
+ memset(rs, 0, sizeof(*rs));
+ }
+}
+
+static inline int
+_rs_allocate(struct _rs **rsp, struct _rsx **rsxp)
+{
+ if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED)
+ return -1;
+
+ if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) {
+ munmap(*rsp, sizeof(**rsp));
+ return -1;
+ }
+
+ _ARC4_ATFORK(_rs_forkhandler);
+ return 0;
+}
diff --git a/compat/arc4random/arc4random_linux.h b/compat/arc4random/arc4random_linux.h
@@ -0,0 +1,85 @@
+/* $OpenBSD: arc4random_linux.h,v 1.7 2014/07/20 20:51:13 bcook Exp $ */
+
+/*
+ * Copyright (c) 1996, David Mazieres <dm@uun.org>
+ * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+ * Copyright (c) 2013, Markus Friedl <markus@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Stub functions for portability.
+ */
+
+#include <sys/mman.h>
+
+#include <pthread.h>
+#include <signal.h>
+
+static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER;
+#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx)
+#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx)
+
+#ifdef __GLIBC__
+extern void *__dso_handle;
+extern int __register_atfork(void (*)(void), void(*)(void), void (*)(void), void *);
+#define _ARC4_ATFORK(f) __register_atfork(NULL, NULL, (f), __dso_handle)
+#else
+#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f))
+#endif
+
+static inline void
+_getentropy_fail(void)
+{
+ raise(SIGKILL);
+}
+
+static volatile sig_atomic_t _rs_forked;
+
+static inline void
+_rs_forkhandler(void)
+{
+ _rs_forked = 1;
+}
+
+static inline void
+_rs_forkdetect(void)
+{
+ static pid_t _rs_pid = 0;
+ pid_t pid = getpid();
+
+ if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) {
+ _rs_pid = pid;
+ _rs_forked = 0;
+ if (rs)
+ memset(rs, 0, sizeof(*rs));
+ }
+}
+
+static inline int
+_rs_allocate(struct _rs **rsp, struct _rsx **rsxp)
+{
+ if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED)
+ return (-1);
+
+ if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) {
+ munmap(*rsp, sizeof(**rsp));
+ return (-1);
+ }
+
+ _ARC4_ATFORK(_rs_forkhandler);
+ return (0);
+}
diff --git a/compat/arc4random/arc4random_osx.h b/compat/arc4random/arc4random_osx.h
@@ -0,0 +1,79 @@
+/* $OpenBSD: arc4random_osx.h,v 1.7 2014/07/20 20:51:13 bcook Exp $ */
+
+/*
+ * Copyright (c) 1996, David Mazieres <dm@uun.org>
+ * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+ * Copyright (c) 2013, Markus Friedl <markus@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Stub functions for portability.
+ */
+
+#include <sys/mman.h>
+
+#include <pthread.h>
+#include <signal.h>
+
+static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER;
+#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx)
+#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx)
+
+#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f))
+
+static inline void
+_getentropy_fail(void)
+{
+ raise(SIGKILL);
+}
+
+static volatile sig_atomic_t _rs_forked;
+
+static inline void
+_rs_forkhandler(void)
+{
+ _rs_forked = 1;
+}
+
+static inline void
+_rs_forkdetect(void)
+{
+ static pid_t _rs_pid = 0;
+ pid_t pid = getpid();
+
+ if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) {
+ _rs_pid = pid;
+ _rs_forked = 0;
+ if (rs)
+ memset(rs, 0, sizeof(*rs));
+ }
+}
+
+static inline int
+_rs_allocate(struct _rs **rsp, struct _rsx **rsxp)
+{
+ if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED)
+ return -1;
+
+ if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) {
+ munmap(*rsp, sizeof(**rsp));
+ return -1;
+ }
+
+ _ARC4_ATFORK(_rs_forkhandler);
+ return 0;
+}
diff --git a/compat/arc4random/arc4random_solaris.h b/compat/arc4random/arc4random_solaris.h
@@ -0,0 +1,79 @@
+/* $OpenBSD: arc4random_solaris.h,v 1.7 2014/07/20 20:51:13 bcook Exp $ */
+
+/*
+ * Copyright (c) 1996, David Mazieres <dm@uun.org>
+ * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+ * Copyright (c) 2013, Markus Friedl <markus@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Stub functions for portability.
+ */
+
+#include <sys/mman.h>
+
+#include <pthread.h>
+#include <signal.h>
+
+static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER;
+#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx)
+#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx)
+
+#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f))
+
+static inline void
+_getentropy_fail(void)
+{
+ raise(SIGKILL);
+}
+
+static volatile sig_atomic_t _rs_forked;
+
+static inline void
+_rs_forkhandler(void)
+{
+ _rs_forked = 1;
+}
+
+static inline void
+_rs_forkdetect(void)
+{
+ static pid_t _rs_pid = 0;
+ pid_t pid = getpid();
+
+ if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) {
+ _rs_pid = pid;
+ _rs_forked = 0;
+ if (rs)
+ memset(rs, 0, sizeof(*rs));
+ }
+}
+
+static inline int
+_rs_allocate(struct _rs **rsp, struct _rsx **rsxp)
+{
+ if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED)
+ return (-1);
+
+ if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) {
+ munmap(*rsp, sizeof(**rsp));
+ return (-1);
+ }
+
+ _ARC4_ATFORK(_rs_forkhandler);
+ return (0);
+}
diff --git a/compat/arc4random/arc4random_uniform.c b/compat/arc4random/arc4random_uniform.c
@@ -0,0 +1,56 @@
+/* $OpenBSD: arc4random_uniform.c,v 1.1 2014/07/12 13:24:54 deraadt Exp $ */
+
+/*
+ * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <stdlib.h>
+
+/*
+ * Calculate a uniformly distributed random number less than upper_bound
+ * avoiding "modulo bias".
+ *
+ * Uniformity is achieved by generating new random numbers until the one
+ * returned is outside the range [0, 2**32 % upper_bound). This
+ * guarantees the selected random number will be inside
+ * [2**32 % upper_bound, 2**32) which maps back to [0, upper_bound)
+ * after reduction modulo upper_bound.
+ */
+uint32_t
+arc4random_uniform(uint32_t upper_bound)
+{
+ uint32_t r, min;
+
+ if (upper_bound < 2)
+ return 0;
+
+ /* 2**32 % x == (2**32 - x) % x */
+ min = -upper_bound % upper_bound;
+
+ /*
+ * This could theoretically loop forever but each retry has
+ * p > 0.5 (worst case, usually far better) of selecting a
+ * number inside the range we need, so it should rarely need
+ * to re-roll.
+ */
+ for (;;) {
+ r = arc4random();
+ if (r >= min)
+ break;
+ }
+
+ return r % upper_bound;
+}
diff --git a/compat/arc4random/arc4random_win.h b/compat/arc4random/arc4random_win.h
@@ -0,0 +1,74 @@
+/* $OpenBSD: arc4random_win.h,v 1.3 2014/07/20 16:59:31 bcook Exp $ */
+
+/*
+ * Copyright (c) 1996, David Mazieres <dm@uun.org>
+ * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+ * Copyright (c) 2013, Markus Friedl <markus@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Stub functions for portability.
+ */
+
+#include <windows.h>
+
+static volatile HANDLE arc4random_mtx = NULL;
+
+/*
+ * Initialize the mutex on the first lock attempt. On collision, each thread
+ * will attempt to allocate a mutex and compare-and-swap it into place as the
+ * global mutex. On failure to swap in the global mutex, the mutex is closed.
+ */
+#define _ARC4_LOCK() { \
+ if (!arc4random_mtx) { \
+ HANDLE p = CreateMutex(NULL, FALSE, NULL); \
+ if (InterlockedCompareExchangePointer((void **)&arc4random_mtx, (void *)p, NULL)) \
+ CloseHandle(p); \
+ } \
+ WaitForSingleObject(arc4random_mtx, INFINITE); \
+} \
+
+#define _ARC4_UNLOCK() ReleaseMutex(arc4random_mtx)
+
+static inline void
+_getentropy_fail(void)
+{
+ TerminateProcess(GetCurrentProcess(), 0);
+}
+
+static inline int
+_rs_allocate(struct _rs **rsp, struct _rsx **rsxp)
+{
+ *rsp = calloc(1, sizeof(**rsp));
+ if (*rsp == NULL)
+ return (-1);
+
+ *rsxp = calloc(1, sizeof(**rsxp));
+ if (*rsxp == NULL) {
+ free(*rsp);
+ return (-1);
+ }
+ return (0);
+}
+
+static inline void
+_rs_forkhandler(void)
+{
+}
+
+static inline void
+_rs_forkdetect(void)
+{
+}
diff --git a/compat/arc4random/chacha_private.h b/compat/arc4random/chacha_private.h
@@ -0,0 +1,222 @@
+/*
+chacha-merged.c version 20080118
+D. J. Bernstein
+Public domain.
+*/
+
+/* $OpenBSD$ */
+
+typedef unsigned char u8;
+typedef unsigned int u32;
+
+typedef struct
+{
+ u32 input[16]; /* could be compressed */
+} chacha_ctx;
+
+#define U8C(v) (v##U)
+#define U32C(v) (v##U)
+
+#define U8V(v) ((u8)(v) & U8C(0xFF))
+#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
+
+#define ROTL32(v, n) \
+ (U32V((v) << (n)) | ((v) >> (32 - (n))))
+
+#define U8TO32_LITTLE(p) \
+ (((u32)((p)[0]) ) | \
+ ((u32)((p)[1]) << 8) | \
+ ((u32)((p)[2]) << 16) | \
+ ((u32)((p)[3]) << 24))
+
+#define U32TO8_LITTLE(p, v) \
+ do { \
+ (p)[0] = U8V((v) ); \
+ (p)[1] = U8V((v) >> 8); \
+ (p)[2] = U8V((v) >> 16); \
+ (p)[3] = U8V((v) >> 24); \
+ } while (0)
+
+#define ROTATE(v,c) (ROTL32(v,c))
+#define XOR(v,w) ((v) ^ (w))
+#define PLUS(v,w) (U32V((v) + (w)))
+#define PLUSONE(v) (PLUS((v),1))
+
+#define QUARTERROUND(a,b,c,d) \
+ a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
+ c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
+ a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
+ c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
+
+static const char sigma[16] = "expand 32-byte k";
+static const char tau[16] = "expand 16-byte k";
+
+static void
+chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits,u32 ivbits)
+{
+ const char *constants;
+
+ x->input[4] = U8TO32_LITTLE(k + 0);
+ x->input[5] = U8TO32_LITTLE(k + 4);
+ x->input[6] = U8TO32_LITTLE(k + 8);
+ x->input[7] = U8TO32_LITTLE(k + 12);
+ if (kbits == 256) { /* recommended */
+ k += 16;
+ constants = sigma;
+ } else { /* kbits == 128 */
+ constants = tau;
+ }
+ x->input[8] = U8TO32_LITTLE(k + 0);
+ x->input[9] = U8TO32_LITTLE(k + 4);
+ x->input[10] = U8TO32_LITTLE(k + 8);
+ x->input[11] = U8TO32_LITTLE(k + 12);
+ x->input[0] = U8TO32_LITTLE(constants + 0);
+ x->input[1] = U8TO32_LITTLE(constants + 4);
+ x->input[2] = U8TO32_LITTLE(constants + 8);
+ x->input[3] = U8TO32_LITTLE(constants + 12);
+}
+
+static void
+chacha_ivsetup(chacha_ctx *x,const u8 *iv)
+{
+ x->input[12] = 0;
+ x->input[13] = 0;
+ x->input[14] = U8TO32_LITTLE(iv + 0);
+ x->input[15] = U8TO32_LITTLE(iv + 4);
+}
+
+static void
+chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
+{
+ u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+ u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
+ u8 *ctarget = NULL;
+ u8 tmp[64];
+ u_int i;
+
+ if (!bytes) return;
+
+ j0 = x->input[0];
+ j1 = x->input[1];
+ j2 = x->input[2];
+ j3 = x->input[3];
+ j4 = x->input[4];
+ j5 = x->input[5];
+ j6 = x->input[6];
+ j7 = x->input[7];
+ j8 = x->input[8];
+ j9 = x->input[9];
+ j10 = x->input[10];
+ j11 = x->input[11];
+ j12 = x->input[12];
+ j13 = x->input[13];
+ j14 = x->input[14];
+ j15 = x->input[15];
+
+ for (;;) {
+ if (bytes < 64) {
+ for (i = 0;i < bytes;++i) tmp[i] = m[i];
+ m = tmp;
+ ctarget = c;
+ c = tmp;
+ }
+ x0 = j0;
+ x1 = j1;
+ x2 = j2;
+ x3 = j3;
+ x4 = j4;
+ x5 = j5;
+ x6 = j6;
+ x7 = j7;
+ x8 = j8;
+ x9 = j9;
+ x10 = j10;
+ x11 = j11;
+ x12 = j12;
+ x13 = j13;
+ x14 = j14;
+ x15 = j15;
+ for (i = 20;i > 0;i -= 2) {
+ QUARTERROUND( x0, x4, x8,x12)
+ QUARTERROUND( x1, x5, x9,x13)
+ QUARTERROUND( x2, x6,x10,x14)
+ QUARTERROUND( x3, x7,x11,x15)
+ QUARTERROUND( x0, x5,x10,x15)
+ QUARTERROUND( x1, x6,x11,x12)
+ QUARTERROUND( x2, x7, x8,x13)
+ QUARTERROUND( x3, x4, x9,x14)
+ }
+ x0 = PLUS(x0,j0);
+ x1 = PLUS(x1,j1);
+ x2 = PLUS(x2,j2);
+ x3 = PLUS(x3,j3);
+ x4 = PLUS(x4,j4);
+ x5 = PLUS(x5,j5);
+ x6 = PLUS(x6,j6);
+ x7 = PLUS(x7,j7);
+ x8 = PLUS(x8,j8);
+ x9 = PLUS(x9,j9);
+ x10 = PLUS(x10,j10);
+ x11 = PLUS(x11,j11);
+ x12 = PLUS(x12,j12);
+ x13 = PLUS(x13,j13);
+ x14 = PLUS(x14,j14);
+ x15 = PLUS(x15,j15);
+
+#ifndef KEYSTREAM_ONLY
+ x0 = XOR(x0,U8TO32_LITTLE(m + 0));
+ x1 = XOR(x1,U8TO32_LITTLE(m + 4));
+ x2 = XOR(x2,U8TO32_LITTLE(m + 8));
+ x3 = XOR(x3,U8TO32_LITTLE(m + 12));
+ x4 = XOR(x4,U8TO32_LITTLE(m + 16));
+ x5 = XOR(x5,U8TO32_LITTLE(m + 20));
+ x6 = XOR(x6,U8TO32_LITTLE(m + 24));
+ x7 = XOR(x7,U8TO32_LITTLE(m + 28));
+ x8 = XOR(x8,U8TO32_LITTLE(m + 32));
+ x9 = XOR(x9,U8TO32_LITTLE(m + 36));
+ x10 = XOR(x10,U8TO32_LITTLE(m + 40));
+ x11 = XOR(x11,U8TO32_LITTLE(m + 44));
+ x12 = XOR(x12,U8TO32_LITTLE(m + 48));
+ x13 = XOR(x13,U8TO32_LITTLE(m + 52));
+ x14 = XOR(x14,U8TO32_LITTLE(m + 56));
+ x15 = XOR(x15,U8TO32_LITTLE(m + 60));
+#endif
+
+ j12 = PLUSONE(j12);
+ if (!j12) {
+ j13 = PLUSONE(j13);
+ /* stopping at 2^70 bytes per nonce is user's responsibility */
+ }
+
+ U32TO8_LITTLE(c + 0,x0);
+ U32TO8_LITTLE(c + 4,x1);
+ U32TO8_LITTLE(c + 8,x2);
+ U32TO8_LITTLE(c + 12,x3);
+ U32TO8_LITTLE(c + 16,x4);
+ U32TO8_LITTLE(c + 20,x5);
+ U32TO8_LITTLE(c + 24,x6);
+ U32TO8_LITTLE(c + 28,x7);
+ U32TO8_LITTLE(c + 32,x8);
+ U32TO8_LITTLE(c + 36,x9);
+ U32TO8_LITTLE(c + 40,x10);
+ U32TO8_LITTLE(c + 44,x11);
+ U32TO8_LITTLE(c + 48,x12);
+ U32TO8_LITTLE(c + 52,x13);
+ U32TO8_LITTLE(c + 56,x14);
+ U32TO8_LITTLE(c + 60,x15);
+
+ if (bytes <= 64) {
+ if (bytes < 64) {
+ for (i = 0;i < bytes;++i) ctarget[i] = c[i];
+ }
+ x->input[12] = j12;
+ x->input[13] = j13;
+ return;
+ }
+ bytes -= 64;
+ c += 64;
+#ifndef KEYSTREAM_ONLY
+ m += 64;
+#endif
+ }
+}
diff --git a/compat/getentropy/getentropy_freebsd.c b/compat/getentropy/getentropy_freebsd.c
@@ -0,0 +1,64 @@
+/* $OpenBSD: getentropy_osx.c,v 1.7 2014/07/19 16:12:00 deraadt Exp $ */
+
+/*
+ * Copyright (c) 2014 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * Copyright (c) 2014 Brent Cook <bcook@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Emulation of getentropy(2) as documented at:
+ * http://www.openbsd.org/cgi-bin/man.cgi/OpenBSD-current/man2/getentropy.2
+ */
+
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+#include <errno.h>
+#include <stddef.h>
+
+/*
+ * Derived from lib/libc/gen/arc4random.c from FreeBSD.
+ */
+static size_t
+getentropy_sysctl(u_char *buf, size_t size)
+{
+ int mib[2];
+ size_t len, done;
+
+ mib[0] = CTL_KERN;
+ mib[1] = KERN_ARND;
+ done = 0;
+
+ do {
+ len = size;
+ if (sysctl(mib, 2, buf, &len, NULL, 0) == -1)
+ return (done);
+ done += len;
+ buf += len;
+ size -= len;
+ } while (size > 0);
+
+ return (done);
+}
+
+int
+getentropy(void *buf, size_t len)
+{
+ if (len <= 256 &&
+ getentropy_sysctl(buf, len) == len) {
+ return 0;
+ }
+
+ errno = EIO;
+ return -1;
+}
diff --git a/compat/getentropy/getentropy_linux.c b/compat/getentropy/getentropy_linux.c
@@ -0,0 +1,548 @@
+/* $OpenBSD: getentropy_linux.c,v 1.35 2014/08/28 01:00:57 bcook Exp $ */
+
+/*
+ * Copyright (c) 2014 Theo de Raadt <deraadt@openbsd.org>
+ * Copyright (c) 2014 Bob Beck <beck@obtuse.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Emulation of getentropy(2) as documented at:
+ * http://www.openbsd.org/cgi-bin/man.cgi/OpenBSD-current/man2/getentropy.2
+ */
+
+#define _POSIX_C_SOURCE 199309L
+#define _GNU_SOURCE 1
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#ifdef HAVE_SYS_SYSCTL_H
+#include <sys/sysctl.h>
+#endif
+#include <sys/statvfs.h>
+#include <sys/socket.h>
+#include <sys/mount.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <link.h>
+#include <termios.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <time.h>
+#include <openssl/sha.h>
+
+#include <linux/types.h>
+#include <linux/random.h>
+#include <linux/sysctl.h>
+#ifdef HAVE_GETAUXVAL
+#include <sys/auxv.h>
+#endif
+#include <sys/vfs.h>
+
+#define REPEAT 5
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+
+#define HX(a, b) \
+ do { \
+ if ((a)) \
+ HD(errno); \
+ else \
+ HD(b); \
+ } while (0)
+
+#define HR(x, l) (SHA512_Update(&ctx, (char *)(x), (l)))
+#define HD(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (x)))
+#define HF(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (void*)))
+
+int getentropy(void *buf, size_t len);
+
+static int gotdata(char *buf, size_t len);
+#ifdef SYS_getrandom
+static int getentropy_getrandom(void *buf, size_t len);
+#endif
+static int getentropy_urandom(void *buf, size_t len);
+#ifdef SYS__sysctl
+static int getentropy_sysctl(void *buf, size_t len);
+#endif
+static int getentropy_fallback(void *buf, size_t len);
+static int getentropy_phdr(struct dl_phdr_info *info, size_t size, void *data);
+
+int
+getentropy(void *buf, size_t len)
+{
+ int ret = -1;
+
+ if (len > 256) {
+ errno = EIO;
+ return -1;
+ }
+
+#ifdef SYS_getrandom
+ /*
+ * Try descriptor-less getrandom()
+ */
+ ret = getentropy_getrandom(buf, len);
+ if (ret != -1)
+ return (ret);
+ if (errno != ENOSYS)
+ return (-1);
+#endif
+
+ /*
+ * Try to get entropy with /dev/urandom
+ *
+ * This can fail if the process is inside a chroot or if file
+ * descriptors are exhausted.
+ */
+ ret = getentropy_urandom(buf, len);
+ if (ret != -1)
+ return (ret);
+
+#ifdef SYS__sysctl
+ /*
+ * Try to use sysctl CTL_KERN, KERN_RANDOM, RANDOM_UUID.
+ * sysctl is a failsafe API, so it guarantees a result. This
+ * should work inside a chroot, or when file descriptors are
+ * exhuasted.
+ *
+ * However this can fail if the Linux kernel removes support
+ * for sysctl. Starting in 2007, there have been efforts to
+ * deprecate the sysctl API/ABI, and push callers towards use
+ * of the chroot-unavailable fd-using /proc mechanism --
+ * essentially the same problems as /dev/urandom.
+ *
+ * Numerous setbacks have been encountered in their deprecation
+ * schedule, so as of June 2014 the kernel ABI still exists on
+ * most Linux architectures. The sysctl() stub in libc is missing
+ * on some systems. There are also reports that some kernels
+ * spew messages to the console.
+ */
+ ret = getentropy_sysctl(buf, len);
+ if (ret != -1)
+ return (ret);
+#endif /* SYS__sysctl */
+
+ /*
+ * Entropy collection via /dev/urandom and sysctl have failed.
+ *
+ * No other API exists for collecting entropy. See the large
+ * comment block above.
+ *
+ * We have very few options:
+ * - Even syslog_r is unsafe to call at this low level, so
+ * there is no way to alert the user or program.
+ * - Cannot call abort() because some systems have unsafe
+ * corefiles.
+ * - Could raise(SIGKILL) resulting in silent program termination.
+ * - Return EIO, to hint that arc4random's stir function
+ * should raise(SIGKILL)
+ * - Do the best under the circumstances....
+ *
+ * This code path exists to bring light to the issue that Linux
+ * does not provide a failsafe API for entropy collection.
+ *
+ * We hope this demonstrates that Linux should either retain their
+ * sysctl ABI, or consider providing a new failsafe API which
+ * works in a chroot or when file descriptors are exhausted.
+ */
+#undef FAIL_INSTEAD_OF_TRYING_FALLBACK
+#ifdef FAIL_INSTEAD_OF_TRYING_FALLBACK
+ raise(SIGKILL);
+#endif
+ ret = getentropy_fallback(buf, len);
+ if (ret != -1)
+ return (ret);
+
+ errno = EIO;
+ return (ret);
+}
+
+/*
+ * Basic sanity checking; wish we could do better.
+ */
+static int
+gotdata(char *buf, size_t len)
+{
+ char any_set = 0;
+ size_t i;
+
+ for (i = 0; i < len; ++i)
+ any_set |= buf[i];
+ if (any_set == 0)
+ return -1;
+ return 0;
+}
+
+#ifdef SYS_getrandom
+static int
+getentropy_getrandom(void *buf, size_t len)
+{
+ int pre_errno = errno;
+ int ret;
+ if (len > 256)
+ return (-1);
+ do {
+ ret = syscall(SYS_getrandom, buf, len, 0);
+ } while (ret == -1 && errno == EINTR);
+
+ if (ret != len)
+ return (-1);
+ errno = pre_errno;
+ return (0);
+}
+#endif
+
+static int
+getentropy_urandom(void *buf, size_t len)
+{
+ struct stat st;
+ size_t i;
+ int fd, cnt, flags;
+ int save_errno = errno;
+
+start:
+
+ flags = O_RDONLY;
+#ifdef O_NOFOLLOW
+ flags |= O_NOFOLLOW;
+#endif
+#ifdef O_CLOEXEC
+ flags |= O_CLOEXEC;
+#endif
+ fd = open("/dev/urandom", flags, 0);
+ if (fd == -1) {
+ if (errno == EINTR)
+ goto start;
+ goto nodevrandom;
+ }
+#ifndef O_CLOEXEC
+ fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+#endif
+
+ /* Lightly verify that the device node looks sane */
+ if (fstat(fd, &st) == -1 || !S_ISCHR(st.st_mode)) {
+ close(fd);
+ goto nodevrandom;
+ }
+ if (ioctl(fd, RNDGETENTCNT, &cnt) == -1) {
+ close(fd);
+ goto nodevrandom;
+ }
+ for (i = 0; i < len; ) {
+ size_t wanted = len - i;
+ ssize_t ret = read(fd, (char *)buf + i, wanted);
+
+ if (ret == -1) {
+ if (errno == EAGAIN || errno == EINTR)
+ continue;
+ close(fd);
+ goto nodevrandom;
+ }
+ i += ret;
+ }
+ close(fd);
+ if (gotdata(buf, len) == 0) {
+ errno = save_errno;
+ return 0; /* satisfied */
+ }
+nodevrandom:
+ errno = EIO;
+ return -1;
+}
+
+#ifdef SYS__sysctl
+static int
+getentropy_sysctl(void *buf, size_t len)
+{
+ static int mib[] = { CTL_KERN, KERN_RANDOM, RANDOM_UUID };
+ size_t i;
+ int save_errno = errno;
+
+ for (i = 0; i < len; ) {
+ size_t chunk = min(len - i, 16);
+
+ /* SYS__sysctl because some systems already removed sysctl() */
+ struct __sysctl_args args = {
+ .name = mib,
+ .nlen = 3,
+ .oldval = (char *)buf + i,
+ .oldlenp = &chunk,
+ };
+ if (syscall(SYS__sysctl, &args) != 0)
+ goto sysctlfailed;
+ i += chunk;
+ }
+ if (gotdata(buf, len) == 0) {
+ errno = save_errno;
+ return (0); /* satisfied */
+ }
+sysctlfailed:
+ errno = EIO;
+ return -1;
+}
+#endif /* SYS__sysctl */
+
+static int cl[] = {
+ CLOCK_REALTIME,
+#ifdef CLOCK_MONOTONIC
+ CLOCK_MONOTONIC,
+#endif
+#ifdef CLOCK_MONOTONIC_RAW
+ CLOCK_MONOTONIC_RAW,
+#endif
+#ifdef CLOCK_TAI
+ CLOCK_TAI,
+#endif
+#ifdef CLOCK_VIRTUAL
+ CLOCK_VIRTUAL,
+#endif
+#ifdef CLOCK_UPTIME
+ CLOCK_UPTIME,
+#endif
+#ifdef CLOCK_PROCESS_CPUTIME_ID
+ CLOCK_PROCESS_CPUTIME_ID,
+#endif
+#ifdef CLOCK_THREAD_CPUTIME_ID
+ CLOCK_THREAD_CPUTIME_ID,
+#endif
+};
+
+static int
+getentropy_phdr(struct dl_phdr_info *info, size_t size, void *data)
+{
+ SHA512_CTX *ctx = data;
+
+ SHA512_Update(ctx, &info->dlpi_addr, sizeof (info->dlpi_addr));
+ return 0;
+}
+
+static int
+getentropy_fallback(void *buf, size_t len)
+{
+ uint8_t results[SHA512_DIGEST_LENGTH];
+ int save_errno = errno, e, pgs = getpagesize(), faster = 0, repeat;
+ static int cnt;
+ struct timespec ts;
+ struct timeval tv;
+ struct rusage ru;
+ sigset_t sigset;
+ struct stat st;
+ SHA512_CTX ctx;
+ static pid_t lastpid;
+ pid_t pid;
+ size_t i, ii, m;
+ char *p;
+
+ pid = getpid();
+ if (lastpid == pid) {
+ faster = 1;
+ repeat = 2;
+ } else {
+ faster = 0;
+ lastpid = pid;
+ repeat = REPEAT;
+ }
+ for (i = 0; i < len; ) {
+ int j;
+ SHA512_Init(&ctx);
+ for (j = 0; j < repeat; j++) {
+ HX((e = gettimeofday(&tv, NULL)) == -1, tv);
+ if (e != -1) {
+ cnt += (int)tv.tv_sec;
+ cnt += (int)tv.tv_usec;
+ }
+
+ dl_iterate_phdr(getentropy_phdr, &ctx);
+
+ for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]); ii++)
+ HX(clock_gettime(cl[ii], &ts) == -1, ts);
+
+ HX((pid = getpid()) == -1, pid);
+ HX((pid = getsid(pid)) == -1, pid);
+ HX((pid = getppid()) == -1, pid);
+ HX((pid = getpgid(0)) == -1, pid);
+ HX((e = getpriority(0, 0)) == -1, e);
+
+ if (!faster) {
+ ts.tv_sec = 0;
+ ts.tv_nsec = 1;
+ (void) nanosleep(&ts, NULL);
+ }
+
+ HX(sigpending(&sigset) == -1, sigset);
+ HX(sigprocmask(SIG_BLOCK, NULL, &sigset) == -1,
+ sigset);
+
+ HF(getentropy); /* an addr in this library */
+ HF(printf); /* an addr in libc */
+ p = (char *)&p;
+ HD(p); /* an addr on stack */
+ p = (char *)&errno;
+ HD(p); /* the addr of errno */
+
+ if (i == 0) {
+ struct sockaddr_storage ss;
+ struct statvfs stvfs;
+ struct termios tios;
+ struct statfs stfs;
+ socklen_t ssl;
+ off_t off;
+
+ /*
+ * Prime-sized mappings encourage fragmentation;
+ * thus exposing some address entropy.
+ */
+ struct mm {
+ size_t npg;
+ void *p;
+ } mm[] = {
+ { 17, MAP_FAILED }, { 3, MAP_FAILED },
+ { 11, MAP_FAILED }, { 2, MAP_FAILED },
+ { 5, MAP_FAILED }, { 3, MAP_FAILED },
+ { 7, MAP_FAILED }, { 1, MAP_FAILED },
+ { 57, MAP_FAILED }, { 3, MAP_FAILED },
+ { 131, MAP_FAILED }, { 1, MAP_FAILED },
+ };
+
+ for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) {
+ HX(mm[m].p = mmap(NULL,
+ mm[m].npg * pgs,
+ PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANON, -1,
+ (off_t)0), mm[m].p);
+ if (mm[m].p != MAP_FAILED) {
+ size_t mo;
+
+ /* Touch some memory... */
+ p = mm[m].p;
+ mo = cnt %
+ (mm[m].npg * pgs - 1);
+ p[mo] = 1;
+ cnt += (int)((long)(mm[m].p)
+ / pgs);
+ }
+
+ /* Check cnts and times... */
+ for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]);
+ ii++) {
+ HX((e = clock_gettime(cl[ii],
+ &ts)) == -1, ts);
+ if (e != -1)
+ cnt += (int)ts.tv_nsec;
+ }
+
+ HX((e = getrusage(RUSAGE_SELF,
+ &ru)) == -1, ru);
+ if (e != -1) {
+ cnt += (int)ru.ru_utime.tv_sec;
+ cnt += (int)ru.ru_utime.tv_usec;
+ }
+ }
+
+ for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) {
+ if (mm[m].p != MAP_FAILED)
+ munmap(mm[m].p, mm[m].npg * pgs);
+ mm[m].p = MAP_FAILED;
+ }
+
+ HX(stat(".", &st) == -1, st);
+ HX(statvfs(".", &stvfs) == -1, stvfs);
+ HX(statfs(".", &stfs) == -1, stfs);
+
+ HX(stat("/", &st) == -1, st);
+ HX(statvfs("/", &stvfs) == -1, stvfs);
+ HX(statfs("/", &stfs) == -1, stfs);
+
+ HX((e = fstat(0, &st)) == -1, st);
+ if (e == -1) {
+ if (S_ISREG(st.st_mode) ||
+ S_ISFIFO(st.st_mode) ||
+ S_ISSOCK(st.st_mode)) {
+ HX(fstatvfs(0, &stvfs) == -1,
+ stvfs);
+ HX(fstatfs(0, &stfs) == -1,
+ stfs);
+ HX((off = lseek(0, (off_t)0,
+ SEEK_CUR)) < 0, off);
+ }
+ if (S_ISCHR(st.st_mode)) {
+ HX(tcgetattr(0, &tios) == -1,
+ tios);
+ } else if (S_ISSOCK(st.st_mode)) {
+ memset(&ss, 0, sizeof ss);
+ ssl = sizeof(ss);
+ HX(getpeername(0,
+ (void *)&ss, &ssl) == -1,
+ ss);
+ }
+ }
+
+ HX((e = getrusage(RUSAGE_CHILDREN,
+ &ru)) == -1, ru);
+ if (e != -1) {
+ cnt += (int)ru.ru_utime.tv_sec;
+ cnt += (int)ru.ru_utime.tv_usec;
+ }
+ } else {
+ /* Subsequent hashes absorb previous result */
+ HD(results);
+ }
+
+ HX((e = gettimeofday(&tv, NULL)) == -1, tv);
+ if (e != -1) {
+ cnt += (int)tv.tv_sec;
+ cnt += (int)tv.tv_usec;
+ }
+
+ HD(cnt);
+ }
+#ifdef HAVE_GETAUXVAL
+#ifdef AT_RANDOM
+ /* Not as random as you think but we take what we are given */
+ p = (char *) getauxval(AT_RANDOM);
+ if (p)
+ HR(p, 16);
+#endif
+#ifdef AT_SYSINFO_EHDR
+ p = (char *) getauxval(AT_SYSINFO_EHDR);
+ if (p)
+ HR(p, pgs);
+#endif
+#ifdef AT_BASE
+ p = (char *) getauxval(AT_BASE);
+ if (p)
+ HD(p);
+#endif
+#endif
+
+ SHA512_Final(results, &ctx);
+ memcpy((char *)buf + i, results, min(sizeof(results), len - i));
+ i += min(sizeof(results), len - i);
+ }
+ explicit_bzero(&ctx, sizeof ctx);
+ explicit_bzero(results, sizeof results);
+ if (gotdata(buf, len) == 0) {
+ errno = save_errno;
+ return 0; /* satisfied */
+ }
+ errno = EIO;
+ return -1;
+}
diff --git a/compat/getentropy/getentropy_osx.c b/compat/getentropy/getentropy_osx.c
@@ -0,0 +1,429 @@
+/* $OpenBSD: getentropy_osx.c,v 1.7 2014/07/19 16:12:00 deraadt Exp $ */
+
+/*
+ * Copyright (c) 2014 Theo de Raadt <deraadt@openbsd.org>
+ * Copyright (c) 2014 Bob Beck <beck@obtuse.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Emulation of getentropy(2) as documented at:
+ * http://www.openbsd.org/cgi-bin/man.cgi/OpenBSD-current/man2/getentropy.2
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/sysctl.h>
+#include <sys/statvfs.h>
+#include <sys/socket.h>
+#include <sys/mount.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <termios.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <time.h>
+#include <mach/mach_time.h>
+#include <mach/mach_host.h>
+#include <mach/host_info.h>
+#include <sys/socketvar.h>
+#include <sys/vmmeter.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp_var.h>
+#include <netinet/udp_var.h>
+#include <CommonCrypto/CommonDigest.h>
+#define SHA512_Update(a, b, c) (CC_SHA512_Update((a), (b), (c)))
+#define SHA512_Init(xxx) (CC_SHA512_Init((xxx)))
+#define SHA512_Final(xxx, yyy) (CC_SHA512_Final((xxx), (yyy)))
+#define SHA512_CTX CC_SHA512_CTX
+#define SHA512_DIGEST_LENGTH CC_SHA512_DIGEST_LENGTH
+
+#define REPEAT 5
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+
+#define HX(a, b) \
+ do { \
+ if ((a)) \
+ HD(errno); \
+ else \
+ HD(b); \
+ } while (0)
+
+#define HR(x, l) (SHA512_Update(&ctx, (char *)(x), (l)))
+#define HD(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (x)))
+#define HF(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (void*)))
+
+int getentropy(void *buf, size_t len);
+
+static int gotdata(char *buf, size_t len);
+static int getentropy_urandom(void *buf, size_t len);
+static int getentropy_fallback(void *buf, size_t len);
+
+int
+getentropy(void *buf, size_t len)
+{
+ int ret = -1;
+
+ if (len > 256) {
+ errno = EIO;
+ return -1;
+ }
+
+ /*
+ * Try to get entropy with /dev/urandom
+ *
+ * This can fail if the process is inside a chroot or if file
+ * descriptors are exhausted.
+ */
+ ret = getentropy_urandom(buf, len);
+ if (ret != -1)
+ return (ret);
+
+ /*
+ * Entropy collection via /dev/urandom and sysctl have failed.
+ *
+ * No other API exists for collecting entropy, and we have
+ * no failsafe way to get it on OSX that is not sensitive
+ * to resource exhaustion.
+ *
+ * We have very few options:
+ * - Even syslog_r is unsafe to call at this low level, so
+ * there is no way to alert the user or program.
+ * - Cannot call abort() because some systems have unsafe
+ * corefiles.
+ * - Could raise(SIGKILL) resulting in silent program termination.
+ * - Return EIO, to hint that arc4random's stir function
+ * should raise(SIGKILL)
+ * - Do the best under the circumstances....
+ *
+ * This code path exists to bring light to the issue that OSX
+ * does not provide a failsafe API for entropy collection.
+ *
+ * We hope this demonstrates that OSX should consider
+ * providing a new failsafe API which works in a chroot or
+ * when file descriptors are exhausted.
+ */
+#undef FAIL_INSTEAD_OF_TRYING_FALLBACK
+#ifdef FAIL_INSTEAD_OF_TRYING_FALLBACK
+ raise(SIGKILL);
+#endif
+ ret = getentropy_fallback(buf, len);
+ if (ret != -1)
+ return (ret);
+
+ errno = EIO;
+ return (ret);
+}
+
+/*
+ * Basic sanity checking; wish we could do better.
+ */
+static int
+gotdata(char *buf, size_t len)
+{
+ char any_set = 0;
+ size_t i;
+
+ for (i = 0; i < len; ++i)
+ any_set |= buf[i];
+ if (any_set == 0)
+ return -1;
+ return 0;
+}
+
+static int
+getentropy_urandom(void *buf, size_t len)
+{
+ struct stat st;
+ size_t i;
+ int fd, flags;
+ int save_errno = errno;
+
+start:
+
+ flags = O_RDONLY;
+#ifdef O_NOFOLLOW
+ flags |= O_NOFOLLOW;
+#endif
+#ifdef O_CLOEXEC
+ flags |= O_CLOEXEC;
+#endif
+ fd = open("/dev/urandom", flags, 0);
+ if (fd == -1) {
+ if (errno == EINTR)
+ goto start;
+ goto nodevrandom;
+ }
+#ifndef O_CLOEXEC
+ fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+#endif
+
+ /* Lightly verify that the device node looks sane */
+ if (fstat(fd, &st) == -1 || !S_ISCHR(st.st_mode)) {
+ close(fd);
+ goto nodevrandom;
+ }
+ for (i = 0; i < len; ) {
+ size_t wanted = len - i;
+ ssize_t ret = read(fd, (char *)buf + i, wanted);
+
+ if (ret == -1) {
+ if (errno == EAGAIN || errno == EINTR)
+ continue;
+ close(fd);
+ goto nodevrandom;
+ }
+ i += ret;
+ }
+ close(fd);
+ if (gotdata(buf, len) == 0) {
+ errno = save_errno;
+ return 0; /* satisfied */
+ }
+nodevrandom:
+ errno = EIO;
+ return -1;
+}
+
+static int tcpmib[] = { CTL_NET, AF_INET, IPPROTO_TCP, TCPCTL_STATS };
+static int udpmib[] = { CTL_NET, AF_INET, IPPROTO_UDP, UDPCTL_STATS };
+static int ipmib[] = { CTL_NET, AF_INET, IPPROTO_IP, IPCTL_STATS };
+static int kmib[] = { CTL_KERN, KERN_USRSTACK };
+static int hwmib[] = { CTL_HW, HW_USERMEM };
+
+static int
+getentropy_fallback(void *buf, size_t len)
+{
+ uint8_t results[SHA512_DIGEST_LENGTH];
+ int save_errno = errno, e, pgs = getpagesize(), faster = 0, repeat;
+ static int cnt;
+ struct timespec ts;
+ struct timeval tv;
+ struct rusage ru;
+ sigset_t sigset;
+ struct stat st;
+ SHA512_CTX ctx;
+ static pid_t lastpid;
+ pid_t pid;
+ size_t i, ii, m;
+ char *p;
+ struct tcpstat tcpstat;
+ struct udpstat udpstat;
+ struct ipstat ipstat;
+ u_int64_t mach_time;
+ unsigned int idata;
+ void *addr;
+
+ pid = getpid();
+ if (lastpid == pid) {
+ faster = 1;
+ repeat = 2;
+ } else {
+ faster = 0;
+ lastpid = pid;
+ repeat = REPEAT;
+ }
+ for (i = 0; i < len; ) {
+ int j;
+ SHA512_Init(&ctx);
+ for (j = 0; j < repeat; j++) {
+ HX((e = gettimeofday(&tv, NULL)) == -1, tv);
+ if (e != -1) {
+ cnt += (int)tv.tv_sec;
+ cnt += (int)tv.tv_usec;
+ }
+
+ mach_time = mach_absolute_time();
+ HD(mach_time);
+
+ ii = sizeof(addr);
+ HX(sysctl(kmib, sizeof(kmib) / sizeof(kmib[0]),
+ &addr, &ii, NULL, 0) == -1, addr);
+
+ ii = sizeof(idata);
+ HX(sysctl(hwmib, sizeof(hwmib) / sizeof(hwmib[0]),
+ &idata, &ii, NULL, 0) == -1, idata);
+
+ ii = sizeof(tcpstat);
+ HX(sysctl(tcpmib, sizeof(tcpmib) / sizeof(tcpmib[0]),
+ &tcpstat, &ii, NULL, 0) == -1, tcpstat);
+
+ ii = sizeof(udpstat);
+ HX(sysctl(udpmib, sizeof(udpmib) / sizeof(udpmib[0]),
+ &udpstat, &ii, NULL, 0) == -1, udpstat);
+
+ ii = sizeof(ipstat);
+ HX(sysctl(ipmib, sizeof(ipmib) / sizeof(ipmib[0]),
+ &ipstat, &ii, NULL, 0) == -1, ipstat);
+
+ HX((pid = getpid()) == -1, pid);
+ HX((pid = getsid(pid)) == -1, pid);
+ HX((pid = getppid()) == -1, pid);
+ HX((pid = getpgid(0)) == -1, pid);
+ HX((e = getpriority(0, 0)) == -1, e);
+
+ if (!faster) {
+ ts.tv_sec = 0;
+ ts.tv_nsec = 1;
+ (void) nanosleep(&ts, NULL);
+ }
+
+ HX(sigpending(&sigset) == -1, sigset);
+ HX(sigprocmask(SIG_BLOCK, NULL, &sigset) == -1,
+ sigset);
+
+ HF(getentropy); /* an addr in this library */
+ HF(printf); /* an addr in libc */
+ p = (char *)&p;
+ HD(p); /* an addr on stack */
+ p = (char *)&errno;
+ HD(p); /* the addr of errno */
+
+ if (i == 0) {
+ struct sockaddr_storage ss;
+ struct statvfs stvfs;
+ struct termios tios;
+ struct statfs stfs;
+ socklen_t ssl;
+ off_t off;
+
+ /*
+ * Prime-sized mappings encourage fragmentation;
+ * thus exposing some address entropy.
+ */
+ struct mm {
+ size_t npg;
+ void *p;
+ } mm[] = {
+ { 17, MAP_FAILED }, { 3, MAP_FAILED },
+ { 11, MAP_FAILED }, { 2, MAP_FAILED },
+ { 5, MAP_FAILED }, { 3, MAP_FAILED },
+ { 7, MAP_FAILED }, { 1, MAP_FAILED },
+ { 57, MAP_FAILED }, { 3, MAP_FAILED },
+ { 131, MAP_FAILED }, { 1, MAP_FAILED },
+ };
+
+ for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) {
+ HX(mm[m].p = mmap(NULL,
+ mm[m].npg * pgs,
+ PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANON, -1,
+ (off_t)0), mm[m].p);
+ if (mm[m].p != MAP_FAILED) {
+ size_t mo;
+
+ /* Touch some memory... */
+ p = mm[m].p;
+ mo = cnt %
+ (mm[m].npg * pgs - 1);
+ p[mo] = 1;
+ cnt += (int)((long)(mm[m].p)
+ / pgs);
+ }
+
+ /* Check cnts and times... */
+ mach_time = mach_absolute_time();
+ HD(mach_time);
+ cnt += (int)mach_time;
+
+ HX((e = getrusage(RUSAGE_SELF,
+ &ru)) == -1, ru);
+ if (e != -1) {
+ cnt += (int)ru.ru_utime.tv_sec;
+ cnt += (int)ru.ru_utime.tv_usec;
+ }
+ }
+
+ for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) {
+ if (mm[m].p != MAP_FAILED)
+ munmap(mm[m].p, mm[m].npg * pgs);
+ mm[m].p = MAP_FAILED;
+ }
+
+ HX(stat(".", &st) == -1, st);
+ HX(statvfs(".", &stvfs) == -1, stvfs);
+ HX(statfs(".", &stfs) == -1, stfs);
+
+ HX(stat("/", &st) == -1, st);
+ HX(statvfs("/", &stvfs) == -1, stvfs);
+ HX(statfs("/", &stfs) == -1, stfs);
+
+ HX((e = fstat(0, &st)) == -1, st);
+ if (e == -1) {
+ if (S_ISREG(st.st_mode) ||
+ S_ISFIFO(st.st_mode) ||
+ S_ISSOCK(st.st_mode)) {
+ HX(fstatvfs(0, &stvfs) == -1,
+ stvfs);
+ HX(fstatfs(0, &stfs) == -1,
+ stfs);
+ HX((off = lseek(0, (off_t)0,
+ SEEK_CUR)) < 0, off);
+ }
+ if (S_ISCHR(st.st_mode)) {
+ HX(tcgetattr(0, &tios) == -1,
+ tios);
+ } else if (S_ISSOCK(st.st_mode)) {
+ memset(&ss, 0, sizeof ss);
+ ssl = sizeof(ss);
+ HX(getpeername(0,
+ (void *)&ss, &ssl) == -1,
+ ss);
+ }
+ }
+
+ HX((e = getrusage(RUSAGE_CHILDREN,
+ &ru)) == -1, ru);
+ if (e != -1) {
+ cnt += (int)ru.ru_utime.tv_sec;
+ cnt += (int)ru.ru_utime.tv_usec;
+ }
+ } else {
+ /* Subsequent hashes absorb previous result */
+ HD(results);
+ }
+
+ HX((e = gettimeofday(&tv, NULL)) == -1, tv);
+ if (e != -1) {
+ cnt += (int)tv.tv_sec;
+ cnt += (int)tv.tv_usec;
+ }
+
+ HD(cnt);
+ }
+
+ SHA512_Final(results, &ctx);
+ memcpy((char *)buf + i, results, min(sizeof(results), len - i));
+ i += min(sizeof(results), len - i);
+ }
+ explicit_bzero(&ctx, sizeof ctx);
+ explicit_bzero(results, sizeof results);
+ if (gotdata(buf, len) == 0) {
+ errno = save_errno;
+ return 0; /* satisfied */
+ }
+ errno = EIO;
+ return -1;
+}
diff --git a/compat/getentropy/getentropy_solaris.c b/compat/getentropy/getentropy_solaris.c
@@ -0,0 +1,445 @@
+/* $OpenBSD: getentropy_solaris.c,v 1.8 2014/07/19 16:12:00 deraadt Exp $ */
+
+/*
+ * Copyright (c) 2014 Theo de Raadt <deraadt@openbsd.org>
+ * Copyright (c) 2014 Bob Beck <beck@obtuse.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Emulation of getentropy(2) as documented at:
+ * http://www.openbsd.org/cgi-bin/man.cgi/OpenBSD-current/man2/getentropy.2
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/statvfs.h>
+#include <sys/socket.h>
+#include <sys/mount.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <link.h>
+#include <termios.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/sha2.h>
+#define SHA512_Init SHA512Init
+#define SHA512_Update SHA512Update
+#define SHA512_Final SHA512Final
+
+#include <sys/vfs.h>
+#include <sys/statfs.h>
+#include <sys/loadavg.h>
+
+#define REPEAT 5
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+
+#define HX(a, b) \
+ do { \
+ if ((a)) \
+ HD(errno); \
+ else \
+ HD(b); \
+ } while (0)
+
+#define HR(x, l) (SHA512_Update(&ctx, (char *)(x), (l)))
+#define HD(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (x)))
+#define HF(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (void*)))
+
+int getentropy(void *buf, size_t len);
+
+static int gotdata(char *buf, size_t len);
+static int getentropy_urandom(void *buf, size_t len, const char *path,
+ int devfscheck);
+static int getentropy_fallback(void *buf, size_t len);
+static int getentropy_phdr(struct dl_phdr_info *info, size_t size, void *data);
+
+int
+getentropy(void *buf, size_t len)
+{
+ int ret = -1;
+
+ if (len > 256) {
+ errno = EIO;
+ return -1;
+ }
+
+ /*
+ * Try to get entropy with /dev/urandom
+ *
+ * Solaris provides /dev/urandom as a symbolic link to
+ * /devices/pseudo/random@0:urandom which is provided by
+ * a devfs filesystem. Best practice is to use O_NOFOLLOW,
+ * so we must try the unpublished name directly.
+ *
+ * This can fail if the process is inside a chroot which lacks
+ * the devfs mount, or if file descriptors are exhausted.
+ */
+ ret = getentropy_urandom(buf, len,
+ "/devices/pseudo/random@0:urandom", 1);
+ if (ret != -1)
+ return (ret);
+
+ /*
+ * Unfortunately, chroot spaces on Solaris are sometimes setup
+ * with direct device node of the well-known /dev/urandom name
+ * (perhaps to avoid dragging all of devfs into the space).
+ *
+ * This can fail if the process is inside a chroot or if file
+ * descriptors are exhausted.
+ */
+ ret = getentropy_urandom(buf, len, "/dev/urandom", 0);
+ if (ret != -1)
+ return (ret);
+
+ /*
+ * Entropy collection via /dev/urandom has failed.
+ *
+ * No other API exists for collecting entropy, and we have
+ * no failsafe way to get it on Solaris that is not sensitive
+ * to resource exhaustion.
+ *
+ * We have very few options:
+ * - Even syslog_r is unsafe to call at this low level, so
+ * there is no way to alert the user or program.
+ * - Cannot call abort() because some systems have unsafe
+ * corefiles.
+ * - Could raise(SIGKILL) resulting in silent program termination.
+ * - Return EIO, to hint that arc4random's stir function
+ * should raise(SIGKILL)
+ * - Do the best under the circumstances....
+ *
+ * This code path exists to bring light to the issue that Solaris
+ * does not provide a failsafe API for entropy collection.
+ *
+ * We hope this demonstrates that Solaris should consider
+ * providing a new failsafe API which works in a chroot or
+ * when file descriptors are exhausted.
+ */
+#undef FAIL_INSTEAD_OF_TRYING_FALLBACK
+#ifdef FAIL_INSTEAD_OF_TRYING_FALLBACK
+ raise(SIGKILL);
+#endif
+ ret = getentropy_fallback(buf, len);
+ if (ret != -1)
+ return (ret);
+
+ errno = EIO;
+ return (ret);
+}
+
+/*
+ * Basic sanity checking; wish we could do better.
+ */
+static int
+gotdata(char *buf, size_t len)
+{
+ char any_set = 0;
+ size_t i;
+
+ for (i = 0; i < len; ++i)
+ any_set |= buf[i];
+ if (any_set == 0)
+ return -1;
+ return 0;
+}
+
+static int
+getentropy_urandom(void *buf, size_t len, const char *path, int devfscheck)
+{
+ struct stat st;
+ size_t i;
+ int fd, flags;
+ int save_errno = errno;
+
+start:
+
+ flags = O_RDONLY;
+#ifdef O_NOFOLLOW
+ flags |= O_NOFOLLOW;
+#endif
+#ifdef O_CLOEXEC
+ flags |= O_CLOEXEC;
+#endif
+ fd = open(path, flags, 0);
+ if (fd == -1) {
+ if (errno == EINTR)
+ goto start;
+ goto nodevrandom;
+ }
+#ifndef O_CLOEXEC
+ fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+#endif
+
+ /* Lightly verify that the device node looks sane */
+ if (fstat(fd, &st) == -1 || !S_ISCHR(st.st_mode) ||
+ (devfscheck && (strcmp(st.st_fstype, "devfs") != 0))) {
+ close(fd);
+ goto nodevrandom;
+ }
+ for (i = 0; i < len; ) {
+ size_t wanted = len - i;
+ ssize_t ret = read(fd, (char *)buf + i, wanted);
+
+ if (ret == -1) {
+ if (errno == EAGAIN || errno == EINTR)
+ continue;
+ close(fd);
+ goto nodevrandom;
+ }
+ i += ret;
+ }
+ close(fd);
+ if (gotdata(buf, len) == 0) {
+ errno = save_errno;
+ return 0; /* satisfied */
+ }
+nodevrandom:
+ errno = EIO;
+ return -1;
+}
+
+static const int cl[] = {
+ CLOCK_REALTIME,
+#ifdef CLOCK_MONOTONIC
+ CLOCK_MONOTONIC,
+#endif
+#ifdef CLOCK_MONOTONIC_RAW
+ CLOCK_MONOTONIC_RAW,
+#endif
+#ifdef CLOCK_TAI
+ CLOCK_TAI,
+#endif
+#ifdef CLOCK_VIRTUAL
+ CLOCK_VIRTUAL,
+#endif
+#ifdef CLOCK_UPTIME
+ CLOCK_UPTIME,
+#endif
+#ifdef CLOCK_PROCESS_CPUTIME_ID
+ CLOCK_PROCESS_CPUTIME_ID,
+#endif
+#ifdef CLOCK_THREAD_CPUTIME_ID
+ CLOCK_THREAD_CPUTIME_ID,
+#endif
+};
+
+static int
+getentropy_phdr(struct dl_phdr_info *info, size_t size, void *data)
+{
+ SHA512_CTX *ctx = data;
+
+ SHA512_Update(ctx, &info->dlpi_addr, sizeof (info->dlpi_addr));
+ return 0;
+}
+
+static int
+getentropy_fallback(void *buf, size_t len)
+{
+ uint8_t results[SHA512_DIGEST_LENGTH];
+ int save_errno = errno, e, pgs = getpagesize(), faster = 0, repeat;
+ static int cnt;
+ struct timespec ts;
+ struct timeval tv;
+ double loadavg[3];
+ struct rusage ru;
+ sigset_t sigset;
+ struct stat st;
+ SHA512_CTX ctx;
+ static pid_t lastpid;
+ pid_t pid;
+ size_t i, ii, m;
+ char *p;
+
+ pid = getpid();
+ if (lastpid == pid) {
+ faster = 1;
+ repeat = 2;
+ } else {
+ faster = 0;
+ lastpid = pid;
+ repeat = REPEAT;
+ }
+ for (i = 0; i < len; ) {
+ int j;
+ SHA512_Init(&ctx);
+ for (j = 0; j < repeat; j++) {
+ HX((e = gettimeofday(&tv, NULL)) == -1, tv);
+ if (e != -1) {
+ cnt += (int)tv.tv_sec;
+ cnt += (int)tv.tv_usec;
+ }
+
+ dl_iterate_phdr(getentropy_phdr, &ctx);
+
+ for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]); ii++)
+ HX(clock_gettime(cl[ii], &ts) == -1, ts);
+
+ HX((pid = getpid()) == -1, pid);
+ HX((pid = getsid(pid)) == -1, pid);
+ HX((pid = getppid()) == -1, pid);
+ HX((pid = getpgid(0)) == -1, pid);
+ HX((e = getpriority(0, 0)) == -1, e);
+ HX((getloadavg(loadavg, 3) == -1), loadavg);
+
+ if (!faster) {
+ ts.tv_sec = 0;
+ ts.tv_nsec = 1;
+ (void) nanosleep(&ts, NULL);
+ }
+
+ HX(sigpending(&sigset) == -1, sigset);
+ HX(sigprocmask(SIG_BLOCK, NULL, &sigset) == -1,
+ sigset);
+
+ HF(getentropy); /* an addr in this library */
+ HF(printf); /* an addr in libc */
+ p = (char *)&p;
+ HD(p); /* an addr on stack */
+ p = (char *)&errno;
+ HD(p); /* the addr of errno */
+
+ if (i == 0) {
+ struct sockaddr_storage ss;
+ struct statvfs stvfs;
+ struct termios tios;
+ socklen_t ssl;
+ off_t off;
+
+ /*
+ * Prime-sized mappings encourage fragmentation;
+ * thus exposing some address entropy.
+ */
+ struct mm {
+ size_t npg;
+ void *p;
+ } mm[] = {
+ { 17, MAP_FAILED }, { 3, MAP_FAILED },
+ { 11, MAP_FAILED }, { 2, MAP_FAILED },
+ { 5, MAP_FAILED }, { 3, MAP_FAILED },
+ { 7, MAP_FAILED }, { 1, MAP_FAILED },
+ { 57, MAP_FAILED }, { 3, MAP_FAILED },
+ { 131, MAP_FAILED }, { 1, MAP_FAILED },
+ };
+
+ for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) {
+ HX(mm[m].p = mmap(NULL,
+ mm[m].npg * pgs,
+ PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANON, -1,
+ (off_t)0), mm[m].p);
+ if (mm[m].p != MAP_FAILED) {
+ size_t mo;
+
+ /* Touch some memory... */
+ p = mm[m].p;
+ mo = cnt %
+ (mm[m].npg * pgs - 1);
+ p[mo] = 1;
+ cnt += (int)((long)(mm[m].p)
+ / pgs);
+ }
+
+ /* Check cnts and times... */
+ for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]);
+ ii++) {
+ HX((e = clock_gettime(cl[ii],
+ &ts)) == -1, ts);
+ if (e != -1)
+ cnt += (int)ts.tv_nsec;
+ }
+
+ HX((e = getrusage(RUSAGE_SELF,
+ &ru)) == -1, ru);
+ if (e != -1) {
+ cnt += (int)ru.ru_utime.tv_sec;
+ cnt += (int)ru.ru_utime.tv_usec;
+ }
+ }
+
+ for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) {
+ if (mm[m].p != MAP_FAILED)
+ munmap(mm[m].p, mm[m].npg * pgs);
+ mm[m].p = MAP_FAILED;
+ }
+
+ HX(stat(".", &st) == -1, st);
+ HX(statvfs(".", &stvfs) == -1, stvfs);
+
+ HX(stat("/", &st) == -1, st);
+ HX(statvfs("/", &stvfs) == -1, stvfs);
+
+ HX((e = fstat(0, &st)) == -1, st);
+ if (e == -1) {
+ if (S_ISREG(st.st_mode) ||
+ S_ISFIFO(st.st_mode) ||
+ S_ISSOCK(st.st_mode)) {
+ HX(fstatvfs(0, &stvfs) == -1,
+ stvfs);
+ HX((off = lseek(0, (off_t)0,
+ SEEK_CUR)) < 0, off);
+ }
+ if (S_ISCHR(st.st_mode)) {
+ HX(tcgetattr(0, &tios) == -1,
+ tios);
+ } else if (S_ISSOCK(st.st_mode)) {
+ memset(&ss, 0, sizeof ss);
+ ssl = sizeof(ss);
+ HX(getpeername(0,
+ (void *)&ss, &ssl) == -1,
+ ss);
+ }
+ }
+
+ HX((e = getrusage(RUSAGE_CHILDREN,
+ &ru)) == -1, ru);
+ if (e != -1) {
+ cnt += (int)ru.ru_utime.tv_sec;
+ cnt += (int)ru.ru_utime.tv_usec;
+ }
+ } else {
+ /* Subsequent hashes absorb previous result */
+ HD(results);
+ }
+
+ HX((e = gettimeofday(&tv, NULL)) == -1, tv);
+ if (e != -1) {
+ cnt += (int)tv.tv_sec;
+ cnt += (int)tv.tv_usec;
+ }
+
+ HD(cnt);
+ }
+ SHA512_Final(results, &ctx);
+ memcpy((char *)buf + i, results, min(sizeof(results), len - i));
+ i += min(sizeof(results), len - i);
+ }
+ explicit_bzero(&ctx, sizeof ctx);
+ explicit_bzero(results, sizeof results);
+ if (gotdata(buf, len) == 0) {
+ errno = save_errno;
+ return 0; /* satisfied */
+ }
+ errno = EIO;
+ return -1;
+}
diff --git a/compat/getentropy/getentropy_win.c b/compat/getentropy/getentropy_win.c
@@ -0,0 +1,59 @@
+/* $OpenBSD: getentropy_win.c,v 1.2 2014/07/13 13:03:09 deraadt Exp $ */
+
+/*
+ * Copyright (c) 2014, Theo de Raadt <deraadt@openbsd.org>
+ * Copyright (c) 2014, Bob Beck <beck@obtuse.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Emulation of getentropy(2) as documented at:
+ * http://www.openbsd.org/cgi-bin/man.cgi/OpenBSD-current/man2/getentropy.2
+ */
+
+#include <windows.h>
+#include <errno.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <wincrypt.h>
+#include <process.h>
+
+int getentropy(void *buf, size_t len);
+
+/*
+ * On Windows, CryptGenRandom is supposed to be a well-seeded
+ * cryptographically strong random number generator.
+ */
+int
+getentropy(void *buf, size_t len)
+{
+ HCRYPTPROV provider;
+
+ if (len > 256) {
+ errno = EIO;
+ return -1;
+ }
+
+ if (CryptAcquireContext(&provider, NULL, NULL, PROV_RSA_FULL,
+ CRYPT_VERIFYCONTEXT) == 0)
+ goto fail;
+ if (CryptGenRandom(provider, len, buf) == 0) {
+ CryptReleaseContext(provider, 0);
+ goto fail;
+ }
+ CryptReleaseContext(provider, 0);
+ return (0);
+
+fail:
+ errno = EIO;
+ return (-1);
+}
diff --git a/compat/safebfuns.c b/compat/safebfuns.c
@@ -0,0 +1,52 @@
+/* Public domain */
+
+#include <string.h>
+
+#if __clang__
+ /*
+ * http://clang.llvm.org/docs/LanguageExtensions.html#feature-checking-macros
+ * http://lists.cs.uiuc.edu/pipermail/cfe-dev/2014-December/040627.html
+ */
+ #if __has_attribute( noinline ) /* && __has_attribute( optnone ) */
+ #define NOOPT /* __attribute__ (( optnone )) */
+ #define NOINLINE __attribute__ (( noinline ))
+ #else
+ #error "require clang with noinline and optnone attributes"
+ #endif
+#elif __GNUC__
+ /*
+ * http://gcc.gnu.org/onlinedocs/gcc/Function-Specific-Option-Pragmas.html
+ * http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html
+ */
+ #if __GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 4 )
+ #define NOOPT __attribute__ (( optimize( 0 ) ))
+ #define NOINLINE __attribute__ (( noinline ))
+ #else
+ #error "require gcc >= 4.4"
+ #endif
+#else
+ #error "unrecognised compiler"
+ explode
+#endif
+
+NOOPT NOINLINE void explicit_bzero( void * const buf, const size_t n ) {
+ size_t i;
+ unsigned char * p = buf;
+
+ for( i = 0; i < n; i++ ) {
+ p[ i ] = 0;
+ }
+}
+
+NOOPT NOINLINE int timingsafe_bcmp( const void * const b1, const void * const b2, const size_t n ) {
+ size_t i;
+ const unsigned char * const p1 = b1;
+ const unsigned char * const p2 = b2;
+ int result = 0;
+
+ for( i = 0; i < n; i++ ) {
+ result |= p1[ i ] ^ p2[ i ];
+ }
+
+ return result != 0;
+}
diff --git a/compat/sha/sha512-elf-x86_64.S b/compat/sha/sha512-elf-x86_64.S
@@ -0,0 +1,1802 @@
+.text
+
+.globl sha512_block_data_order
+.type sha512_block_data_order,@function
+.align 16
+sha512_block_data_order:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r11
+ shlq $4,%rdx
+ subq $128+32,%rsp
+ leaq (%rsi,%rdx,8),%rdx
+ andq $-64,%rsp
+ movq %rdi,128+0(%rsp)
+ movq %rsi,128+8(%rsp)
+ movq %rdx,128+16(%rsp)
+ movq %r11,128+24(%rsp)
+.Lprologue:
+
+ leaq K512(%rip),%rbp
+
+ movq 0(%rdi),%rax
+ movq 8(%rdi),%rbx
+ movq 16(%rdi),%rcx
+ movq 24(%rdi),%rdx
+ movq 32(%rdi),%r8
+ movq 40(%rdi),%r9
+ movq 48(%rdi),%r10
+ movq 56(%rdi),%r11
+ jmp .Lloop
+
+.align 16
+.Lloop:
+ xorq %rdi,%rdi
+ movq 0(%rsi),%r12
+ movq %r8,%r13
+ movq %rax,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,0(%rsp)
+
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
+
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
+
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
+
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 1(%rdi),%rdi
+ addq %r14,%r11
+
+ movq 8(%rsi),%r12
+ movq %rdx,%r13
+ movq %r11,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,8(%rsp)
+
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
+
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
+
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
+
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 1(%rdi),%rdi
+ addq %r14,%r10
+
+ movq 16(%rsi),%r12
+ movq %rcx,%r13
+ movq %r10,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,16(%rsp)
+
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
+
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
+
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
+
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 1(%rdi),%rdi
+ addq %r14,%r9
+
+ movq 24(%rsi),%r12
+ movq %rbx,%r13
+ movq %r9,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,24(%rsp)
+
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
+
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
+
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
+
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 1(%rdi),%rdi
+ addq %r14,%r8
+
+ movq 32(%rsi),%r12
+ movq %rax,%r13
+ movq %r8,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,32(%rsp)
+
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
+
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
+
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
+
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rdx
+
+ movq 40(%rsi),%r12
+ movq %r11,%r13
+ movq %rdx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,40(%rsp)
+
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
+
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
+
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
+
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rcx
+
+ movq 48(%rsi),%r12
+ movq %r10,%r13
+ movq %rcx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,48(%rsp)
+
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
+
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
+
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
+
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rbx
+
+ movq 56(%rsi),%r12
+ movq %r9,%r13
+ movq %rbx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,56(%rsp)
+
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
+
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
+
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
+
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 1(%rdi),%rdi
+ addq %r14,%rax
+
+ movq 64(%rsi),%r12
+ movq %r8,%r13
+ movq %rax,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,64(%rsp)
+
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
+
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
+
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
+
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 1(%rdi),%rdi
+ addq %r14,%r11
+
+ movq 72(%rsi),%r12
+ movq %rdx,%r13
+ movq %r11,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,72(%rsp)
+
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
+
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
+
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
+
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 1(%rdi),%rdi
+ addq %r14,%r10
+
+ movq 80(%rsi),%r12
+ movq %rcx,%r13
+ movq %r10,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,80(%rsp)
+
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
+
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
+
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
+
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 1(%rdi),%rdi
+ addq %r14,%r9
+
+ movq 88(%rsi),%r12
+ movq %rbx,%r13
+ movq %r9,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,88(%rsp)
+
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
+
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
+
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
+
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 1(%rdi),%rdi
+ addq %r14,%r8
+
+ movq 96(%rsi),%r12
+ movq %rax,%r13
+ movq %r8,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,96(%rsp)
+
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
+
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
+
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
+
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rdx
+
+ movq 104(%rsi),%r12
+ movq %r11,%r13
+ movq %rdx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,104(%rsp)
+
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
+
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
+
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
+
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rcx
+
+ movq 112(%rsi),%r12
+ movq %r10,%r13
+ movq %rcx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,112(%rsp)
+
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
+
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
+
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
+
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rbx
+
+ movq 120(%rsi),%r12
+ movq %r9,%r13
+ movq %rbx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,120(%rsp)
+
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
+
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
+
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
+
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 1(%rdi),%rdi
+ addq %r14,%rax
+
+ jmp .Lrounds_16_xx
+.align 16
+.Lrounds_16_xx:
+ movq 8(%rsp),%r13
+ movq 112(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 72(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 0(%rsp),%r12
+ movq %r8,%r13
+ addq %r14,%r12
+ movq %rax,%r14
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,0(%rsp)
+
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
+
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
+
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
+
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 1(%rdi),%rdi
+ addq %r14,%r11
+
+ movq 16(%rsp),%r13
+ movq 120(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 80(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 8(%rsp),%r12
+ movq %rdx,%r13
+ addq %r14,%r12
+ movq %r11,%r14
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,8(%rsp)
+
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
+
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
+
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
+
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 1(%rdi),%rdi
+ addq %r14,%r10
+
+ movq 24(%rsp),%r13
+ movq 0(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 88(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 16(%rsp),%r12
+ movq %rcx,%r13
+ addq %r14,%r12
+ movq %r10,%r14
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,16(%rsp)
+
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
+
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
+
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
+
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 1(%rdi),%rdi
+ addq %r14,%r9
+
+ movq 32(%rsp),%r13
+ movq 8(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 96(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 24(%rsp),%r12
+ movq %rbx,%r13
+ addq %r14,%r12
+ movq %r9,%r14
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,24(%rsp)
+
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
+
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
+
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
+
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 1(%rdi),%rdi
+ addq %r14,%r8
+
+ movq 40(%rsp),%r13
+ movq 16(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 104(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 32(%rsp),%r12
+ movq %rax,%r13
+ addq %r14,%r12
+ movq %r8,%r14
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,32(%rsp)
+
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
+
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
+
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
+
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rdx
+
+ movq 48(%rsp),%r13
+ movq 24(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 112(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 40(%rsp),%r12
+ movq %r11,%r13
+ addq %r14,%r12
+ movq %rdx,%r14
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,40(%rsp)
+
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
+
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
+
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
+
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rcx
+
+ movq 56(%rsp),%r13
+ movq 32(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 120(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 48(%rsp),%r12
+ movq %r10,%r13
+ addq %r14,%r12
+ movq %rcx,%r14
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,48(%rsp)
+
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
+
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
+
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
+
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rbx
+
+ movq 64(%rsp),%r13
+ movq 40(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 0(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 56(%rsp),%r12
+ movq %r9,%r13
+ addq %r14,%r12
+ movq %rbx,%r14
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,56(%rsp)
+
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
+
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
+
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
+
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 1(%rdi),%rdi
+ addq %r14,%rax
+
+ movq 72(%rsp),%r13
+ movq 48(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 8(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 64(%rsp),%r12
+ movq %r8,%r13
+ addq %r14,%r12
+ movq %rax,%r14
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,64(%rsp)
+
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
+
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
+
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
+
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 1(%rdi),%rdi
+ addq %r14,%r11
+
+ movq 80(%rsp),%r13
+ movq 56(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 16(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 72(%rsp),%r12
+ movq %rdx,%r13
+ addq %r14,%r12
+ movq %r11,%r14
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,72(%rsp)
+
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
+
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
+
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
+
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 1(%rdi),%rdi
+ addq %r14,%r10
+
+ movq 88(%rsp),%r13
+ movq 64(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 24(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 80(%rsp),%r12
+ movq %rcx,%r13
+ addq %r14,%r12
+ movq %r10,%r14
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,80(%rsp)
+
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
+
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
+
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
+
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 1(%rdi),%rdi
+ addq %r14,%r9
+
+ movq 96(%rsp),%r13
+ movq 72(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 32(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 88(%rsp),%r12
+ movq %rbx,%r13
+ addq %r14,%r12
+ movq %r9,%r14
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,88(%rsp)
+
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
+
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
+
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
+
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 1(%rdi),%rdi
+ addq %r14,%r8
+
+ movq 104(%rsp),%r13
+ movq 80(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 40(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 96(%rsp),%r12
+ movq %rax,%r13
+ addq %r14,%r12
+ movq %r8,%r14
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,96(%rsp)
+
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
+
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
+
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
+
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rdx
+
+ movq 112(%rsp),%r13
+ movq 88(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 48(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 104(%rsp),%r12
+ movq %r11,%r13
+ addq %r14,%r12
+ movq %rdx,%r14
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,104(%rsp)
+
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
+
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
+
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
+
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rcx
+
+ movq 120(%rsp),%r13
+ movq 96(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 56(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 112(%rsp),%r12
+ movq %r10,%r13
+ addq %r14,%r12
+ movq %rcx,%r14
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,112(%rsp)
+
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
+
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
+
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
+
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rbx
+
+ movq 0(%rsp),%r13
+ movq 104(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 64(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 120(%rsp),%r12
+ movq %r9,%r13
+ addq %r14,%r12
+ movq %rbx,%r14
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,120(%rsp)
+
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
+
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
+
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
+
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 1(%rdi),%rdi
+ addq %r14,%rax
+
+ cmpq $80,%rdi
+ jb .Lrounds_16_xx
+
+ movq 128+0(%rsp),%rdi
+ leaq 128(%rsi),%rsi
+
+ addq 0(%rdi),%rax
+ addq 8(%rdi),%rbx
+ addq 16(%rdi),%rcx
+ addq 24(%rdi),%rdx
+ addq 32(%rdi),%r8
+ addq 40(%rdi),%r9
+ addq 48(%rdi),%r10
+ addq 56(%rdi),%r11
+
+ cmpq 128+16(%rsp),%rsi
+
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,16(%rdi)
+ movq %rdx,24(%rdi)
+ movq %r8,32(%rdi)
+ movq %r9,40(%rdi)
+ movq %r10,48(%rdi)
+ movq %r11,56(%rdi)
+ jb .Lloop
+
+ movq 128+24(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lepilogue:
+ .byte 0xf3,0xc3
+.size sha512_block_data_order,.-sha512_block_data_order
+.align 64
+.type K512,@object
+K512:
+.quad 0x428a2f98d728ae22,0x7137449123ef65cd
+.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad 0x3956c25bf348b538,0x59f111f1b605d019
+.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad 0xd807aa98a3030242,0x12835b0145706fbe
+.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+.quad 0x9bdc06a725c71235,0xc19bf174cf692694
+.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+.quad 0x983e5152ee66dfab,0xa831c66d2db43210
+.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+.quad 0x06ca6351e003826f,0x142929670a0e6e70
+.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+.quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+.quad 0x81c2c92e47edaee6,0x92722c851482353b
+.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+.quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+.quad 0xd192e819d6ef5218,0xd69906245565a910
+.quad 0xf40e35855771202a,0x106aa07032bbd1b8
+.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+.quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+.quad 0x90befffa23631e28,0xa4506cebde82bde9
+.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+.quad 0xca273eceea26619c,0xd186b8c721c0c207
+.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
+.quad 0x113f9804bef90dae,0x1b710b35131c471b
+.quad 0x28db77f523047d84,0x32caab7b40c72493
+.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
diff --git a/compat/sha/sha512-macosx-x86_64.S b/compat/sha/sha512-macosx-x86_64.S
@@ -0,0 +1,1802 @@
+.text
+
+.globl _sha512_block_data_order
+
+.p2align 4
+_sha512_block_data_order:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r11
+ shlq $4,%rdx
+ subq $128+32,%rsp
+ leaq (%rsi,%rdx,8),%rdx
+ andq $-64,%rsp
+ movq %rdi,128+0(%rsp)
+ movq %rsi,128+8(%rsp)
+ movq %rdx,128+16(%rsp)
+ movq %r11,128+24(%rsp)
+L$prologue:
+
+ leaq K512(%rip),%rbp
+
+ movq 0(%rdi),%rax
+ movq 8(%rdi),%rbx
+ movq 16(%rdi),%rcx
+ movq 24(%rdi),%rdx
+ movq 32(%rdi),%r8
+ movq 40(%rdi),%r9
+ movq 48(%rdi),%r10
+ movq 56(%rdi),%r11
+ jmp L$loop
+
+.p2align 4
+L$loop:
+ xorq %rdi,%rdi
+ movq 0(%rsi),%r12
+ movq %r8,%r13
+ movq %rax,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,0(%rsp)
+
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
+
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
+
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
+
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 1(%rdi),%rdi
+ addq %r14,%r11
+
+ movq 8(%rsi),%r12
+ movq %rdx,%r13
+ movq %r11,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,8(%rsp)
+
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
+
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
+
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
+
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 1(%rdi),%rdi
+ addq %r14,%r10
+
+ movq 16(%rsi),%r12
+ movq %rcx,%r13
+ movq %r10,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,16(%rsp)
+
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
+
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
+
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
+
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 1(%rdi),%rdi
+ addq %r14,%r9
+
+ movq 24(%rsi),%r12
+ movq %rbx,%r13
+ movq %r9,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,24(%rsp)
+
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
+
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
+
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
+
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 1(%rdi),%rdi
+ addq %r14,%r8
+
+ movq 32(%rsi),%r12
+ movq %rax,%r13
+ movq %r8,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,32(%rsp)
+
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
+
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
+
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
+
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rdx
+
+ movq 40(%rsi),%r12
+ movq %r11,%r13
+ movq %rdx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,40(%rsp)
+
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
+
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
+
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
+
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rcx
+
+ movq 48(%rsi),%r12
+ movq %r10,%r13
+ movq %rcx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,48(%rsp)
+
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
+
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
+
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
+
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rbx
+
+ movq 56(%rsi),%r12
+ movq %r9,%r13
+ movq %rbx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,56(%rsp)
+
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
+
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
+
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
+
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 1(%rdi),%rdi
+ addq %r14,%rax
+
+ movq 64(%rsi),%r12
+ movq %r8,%r13
+ movq %rax,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,64(%rsp)
+
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
+
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
+
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
+
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 1(%rdi),%rdi
+ addq %r14,%r11
+
+ movq 72(%rsi),%r12
+ movq %rdx,%r13
+ movq %r11,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,72(%rsp)
+
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
+
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
+
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
+
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 1(%rdi),%rdi
+ addq %r14,%r10
+
+ movq 80(%rsi),%r12
+ movq %rcx,%r13
+ movq %r10,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,80(%rsp)
+
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
+
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
+
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
+
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 1(%rdi),%rdi
+ addq %r14,%r9
+
+ movq 88(%rsi),%r12
+ movq %rbx,%r13
+ movq %r9,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,88(%rsp)
+
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
+
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
+
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
+
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 1(%rdi),%rdi
+ addq %r14,%r8
+
+ movq 96(%rsi),%r12
+ movq %rax,%r13
+ movq %r8,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,96(%rsp)
+
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
+
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
+
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
+
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rdx
+
+ movq 104(%rsi),%r12
+ movq %r11,%r13
+ movq %rdx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,104(%rsp)
+
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
+
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
+
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
+
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rcx
+
+ movq 112(%rsi),%r12
+ movq %r10,%r13
+ movq %rcx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,112(%rsp)
+
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
+
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
+
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
+
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rbx
+
+ movq 120(%rsi),%r12
+ movq %r9,%r13
+ movq %rbx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,120(%rsp)
+
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
+
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
+
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
+
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 1(%rdi),%rdi
+ addq %r14,%rax
+
+ jmp L$rounds_16_xx
+.p2align 4
+L$rounds_16_xx:
+ movq 8(%rsp),%r13
+ movq 112(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 72(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 0(%rsp),%r12
+ movq %r8,%r13
+ addq %r14,%r12
+ movq %rax,%r14
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,0(%rsp)
+
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
+
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
+
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
+
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 1(%rdi),%rdi
+ addq %r14,%r11
+
+ movq 16(%rsp),%r13
+ movq 120(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 80(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 8(%rsp),%r12
+ movq %rdx,%r13
+ addq %r14,%r12
+ movq %r11,%r14
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,8(%rsp)
+
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
+
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
+
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
+
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 1(%rdi),%rdi
+ addq %r14,%r10
+
+ movq 24(%rsp),%r13
+ movq 0(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 88(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 16(%rsp),%r12
+ movq %rcx,%r13
+ addq %r14,%r12
+ movq %r10,%r14
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,16(%rsp)
+
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
+
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
+
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
+
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 1(%rdi),%rdi
+ addq %r14,%r9
+
+ movq 32(%rsp),%r13
+ movq 8(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 96(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 24(%rsp),%r12
+ movq %rbx,%r13
+ addq %r14,%r12
+ movq %r9,%r14
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,24(%rsp)
+
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
+
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
+
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
+
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 1(%rdi),%rdi
+ addq %r14,%r8
+
+ movq 40(%rsp),%r13
+ movq 16(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 104(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 32(%rsp),%r12
+ movq %rax,%r13
+ addq %r14,%r12
+ movq %r8,%r14
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,32(%rsp)
+
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
+
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
+
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
+
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rdx
+
+ movq 48(%rsp),%r13
+ movq 24(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 112(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 40(%rsp),%r12
+ movq %r11,%r13
+ addq %r14,%r12
+ movq %rdx,%r14
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,40(%rsp)
+
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
+
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
+
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
+
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rcx
+
+ movq 56(%rsp),%r13
+ movq 32(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 120(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 48(%rsp),%r12
+ movq %r10,%r13
+ addq %r14,%r12
+ movq %rcx,%r14
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,48(%rsp)
+
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
+
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
+
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
+
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rbx
+
+ movq 64(%rsp),%r13
+ movq 40(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 0(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 56(%rsp),%r12
+ movq %r9,%r13
+ addq %r14,%r12
+ movq %rbx,%r14
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,56(%rsp)
+
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
+
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
+
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
+
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 1(%rdi),%rdi
+ addq %r14,%rax
+
+ movq 72(%rsp),%r13
+ movq 48(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 8(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 64(%rsp),%r12
+ movq %r8,%r13
+ addq %r14,%r12
+ movq %rax,%r14
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,64(%rsp)
+
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
+
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
+
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
+
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 1(%rdi),%rdi
+ addq %r14,%r11
+
+ movq 80(%rsp),%r13
+ movq 56(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 16(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 72(%rsp),%r12
+ movq %rdx,%r13
+ addq %r14,%r12
+ movq %r11,%r14
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,72(%rsp)
+
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
+
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
+
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
+
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 1(%rdi),%rdi
+ addq %r14,%r10
+
+ movq 88(%rsp),%r13
+ movq 64(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 24(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 80(%rsp),%r12
+ movq %rcx,%r13
+ addq %r14,%r12
+ movq %r10,%r14
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,80(%rsp)
+
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
+
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
+
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
+
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 1(%rdi),%rdi
+ addq %r14,%r9
+
+ movq 96(%rsp),%r13
+ movq 72(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 32(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 88(%rsp),%r12
+ movq %rbx,%r13
+ addq %r14,%r12
+ movq %r9,%r14
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,88(%rsp)
+
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
+
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
+
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
+
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 1(%rdi),%rdi
+ addq %r14,%r8
+
+ movq 104(%rsp),%r13
+ movq 80(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 40(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 96(%rsp),%r12
+ movq %rax,%r13
+ addq %r14,%r12
+ movq %r8,%r14
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,96(%rsp)
+
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
+
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
+
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
+
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rdx
+
+ movq 112(%rsp),%r13
+ movq 88(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 48(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 104(%rsp),%r12
+ movq %r11,%r13
+ addq %r14,%r12
+ movq %rdx,%r14
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,104(%rsp)
+
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
+
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
+
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
+
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rcx
+
+ movq 120(%rsp),%r13
+ movq 96(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 56(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 112(%rsp),%r12
+ movq %r10,%r13
+ addq %r14,%r12
+ movq %rcx,%r14
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,112(%rsp)
+
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
+
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
+
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
+
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rbx
+
+ movq 0(%rsp),%r13
+ movq 104(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 64(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 120(%rsp),%r12
+ movq %r9,%r13
+ addq %r14,%r12
+ movq %rbx,%r14
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,120(%rsp)
+
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
+
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
+
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
+
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 1(%rdi),%rdi
+ addq %r14,%rax
+
+ cmpq $80,%rdi
+ jb L$rounds_16_xx
+
+ movq 128+0(%rsp),%rdi
+ leaq 128(%rsi),%rsi
+
+ addq 0(%rdi),%rax
+ addq 8(%rdi),%rbx
+ addq 16(%rdi),%rcx
+ addq 24(%rdi),%rdx
+ addq 32(%rdi),%r8
+ addq 40(%rdi),%r9
+ addq 48(%rdi),%r10
+ addq 56(%rdi),%r11
+
+ cmpq 128+16(%rsp),%rsi
+
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,16(%rdi)
+ movq %rdx,24(%rdi)
+ movq %r8,32(%rdi)
+ movq %r9,40(%rdi)
+ movq %r10,48(%rdi)
+ movq %r11,56(%rdi)
+ jb L$loop
+
+ movq 128+24(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+L$epilogue:
+ .byte 0xf3,0xc3
+
+.p2align 6
+
+K512:
+.quad 0x428a2f98d728ae22,0x7137449123ef65cd
+.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad 0x3956c25bf348b538,0x59f111f1b605d019
+.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad 0xd807aa98a3030242,0x12835b0145706fbe
+.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+.quad 0x9bdc06a725c71235,0xc19bf174cf692694
+.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+.quad 0x983e5152ee66dfab,0xa831c66d2db43210
+.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+.quad 0x06ca6351e003826f,0x142929670a0e6e70
+.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+.quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+.quad 0x81c2c92e47edaee6,0x92722c851482353b
+.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+.quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+.quad 0xd192e819d6ef5218,0xd69906245565a910
+.quad 0xf40e35855771202a,0x106aa07032bbd1b8
+.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+.quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+.quad 0x90befffa23631e28,0xa4506cebde82bde9
+.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+.quad 0xca273eceea26619c,0xd186b8c721c0c207
+.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
+.quad 0x113f9804bef90dae,0x1b710b35131c471b
+.quad 0x28db77f523047d84,0x32caab7b40c72493
+.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
diff --git a/compat/sha/sha512.c b/compat/sha/sha512.c
@@ -0,0 +1,558 @@
+/* $OpenBSD: sha512.c,v 1.12 2014/07/10 22:45:58 jsing Exp $ */
+/* ====================================================================
+ * Copyright (c) 2004 The OpenSSL Project. All rights reserved
+ * according to the OpenSSL license [found in ../../LICENSE].
+ * ====================================================================
+ */
+
+#include <machine/endian.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <openssl/opensslconf.h>
+
+#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
+/*
+ * IMPLEMENTATION NOTES.
+ *
+ * As you might have noticed 32-bit hash algorithms:
+ *
+ * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
+ * - optimized versions implement two transform functions: one operating
+ * on [aligned] data in host byte order and one - on data in input
+ * stream byte order;
+ * - share common byte-order neutral collector and padding function
+ * implementations, ../md32_common.h;
+ *
+ * Neither of the above applies to this SHA-512 implementations. Reasons
+ * [in reverse order] are:
+ *
+ * - it's the only 64-bit hash algorithm for the moment of this writing,
+ * there is no need for common collector/padding implementation [yet];
+ * - by supporting only one transform function [which operates on
+ * *aligned* data in input stream byte order, big-endian in this case]
+ * we minimize burden of maintenance in two ways: a) collector/padding
+ * function is simpler; b) only one transform function to stare at;
+ * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
+ * apply a number of optimizations to mitigate potential performance
+ * penalties caused by previous design decision;
+ *
+ * Caveat lector.
+ *
+ * Implementation relies on the fact that "long long" is 64-bit on
+ * both 32- and 64-bit platforms. If some compiler vendor comes up
+ * with 128-bit long long, adjustment to sha.h would be required.
+ * As this implementation relies on 64-bit integer type, it's totally
+ * inappropriate for platforms which don't support it, most notably
+ * 16-bit platforms.
+ * <appro@fy.chalmers.se>
+ */
+
+#include <openssl/crypto.h>
+#include <openssl/opensslv.h>
+#include <openssl/sha.h>
+
+#if !defined(__STRICT_ALIGNMENT) || defined(SHA512_ASM)
+#define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
+#endif
+
+int SHA384_Init(SHA512_CTX *c)
+ {
+ c->h[0]=U64(0xcbbb9d5dc1059ed8);
+ c->h[1]=U64(0x629a292a367cd507);
+ c->h[2]=U64(0x9159015a3070dd17);
+ c->h[3]=U64(0x152fecd8f70e5939);
+ c->h[4]=U64(0x67332667ffc00b31);
+ c->h[5]=U64(0x8eb44a8768581511);
+ c->h[6]=U64(0xdb0c2e0d64f98fa7);
+ c->h[7]=U64(0x47b5481dbefa4fa4);
+
+ c->Nl=0; c->Nh=0;
+ c->num=0; c->md_len=SHA384_DIGEST_LENGTH;
+ return 1;
+ }
+
+int SHA512_Init(SHA512_CTX *c)
+ {
+ c->h[0]=U64(0x6a09e667f3bcc908);
+ c->h[1]=U64(0xbb67ae8584caa73b);
+ c->h[2]=U64(0x3c6ef372fe94f82b);
+ c->h[3]=U64(0xa54ff53a5f1d36f1);
+ c->h[4]=U64(0x510e527fade682d1);
+ c->h[5]=U64(0x9b05688c2b3e6c1f);
+ c->h[6]=U64(0x1f83d9abfb41bd6b);
+ c->h[7]=U64(0x5be0cd19137e2179);
+
+ c->Nl=0; c->Nh=0;
+ c->num=0; c->md_len=SHA512_DIGEST_LENGTH;
+ return 1;
+ }
+
+#ifndef SHA512_ASM
+static
+#endif
+void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num);
+
+int SHA512_Final (unsigned char *md, SHA512_CTX *c)
+ {
+ unsigned char *p=(unsigned char *)c->u.p;
+ size_t n=c->num;
+
+ p[n]=0x80; /* There always is a room for one */
+ n++;
+ if (n > (sizeof(c->u)-16))
+ memset (p+n,0,sizeof(c->u)-n), n=0,
+ sha512_block_data_order (c,p,1);
+
+ memset (p+n,0,sizeof(c->u)-16-n);
+#if BYTE_ORDER == BIG_ENDIAN
+ c->u.d[SHA_LBLOCK-2] = c->Nh;
+ c->u.d[SHA_LBLOCK-1] = c->Nl;
+#else
+ p[sizeof(c->u)-1] = (unsigned char)(c->Nl);
+ p[sizeof(c->u)-2] = (unsigned char)(c->Nl>>8);
+ p[sizeof(c->u)-3] = (unsigned char)(c->Nl>>16);
+ p[sizeof(c->u)-4] = (unsigned char)(c->Nl>>24);
+ p[sizeof(c->u)-5] = (unsigned char)(c->Nl>>32);
+ p[sizeof(c->u)-6] = (unsigned char)(c->Nl>>40);
+ p[sizeof(c->u)-7] = (unsigned char)(c->Nl>>48);
+ p[sizeof(c->u)-8] = (unsigned char)(c->Nl>>56);
+ p[sizeof(c->u)-9] = (unsigned char)(c->Nh);
+ p[sizeof(c->u)-10] = (unsigned char)(c->Nh>>8);
+ p[sizeof(c->u)-11] = (unsigned char)(c->Nh>>16);
+ p[sizeof(c->u)-12] = (unsigned char)(c->Nh>>24);
+ p[sizeof(c->u)-13] = (unsigned char)(c->Nh>>32);
+ p[sizeof(c->u)-14] = (unsigned char)(c->Nh>>40);
+ p[sizeof(c->u)-15] = (unsigned char)(c->Nh>>48);
+ p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56);
+#endif
+
+ sha512_block_data_order (c,p,1);
+
+ if (md==0) return 0;
+
+ switch (c->md_len)
+ {
+ /* Let compiler decide if it's appropriate to unroll... */
+ case SHA384_DIGEST_LENGTH:
+ for (n=0;n<SHA384_DIGEST_LENGTH/8;n++)
+ {
+ SHA_LONG64 t = c->h[n];
+
+ *(md++) = (unsigned char)(t>>56);
+ *(md++) = (unsigned char)(t>>48);
+ *(md++) = (unsigned char)(t>>40);
+ *(md++) = (unsigned char)(t>>32);
+ *(md++) = (unsigned char)(t>>24);
+ *(md++) = (unsigned char)(t>>16);
+ *(md++) = (unsigned char)(t>>8);
+ *(md++) = (unsigned char)(t);
+ }
+ break;
+ case SHA512_DIGEST_LENGTH:
+ for (n=0;n<SHA512_DIGEST_LENGTH/8;n++)
+ {
+ SHA_LONG64 t = c->h[n];
+
+ *(md++) = (unsigned char)(t>>56);
+ *(md++) = (unsigned char)(t>>48);
+ *(md++) = (unsigned char)(t>>40);
+ *(md++) = (unsigned char)(t>>32);
+ *(md++) = (unsigned char)(t>>24);
+ *(md++) = (unsigned char)(t>>16);
+ *(md++) = (unsigned char)(t>>8);
+ *(md++) = (unsigned char)(t);
+ }
+ break;
+ /* ... as well as make sure md_len is not abused. */
+ default: return 0;
+ }
+
+ return 1;
+ }
+
+int SHA384_Final (unsigned char *md,SHA512_CTX *c)
+{ return SHA512_Final (md,c); }
+
+int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
+ {
+ SHA_LONG64 l;
+ unsigned char *p=c->u.p;
+ const unsigned char *data=(const unsigned char *)_data;
+
+ if (len==0) return 1;
+
+ l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
+ if (l < c->Nl) c->Nh++;
+ if (sizeof(len)>=8) c->Nh+=(((SHA_LONG64)len)>>61);
+ c->Nl=l;
+
+ if (c->num != 0)
+ {
+ size_t n = sizeof(c->u) - c->num;
+
+ if (len < n)
+ {
+ memcpy (p+c->num,data,len), c->num += (unsigned int)len;
+ return 1;
+ }
+ else {
+ memcpy (p+c->num,data,n), c->num = 0;
+ len-=n, data+=n;
+ sha512_block_data_order (c,p,1);
+ }
+ }
+
+ if (len >= sizeof(c->u))
+ {
+#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
+ if ((size_t)data%sizeof(c->u.d[0]) != 0)
+ while (len >= sizeof(c->u))
+ memcpy (p,data,sizeof(c->u)),
+ sha512_block_data_order (c,p,1),
+ len -= sizeof(c->u),
+ data += sizeof(c->u);
+ else
+#endif
+ sha512_block_data_order (c,data,len/sizeof(c->u)),
+ data += len,
+ len %= sizeof(c->u),
+ data -= len;
+ }
+
+ if (len != 0) memcpy (p,data,len), c->num = (int)len;
+
+ return 1;
+ }
+
+int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
+{ return SHA512_Update (c,data,len); }
+
+void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
+ {
+#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
+ if ((size_t)data%sizeof(c->u.d[0]) != 0)
+ memcpy(c->u.p,data,sizeof(c->u.p)),
+ data = c->u.p;
+#endif
+ sha512_block_data_order (c,data,1);
+ }
+
+unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
+ {
+ SHA512_CTX c;
+ static unsigned char m[SHA384_DIGEST_LENGTH];
+
+ if (md == NULL) md=m;
+ SHA384_Init(&c);
+ SHA512_Update(&c,d,n);
+ SHA512_Final(md,&c);
+ OPENSSL_cleanse(&c,sizeof(c));
+ return(md);
+ }
+
+unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
+ {
+ SHA512_CTX c;
+ static unsigned char m[SHA512_DIGEST_LENGTH];
+
+ if (md == NULL) md=m;
+ SHA512_Init(&c);
+ SHA512_Update(&c,d,n);
+ SHA512_Final(md,&c);
+ OPENSSL_cleanse(&c,sizeof(c));
+ return(md);
+ }
+
+#ifndef SHA512_ASM
+static const SHA_LONG64 K512[80] = {
+ U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
+ U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
+ U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
+ U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
+ U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
+ U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
+ U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
+ U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
+ U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
+ U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
+ U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
+ U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
+ U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
+ U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
+ U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
+ U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
+ U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
+ U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
+ U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
+ U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
+ U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
+ U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
+ U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
+ U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
+ U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
+ U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
+ U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
+ U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
+ U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
+ U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
+ U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
+ U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
+ U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
+ U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
+ U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
+ U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
+ U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
+ U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
+ U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
+ U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
+
+#if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
+# if defined(__x86_64) || defined(__x86_64__)
+# define ROTR(a,n) ({ SHA_LONG64 ret; \
+ asm ("rorq %1,%0" \
+ : "=r"(ret) \
+ : "J"(n),"0"(a) \
+ : "cc"); ret; })
+# define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \
+ asm ("bswapq %0" \
+ : "=r"(ret) \
+ : "0"(ret)); ret; })
+# elif (defined(__i386) || defined(__i386__))
+# if defined(I386_ONLY)
+# define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
+ unsigned int hi=p[0],lo=p[1]; \
+ asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
+ "roll $16,%%eax; roll $16,%%edx; "\
+ "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
+ : "=a"(lo),"=d"(hi) \
+ : "0"(lo),"1"(hi) : "cc"); \
+ ((SHA_LONG64)hi)<<32|lo; })
+# else
+# define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
+ unsigned int hi=p[0],lo=p[1]; \
+ asm ("bswapl %0; bswapl %1;" \
+ : "=r"(lo),"=r"(hi) \
+ : "0"(lo),"1"(hi)); \
+ ((SHA_LONG64)hi)<<32|lo; })
+# endif
+# elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
+# define ROTR(a,n) ({ SHA_LONG64 ret; \
+ asm ("rotrdi %0,%1,%2" \
+ : "=r"(ret) \
+ : "r"(a),"K"(n)); ret; })
+# endif
+#endif
+
+#ifndef PULL64
+#define B(x,j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
+#define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
+#endif
+
+#ifndef ROTR
+#define ROTR(x,s) (((x)>>s) | (x)<<(64-s))
+#endif
+
+#define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
+#define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
+#define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
+#define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
+
+#define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
+#define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
+
+
+#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
+/*
+ * This code should give better results on 32-bit CPU with less than
+ * ~24 registers, both size and performance wise...
+ */
+static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
+ {
+ const SHA_LONG64 *W=in;
+ SHA_LONG64 A,E,T;
+ SHA_LONG64 X[9+80],*F;
+ int i;
+
+ while (num--) {
+
+ F = X+80;
+ A = ctx->h[0]; F[1] = ctx->h[1];
+ F[2] = ctx->h[2]; F[3] = ctx->h[3];
+ E = ctx->h[4]; F[5] = ctx->h[5];
+ F[6] = ctx->h[6]; F[7] = ctx->h[7];
+
+ for (i=0;i<16;i++,F--)
+ {
+ T = PULL64(W[i]);
+ F[0] = A;
+ F[4] = E;
+ F[8] = T;
+ T += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
+ E = F[3] + T;
+ A = T + Sigma0(A) + Maj(A,F[1],F[2]);
+ }
+
+ for (;i<80;i++,F--)
+ {
+ T = sigma0(F[8+16-1]);
+ T += sigma1(F[8+16-14]);
+ T += F[8+16] + F[8+16-9];
+
+ F[0] = A;
+ F[4] = E;
+ F[8] = T;
+ T += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
+ E = F[3] + T;
+ A = T + Sigma0(A) + Maj(A,F[1],F[2]);
+ }
+
+ ctx->h[0] += A; ctx->h[1] += F[1];
+ ctx->h[2] += F[2]; ctx->h[3] += F[3];
+ ctx->h[4] += E; ctx->h[5] += F[5];
+ ctx->h[6] += F[6]; ctx->h[7] += F[7];
+
+ W+=SHA_LBLOCK;
+ }
+ }
+
+#elif defined(OPENSSL_SMALL_FOOTPRINT)
+
+static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
+ {
+ const SHA_LONG64 *W=in;
+ SHA_LONG64 a,b,c,d,e,f,g,h,s0,s1,T1,T2;
+ SHA_LONG64 X[16];
+ int i;
+
+ while (num--) {
+
+ a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3];
+ e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7];
+
+ for (i=0;i<16;i++)
+ {
+#if BYTE_ORDER == BIG_ENDIAN
+ T1 = X[i] = W[i];
+#else
+ T1 = X[i] = PULL64(W[i]);
+#endif
+ T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
+ T2 = Sigma0(a) + Maj(a,b,c);
+ h = g; g = f; f = e; e = d + T1;
+ d = c; c = b; b = a; a = T1 + T2;
+ }
+
+ for (;i<80;i++)
+ {
+ s0 = X[(i+1)&0x0f]; s0 = sigma0(s0);
+ s1 = X[(i+14)&0x0f]; s1 = sigma1(s1);
+
+ T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
+ T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
+ T2 = Sigma0(a) + Maj(a,b,c);
+ h = g; g = f; f = e; e = d + T1;
+ d = c; c = b; b = a; a = T1 + T2;
+ }
+
+ ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
+ ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
+
+ W+=SHA_LBLOCK;
+ }
+ }
+
+#else
+
+#define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \
+ T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; \
+ h = Sigma0(a) + Maj(a,b,c); \
+ d += T1; h += T1; } while (0)
+
+#define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X) do { \
+ s0 = X[(j+1)&0x0f]; s0 = sigma0(s0); \
+ s1 = X[(j+14)&0x0f]; s1 = sigma1(s1); \
+ T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f]; \
+ ROUND_00_15(i+j,a,b,c,d,e,f,g,h); } while (0)
+
+static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
+ {
+ const SHA_LONG64 *W=in;
+ SHA_LONG64 a,b,c,d,e,f,g,h,s0,s1,T1;
+ SHA_LONG64 X[16];
+ int i;
+
+ while (num--) {
+
+ a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3];
+ e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7];
+
+#if BYTE_ORDER == BIG_ENDIAN
+ T1 = X[0] = W[0]; ROUND_00_15(0,a,b,c,d,e,f,g,h);
+ T1 = X[1] = W[1]; ROUND_00_15(1,h,a,b,c,d,e,f,g);
+ T1 = X[2] = W[2]; ROUND_00_15(2,g,h,a,b,c,d,e,f);
+ T1 = X[3] = W[3]; ROUND_00_15(3,f,g,h,a,b,c,d,e);
+ T1 = X[4] = W[4]; ROUND_00_15(4,e,f,g,h,a,b,c,d);
+ T1 = X[5] = W[5]; ROUND_00_15(5,d,e,f,g,h,a,b,c);
+ T1 = X[6] = W[6]; ROUND_00_15(6,c,d,e,f,g,h,a,b);
+ T1 = X[7] = W[7]; ROUND_00_15(7,b,c,d,e,f,g,h,a);
+ T1 = X[8] = W[8]; ROUND_00_15(8,a,b,c,d,e,f,g,h);
+ T1 = X[9] = W[9]; ROUND_00_15(9,h,a,b,c,d,e,f,g);
+ T1 = X[10] = W[10]; ROUND_00_15(10,g,h,a,b,c,d,e,f);
+ T1 = X[11] = W[11]; ROUND_00_15(11,f,g,h,a,b,c,d,e);
+ T1 = X[12] = W[12]; ROUND_00_15(12,e,f,g,h,a,b,c,d);
+ T1 = X[13] = W[13]; ROUND_00_15(13,d,e,f,g,h,a,b,c);
+ T1 = X[14] = W[14]; ROUND_00_15(14,c,d,e,f,g,h,a,b);
+ T1 = X[15] = W[15]; ROUND_00_15(15,b,c,d,e,f,g,h,a);
+#else
+ T1 = X[0] = PULL64(W[0]); ROUND_00_15(0,a,b,c,d,e,f,g,h);
+ T1 = X[1] = PULL64(W[1]); ROUND_00_15(1,h,a,b,c,d,e,f,g);
+ T1 = X[2] = PULL64(W[2]); ROUND_00_15(2,g,h,a,b,c,d,e,f);
+ T1 = X[3] = PULL64(W[3]); ROUND_00_15(3,f,g,h,a,b,c,d,e);
+ T1 = X[4] = PULL64(W[4]); ROUND_00_15(4,e,f,g,h,a,b,c,d);
+ T1 = X[5] = PULL64(W[5]); ROUND_00_15(5,d,e,f,g,h,a,b,c);
+ T1 = X[6] = PULL64(W[6]); ROUND_00_15(6,c,d,e,f,g,h,a,b);
+ T1 = X[7] = PULL64(W[7]); ROUND_00_15(7,b,c,d,e,f,g,h,a);
+ T1 = X[8] = PULL64(W[8]); ROUND_00_15(8,a,b,c,d,e,f,g,h);
+ T1 = X[9] = PULL64(W[9]); ROUND_00_15(9,h,a,b,c,d,e,f,g);
+ T1 = X[10] = PULL64(W[10]); ROUND_00_15(10,g,h,a,b,c,d,e,f);
+ T1 = X[11] = PULL64(W[11]); ROUND_00_15(11,f,g,h,a,b,c,d,e);
+ T1 = X[12] = PULL64(W[12]); ROUND_00_15(12,e,f,g,h,a,b,c,d);
+ T1 = X[13] = PULL64(W[13]); ROUND_00_15(13,d,e,f,g,h,a,b,c);
+ T1 = X[14] = PULL64(W[14]); ROUND_00_15(14,c,d,e,f,g,h,a,b);
+ T1 = X[15] = PULL64(W[15]); ROUND_00_15(15,b,c,d,e,f,g,h,a);
+#endif
+
+ for (i=16;i<80;i+=16)
+ {
+ ROUND_16_80(i, 0,a,b,c,d,e,f,g,h,X);
+ ROUND_16_80(i, 1,h,a,b,c,d,e,f,g,X);
+ ROUND_16_80(i, 2,g,h,a,b,c,d,e,f,X);
+ ROUND_16_80(i, 3,f,g,h,a,b,c,d,e,X);
+ ROUND_16_80(i, 4,e,f,g,h,a,b,c,d,X);
+ ROUND_16_80(i, 5,d,e,f,g,h,a,b,c,X);
+ ROUND_16_80(i, 6,c,d,e,f,g,h,a,b,X);
+ ROUND_16_80(i, 7,b,c,d,e,f,g,h,a,X);
+ ROUND_16_80(i, 8,a,b,c,d,e,f,g,h,X);
+ ROUND_16_80(i, 9,h,a,b,c,d,e,f,g,X);
+ ROUND_16_80(i,10,g,h,a,b,c,d,e,f,X);
+ ROUND_16_80(i,11,f,g,h,a,b,c,d,e,X);
+ ROUND_16_80(i,12,e,f,g,h,a,b,c,d,X);
+ ROUND_16_80(i,13,d,e,f,g,h,a,b,c,X);
+ ROUND_16_80(i,14,c,d,e,f,g,h,a,b,X);
+ ROUND_16_80(i,15,b,c,d,e,f,g,h,a,X);
+ }
+
+ ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
+ ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
+
+ W+=SHA_LBLOCK;
+ }
+ }
+
+#endif
+
+#endif /* SHA512_ASM */
+
+#endif /* !OPENSSL_NO_SHA512 */
diff --git a/include/blf.h b/include/blf.h
@@ -0,0 +1,82 @@
+/* $OpenBSD: blf.h,v 1.7 2007/03/14 17:59:41 grunk Exp $ */
+/*
+ * Blowfish - a fast block cipher designed by Bruce Schneier
+ *
+ * Copyright 1997 Niels Provos <provos@physnet.uni-hamburg.de>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Niels Provos.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _BLF_H_
+#define _BLF_H_
+
+/* Schneier specifies a maximum key length of 56 bytes.
+ * This ensures that every key bit affects every cipher
+ * bit. However, the subkeys can hold up to 72 bytes.
+ * Warning: For normal blowfish encryption only 56 bytes
+ * of the key affect all cipherbits.
+ */
+
+#define BLF_N 16 /* Number of Subkeys */
+#define BLF_MAXKEYLEN ((BLF_N-2)*4) /* 448 bits */
+#define BLF_MAXUTILIZED ((BLF_N+2)*4) /* 576 bits */
+
+/* Blowfish context */
+typedef struct BlowfishContext {
+ u_int32_t S[4][256]; /* S-Boxes */
+ u_int32_t P[BLF_N + 2]; /* Subkeys */
+} blf_ctx;
+
+/* Raw access to customized Blowfish
+ * blf_key is just:
+ * Blowfish_initstate( state )
+ * Blowfish_expand0state( state, key, keylen )
+ */
+
+void Blowfish_encipher(blf_ctx *, u_int32_t *, u_int32_t *);
+void Blowfish_decipher(blf_ctx *, u_int32_t *, u_int32_t *);
+void Blowfish_initstate(blf_ctx *);
+void Blowfish_expand0state(blf_ctx *, const u_int8_t *, u_int16_t);
+void Blowfish_expandstate
+(blf_ctx *, const u_int8_t *, u_int16_t, const u_int8_t *, u_int16_t);
+
+/* Standard Blowfish */
+
+void blf_key(blf_ctx *, const u_int8_t *, u_int16_t);
+void blf_enc(blf_ctx *, u_int32_t *, u_int16_t);
+void blf_dec(blf_ctx *, u_int32_t *, u_int16_t);
+
+void blf_ecb_encrypt(blf_ctx *, u_int8_t *, u_int32_t);
+void blf_ecb_decrypt(blf_ctx *, u_int8_t *, u_int32_t);
+
+void blf_cbc_encrypt(blf_ctx *, u_int8_t *, u_int8_t *, u_int32_t);
+void blf_cbc_decrypt(blf_ctx *, u_int8_t *, u_int8_t *, u_int32_t);
+
+/* Converts u_int8_t to u_int32_t */
+u_int32_t Blowfish_stream2word(const u_int8_t *, u_int16_t , u_int16_t *);
+
+#endif
diff --git a/include/machine/endian.h b/include/machine/endian.h
@@ -0,0 +1,40 @@
+/*
+ * Public domain
+ * machine/endian.h compatibility shim
+ */
+
+#ifndef LIBCRYPTOCOMPAT_BYTE_ORDER_H_
+#define LIBCRYPTOCOMPAT_BYTE_ORDER_H_
+
+#if defined(_WIN32)
+
+#define LITTLE_ENDIAN 1234
+#define BIG_ENDIAN 4321
+#define PDP_ENDIAN 3412
+
+/*
+ * Use GCC and Visual Studio compiler defines to determine endian.
+ */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define BYTE_ORDER LITTLE_ENDIAN
+#else
+#define BYTE_ORDER BIG_ENDIAN
+#endif
+
+#elif defined(__linux__)
+#include <endian.h>
+
+#elif defined(__sun) || defined(_AIX) || defined(__hpux)
+#include <sys/types.h>
+#include <arpa/nameser_compat.h>
+
+#elif defined(__sgi)
+#include <standards.h>
+#include <sys/endian.h>
+
+#else
+#include_next <machine/endian.h>
+
+#endif
+
+#endif
diff --git a/include/stdlib.h b/include/stdlib.h
@@ -0,0 +1,30 @@
+/*
+ * stdlib.h compatibility shim
+ * Public domain
+ */
+
+#include_next <stdlib.h>
+
+#ifndef LIBCRYPTOCOMPAT_STDLIB_H
+#define LIBCRYPTOCOMPAT_STDLIB_H
+
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <stdint.h>
+
+#ifndef HAVE_ARC4RANDOM_BUF
+uint32_t arc4random(void);
+uint32_t arc4random_uniform(uint32_t);
+void arc4random_buf(void *_buf, size_t n);
+#endif
+
+#ifndef HAVE_REALLOCARRAY
+void *reallocarray(void *, size_t, size_t);
+#endif
+
+#ifndef HAVE_STRTONUM
+long long strtonum(const char *nptr, long long minval,
+ long long maxval, const char **errstr);
+#endif
+
+#endif
diff --git a/include/string.h b/include/string.h
@@ -0,0 +1,69 @@
+/*
+ * Public domain
+ * string.h compatibility shim
+ */
+
+#include_next <string.h>
+
+#ifndef LIBCRYPTOCOMPAT_STRING_H
+#define LIBCRYPTOCOMPAT_STRING_H
+
+#include <sys/types.h>
+
+#if defined(__sun) || defined(__hpux)
+/* Some functions historically defined in string.h were placed in strings.h by
+ * SUS. Use the same hack as OS X and FreeBSD use to work around on Solaris and HPUX.
+ */
+#include <strings.h>
+#endif
+
+#ifndef HAVE_STRLCPY
+size_t strlcpy(char *dst, const char *src, size_t siz);
+#endif
+
+#ifndef HAVE_STRLCAT
+size_t strlcat(char *dst, const char *src, size_t siz);
+#endif
+
+#ifndef HAVE_STRNDUP
+char * strndup(const char *str, size_t maxlen);
+/* the only user of strnlen is strndup, so only build it if needed */
+#ifndef HAVE_STRNLEN
+size_t strnlen(const char *str, size_t maxlen);
+#endif
+#endif
+
+#ifndef HAVE_EXPLICIT_BZERO
+void explicit_bzero(void *, size_t);
+#endif
+
+#ifndef HAVE_TIMINGSAFE_BCMP
+int timingsafe_bcmp(const void *b1, const void *b2, size_t n);
+#endif
+
+#ifndef HAVE_TIMINGSAFE_MEMCMP
+int timingsafe_memcmp(const void *b1, const void *b2, size_t len);
+#endif
+
+#ifndef HAVE_MEMMEM
+void * memmem(const void *big, size_t big_len, const void *little,
+ size_t little_len);
+#endif
+
+#ifdef _WIN32
+#include <errno.h>
+
+static inline char *
+posix_strerror(int errnum)
+{
+ if (errnum == ECONNREFUSED) {
+ return "Connection refused";
+ }
+ return strerror(errnum);
+}
+
+#define strerror(errnum) posix_strerror(errnum)
+
+#endif
+
+#endif
diff --git a/include/sys/types.h b/include/sys/types.h
@@ -0,0 +1,21 @@
+/*
+ * Public domain
+ * sys/types.h compatibility shim
+ */
+
+#include_next <sys/types.h>
+
+#ifndef LIBCRYPTOCOMPAT_SYS_TYPES_H
+#define LIBCRYPTOCOMPAT_SYS_TYPES_H
+
+#include <stdint.h>
+
+#ifdef __MINGW32__
+#include <_bsd_types.h>
+#endif
+
+#if !defined(HAVE_ATTRIBUTE__BOUNDED__) && !defined(__bounded__)
+# define __bounded__(x, y, z)
+#endif
+
+#endif
diff --git a/include/unistd.h b/include/unistd.h
@@ -0,0 +1,19 @@
+/*
+ * Public domain
+ * unistd.h compatibility shim
+ */
+
+#include_next <unistd.h>
+
+#ifndef LIBCRYPTOCOMPAT_UNISTD_H
+#define LIBCRYPTOCOMPAT_UNISTD_H
+
+#ifndef HAVE_GETENTROPY
+int getentropy(void *buf, size_t buflen);
+#endif
+
+#ifndef HAVE_ISSETUGID
+int issetugid(void);
+#endif
+
+#endif
diff --git a/rockspec/arc4random-scm-1.rockspec b/rockspec/arc4random-scm-1.rockspec
@@ -0,0 +1,33 @@
+package = "arc4random"
+version = "scm-1"
+
+source = {
+ url = "git://github.com/mikejsavage/lua-arc4random.git",
+}
+
+description = {
+ summary = "A Lua wrapper for arc4random",
+ homepage = "http://github.com/mikejsavage/lua-arc4random",
+ license = "ISC",
+ maintainer = "Mike Savage",
+}
+
+dependencies = {
+ "lua >= 5.1",
+}
+
+build = {
+ type = "make",
+
+ install_pass = false,
+
+ build_variables = {
+ LUA_INCDIR = "$(LUA_INCDIR)",
+ },
+
+ install = {
+ lib = {
+ [ "arc4random" ] = "arc4random.so",
+ },
+ },
+}
diff --git a/src/main.c b/src/main.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2014, Michael Savage <mike@mikejsavage.co.uk>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+*/
+
+#include <stdlib.h>
+#include <sys/types.h>
+#include <math.h>
+
+#include <lua.h>
+#include <lualib.h>
+#include <lauxlib.h>
+
+#if LUA_VERSION_NUM < 502
+ #define luaL_newlib( L, l ) ( lua_newtable( L ), luaL_register( L, NULL, l ) )
+#endif
+
+#define LUAINT_MAX ( sizeof( lua_Integer ) == 32 ? INT32_MAX : INT64_MAX )
+
+static int luaarc4_random( lua_State * const L ) {
+ // http://marc.info/?l=openbsd-tech&m=142175806616927&w=2
+ if( lua_gettop( L ) == 0 ) {
+ uint16_t buf[ 3 ];
+ arc4random_buf( buf, sizeof( buf ) );
+
+ const double d = ldexp( ( double ) buf[ 0 ], -48 )
+ + ldexp( ( double ) buf[ 1 ], -32 )
+ + ldexp( ( double ) buf[ 2 ], -16 );
+
+ lua_pushnumber( L, d );
+
+ return 1;
+ }
+
+ lua_Integer min = 1;
+ lua_Integer max;
+
+ if( lua_gettop( L ) == 1 ) {
+ max = lua_tointeger( L, 1 );
+ }
+ else {
+ min = lua_tointeger( L, 1 );
+ max = lua_tointeger( L, 2 );
+ }
+
+ if( max == LUAINT_MAX ) {
+ lua_pushliteral( L, "upper bound too large" );
+ return lua_error( L );
+ }
+
+ lua_Integer max_plus_one = max + 1;
+
+ if( max_plus_one <= min ) {
+ lua_pushliteral( L, "interval is empty" );
+ return lua_error( L );
+ }
+
+ if( max_plus_one - min > UINT32_MAX ) {
+ lua_pushliteral( L, "interval too large" );
+ return lua_error( L );
+ }
+
+ const lua_Integer r = min + arc4random_uniform( max_plus_one - min );
+ lua_pushinteger( L, r );
+
+ return 1;
+}
+
+static int luaarc4_buf( lua_State * const L ) {
+ const long bytes = luaL_checklong( L, 1 );
+ char * const buf = malloc( bytes );
+
+ if( buf == NULL ) {
+ lua_pushliteral( L, "out of memory" );
+ return lua_error( L );
+ }
+
+ arc4random_buf( buf, bytes );
+ lua_pushlstring( L, buf, bytes );
+ free( buf );
+
+ return 1;
+}
+
+static const struct luaL_Reg luaarc4_lib[] = {
+ { "random", luaarc4_random },
+ { "buf", luaarc4_buf },
+ { NULL, NULL },
+};
+
+LUALIB_API int luaopen_arc4random( lua_State * const L ) {
+ luaL_newlib( L, luaarc4_lib );
+
+ return 1;
+}
diff --git a/test.lua b/test.lua
@@ -0,0 +1,16 @@
+#! /usr/bin/lua
+
+local arc4 = require( "arc4random" )
+
+for i = 1, 100 do
+ print( ( "%.3f" ):format( arc4.random() ) )
+end
+print( arc4.random() )
+print( arc4.random( 3 ) )
+print( arc4.random( -5, 5 ) )
+
+local str = arc4.buf( 16 )
+str = str:gsub( "(.)", function( c )
+ return ( "%02x" ):format( string.byte( c ) )
+end )
+print( str )