commit e01448b5ddae2a8c67706f2ae789e8fa522fdd12
parent 98b03e3aae6db9b613545e1e02a0349d8f3dcfa9
Author: Michael Savage <mikejsavage@gmail.com>
Date: Thu, 11 Dec 2014 16:28:07 +0000
Rewrite!
Important changes:
- Support `$2b$` digests.
- Simplified interface to just digest/verify. Generating salts and
setting random devices is not something anyone should have to worry
about.
- Use OpenBSD's bcrypt. tedu's work on the new crypt API makes it much
nicer to work with, and leaves less room for me to get things wrong.
- Pull in arc4random from libressl. OpenBSD's bcrypt requires this
anyway, but it also fixes things like #3 for free and again, leaves
less room for me to make mistakes.
- Support more build targets. I have tested on Linux and OpenBSD (5.6+),
and it should work on OSX and FreeBSD.
Diffstat:
60 files changed, 17928 insertions(+), 2440 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,4 @@
.tags
bcrypt.so
-crypt_test
-
*.o
diff --git a/Makefile b/Makefile
@@ -1,39 +1,20 @@
-SOURCES = $(wildcard src/**/*.c src/*.c)
-OBJECTS = $(patsubst %.c,%.o,$(SOURCES))
+include Makefile.mess
-TARGET = bcrypt.so
+SRCS += src/main.c
-CFLAGS = -O2 -fPIC -std=c99 -D_GNU_SOURCE -Wall -Wextra -Wno-nonnull -Wwrite-strings -Wformat=2 -DNDEBUG -Ilib/bcrypt
+CFLAGS += -Wall -Wno-pointer-sign
+CFLAGS += -O2 -fPIC -DNDEBUG
-BCRYPT_OBJECTS = lib/bcrypt/crypt_blowfish.o lib/bcrypt/x86.o lib/bcrypt/crypt_gensalt.o lib/bcrypt/wrapper.o
+OBJS := $(patsubst %.c,%.o,$(SRCS))
-ifdef LUA_INCDIR
- CFLAGS += -I$(LUA_INCDIR)
-endif
+all: bcrypt.so
-ifeq ($(shell uname -s),Darwin)
- CFLAGS += -bundle -undefined dynamic_lookup
-else
- CFLAGS += -shared
-endif
-
-.PHONY: debug test clean
-
-all: $(TARGET)
-
-debug: CFLAGS += -ggdb -UNDEBUG
+debug: CFLAGS += -ggdb3 -UNDEBUG
debug: all
-lib/bcrypt/%.o:
- make -C lib/bcrypt
-
-$(TARGET): $(OBJECTS) $(BCRYPT_OBJECTS)
- $(CC) -o $(TARGET) $^ $(CFLAGS)
-
-test:
- make -C lib/bcrypt check
- @lua tests.lua
+bcrypt.so: $(OBJS)
+ $(CC) -o bcrypt.so $(LDFLAGS) $(OBJS)
clean:
- make -C lib/bcrypt clean
- rm -f $(OBJECTS) $(TARGET)
+ rm -f bcrypt.so
+ rm -f $(OBJS)
diff --git a/Makefile.mess b/Makefile.mess
@@ -0,0 +1,44 @@
+# This is more or less what libressl does
+# See http://openbsd.cs.toronto.edu/cgi-bin/cvsweb/src/lib/libssl/src/crypto/mem_clr.c?rev=1.4&content-type=text/x-cvsweb-markup
+CFLAGS += -DOPENSSL_cleanse=explicit_bzero
+
+# Make pwd.h define bcrypt_newnhash/bcrypt_checkpass
+CFLAGS += -D__BSD_VISIBLE
+
+# GCC whines without this. Assume everyone has strndup anyway
+CFLAGS += -DHAVE_STRNDUP
+
+# Let Luarocks point us to the right headers
+ifdef LUA_INCDIR
+ CFLAGS += -I$(LUA_INCDIR)
+endif
+
+# OS detection
+uname := $(shell uname -s)
+
+ifneq ($(uname),Darwin)
+ LDFLAGS += -shared
+else
+ LDFLAGS += -bundle -undefined dynamic_lookup
+endif
+
+CFLAGS += -Iinclude
+
+SRCS += compat/safebfuns.c
+SRCS += compat/bcrypt/bcrypt.c
+SRCS += compat/bcrypt/blowfish.c
+SRCS += compat/arc4random/arc4random.c
+SRCS += compat/strlcpy.c
+SRCS += compat/sha/sha512.c
+
+ifeq ($(uname),Linux)
+ SRCS += compat/getentropy/getentropy_linux.c
+endif
+
+ifeq ($(uname),Darwin)
+ SRCS += compat/getentropy/getentropy_osx.c
+endif
+
+ifeq ($(uname),FreeBSD)
+ SRCS += compat/getentropy/getentropy_freebsd.c
+endif
diff --git a/Makefile.obsd b/Makefile.obsd
@@ -0,0 +1,10 @@
+LIB = bcrypt
+SRCS = src/main.c
+
+LDADD = -lcrypto
+CFLAGS += `pkg-config --cflags lua51`
+
+SHLIB_MAJOR = 2
+SHLIB_MINOR = 0
+
+.include <bsd.lib.mk>
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-A Lua wrapper for [crypt_blowfish](http://www.openwall.com/crypt/).
+A Lua wrapper for OpenBSD's bcrypt.
Requirements
@@ -17,68 +17,81 @@ Usage
-----
local bcrypt = require( "bcrypt" )
- local rounds = 5 -- tune this as appropriate
- local digest = bcrypt.digest( "password", rounds )
- assert( bcrypt.verify( "password", digest ) )
-
-You can also explicitly pass a salt to `bcrypt.digest`:
-
- local bcrypt = require( "bcrypt" )
- local rounds = 5 -- tune this as appropriate
-
- local salt = bcrypt.salt( rounds )
- local digest = bcrypt.digest( "password", salt )
+ -- Bigger numbers here will make your digest exponentially harder to compute
+ local log_rounds = 9
+ local digest = bcrypt.digest( "password", log_rounds )
assert( bcrypt.verify( "password", digest ) )
-If you want to use a different random device, you can use
-`bcrypt.random`:
- local bcrypt = require( "bcrypt" )
- bcrypt.random( "/dev/random" )
- local digest = bcrypt.digest( "password", rounds )
-
- -- bcrypt.random( "/dev/null" ) - fails
+Security concerns
+-----------------
+Lua will keep plaintext passwords around in memory as part of its string
+interning mechanism. As far as I'm aware, there's nothing I can do about
+this.
-Chroot
+
+Tuning
------
-If you want to use `lua-bcrypt` from inside a chroot, `/dev/urandom`
-must still exist when `require( "bcrypt" )` is called, even if you are
-planning to use a different random device. The reasoning behind this is
-that chroots are a less common use case of this library, and we should
-fail as early as possible on errors in the common use case.
+If you would like to automatically tune the number of rounds to your
+hardware, you can include a function like:
-Therefore, if you wish to use chroot, you should run:
+ function bcrypt.tune( t )
+ local SAMPLES = 10
+ local rounds = 5
+
+ while true do
+ local total = 0
+
+ for i = 1, SAMPLES do
+ local start = os.clock()
+ bcrypt.digest( "asdf", rounds )
+ local delta = os.clock() - start
+
+ total = total + delta
+ end
+
+ if ( total / SAMPLES ) * 1000 >= t then
+ return rounds - 1
+ end
+
+ rounds = rounds + 1
+ end
+ end
- mknod -m 644 /path/to/chroot/dev/urandom c 1 9
+This function returns the largest load factor such that `bcrypt.digest(
+str, work )` takes less than `t` milliseconds (assuming your CPU isn't
+dodgy).
+Note that this will take at least `2 * SAMPLES * t` ms to evaluate.
-Security
---------
-You might have noticed that `luabcrypt_verify` doesn't use a constant
-time comparison and `luabcrypt_digest` doesn't take steps to zero out
-the entropy buffer. I don't think either of these are a problem.
+Chroot
+------
-I haven't used a constant time comparison because I'm not sure how to
-write one and then guarantee the compiler won't optimise it out. I don't
-think this is a big deal because strong hash functions, such as bcrypt,
-have good preimage resistance. That said, the code is currently relying
-on this holding forever, and if you aren't happy with that you shouldn't
-use it.
+[lua-setuid]: https://github.com/mikejsavage/lua-setuid
+[test-chroot]: https://github.com/mikejsavage/lua-bcrypt/blob/master/test-chroot.lua
-I don't zero out the entropy buffer after generating a salt for the same
-reason. There are two ways in which this can be obtained:
+Some operating systems do not provide a method for reliably getting
+random data from inside a chroot. One workaround for this is to chroot
+after initialising lua-bcrypt, for example by using
+[lua-setuid][lua-setuid].
-* another program calls `malloc` and gets handed memory that was
- previously used as an entropy buffer
-* the entropy buffer is read directly from memory
+ local setuid = require( "setuid" )
+ local bcrypt = require( "bcrypt" )
+
+ assert( setuid.chroot( "." ) )
+ assert( not io.open( "/etc/passwd", "r" ) )
+
+ print( bcrypt.digest( "adsf", 5 ) )
+
+There are also operating system specific workarounds. On
+non-bleeding-edge (earlier than 3.17) Linux kernels, you can run:
+
+ mkdir /path/to/chroot/dev
+ mknod -m 644 /path/to/chroot/dev/urandom c 1 9
-I don't think it matters because you shouldn't be able to predict the
-output of a CSPRNG even given previous outputs. If you are worried about
-the second case, note that Lua interns short strings which will include
-passwords handled by your application so an attacker can read those
-directly instead.
+I have included a test script in [`test-chroot.lua`][test-chroot].
diff --git a/compat/arc4random/arc4random.c b/compat/arc4random/arc4random.c
@@ -0,0 +1,195 @@
+/* $OpenBSD: arc4random.c,v 1.49 2014/07/20 20:51:13 bcook Exp $ */
+
+/*
+ * Copyright (c) 1996, David Mazieres <dm@uun.org>
+ * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+ * Copyright (c) 2013, Markus Friedl <markus@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * ChaCha based random number generator for OpenBSD.
+ */
+
+#include <fcntl.h>
+#include <limits.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+
+#define KEYSTREAM_ONLY
+#include "chacha_private.h"
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#ifdef __GNUC__
+#define inline __inline
+#else /* !__GNUC__ */
+#define inline
+#endif /* !__GNUC__ */
+
+#define KEYSZ 32
+#define IVSZ 8
+#define BLOCKSZ 64
+#define RSBUFSZ (16*BLOCKSZ)
+
+/* Marked MAP_INHERIT_ZERO, so zero'd out in fork children. */
+static struct _rs {
+ size_t rs_have; /* valid bytes at end of rs_buf */
+ size_t rs_count; /* bytes till reseed */
+} *rs;
+
+/* Maybe be preserved in fork children, if _rs_allocate() decides. */
+static struct _rsx {
+ chacha_ctx rs_chacha; /* chacha context for random keystream */
+ u_char rs_buf[RSBUFSZ]; /* keystream blocks */
+} *rsx;
+
+static inline int _rs_allocate(struct _rs **, struct _rsx **);
+static inline void _rs_forkdetect(void);
+#include "arc4random.h"
+
+static inline void _rs_rekey(u_char *dat, size_t datlen);
+
+static inline void
+_rs_init(u_char *buf, size_t n)
+{
+ if (n < KEYSZ + IVSZ)
+ return;
+
+ if (rs == NULL) {
+ if (_rs_allocate(&rs, &rsx) == -1)
+ abort();
+ }
+
+ chacha_keysetup(&rsx->rs_chacha, buf, KEYSZ * 8, 0);
+ chacha_ivsetup(&rsx->rs_chacha, buf + KEYSZ);
+}
+
+static void
+_rs_stir(void)
+{
+ u_char rnd[KEYSZ + IVSZ];
+
+ if (getentropy(rnd, sizeof rnd) == -1)
+ _getentropy_fail();
+
+ if (!rs)
+ _rs_init(rnd, sizeof(rnd));
+ else
+ _rs_rekey(rnd, sizeof(rnd));
+ explicit_bzero(rnd, sizeof(rnd)); /* discard source seed */
+
+ /* invalidate rs_buf */
+ rs->rs_have = 0;
+ memset(rsx->rs_buf, 0, sizeof(rsx->rs_buf));
+
+ rs->rs_count = 1600000;
+}
+
+static inline void
+_rs_stir_if_needed(size_t len)
+{
+ _rs_forkdetect();
+ if (!rs || rs->rs_count <= len)
+ _rs_stir();
+ if (rs->rs_count <= len)
+ rs->rs_count = 0;
+ else
+ rs->rs_count -= len;
+}
+
+static inline void
+_rs_rekey(u_char *dat, size_t datlen)
+{
+#ifndef KEYSTREAM_ONLY
+ memset(rsx->rs_buf, 0, sizeof(rsx->rs_buf));
+#endif
+ /* fill rs_buf with the keystream */
+ chacha_encrypt_bytes(&rsx->rs_chacha, rsx->rs_buf,
+ rsx->rs_buf, sizeof(rsx->rs_buf));
+ /* mix in optional user provided data */
+ if (dat) {
+ size_t i, m;
+
+ m = min(datlen, KEYSZ + IVSZ);
+ for (i = 0; i < m; i++)
+ rsx->rs_buf[i] ^= dat[i];
+ }
+ /* immediately reinit for backtracking resistance */
+ _rs_init(rsx->rs_buf, KEYSZ + IVSZ);
+ memset(rsx->rs_buf, 0, KEYSZ + IVSZ);
+ rs->rs_have = sizeof(rsx->rs_buf) - KEYSZ - IVSZ;
+}
+
+static inline void
+_rs_random_buf(void *_buf, size_t n)
+{
+ u_char *buf = (u_char *)_buf;
+ u_char *keystream;
+ size_t m;
+
+ _rs_stir_if_needed(n);
+ while (n > 0) {
+ if (rs->rs_have > 0) {
+ m = min(n, rs->rs_have);
+ keystream = rsx->rs_buf + sizeof(rsx->rs_buf)
+ - rs->rs_have;
+ memcpy(buf, keystream, m);
+ memset(keystream, 0, m);
+ buf += m;
+ n -= m;
+ rs->rs_have -= m;
+ }
+ if (rs->rs_have == 0)
+ _rs_rekey(NULL, 0);
+ }
+}
+
+static inline void
+_rs_random_u32(uint32_t *val)
+{
+ u_char *keystream;
+
+ _rs_stir_if_needed(sizeof(*val));
+ if (rs->rs_have < sizeof(*val))
+ _rs_rekey(NULL, 0);
+ keystream = rsx->rs_buf + sizeof(rsx->rs_buf) - rs->rs_have;
+ memcpy(val, keystream, sizeof(*val));
+ memset(keystream, 0, sizeof(*val));
+ rs->rs_have -= sizeof(*val);
+}
+
+uint32_t
+arc4random(void)
+{
+ uint32_t val;
+
+ _ARC4_LOCK();
+ _rs_random_u32(&val);
+ _ARC4_UNLOCK();
+ return val;
+}
+
+void
+arc4random_buf(void *buf, size_t n)
+{
+ _ARC4_LOCK();
+ _rs_random_buf(buf, n);
+ _ARC4_UNLOCK();
+}
diff --git a/compat/arc4random/arc4random.h b/compat/arc4random/arc4random.h
@@ -0,0 +1,26 @@
+#ifndef LIBCRYPTOCOMPAT_ARC4RANDOM_H
+#define LIBCRYPTOCOMPAT_ARC4RANDOM_H
+
+#include <sys/param.h>
+
+#if defined(__FreeBSD__)
+#include "arc4random_freebsd.h"
+
+#elif defined(__linux__)
+#include "arc4random_linux.h"
+
+#elif defined(__APPLE__)
+#include "arc4random_osx.h"
+
+#elif defined(__sun)
+#include "arc4random_solaris.h"
+
+#elif defined(_WIN32)
+#include "arc4random_win.h"
+
+#else
+#error "No arc4random hooks defined for this platform."
+
+#endif
+
+#endif
diff --git a/compat/arc4random/arc4random_freebsd.h b/compat/arc4random/arc4random_freebsd.h
@@ -0,0 +1,85 @@
+/* $OpenBSD: arc4random_freebsd.h,v 1.1 2014/07/20 20:51:13 bcook Exp $ */
+
+/*
+ * Copyright (c) 1996, David Mazieres <dm@uun.org>
+ * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+ * Copyright (c) 2013, Markus Friedl <markus@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Stub functions for portability.
+ */
+
+#include <sys/mman.h>
+
+#include <pthread.h>
+#include <signal.h>
+
+static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER;
+#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx)
+#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx)
+
+/*
+ * Unfortunately, pthread_atfork() is broken on FreeBSD (at least 9 and 10) if
+ * a program does not link to -lthr. Callbacks registered with pthread_atfork()
+ * appear to fail silently. So, it is not always possible to detect a PID
+ * wraparound.
+ */
+#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f))
+
+static inline void
+_getentropy_fail(void)
+{
+ raise(SIGKILL);
+}
+
+static volatile sig_atomic_t _rs_forked;
+
+static inline void
+_rs_forkhandler(void)
+{
+ _rs_forked = 1;
+}
+
+static inline void
+_rs_forkdetect(void)
+{
+ static pid_t _rs_pid = 0;
+ pid_t pid = getpid();
+
+ if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) {
+ _rs_pid = pid;
+ _rs_forked = 0;
+ if (rs)
+ memset(rs, 0, sizeof(*rs));
+ }
+}
+
+static inline int
+_rs_allocate(struct _rs **rsp, struct _rsx **rsxp)
+{
+ if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED)
+ return -1;
+
+ if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) {
+ munmap(*rsp, sizeof(**rsp));
+ return -1;
+ }
+
+ _ARC4_ATFORK(_rs_forkhandler);
+ return 0;
+}
diff --git a/compat/arc4random/arc4random_linux.h b/compat/arc4random/arc4random_linux.h
@@ -0,0 +1,85 @@
+/* $OpenBSD: arc4random_linux.h,v 1.7 2014/07/20 20:51:13 bcook Exp $ */
+
+/*
+ * Copyright (c) 1996, David Mazieres <dm@uun.org>
+ * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+ * Copyright (c) 2013, Markus Friedl <markus@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Stub functions for portability.
+ */
+
+#include <sys/mman.h>
+
+#include <pthread.h>
+#include <signal.h>
+
+static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER;
+#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx)
+#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx)
+
+#ifdef __GLIBC__
+extern void *__dso_handle;
+extern int __register_atfork(void (*)(void), void(*)(void), void (*)(void), void *);
+#define _ARC4_ATFORK(f) __register_atfork(NULL, NULL, (f), __dso_handle)
+#else
+#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f))
+#endif
+
+static inline void
+_getentropy_fail(void)
+{
+ raise(SIGKILL);
+}
+
+static volatile sig_atomic_t _rs_forked;
+
+static inline void
+_rs_forkhandler(void)
+{
+ _rs_forked = 1;
+}
+
+static inline void
+_rs_forkdetect(void)
+{
+ static pid_t _rs_pid = 0;
+ pid_t pid = getpid();
+
+ if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) {
+ _rs_pid = pid;
+ _rs_forked = 0;
+ if (rs)
+ memset(rs, 0, sizeof(*rs));
+ }
+}
+
+static inline int
+_rs_allocate(struct _rs **rsp, struct _rsx **rsxp)
+{
+ if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED)
+ return (-1);
+
+ if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) {
+ munmap(*rsp, sizeof(**rsp));
+ return (-1);
+ }
+
+ _ARC4_ATFORK(_rs_forkhandler);
+ return (0);
+}
diff --git a/compat/arc4random/arc4random_osx.h b/compat/arc4random/arc4random_osx.h
@@ -0,0 +1,79 @@
+/* $OpenBSD: arc4random_osx.h,v 1.7 2014/07/20 20:51:13 bcook Exp $ */
+
+/*
+ * Copyright (c) 1996, David Mazieres <dm@uun.org>
+ * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+ * Copyright (c) 2013, Markus Friedl <markus@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Stub functions for portability.
+ */
+
+#include <sys/mman.h>
+
+#include <pthread.h>
+#include <signal.h>
+
+static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER;
+#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx)
+#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx)
+
+#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f))
+
+static inline void
+_getentropy_fail(void)
+{
+ raise(SIGKILL);
+}
+
+static volatile sig_atomic_t _rs_forked;
+
+static inline void
+_rs_forkhandler(void)
+{
+ _rs_forked = 1;
+}
+
+static inline void
+_rs_forkdetect(void)
+{
+ static pid_t _rs_pid = 0;
+ pid_t pid = getpid();
+
+ if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) {
+ _rs_pid = pid;
+ _rs_forked = 0;
+ if (rs)
+ memset(rs, 0, sizeof(*rs));
+ }
+}
+
+static inline int
+_rs_allocate(struct _rs **rsp, struct _rsx **rsxp)
+{
+ if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED)
+ return -1;
+
+ if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) {
+ munmap(*rsp, sizeof(**rsp));
+ return -1;
+ }
+
+ _ARC4_ATFORK(_rs_forkhandler);
+ return 0;
+}
diff --git a/compat/arc4random/arc4random_solaris.h b/compat/arc4random/arc4random_solaris.h
@@ -0,0 +1,79 @@
+/* $OpenBSD: arc4random_solaris.h,v 1.7 2014/07/20 20:51:13 bcook Exp $ */
+
+/*
+ * Copyright (c) 1996, David Mazieres <dm@uun.org>
+ * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+ * Copyright (c) 2013, Markus Friedl <markus@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Stub functions for portability.
+ */
+
+#include <sys/mman.h>
+
+#include <pthread.h>
+#include <signal.h>
+
+static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER;
+#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx)
+#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx)
+
+#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f))
+
+static inline void
+_getentropy_fail(void)
+{
+ raise(SIGKILL);
+}
+
+static volatile sig_atomic_t _rs_forked;
+
+static inline void
+_rs_forkhandler(void)
+{
+ _rs_forked = 1;
+}
+
+static inline void
+_rs_forkdetect(void)
+{
+ static pid_t _rs_pid = 0;
+ pid_t pid = getpid();
+
+ if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) {
+ _rs_pid = pid;
+ _rs_forked = 0;
+ if (rs)
+ memset(rs, 0, sizeof(*rs));
+ }
+}
+
+static inline int
+_rs_allocate(struct _rs **rsp, struct _rsx **rsxp)
+{
+ if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED)
+ return (-1);
+
+ if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) {
+ munmap(*rsp, sizeof(**rsp));
+ return (-1);
+ }
+
+ _ARC4_ATFORK(_rs_forkhandler);
+ return (0);
+}
diff --git a/compat/arc4random/arc4random_win.h b/compat/arc4random/arc4random_win.h
@@ -0,0 +1,74 @@
+/* $OpenBSD: arc4random_win.h,v 1.3 2014/07/20 16:59:31 bcook Exp $ */
+
+/*
+ * Copyright (c) 1996, David Mazieres <dm@uun.org>
+ * Copyright (c) 2008, Damien Miller <djm@openbsd.org>
+ * Copyright (c) 2013, Markus Friedl <markus@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Stub functions for portability.
+ */
+
+#include <windows.h>
+
+static volatile HANDLE arc4random_mtx = NULL;
+
+/*
+ * Initialize the mutex on the first lock attempt. On collision, each thread
+ * will attempt to allocate a mutex and compare-and-swap it into place as the
+ * global mutex. On failure to swap in the global mutex, the mutex is closed.
+ */
+#define _ARC4_LOCK() { \
+ if (!arc4random_mtx) { \
+ HANDLE p = CreateMutex(NULL, FALSE, NULL); \
+ if (InterlockedCompareExchangePointer((void **)&arc4random_mtx, (void *)p, NULL)) \
+ CloseHandle(p); \
+ } \
+ WaitForSingleObject(arc4random_mtx, INFINITE); \
+} \
+
+#define _ARC4_UNLOCK() ReleaseMutex(arc4random_mtx)
+
+static inline void
+_getentropy_fail(void)
+{
+ TerminateProcess(GetCurrentProcess(), 0);
+}
+
+static inline int
+_rs_allocate(struct _rs **rsp, struct _rsx **rsxp)
+{
+ *rsp = calloc(1, sizeof(**rsp));
+ if (*rsp == NULL)
+ return (-1);
+
+ *rsxp = calloc(1, sizeof(**rsxp));
+ if (*rsxp == NULL) {
+ free(*rsp);
+ return (-1);
+ }
+ return (0);
+}
+
+static inline void
+_rs_forkhandler(void)
+{
+}
+
+static inline void
+_rs_forkdetect(void)
+{
+}
diff --git a/compat/arc4random/chacha_private.h b/compat/arc4random/chacha_private.h
@@ -0,0 +1,222 @@
+/*
+chacha-merged.c version 20080118
+D. J. Bernstein
+Public domain.
+*/
+
+/* $OpenBSD$ */
+
+typedef unsigned char u8;
+typedef unsigned int u32;
+
+typedef struct
+{
+ u32 input[16]; /* could be compressed */
+} chacha_ctx;
+
+#define U8C(v) (v##U)
+#define U32C(v) (v##U)
+
+#define U8V(v) ((u8)(v) & U8C(0xFF))
+#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
+
+#define ROTL32(v, n) \
+ (U32V((v) << (n)) | ((v) >> (32 - (n))))
+
+#define U8TO32_LITTLE(p) \
+ (((u32)((p)[0]) ) | \
+ ((u32)((p)[1]) << 8) | \
+ ((u32)((p)[2]) << 16) | \
+ ((u32)((p)[3]) << 24))
+
+#define U32TO8_LITTLE(p, v) \
+ do { \
+ (p)[0] = U8V((v) ); \
+ (p)[1] = U8V((v) >> 8); \
+ (p)[2] = U8V((v) >> 16); \
+ (p)[3] = U8V((v) >> 24); \
+ } while (0)
+
+#define ROTATE(v,c) (ROTL32(v,c))
+#define XOR(v,w) ((v) ^ (w))
+#define PLUS(v,w) (U32V((v) + (w)))
+#define PLUSONE(v) (PLUS((v),1))
+
+#define QUARTERROUND(a,b,c,d) \
+ a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
+ c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
+ a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
+ c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
+
+static const char sigma[16] = "expand 32-byte k";
+static const char tau[16] = "expand 16-byte k";
+
+static void
+chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits,u32 ivbits)
+{
+ const char *constants;
+
+ x->input[4] = U8TO32_LITTLE(k + 0);
+ x->input[5] = U8TO32_LITTLE(k + 4);
+ x->input[6] = U8TO32_LITTLE(k + 8);
+ x->input[7] = U8TO32_LITTLE(k + 12);
+ if (kbits == 256) { /* recommended */
+ k += 16;
+ constants = sigma;
+ } else { /* kbits == 128 */
+ constants = tau;
+ }
+ x->input[8] = U8TO32_LITTLE(k + 0);
+ x->input[9] = U8TO32_LITTLE(k + 4);
+ x->input[10] = U8TO32_LITTLE(k + 8);
+ x->input[11] = U8TO32_LITTLE(k + 12);
+ x->input[0] = U8TO32_LITTLE(constants + 0);
+ x->input[1] = U8TO32_LITTLE(constants + 4);
+ x->input[2] = U8TO32_LITTLE(constants + 8);
+ x->input[3] = U8TO32_LITTLE(constants + 12);
+}
+
+static void
+chacha_ivsetup(chacha_ctx *x,const u8 *iv)
+{
+ x->input[12] = 0;
+ x->input[13] = 0;
+ x->input[14] = U8TO32_LITTLE(iv + 0);
+ x->input[15] = U8TO32_LITTLE(iv + 4);
+}
+
+static void
+chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
+{
+ u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+ u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
+ u8 *ctarget = NULL;
+ u8 tmp[64];
+ u_int i;
+
+ if (!bytes) return;
+
+ j0 = x->input[0];
+ j1 = x->input[1];
+ j2 = x->input[2];
+ j3 = x->input[3];
+ j4 = x->input[4];
+ j5 = x->input[5];
+ j6 = x->input[6];
+ j7 = x->input[7];
+ j8 = x->input[8];
+ j9 = x->input[9];
+ j10 = x->input[10];
+ j11 = x->input[11];
+ j12 = x->input[12];
+ j13 = x->input[13];
+ j14 = x->input[14];
+ j15 = x->input[15];
+
+ for (;;) {
+ if (bytes < 64) {
+ for (i = 0;i < bytes;++i) tmp[i] = m[i];
+ m = tmp;
+ ctarget = c;
+ c = tmp;
+ }
+ x0 = j0;
+ x1 = j1;
+ x2 = j2;
+ x3 = j3;
+ x4 = j4;
+ x5 = j5;
+ x6 = j6;
+ x7 = j7;
+ x8 = j8;
+ x9 = j9;
+ x10 = j10;
+ x11 = j11;
+ x12 = j12;
+ x13 = j13;
+ x14 = j14;
+ x15 = j15;
+ for (i = 20;i > 0;i -= 2) {
+ QUARTERROUND( x0, x4, x8,x12)
+ QUARTERROUND( x1, x5, x9,x13)
+ QUARTERROUND( x2, x6,x10,x14)
+ QUARTERROUND( x3, x7,x11,x15)
+ QUARTERROUND( x0, x5,x10,x15)
+ QUARTERROUND( x1, x6,x11,x12)
+ QUARTERROUND( x2, x7, x8,x13)
+ QUARTERROUND( x3, x4, x9,x14)
+ }
+ x0 = PLUS(x0,j0);
+ x1 = PLUS(x1,j1);
+ x2 = PLUS(x2,j2);
+ x3 = PLUS(x3,j3);
+ x4 = PLUS(x4,j4);
+ x5 = PLUS(x5,j5);
+ x6 = PLUS(x6,j6);
+ x7 = PLUS(x7,j7);
+ x8 = PLUS(x8,j8);
+ x9 = PLUS(x9,j9);
+ x10 = PLUS(x10,j10);
+ x11 = PLUS(x11,j11);
+ x12 = PLUS(x12,j12);
+ x13 = PLUS(x13,j13);
+ x14 = PLUS(x14,j14);
+ x15 = PLUS(x15,j15);
+
+#ifndef KEYSTREAM_ONLY
+ x0 = XOR(x0,U8TO32_LITTLE(m + 0));
+ x1 = XOR(x1,U8TO32_LITTLE(m + 4));
+ x2 = XOR(x2,U8TO32_LITTLE(m + 8));
+ x3 = XOR(x3,U8TO32_LITTLE(m + 12));
+ x4 = XOR(x4,U8TO32_LITTLE(m + 16));
+ x5 = XOR(x5,U8TO32_LITTLE(m + 20));
+ x6 = XOR(x6,U8TO32_LITTLE(m + 24));
+ x7 = XOR(x7,U8TO32_LITTLE(m + 28));
+ x8 = XOR(x8,U8TO32_LITTLE(m + 32));
+ x9 = XOR(x9,U8TO32_LITTLE(m + 36));
+ x10 = XOR(x10,U8TO32_LITTLE(m + 40));
+ x11 = XOR(x11,U8TO32_LITTLE(m + 44));
+ x12 = XOR(x12,U8TO32_LITTLE(m + 48));
+ x13 = XOR(x13,U8TO32_LITTLE(m + 52));
+ x14 = XOR(x14,U8TO32_LITTLE(m + 56));
+ x15 = XOR(x15,U8TO32_LITTLE(m + 60));
+#endif
+
+ j12 = PLUSONE(j12);
+ if (!j12) {
+ j13 = PLUSONE(j13);
+ /* stopping at 2^70 bytes per nonce is user's responsibility */
+ }
+
+ U32TO8_LITTLE(c + 0,x0);
+ U32TO8_LITTLE(c + 4,x1);
+ U32TO8_LITTLE(c + 8,x2);
+ U32TO8_LITTLE(c + 12,x3);
+ U32TO8_LITTLE(c + 16,x4);
+ U32TO8_LITTLE(c + 20,x5);
+ U32TO8_LITTLE(c + 24,x6);
+ U32TO8_LITTLE(c + 28,x7);
+ U32TO8_LITTLE(c + 32,x8);
+ U32TO8_LITTLE(c + 36,x9);
+ U32TO8_LITTLE(c + 40,x10);
+ U32TO8_LITTLE(c + 44,x11);
+ U32TO8_LITTLE(c + 48,x12);
+ U32TO8_LITTLE(c + 52,x13);
+ U32TO8_LITTLE(c + 56,x14);
+ U32TO8_LITTLE(c + 60,x15);
+
+ if (bytes <= 64) {
+ if (bytes < 64) {
+ for (i = 0;i < bytes;++i) ctarget[i] = c[i];
+ }
+ x->input[12] = j12;
+ x->input[13] = j13;
+ return;
+ }
+ bytes -= 64;
+ c += 64;
+#ifndef KEYSTREAM_ONLY
+ m += 64;
+#endif
+ }
+}
diff --git a/compat/bcrypt/bcrypt.c b/compat/bcrypt/bcrypt.c
@@ -0,0 +1,353 @@
+/* $OpenBSD: bcrypt.c,v 1.45 2014/07/20 04:22:34 guenther Exp $ */
+
+/*
+ * Copyright (c) 2014 Ted Unangst <tedu@openbsd.org>
+ * Copyright (c) 1997 Niels Provos <provos@umich.edu>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* This password hashing algorithm was designed by David Mazieres
+ * <dm@lcs.mit.edu> and works as follows:
+ *
+ * 1. state := InitState ()
+ * 2. state := ExpandKey (state, salt, password)
+ * 3. REPEAT rounds:
+ * state := ExpandKey (state, 0, password)
+ * state := ExpandKey (state, 0, salt)
+ * 4. ctext := "OrpheanBeholderScryDoubt"
+ * 5. REPEAT 64:
+ * ctext := Encrypt_ECB (state, ctext);
+ * 6. RETURN Concatenate (salt, ctext);
+ *
+ */
+
+#include <sys/types.h>
+#include <blf.h>
+#include <ctype.h>
+#include <pwd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* This implementation is adaptable to current computing power.
+ * You can have up to 2^31 rounds which should be enough for some
+ * time to come.
+ */
+
+#define BCRYPT_VERSION '2'
+#define BCRYPT_MAXSALT 16 /* Precomputation is just so nice */
+#define BCRYPT_BLOCKS 6 /* Ciphertext blocks */
+#define BCRYPT_MINLOGROUNDS 4 /* we have log2(rounds) in salt */
+
+#define BCRYPT_SALTSPACE (7 + (BCRYPT_MAXSALT * 4 + 2) / 3 + 1)
+#define BCRYPT_HASHSPACE 61
+
+char *bcrypt_gensalt(u_int8_t);
+
+static int encode_base64(char *, const u_int8_t *, size_t);
+static int decode_base64(u_int8_t *, size_t, const char *);
+
+/*
+ * Generates a salt for this version of crypt.
+ */
+static int
+bcrypt_initsalt(int log_rounds, uint8_t *salt, size_t saltbuflen)
+{
+ uint8_t csalt[BCRYPT_MAXSALT];
+
+ if (saltbuflen < BCRYPT_SALTSPACE)
+ return -1;
+
+ arc4random_buf(csalt, sizeof(csalt));
+
+ if (log_rounds < 4)
+ log_rounds = 4;
+ else if (log_rounds > 31)
+ log_rounds = 31;
+
+ snprintf(salt, saltbuflen, "$2b$%2.2u$", log_rounds);
+ encode_base64(salt + 7, csalt, sizeof(csalt));
+
+ return 0;
+}
+
+/*
+ * the core bcrypt function
+ */
+static int
+bcrypt_hashpass(const char *key, const char *salt, char *encrypted,
+ size_t encryptedlen)
+{
+ blf_ctx state;
+ u_int32_t rounds, i, k;
+ u_int16_t j;
+ size_t key_len;
+ u_int8_t salt_len, logr, minor;
+ u_int8_t ciphertext[4 * BCRYPT_BLOCKS] = "OrpheanBeholderScryDoubt";
+ u_int8_t csalt[BCRYPT_MAXSALT];
+ u_int32_t cdata[BCRYPT_BLOCKS];
+
+ if (encryptedlen < BCRYPT_HASHSPACE)
+ return -1;
+
+ /* Check and discard "$" identifier */
+ if (salt[0] != '$')
+ return -1;
+ salt += 1;
+
+ if (salt[0] != BCRYPT_VERSION)
+ return -1;
+
+ /* Check for minor versions */
+ switch ((minor = salt[1])) {
+ case 'a':
+ key_len = (u_int8_t)(strlen(key) + 1);
+ break;
+ case 'b':
+ /* strlen() returns a size_t, but the function calls
+ * below result in implicit casts to a narrower integer
+ * type, so cap key_len at the actual maximum supported
+ * length here to avoid integer wraparound */
+ key_len = strlen(key);
+ if (key_len > 72)
+ key_len = 72;
+ key_len++; /* include the NUL */
+ break;
+ default:
+ return -1;
+ }
+ if (salt[2] != '$')
+ return -1;
+ /* Discard version + "$" identifier */
+ salt += 3;
+
+ /* Check and parse num rounds */
+ if (!isdigit((unsigned char)salt[0]) ||
+ !isdigit((unsigned char)salt[1]) || salt[2] != '$')
+ return -1;
+ logr = atoi(salt);
+ if (logr < BCRYPT_MINLOGROUNDS || logr > 31)
+ return -1;
+ /* Computer power doesn't increase linearly, 2^x should be fine */
+ rounds = 1U << logr;
+
+ /* Discard num rounds + "$" identifier */
+ salt += 3;
+
+ if (strlen(salt) * 3 / 4 < BCRYPT_MAXSALT)
+ return -1;
+
+ /* We dont want the base64 salt but the raw data */
+ if (decode_base64(csalt, BCRYPT_MAXSALT, salt))
+ return -1;
+ salt_len = BCRYPT_MAXSALT;
+
+ /* Setting up S-Boxes and Subkeys */
+ Blowfish_initstate(&state);
+ Blowfish_expandstate(&state, csalt, salt_len,
+ (u_int8_t *) key, key_len);
+ for (k = 0; k < rounds; k++) {
+ Blowfish_expand0state(&state, (u_int8_t *) key, key_len);
+ Blowfish_expand0state(&state, csalt, salt_len);
+ }
+
+ /* This can be precomputed later */
+ j = 0;
+ for (i = 0; i < BCRYPT_BLOCKS; i++)
+ cdata[i] = Blowfish_stream2word(ciphertext, 4 * BCRYPT_BLOCKS, &j);
+
+ /* Now do the encryption */
+ for (k = 0; k < 64; k++)
+ blf_enc(&state, cdata, BCRYPT_BLOCKS / 2);
+
+ for (i = 0; i < BCRYPT_BLOCKS; i++) {
+ ciphertext[4 * i + 3] = cdata[i] & 0xff;
+ cdata[i] = cdata[i] >> 8;
+ ciphertext[4 * i + 2] = cdata[i] & 0xff;
+ cdata[i] = cdata[i] >> 8;
+ ciphertext[4 * i + 1] = cdata[i] & 0xff;
+ cdata[i] = cdata[i] >> 8;
+ ciphertext[4 * i + 0] = cdata[i] & 0xff;
+ }
+
+
+ snprintf(encrypted, 8, "$2%c$%2.2u$", minor, logr);
+ encode_base64(encrypted + 7, csalt, BCRYPT_MAXSALT);
+ encode_base64(encrypted + 7 + 22, ciphertext, 4 * BCRYPT_BLOCKS - 1);
+ explicit_bzero(&state, sizeof(state));
+ explicit_bzero(ciphertext, sizeof(ciphertext));
+ explicit_bzero(csalt, sizeof(csalt));
+ explicit_bzero(cdata, sizeof(cdata));
+ return 0;
+}
+
+/*
+ * user friendly functions
+ */
+int
+bcrypt_newhash(const char *pass, int log_rounds, char *hash, size_t hashlen)
+{
+ char salt[BCRYPT_SALTSPACE];
+
+ if (bcrypt_initsalt(log_rounds, salt, sizeof(salt)) != 0)
+ return -1;
+
+ if (bcrypt_hashpass(pass, salt, hash, hashlen) != 0)
+ return -1;
+
+ explicit_bzero(salt, sizeof(salt));
+ return 0;
+}
+
+int
+bcrypt_checkpass(const char *pass, const char *goodhash)
+{
+ char hash[BCRYPT_HASHSPACE];
+
+ if (bcrypt_hashpass(pass, goodhash, hash, sizeof(hash)) != 0)
+ return -1;
+ if (strlen(hash) != strlen(goodhash) ||
+ timingsafe_bcmp(hash, goodhash, strlen(goodhash)) != 0)
+ return -1;
+
+ explicit_bzero(hash, sizeof(hash));
+ return 0;
+}
+
+/*
+ * internal utilities
+ */
+static const u_int8_t Base64Code[] =
+"./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
+
+static const u_int8_t index_64[128] = {
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 0, 1, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63, 255, 255,
+ 255, 255, 255, 255, 255, 2, 3, 4, 5, 6,
+ 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
+ 255, 255, 255, 255, 255, 255, 28, 29, 30,
+ 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+ 51, 52, 53, 255, 255, 255, 255, 255
+};
+#define CHAR64(c) ( (c) > 127 ? 255 : index_64[(c)])
+
+/*
+ * read buflen (after decoding) bytes of data from b64data
+ */
+static int
+decode_base64(u_int8_t *buffer, size_t len, const char *b64data)
+{
+ u_int8_t *bp = buffer;
+ const u_int8_t *p = b64data;
+ u_int8_t c1, c2, c3, c4;
+
+ while (bp < buffer + len) {
+ c1 = CHAR64(*p);
+ /* Invalid data */
+ if (c1 == 255)
+ return -1;
+
+ c2 = CHAR64(*(p + 1));
+ if (c2 == 255)
+ return -1;
+
+ *bp++ = (c1 << 2) | ((c2 & 0x30) >> 4);
+ if (bp >= buffer + len)
+ break;
+
+ c3 = CHAR64(*(p + 2));
+ if (c3 == 255)
+ return -1;
+
+ *bp++ = ((c2 & 0x0f) << 4) | ((c3 & 0x3c) >> 2);
+ if (bp >= buffer + len)
+ break;
+
+ c4 = CHAR64(*(p + 3));
+ if (c4 == 255)
+ return -1;
+ *bp++ = ((c3 & 0x03) << 6) | c4;
+
+ p += 4;
+ }
+ return 0;
+}
+
+/*
+ * Turn len bytes of data into base64 encoded data.
+ * This works without = padding.
+ */
+static int
+encode_base64(char *b64buffer, const u_int8_t *data, size_t len)
+{
+ u_int8_t *bp = b64buffer;
+ const u_int8_t *p = data;
+ u_int8_t c1, c2;
+
+ while (p < data + len) {
+ c1 = *p++;
+ *bp++ = Base64Code[(c1 >> 2)];
+ c1 = (c1 & 0x03) << 4;
+ if (p >= data + len) {
+ *bp++ = Base64Code[c1];
+ break;
+ }
+ c2 = *p++;
+ c1 |= (c2 >> 4) & 0x0f;
+ *bp++ = Base64Code[c1];
+ c1 = (c2 & 0x0f) << 2;
+ if (p >= data + len) {
+ *bp++ = Base64Code[c1];
+ break;
+ }
+ c2 = *p++;
+ c1 |= (c2 >> 6) & 0x03;
+ *bp++ = Base64Code[c1];
+ *bp++ = Base64Code[c2 & 0x3f];
+ }
+ *bp = '\0';
+ return 0;
+}
+
+/*
+ * classic interface
+ */
+char *
+bcrypt_gensalt(u_int8_t log_rounds)
+{
+ static char gsalt[BCRYPT_SALTSPACE];
+
+ bcrypt_initsalt(log_rounds, gsalt, sizeof(gsalt));
+
+ return gsalt;
+}
+
+char *
+bcrypt(const char *pass, const char *salt)
+{
+ static char gencrypted[BCRYPT_HASHSPACE];
+ static char gerror[2];
+
+ /* How do I handle errors ? Return ':' */
+ strlcpy(gerror, ":", sizeof(gerror));
+ if (bcrypt_hashpass(pass, salt, gencrypted, sizeof(gencrypted)) != 0)
+ return gerror;
+
+ return gencrypted;
+}
diff --git a/compat/bcrypt/blowfish.c b/compat/bcrypt/blowfish.c
@@ -0,0 +1,685 @@
+/* $OpenBSD: blowfish.c,v 1.17 2003/04/09 21:46:02 markus Exp $ */
+/*
+ * Blowfish block cipher for OpenBSD
+ * Copyright 1997 Niels Provos <provos@physnet.uni-hamburg.de>
+ * All rights reserved.
+ *
+ * Implementation advice by David Mazieres <dm@lcs.mit.edu>.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Niels Provos.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * This code is derived from section 14.3 and the given source
+ * in section V of Applied Cryptography, second edition.
+ * Blowfish is an unpatented fast block cipher designed by
+ * Bruce Schneier.
+ */
+
+#if 0
+#include <stdio.h> /* used for debugging */
+#include <string.h>
+#endif
+
+#include <sys/types.h>
+#include <blf.h>
+
+#undef inline
+#ifdef __GNUC__
+#define inline __inline
+#else /* !__GNUC__ */
+#define inline
+#endif /* !__GNUC__ */
+
+/* Function for Feistel Networks */
+
+#define F(s, x) ((((s)[ (((x)>>24)&0xFF)] \
+ + (s)[0x100 + (((x)>>16)&0xFF)]) \
+ ^ (s)[0x200 + (((x)>> 8)&0xFF)]) \
+ + (s)[0x300 + ( (x) &0xFF)])
+
+#define BLFRND(s,p,i,j,n) (i ^= F(s,j) ^ (p)[n])
+
+void
+Blowfish_encipher(blf_ctx *c, u_int32_t *xl, u_int32_t *xr)
+{
+ u_int32_t Xl;
+ u_int32_t Xr;
+ u_int32_t *s = c->S[0];
+ u_int32_t *p = c->P;
+
+ Xl = *xl;
+ Xr = *xr;
+
+ Xl ^= p[0];
+ BLFRND(s, p, Xr, Xl, 1); BLFRND(s, p, Xl, Xr, 2);
+ BLFRND(s, p, Xr, Xl, 3); BLFRND(s, p, Xl, Xr, 4);
+ BLFRND(s, p, Xr, Xl, 5); BLFRND(s, p, Xl, Xr, 6);
+ BLFRND(s, p, Xr, Xl, 7); BLFRND(s, p, Xl, Xr, 8);
+ BLFRND(s, p, Xr, Xl, 9); BLFRND(s, p, Xl, Xr, 10);
+ BLFRND(s, p, Xr, Xl, 11); BLFRND(s, p, Xl, Xr, 12);
+ BLFRND(s, p, Xr, Xl, 13); BLFRND(s, p, Xl, Xr, 14);
+ BLFRND(s, p, Xr, Xl, 15); BLFRND(s, p, Xl, Xr, 16);
+
+ *xl = Xr ^ p[17];
+ *xr = Xl;
+}
+
+void
+Blowfish_decipher(blf_ctx *c, u_int32_t *xl, u_int32_t *xr)
+{
+ u_int32_t Xl;
+ u_int32_t Xr;
+ u_int32_t *s = c->S[0];
+ u_int32_t *p = c->P;
+
+ Xl = *xl;
+ Xr = *xr;
+
+ Xl ^= p[17];
+ BLFRND(s, p, Xr, Xl, 16); BLFRND(s, p, Xl, Xr, 15);
+ BLFRND(s, p, Xr, Xl, 14); BLFRND(s, p, Xl, Xr, 13);
+ BLFRND(s, p, Xr, Xl, 12); BLFRND(s, p, Xl, Xr, 11);
+ BLFRND(s, p, Xr, Xl, 10); BLFRND(s, p, Xl, Xr, 9);
+ BLFRND(s, p, Xr, Xl, 8); BLFRND(s, p, Xl, Xr, 7);
+ BLFRND(s, p, Xr, Xl, 6); BLFRND(s, p, Xl, Xr, 5);
+ BLFRND(s, p, Xr, Xl, 4); BLFRND(s, p, Xl, Xr, 3);
+ BLFRND(s, p, Xr, Xl, 2); BLFRND(s, p, Xl, Xr, 1);
+
+ *xl = Xr ^ p[0];
+ *xr = Xl;
+}
+
+void
+Blowfish_initstate(blf_ctx *c)
+{
+ /* P-box and S-box tables initialized with digits of Pi */
+
+ static const blf_ctx initstate =
+ { {
+ {
+ 0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7,
+ 0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99,
+ 0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16,
+ 0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e,
+ 0x0d95748f, 0x728eb658, 0x718bcd58, 0x82154aee,
+ 0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013,
+ 0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef,
+ 0x8e79dcb0, 0x603a180e, 0x6c9e0e8b, 0xb01e8a3e,
+ 0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60,
+ 0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440,
+ 0x55ca396a, 0x2aab10b6, 0xb4cc5c34, 0x1141e8ce,
+ 0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a,
+ 0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e,
+ 0xafd6ba33, 0x6c24cf5c, 0x7a325381, 0x28958677,
+ 0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193,
+ 0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032,
+ 0xef845d5d, 0xe98575b1, 0xdc262302, 0xeb651b88,
+ 0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239,
+ 0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e,
+ 0x21c66842, 0xf6e96c9a, 0x670c9c61, 0xabd388f0,
+ 0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3,
+ 0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98,
+ 0xa1f1651d, 0x39af0176, 0x66ca593e, 0x82430e88,
+ 0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe,
+ 0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6,
+ 0x4ed3aa62, 0x363f7706, 0x1bfedf72, 0x429b023d,
+ 0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b,
+ 0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7,
+ 0xe3fe501a, 0xb6794c3b, 0x976ce0bd, 0x04c006ba,
+ 0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463,
+ 0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f,
+ 0x6dfc511f, 0x9b30952c, 0xcc814544, 0xaf5ebd09,
+ 0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3,
+ 0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb,
+ 0x5579c0bd, 0x1a60320a, 0xd6a100c6, 0x402c7279,
+ 0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8,
+ 0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab,
+ 0x323db5fa, 0xfd238760, 0x53317b48, 0x3e00df82,
+ 0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db,
+ 0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573,
+ 0x695b27b0, 0xbbca58c8, 0xe1ffa35d, 0xb8f011a0,
+ 0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b,
+ 0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790,
+ 0xe1ddf2da, 0xa4cb7e33, 0x62fb1341, 0xcee4c6e8,
+ 0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4,
+ 0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0,
+ 0xd08ed1d0, 0xafc725e0, 0x8e3c5b2f, 0x8e7594b7,
+ 0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c,
+ 0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad,
+ 0x2f2f2218, 0xbe0e1777, 0xea752dfe, 0x8b021fa1,
+ 0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299,
+ 0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9,
+ 0x165fa266, 0x80957705, 0x93cc7314, 0x211a1477,
+ 0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf,
+ 0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49,
+ 0x00250e2d, 0x2071b35e, 0x226800bb, 0x57b8e0af,
+ 0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa,
+ 0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5,
+ 0x83260376, 0x6295cfa9, 0x11c81968, 0x4e734a41,
+ 0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915,
+ 0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400,
+ 0x08ba6fb5, 0x571be91f, 0xf296ec6b, 0x2a0dd915,
+ 0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664,
+ 0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a},
+ {
+ 0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623,
+ 0xad6ea6b0, 0x49a7df7d, 0x9cee60b8, 0x8fedb266,
+ 0xecaa8c71, 0x699a17ff, 0x5664526c, 0xc2b19ee1,
+ 0x193602a5, 0x75094c29, 0xa0591340, 0xe4183a3e,
+ 0x3f54989a, 0x5b429d65, 0x6b8fe4d6, 0x99f73fd6,
+ 0xa1d29c07, 0xefe830f5, 0x4d2d38e6, 0xf0255dc1,
+ 0x4cdd2086, 0x8470eb26, 0x6382e9c6, 0x021ecc5e,
+ 0x09686b3f, 0x3ebaefc9, 0x3c971814, 0x6b6a70a1,
+ 0x687f3584, 0x52a0e286, 0xb79c5305, 0xaa500737,
+ 0x3e07841c, 0x7fdeae5c, 0x8e7d44ec, 0x5716f2b8,
+ 0xb03ada37, 0xf0500c0d, 0xf01c1f04, 0x0200b3ff,
+ 0xae0cf51a, 0x3cb574b2, 0x25837a58, 0xdc0921bd,
+ 0xd19113f9, 0x7ca92ff6, 0x94324773, 0x22f54701,
+ 0x3ae5e581, 0x37c2dadc, 0xc8b57634, 0x9af3dda7,
+ 0xa9446146, 0x0fd0030e, 0xecc8c73e, 0xa4751e41,
+ 0xe238cd99, 0x3bea0e2f, 0x3280bba1, 0x183eb331,
+ 0x4e548b38, 0x4f6db908, 0x6f420d03, 0xf60a04bf,
+ 0x2cb81290, 0x24977c79, 0x5679b072, 0xbcaf89af,
+ 0xde9a771f, 0xd9930810, 0xb38bae12, 0xdccf3f2e,
+ 0x5512721f, 0x2e6b7124, 0x501adde6, 0x9f84cd87,
+ 0x7a584718, 0x7408da17, 0xbc9f9abc, 0xe94b7d8c,
+ 0xec7aec3a, 0xdb851dfa, 0x63094366, 0xc464c3d2,
+ 0xef1c1847, 0x3215d908, 0xdd433b37, 0x24c2ba16,
+ 0x12a14d43, 0x2a65c451, 0x50940002, 0x133ae4dd,
+ 0x71dff89e, 0x10314e55, 0x81ac77d6, 0x5f11199b,
+ 0x043556f1, 0xd7a3c76b, 0x3c11183b, 0x5924a509,
+ 0xf28fe6ed, 0x97f1fbfa, 0x9ebabf2c, 0x1e153c6e,
+ 0x86e34570, 0xeae96fb1, 0x860e5e0a, 0x5a3e2ab3,
+ 0x771fe71c, 0x4e3d06fa, 0x2965dcb9, 0x99e71d0f,
+ 0x803e89d6, 0x5266c825, 0x2e4cc978, 0x9c10b36a,
+ 0xc6150eba, 0x94e2ea78, 0xa5fc3c53, 0x1e0a2df4,
+ 0xf2f74ea7, 0x361d2b3d, 0x1939260f, 0x19c27960,
+ 0x5223a708, 0xf71312b6, 0xebadfe6e, 0xeac31f66,
+ 0xe3bc4595, 0xa67bc883, 0xb17f37d1, 0x018cff28,
+ 0xc332ddef, 0xbe6c5aa5, 0x65582185, 0x68ab9802,
+ 0xeecea50f, 0xdb2f953b, 0x2aef7dad, 0x5b6e2f84,
+ 0x1521b628, 0x29076170, 0xecdd4775, 0x619f1510,
+ 0x13cca830, 0xeb61bd96, 0x0334fe1e, 0xaa0363cf,
+ 0xb5735c90, 0x4c70a239, 0xd59e9e0b, 0xcbaade14,
+ 0xeecc86bc, 0x60622ca7, 0x9cab5cab, 0xb2f3846e,
+ 0x648b1eaf, 0x19bdf0ca, 0xa02369b9, 0x655abb50,
+ 0x40685a32, 0x3c2ab4b3, 0x319ee9d5, 0xc021b8f7,
+ 0x9b540b19, 0x875fa099, 0x95f7997e, 0x623d7da8,
+ 0xf837889a, 0x97e32d77, 0x11ed935f, 0x16681281,
+ 0x0e358829, 0xc7e61fd6, 0x96dedfa1, 0x7858ba99,
+ 0x57f584a5, 0x1b227263, 0x9b83c3ff, 0x1ac24696,
+ 0xcdb30aeb, 0x532e3054, 0x8fd948e4, 0x6dbc3128,
+ 0x58ebf2ef, 0x34c6ffea, 0xfe28ed61, 0xee7c3c73,
+ 0x5d4a14d9, 0xe864b7e3, 0x42105d14, 0x203e13e0,
+ 0x45eee2b6, 0xa3aaabea, 0xdb6c4f15, 0xfacb4fd0,
+ 0xc742f442, 0xef6abbb5, 0x654f3b1d, 0x41cd2105,
+ 0xd81e799e, 0x86854dc7, 0xe44b476a, 0x3d816250,
+ 0xcf62a1f2, 0x5b8d2646, 0xfc8883a0, 0xc1c7b6a3,
+ 0x7f1524c3, 0x69cb7492, 0x47848a0b, 0x5692b285,
+ 0x095bbf00, 0xad19489d, 0x1462b174, 0x23820e00,
+ 0x58428d2a, 0x0c55f5ea, 0x1dadf43e, 0x233f7061,
+ 0x3372f092, 0x8d937e41, 0xd65fecf1, 0x6c223bdb,
+ 0x7cde3759, 0xcbee7460, 0x4085f2a7, 0xce77326e,
+ 0xa6078084, 0x19f8509e, 0xe8efd855, 0x61d99735,
+ 0xa969a7aa, 0xc50c06c2, 0x5a04abfc, 0x800bcadc,
+ 0x9e447a2e, 0xc3453484, 0xfdd56705, 0x0e1e9ec9,
+ 0xdb73dbd3, 0x105588cd, 0x675fda79, 0xe3674340,
+ 0xc5c43465, 0x713e38d8, 0x3d28f89e, 0xf16dff20,
+ 0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7},
+ {
+ 0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934,
+ 0x411520f7, 0x7602d4f7, 0xbcf46b2e, 0xd4a20068,
+ 0xd4082471, 0x3320f46a, 0x43b7d4b7, 0x500061af,
+ 0x1e39f62e, 0x97244546, 0x14214f74, 0xbf8b8840,
+ 0x4d95fc1d, 0x96b591af, 0x70f4ddd3, 0x66a02f45,
+ 0xbfbc09ec, 0x03bd9785, 0x7fac6dd0, 0x31cb8504,
+ 0x96eb27b3, 0x55fd3941, 0xda2547e6, 0xabca0a9a,
+ 0x28507825, 0x530429f4, 0x0a2c86da, 0xe9b66dfb,
+ 0x68dc1462, 0xd7486900, 0x680ec0a4, 0x27a18dee,
+ 0x4f3ffea2, 0xe887ad8c, 0xb58ce006, 0x7af4d6b6,
+ 0xaace1e7c, 0xd3375fec, 0xce78a399, 0x406b2a42,
+ 0x20fe9e35, 0xd9f385b9, 0xee39d7ab, 0x3b124e8b,
+ 0x1dc9faf7, 0x4b6d1856, 0x26a36631, 0xeae397b2,
+ 0x3a6efa74, 0xdd5b4332, 0x6841e7f7, 0xca7820fb,
+ 0xfb0af54e, 0xd8feb397, 0x454056ac, 0xba489527,
+ 0x55533a3a, 0x20838d87, 0xfe6ba9b7, 0xd096954b,
+ 0x55a867bc, 0xa1159a58, 0xcca92963, 0x99e1db33,
+ 0xa62a4a56, 0x3f3125f9, 0x5ef47e1c, 0x9029317c,
+ 0xfdf8e802, 0x04272f70, 0x80bb155c, 0x05282ce3,
+ 0x95c11548, 0xe4c66d22, 0x48c1133f, 0xc70f86dc,
+ 0x07f9c9ee, 0x41041f0f, 0x404779a4, 0x5d886e17,
+ 0x325f51eb, 0xd59bc0d1, 0xf2bcc18f, 0x41113564,
+ 0x257b7834, 0x602a9c60, 0xdff8e8a3, 0x1f636c1b,
+ 0x0e12b4c2, 0x02e1329e, 0xaf664fd1, 0xcad18115,
+ 0x6b2395e0, 0x333e92e1, 0x3b240b62, 0xeebeb922,
+ 0x85b2a20e, 0xe6ba0d99, 0xde720c8c, 0x2da2f728,
+ 0xd0127845, 0x95b794fd, 0x647d0862, 0xe7ccf5f0,
+ 0x5449a36f, 0x877d48fa, 0xc39dfd27, 0xf33e8d1e,
+ 0x0a476341, 0x992eff74, 0x3a6f6eab, 0xf4f8fd37,
+ 0xa812dc60, 0xa1ebddf8, 0x991be14c, 0xdb6e6b0d,
+ 0xc67b5510, 0x6d672c37, 0x2765d43b, 0xdcd0e804,
+ 0xf1290dc7, 0xcc00ffa3, 0xb5390f92, 0x690fed0b,
+ 0x667b9ffb, 0xcedb7d9c, 0xa091cf0b, 0xd9155ea3,
+ 0xbb132f88, 0x515bad24, 0x7b9479bf, 0x763bd6eb,
+ 0x37392eb3, 0xcc115979, 0x8026e297, 0xf42e312d,
+ 0x6842ada7, 0xc66a2b3b, 0x12754ccc, 0x782ef11c,
+ 0x6a124237, 0xb79251e7, 0x06a1bbe6, 0x4bfb6350,
+ 0x1a6b1018, 0x11caedfa, 0x3d25bdd8, 0xe2e1c3c9,
+ 0x44421659, 0x0a121386, 0xd90cec6e, 0xd5abea2a,
+ 0x64af674e, 0xda86a85f, 0xbebfe988, 0x64e4c3fe,
+ 0x9dbc8057, 0xf0f7c086, 0x60787bf8, 0x6003604d,
+ 0xd1fd8346, 0xf6381fb0, 0x7745ae04, 0xd736fccc,
+ 0x83426b33, 0xf01eab71, 0xb0804187, 0x3c005e5f,
+ 0x77a057be, 0xbde8ae24, 0x55464299, 0xbf582e61,
+ 0x4e58f48f, 0xf2ddfda2, 0xf474ef38, 0x8789bdc2,
+ 0x5366f9c3, 0xc8b38e74, 0xb475f255, 0x46fcd9b9,
+ 0x7aeb2661, 0x8b1ddf84, 0x846a0e79, 0x915f95e2,
+ 0x466e598e, 0x20b45770, 0x8cd55591, 0xc902de4c,
+ 0xb90bace1, 0xbb8205d0, 0x11a86248, 0x7574a99e,
+ 0xb77f19b6, 0xe0a9dc09, 0x662d09a1, 0xc4324633,
+ 0xe85a1f02, 0x09f0be8c, 0x4a99a025, 0x1d6efe10,
+ 0x1ab93d1d, 0x0ba5a4df, 0xa186f20f, 0x2868f169,
+ 0xdcb7da83, 0x573906fe, 0xa1e2ce9b, 0x4fcd7f52,
+ 0x50115e01, 0xa70683fa, 0xa002b5c4, 0x0de6d027,
+ 0x9af88c27, 0x773f8641, 0xc3604c06, 0x61a806b5,
+ 0xf0177a28, 0xc0f586e0, 0x006058aa, 0x30dc7d62,
+ 0x11e69ed7, 0x2338ea63, 0x53c2dd94, 0xc2c21634,
+ 0xbbcbee56, 0x90bcb6de, 0xebfc7da1, 0xce591d76,
+ 0x6f05e409, 0x4b7c0188, 0x39720a3d, 0x7c927c24,
+ 0x86e3725f, 0x724d9db9, 0x1ac15bb4, 0xd39eb8fc,
+ 0xed545578, 0x08fca5b5, 0xd83d7cd3, 0x4dad0fc4,
+ 0x1e50ef5e, 0xb161e6f8, 0xa28514d9, 0x6c51133c,
+ 0x6fd5c7e7, 0x56e14ec4, 0x362abfce, 0xddc6c837,
+ 0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0},
+ {
+ 0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b,
+ 0x5cb0679e, 0x4fa33742, 0xd3822740, 0x99bc9bbe,
+ 0xd5118e9d, 0xbf0f7315, 0xd62d1c7e, 0xc700c47b,
+ 0xb78c1b6b, 0x21a19045, 0xb26eb1be, 0x6a366eb4,
+ 0x5748ab2f, 0xbc946e79, 0xc6a376d2, 0x6549c2c8,
+ 0x530ff8ee, 0x468dde7d, 0xd5730a1d, 0x4cd04dc6,
+ 0x2939bbdb, 0xa9ba4650, 0xac9526e8, 0xbe5ee304,
+ 0xa1fad5f0, 0x6a2d519a, 0x63ef8ce2, 0x9a86ee22,
+ 0xc089c2b8, 0x43242ef6, 0xa51e03aa, 0x9cf2d0a4,
+ 0x83c061ba, 0x9be96a4d, 0x8fe51550, 0xba645bd6,
+ 0x2826a2f9, 0xa73a3ae1, 0x4ba99586, 0xef5562e9,
+ 0xc72fefd3, 0xf752f7da, 0x3f046f69, 0x77fa0a59,
+ 0x80e4a915, 0x87b08601, 0x9b09e6ad, 0x3b3ee593,
+ 0xe990fd5a, 0x9e34d797, 0x2cf0b7d9, 0x022b8b51,
+ 0x96d5ac3a, 0x017da67d, 0xd1cf3ed6, 0x7c7d2d28,
+ 0x1f9f25cf, 0xadf2b89b, 0x5ad6b472, 0x5a88f54c,
+ 0xe029ac71, 0xe019a5e6, 0x47b0acfd, 0xed93fa9b,
+ 0xe8d3c48d, 0x283b57cc, 0xf8d56629, 0x79132e28,
+ 0x785f0191, 0xed756055, 0xf7960e44, 0xe3d35e8c,
+ 0x15056dd4, 0x88f46dba, 0x03a16125, 0x0564f0bd,
+ 0xc3eb9e15, 0x3c9057a2, 0x97271aec, 0xa93a072a,
+ 0x1b3f6d9b, 0x1e6321f5, 0xf59c66fb, 0x26dcf319,
+ 0x7533d928, 0xb155fdf5, 0x03563482, 0x8aba3cbb,
+ 0x28517711, 0xc20ad9f8, 0xabcc5167, 0xccad925f,
+ 0x4de81751, 0x3830dc8e, 0x379d5862, 0x9320f991,
+ 0xea7a90c2, 0xfb3e7bce, 0x5121ce64, 0x774fbe32,
+ 0xa8b6e37e, 0xc3293d46, 0x48de5369, 0x6413e680,
+ 0xa2ae0810, 0xdd6db224, 0x69852dfd, 0x09072166,
+ 0xb39a460a, 0x6445c0dd, 0x586cdecf, 0x1c20c8ae,
+ 0x5bbef7dd, 0x1b588d40, 0xccd2017f, 0x6bb4e3bb,
+ 0xdda26a7e, 0x3a59ff45, 0x3e350a44, 0xbcb4cdd5,
+ 0x72eacea8, 0xfa6484bb, 0x8d6612ae, 0xbf3c6f47,
+ 0xd29be463, 0x542f5d9e, 0xaec2771b, 0xf64e6370,
+ 0x740e0d8d, 0xe75b1357, 0xf8721671, 0xaf537d5d,
+ 0x4040cb08, 0x4eb4e2cc, 0x34d2466a, 0x0115af84,
+ 0xe1b00428, 0x95983a1d, 0x06b89fb4, 0xce6ea048,
+ 0x6f3f3b82, 0x3520ab82, 0x011a1d4b, 0x277227f8,
+ 0x611560b1, 0xe7933fdc, 0xbb3a792b, 0x344525bd,
+ 0xa08839e1, 0x51ce794b, 0x2f32c9b7, 0xa01fbac9,
+ 0xe01cc87e, 0xbcc7d1f6, 0xcf0111c3, 0xa1e8aac7,
+ 0x1a908749, 0xd44fbd9a, 0xd0dadecb, 0xd50ada38,
+ 0x0339c32a, 0xc6913667, 0x8df9317c, 0xe0b12b4f,
+ 0xf79e59b7, 0x43f5bb3a, 0xf2d519ff, 0x27d9459c,
+ 0xbf97222c, 0x15e6fc2a, 0x0f91fc71, 0x9b941525,
+ 0xfae59361, 0xceb69ceb, 0xc2a86459, 0x12baa8d1,
+ 0xb6c1075e, 0xe3056a0c, 0x10d25065, 0xcb03a442,
+ 0xe0ec6e0e, 0x1698db3b, 0x4c98a0be, 0x3278e964,
+ 0x9f1f9532, 0xe0d392df, 0xd3a0342b, 0x8971f21e,
+ 0x1b0a7441, 0x4ba3348c, 0xc5be7120, 0xc37632d8,
+ 0xdf359f8d, 0x9b992f2e, 0xe60b6f47, 0x0fe3f11d,
+ 0xe54cda54, 0x1edad891, 0xce6279cf, 0xcd3e7e6f,
+ 0x1618b166, 0xfd2c1d05, 0x848fd2c5, 0xf6fb2299,
+ 0xf523f357, 0xa6327623, 0x93a83531, 0x56cccd02,
+ 0xacf08162, 0x5a75ebb5, 0x6e163697, 0x88d273cc,
+ 0xde966292, 0x81b949d0, 0x4c50901b, 0x71c65614,
+ 0xe6c6c7bd, 0x327a140a, 0x45e1d006, 0xc3f27b9a,
+ 0xc9aa53fd, 0x62a80f00, 0xbb25bfe2, 0x35bdd2f6,
+ 0x71126905, 0xb2040222, 0xb6cbcf7c, 0xcd769c2b,
+ 0x53113ec0, 0x1640e3d3, 0x38abbd60, 0x2547adf0,
+ 0xba38209c, 0xf746ce76, 0x77afa1c5, 0x20756060,
+ 0x85cbfe4e, 0x8ae88dd8, 0x7aaaf9b0, 0x4cf9aa7e,
+ 0x1948c25c, 0x02fb8a8c, 0x01c36ae4, 0xd6ebe1f9,
+ 0x90d4f869, 0xa65cdea0, 0x3f09252d, 0xc208e69f,
+ 0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6}
+ },
+ {
+ 0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344,
+ 0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89,
+ 0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c,
+ 0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917,
+ 0x9216d5d9, 0x8979fb1b
+ } };
+
+ *c = initstate;
+}
+
+u_int32_t
+Blowfish_stream2word(const u_int8_t *data, u_int16_t databytes,
+ u_int16_t *current)
+{
+ u_int8_t i;
+ u_int16_t j;
+ u_int32_t temp;
+
+ temp = 0x00000000;
+ j = *current;
+
+ for (i = 0; i < 4; i++, j++) {
+ if (j >= databytes)
+ j = 0;
+ temp = (temp << 8) | data[j];
+ }
+
+ *current = j;
+ return temp;
+}
+
+void
+Blowfish_expand0state(blf_ctx *c, const u_int8_t *key, u_int16_t keybytes)
+{
+ u_int16_t i;
+ u_int16_t j;
+ u_int16_t k;
+ u_int32_t temp;
+ u_int32_t datal;
+ u_int32_t datar;
+
+ j = 0;
+ for (i = 0; i < BLF_N + 2; i++) {
+ /* Extract 4 int8 to 1 int32 from keystream */
+ temp = Blowfish_stream2word(key, keybytes, &j);
+ c->P[i] = c->P[i] ^ temp;
+ }
+
+ j = 0;
+ datal = 0x00000000;
+ datar = 0x00000000;
+ for (i = 0; i < BLF_N + 2; i += 2) {
+ Blowfish_encipher(c, &datal, &datar);
+
+ c->P[i] = datal;
+ c->P[i + 1] = datar;
+ }
+
+ for (i = 0; i < 4; i++) {
+ for (k = 0; k < 256; k += 2) {
+ Blowfish_encipher(c, &datal, &datar);
+
+ c->S[i][k] = datal;
+ c->S[i][k + 1] = datar;
+ }
+ }
+}
+
+
+void
+Blowfish_expandstate(blf_ctx *c, const u_int8_t *data, u_int16_t databytes,
+ const u_int8_t *key, u_int16_t keybytes)
+{
+ u_int16_t i;
+ u_int16_t j;
+ u_int16_t k;
+ u_int32_t temp;
+ u_int32_t datal;
+ u_int32_t datar;
+
+ j = 0;
+ for (i = 0; i < BLF_N + 2; i++) {
+ /* Extract 4 int8 to 1 int32 from keystream */
+ temp = Blowfish_stream2word(key, keybytes, &j);
+ c->P[i] = c->P[i] ^ temp;
+ }
+
+ j = 0;
+ datal = 0x00000000;
+ datar = 0x00000000;
+ for (i = 0; i < BLF_N + 2; i += 2) {
+ datal ^= Blowfish_stream2word(data, databytes, &j);
+ datar ^= Blowfish_stream2word(data, databytes, &j);
+ Blowfish_encipher(c, &datal, &datar);
+
+ c->P[i] = datal;
+ c->P[i + 1] = datar;
+ }
+
+ for (i = 0; i < 4; i++) {
+ for (k = 0; k < 256; k += 2) {
+ datal ^= Blowfish_stream2word(data, databytes, &j);
+ datar ^= Blowfish_stream2word(data, databytes, &j);
+ Blowfish_encipher(c, &datal, &datar);
+
+ c->S[i][k] = datal;
+ c->S[i][k + 1] = datar;
+ }
+ }
+
+}
+
+void
+blf_key(blf_ctx *c, const u_int8_t *k, u_int16_t len)
+{
+ /* Initialize S-boxes and subkeys with Pi */
+ Blowfish_initstate(c);
+
+ /* Transform S-boxes and subkeys with key */
+ Blowfish_expand0state(c, k, len);
+}
+
+void
+blf_enc(blf_ctx *c, u_int32_t *data, u_int16_t blocks)
+{
+ u_int32_t *d;
+ u_int16_t i;
+
+ d = data;
+ for (i = 0; i < blocks; i++) {
+ Blowfish_encipher(c, d, d + 1);
+ d += 2;
+ }
+}
+
+void
+blf_dec(blf_ctx *c, u_int32_t *data, u_int16_t blocks)
+{
+ u_int32_t *d;
+ u_int16_t i;
+
+ d = data;
+ for (i = 0; i < blocks; i++) {
+ Blowfish_decipher(c, d, d + 1);
+ d += 2;
+ }
+}
+
+void
+blf_ecb_encrypt(blf_ctx *c, u_int8_t *data, u_int32_t len)
+{
+ u_int32_t l, r;
+ u_int32_t i;
+
+ for (i = 0; i < len; i += 8) {
+ l = data[0] << 24 | data[1] << 16 | data[2] << 8 | data[3];
+ r = data[4] << 24 | data[5] << 16 | data[6] << 8 | data[7];
+ Blowfish_encipher(c, &l, &r);
+ data[0] = l >> 24 & 0xff;
+ data[1] = l >> 16 & 0xff;
+ data[2] = l >> 8 & 0xff;
+ data[3] = l & 0xff;
+ data[4] = r >> 24 & 0xff;
+ data[5] = r >> 16 & 0xff;
+ data[6] = r >> 8 & 0xff;
+ data[7] = r & 0xff;
+ data += 8;
+ }
+}
+
+void
+blf_ecb_decrypt(blf_ctx *c, u_int8_t *data, u_int32_t len)
+{
+ u_int32_t l, r;
+ u_int32_t i;
+
+ for (i = 0; i < len; i += 8) {
+ l = data[0] << 24 | data[1] << 16 | data[2] << 8 | data[3];
+ r = data[4] << 24 | data[5] << 16 | data[6] << 8 | data[7];
+ Blowfish_decipher(c, &l, &r);
+ data[0] = l >> 24 & 0xff;
+ data[1] = l >> 16 & 0xff;
+ data[2] = l >> 8 & 0xff;
+ data[3] = l & 0xff;
+ data[4] = r >> 24 & 0xff;
+ data[5] = r >> 16 & 0xff;
+ data[6] = r >> 8 & 0xff;
+ data[7] = r & 0xff;
+ data += 8;
+ }
+}
+
+void
+blf_cbc_encrypt(blf_ctx *c, u_int8_t *iv, u_int8_t *data, u_int32_t len)
+{
+ u_int32_t l, r;
+ u_int32_t i, j;
+
+ for (i = 0; i < len; i += 8) {
+ for (j = 0; j < 8; j++)
+ data[j] ^= iv[j];
+ l = data[0] << 24 | data[1] << 16 | data[2] << 8 | data[3];
+ r = data[4] << 24 | data[5] << 16 | data[6] << 8 | data[7];
+ Blowfish_encipher(c, &l, &r);
+ data[0] = l >> 24 & 0xff;
+ data[1] = l >> 16 & 0xff;
+ data[2] = l >> 8 & 0xff;
+ data[3] = l & 0xff;
+ data[4] = r >> 24 & 0xff;
+ data[5] = r >> 16 & 0xff;
+ data[6] = r >> 8 & 0xff;
+ data[7] = r & 0xff;
+ iv = data;
+ data += 8;
+ }
+}
+
+void
+blf_cbc_decrypt(blf_ctx *c, u_int8_t *iva, u_int8_t *data, u_int32_t len)
+{
+ u_int32_t l, r;
+ u_int8_t *iv;
+ u_int32_t i, j;
+
+ iv = data + len - 16;
+ data = data + len - 8;
+ for (i = len - 8; i >= 8; i -= 8) {
+ l = data[0] << 24 | data[1] << 16 | data[2] << 8 | data[3];
+ r = data[4] << 24 | data[5] << 16 | data[6] << 8 | data[7];
+ Blowfish_decipher(c, &l, &r);
+ data[0] = l >> 24 & 0xff;
+ data[1] = l >> 16 & 0xff;
+ data[2] = l >> 8 & 0xff;
+ data[3] = l & 0xff;
+ data[4] = r >> 24 & 0xff;
+ data[5] = r >> 16 & 0xff;
+ data[6] = r >> 8 & 0xff;
+ data[7] = r & 0xff;
+ for (j = 0; j < 8; j++)
+ data[j] ^= iv[j];
+ iv -= 8;
+ data -= 8;
+ }
+ l = data[0] << 24 | data[1] << 16 | data[2] << 8 | data[3];
+ r = data[4] << 24 | data[5] << 16 | data[6] << 8 | data[7];
+ Blowfish_decipher(c, &l, &r);
+ data[0] = l >> 24 & 0xff;
+ data[1] = l >> 16 & 0xff;
+ data[2] = l >> 8 & 0xff;
+ data[3] = l & 0xff;
+ data[4] = r >> 24 & 0xff;
+ data[5] = r >> 16 & 0xff;
+ data[6] = r >> 8 & 0xff;
+ data[7] = r & 0xff;
+ for (j = 0; j < 8; j++)
+ data[j] ^= iva[j];
+}
+
+#if 0
+void
+report(u_int32_t data[], u_int16_t len)
+{
+ u_int16_t i;
+ for (i = 0; i < len; i += 2)
+ printf("Block %0hd: %08lx %08lx.\n",
+ i / 2, data[i], data[i + 1]);
+}
+void
+main(void)
+{
+
+ blf_ctx c;
+ char key[] = "AAAAA";
+ char key2[] = "abcdefghijklmnopqrstuvwxyz";
+
+ u_int32_t data[10];
+ u_int32_t data2[] =
+ {0x424c4f57l, 0x46495348l};
+
+ u_int16_t i;
+
+ /* First test */
+ for (i = 0; i < 10; i++)
+ data[i] = i;
+
+ blf_key(&c, (u_int8_t *) key, 5);
+ blf_enc(&c, data, 5);
+ blf_dec(&c, data, 1);
+ blf_dec(&c, data + 2, 4);
+ printf("Should read as 0 - 9.\n");
+ report(data, 10);
+
+ /* Second test */
+ blf_key(&c, (u_int8_t *) key2, strlen(key2));
+ blf_enc(&c, data2, 1);
+ printf("\nShould read as: 0x324ed0fe 0xf413a203.\n");
+ report(data2, 2);
+ blf_dec(&c, data2, 1);
+ report(data2, 2);
+}
+#endif
diff --git a/compat/getentropy/getentropy_freebsd.c b/compat/getentropy/getentropy_freebsd.c
@@ -0,0 +1,64 @@
+/* $OpenBSD: getentropy_osx.c,v 1.7 2014/07/19 16:12:00 deraadt Exp $ */
+
+/*
+ * Copyright (c) 2014 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * Copyright (c) 2014 Brent Cook <bcook@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Emulation of getentropy(2) as documented at:
+ * http://www.openbsd.org/cgi-bin/man.cgi/OpenBSD-current/man2/getentropy.2
+ */
+
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+#include <errno.h>
+#include <stddef.h>
+
+/*
+ * Derived from lib/libc/gen/arc4random.c from FreeBSD.
+ */
+static size_t
+getentropy_sysctl(u_char *buf, size_t size)
+{
+ int mib[2];
+ size_t len, done;
+
+ mib[0] = CTL_KERN;
+ mib[1] = KERN_ARND;
+ done = 0;
+
+ do {
+ len = size;
+ if (sysctl(mib, 2, buf, &len, NULL, 0) == -1)
+ return (done);
+ done += len;
+ buf += len;
+ size -= len;
+ } while (size > 0);
+
+ return (done);
+}
+
+int
+getentropy(void *buf, size_t len)
+{
+ if (len <= 256 &&
+ getentropy_sysctl(buf, len) == len) {
+ return 0;
+ }
+
+ errno = EIO;
+ return -1;
+}
diff --git a/compat/getentropy/getentropy_linux.c b/compat/getentropy/getentropy_linux.c
@@ -0,0 +1,548 @@
+/* $OpenBSD: getentropy_linux.c,v 1.35 2014/08/28 01:00:57 bcook Exp $ */
+
+/*
+ * Copyright (c) 2014 Theo de Raadt <deraadt@openbsd.org>
+ * Copyright (c) 2014 Bob Beck <beck@obtuse.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Emulation of getentropy(2) as documented at:
+ * http://www.openbsd.org/cgi-bin/man.cgi/OpenBSD-current/man2/getentropy.2
+ */
+
+#define _POSIX_C_SOURCE 199309L
+#define _GNU_SOURCE 1
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#ifdef HAVE_SYS_SYSCTL_H
+#include <sys/sysctl.h>
+#endif
+#include <sys/statvfs.h>
+#include <sys/socket.h>
+#include <sys/mount.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <link.h>
+#include <termios.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <time.h>
+#include <openssl/sha.h>
+
+#include <linux/types.h>
+#include <linux/random.h>
+#include <linux/sysctl.h>
+#ifdef HAVE_GETAUXVAL
+#include <sys/auxv.h>
+#endif
+#include <sys/vfs.h>
+
+#define REPEAT 5
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+
+#define HX(a, b) \
+ do { \
+ if ((a)) \
+ HD(errno); \
+ else \
+ HD(b); \
+ } while (0)
+
+#define HR(x, l) (SHA512_Update(&ctx, (char *)(x), (l)))
+#define HD(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (x)))
+#define HF(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (void*)))
+
+int getentropy(void *buf, size_t len);
+
+static int gotdata(char *buf, size_t len);
+#ifdef SYS_getrandom
+static int getentropy_getrandom(void *buf, size_t len);
+#endif
+static int getentropy_urandom(void *buf, size_t len);
+#ifdef SYS__sysctl
+static int getentropy_sysctl(void *buf, size_t len);
+#endif
+static int getentropy_fallback(void *buf, size_t len);
+static int getentropy_phdr(struct dl_phdr_info *info, size_t size, void *data);
+
+int
+getentropy(void *buf, size_t len)
+{
+ int ret = -1;
+
+ if (len > 256) {
+ errno = EIO;
+ return -1;
+ }
+
+#ifdef SYS_getrandom
+ /*
+ * Try descriptor-less getrandom()
+ */
+ ret = getentropy_getrandom(buf, len);
+ if (ret != -1)
+ return (ret);
+ if (errno != ENOSYS)
+ return (-1);
+#endif
+
+ /*
+ * Try to get entropy with /dev/urandom
+ *
+ * This can fail if the process is inside a chroot or if file
+ * descriptors are exhausted.
+ */
+ ret = getentropy_urandom(buf, len);
+ if (ret != -1)
+ return (ret);
+
+#ifdef SYS__sysctl
+ /*
+ * Try to use sysctl CTL_KERN, KERN_RANDOM, RANDOM_UUID.
+ * sysctl is a failsafe API, so it guarantees a result. This
+ * should work inside a chroot, or when file descriptors are
+ * exhuasted.
+ *
+ * However this can fail if the Linux kernel removes support
+ * for sysctl. Starting in 2007, there have been efforts to
+ * deprecate the sysctl API/ABI, and push callers towards use
+ * of the chroot-unavailable fd-using /proc mechanism --
+ * essentially the same problems as /dev/urandom.
+ *
+ * Numerous setbacks have been encountered in their deprecation
+ * schedule, so as of June 2014 the kernel ABI still exists on
+ * most Linux architectures. The sysctl() stub in libc is missing
+ * on some systems. There are also reports that some kernels
+ * spew messages to the console.
+ */
+ ret = getentropy_sysctl(buf, len);
+ if (ret != -1)
+ return (ret);
+#endif /* SYS__sysctl */
+
+ /*
+ * Entropy collection via /dev/urandom and sysctl have failed.
+ *
+ * No other API exists for collecting entropy. See the large
+ * comment block above.
+ *
+ * We have very few options:
+ * - Even syslog_r is unsafe to call at this low level, so
+ * there is no way to alert the user or program.
+ * - Cannot call abort() because some systems have unsafe
+ * corefiles.
+ * - Could raise(SIGKILL) resulting in silent program termination.
+ * - Return EIO, to hint that arc4random's stir function
+ * should raise(SIGKILL)
+ * - Do the best under the circumstances....
+ *
+ * This code path exists to bring light to the issue that Linux
+ * does not provide a failsafe API for entropy collection.
+ *
+ * We hope this demonstrates that Linux should either retain their
+ * sysctl ABI, or consider providing a new failsafe API which
+ * works in a chroot or when file descriptors are exhausted.
+ */
+#undef FAIL_INSTEAD_OF_TRYING_FALLBACK
+#ifdef FAIL_INSTEAD_OF_TRYING_FALLBACK
+ raise(SIGKILL);
+#endif
+ ret = getentropy_fallback(buf, len);
+ if (ret != -1)
+ return (ret);
+
+ errno = EIO;
+ return (ret);
+}
+
+/*
+ * Basic sanity checking; wish we could do better.
+ */
+static int
+gotdata(char *buf, size_t len)
+{
+ char any_set = 0;
+ size_t i;
+
+ for (i = 0; i < len; ++i)
+ any_set |= buf[i];
+ if (any_set == 0)
+ return -1;
+ return 0;
+}
+
+#ifdef SYS_getrandom
+static int
+getentropy_getrandom(void *buf, size_t len)
+{
+ int pre_errno = errno;
+ int ret;
+ if (len > 256)
+ return (-1);
+ do {
+ ret = syscall(SYS_getrandom, buf, len, 0);
+ } while (ret == -1 && errno == EINTR);
+
+ if (ret != len)
+ return (-1);
+ errno = pre_errno;
+ return (0);
+}
+#endif
+
+static int
+getentropy_urandom(void *buf, size_t len)
+{
+ struct stat st;
+ size_t i;
+ int fd, cnt, flags;
+ int save_errno = errno;
+
+start:
+
+ flags = O_RDONLY;
+#ifdef O_NOFOLLOW
+ flags |= O_NOFOLLOW;
+#endif
+#ifdef O_CLOEXEC
+ flags |= O_CLOEXEC;
+#endif
+ fd = open("/dev/urandom", flags, 0);
+ if (fd == -1) {
+ if (errno == EINTR)
+ goto start;
+ goto nodevrandom;
+ }
+#ifndef O_CLOEXEC
+ fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+#endif
+
+ /* Lightly verify that the device node looks sane */
+ if (fstat(fd, &st) == -1 || !S_ISCHR(st.st_mode)) {
+ close(fd);
+ goto nodevrandom;
+ }
+ if (ioctl(fd, RNDGETENTCNT, &cnt) == -1) {
+ close(fd);
+ goto nodevrandom;
+ }
+ for (i = 0; i < len; ) {
+ size_t wanted = len - i;
+ ssize_t ret = read(fd, (char *)buf + i, wanted);
+
+ if (ret == -1) {
+ if (errno == EAGAIN || errno == EINTR)
+ continue;
+ close(fd);
+ goto nodevrandom;
+ }
+ i += ret;
+ }
+ close(fd);
+ if (gotdata(buf, len) == 0) {
+ errno = save_errno;
+ return 0; /* satisfied */
+ }
+nodevrandom:
+ errno = EIO;
+ return -1;
+}
+
+#ifdef SYS__sysctl
+static int
+getentropy_sysctl(void *buf, size_t len)
+{
+ static int mib[] = { CTL_KERN, KERN_RANDOM, RANDOM_UUID };
+ size_t i;
+ int save_errno = errno;
+
+ for (i = 0; i < len; ) {
+ size_t chunk = min(len - i, 16);
+
+ /* SYS__sysctl because some systems already removed sysctl() */
+ struct __sysctl_args args = {
+ .name = mib,
+ .nlen = 3,
+ .oldval = (char *)buf + i,
+ .oldlenp = &chunk,
+ };
+ if (syscall(SYS__sysctl, &args) != 0)
+ goto sysctlfailed;
+ i += chunk;
+ }
+ if (gotdata(buf, len) == 0) {
+ errno = save_errno;
+ return (0); /* satisfied */
+ }
+sysctlfailed:
+ errno = EIO;
+ return -1;
+}
+#endif /* SYS__sysctl */
+
+static int cl[] = {
+ CLOCK_REALTIME,
+#ifdef CLOCK_MONOTONIC
+ CLOCK_MONOTONIC,
+#endif
+#ifdef CLOCK_MONOTONIC_RAW
+ CLOCK_MONOTONIC_RAW,
+#endif
+#ifdef CLOCK_TAI
+ CLOCK_TAI,
+#endif
+#ifdef CLOCK_VIRTUAL
+ CLOCK_VIRTUAL,
+#endif
+#ifdef CLOCK_UPTIME
+ CLOCK_UPTIME,
+#endif
+#ifdef CLOCK_PROCESS_CPUTIME_ID
+ CLOCK_PROCESS_CPUTIME_ID,
+#endif
+#ifdef CLOCK_THREAD_CPUTIME_ID
+ CLOCK_THREAD_CPUTIME_ID,
+#endif
+};
+
+static int
+getentropy_phdr(struct dl_phdr_info *info, size_t size, void *data)
+{
+ SHA512_CTX *ctx = data;
+
+ SHA512_Update(ctx, &info->dlpi_addr, sizeof (info->dlpi_addr));
+ return 0;
+}
+
+static int
+getentropy_fallback(void *buf, size_t len)
+{
+ uint8_t results[SHA512_DIGEST_LENGTH];
+ int save_errno = errno, e, pgs = getpagesize(), faster = 0, repeat;
+ static int cnt;
+ struct timespec ts;
+ struct timeval tv;
+ struct rusage ru;
+ sigset_t sigset;
+ struct stat st;
+ SHA512_CTX ctx;
+ static pid_t lastpid;
+ pid_t pid;
+ size_t i, ii, m;
+ char *p;
+
+ pid = getpid();
+ if (lastpid == pid) {
+ faster = 1;
+ repeat = 2;
+ } else {
+ faster = 0;
+ lastpid = pid;
+ repeat = REPEAT;
+ }
+ for (i = 0; i < len; ) {
+ int j;
+ SHA512_Init(&ctx);
+ for (j = 0; j < repeat; j++) {
+ HX((e = gettimeofday(&tv, NULL)) == -1, tv);
+ if (e != -1) {
+ cnt += (int)tv.tv_sec;
+ cnt += (int)tv.tv_usec;
+ }
+
+ dl_iterate_phdr(getentropy_phdr, &ctx);
+
+ for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]); ii++)
+ HX(clock_gettime(cl[ii], &ts) == -1, ts);
+
+ HX((pid = getpid()) == -1, pid);
+ HX((pid = getsid(pid)) == -1, pid);
+ HX((pid = getppid()) == -1, pid);
+ HX((pid = getpgid(0)) == -1, pid);
+ HX((e = getpriority(0, 0)) == -1, e);
+
+ if (!faster) {
+ ts.tv_sec = 0;
+ ts.tv_nsec = 1;
+ (void) nanosleep(&ts, NULL);
+ }
+
+ HX(sigpending(&sigset) == -1, sigset);
+ HX(sigprocmask(SIG_BLOCK, NULL, &sigset) == -1,
+ sigset);
+
+ HF(getentropy); /* an addr in this library */
+ HF(printf); /* an addr in libc */
+ p = (char *)&p;
+ HD(p); /* an addr on stack */
+ p = (char *)&errno;
+ HD(p); /* the addr of errno */
+
+ if (i == 0) {
+ struct sockaddr_storage ss;
+ struct statvfs stvfs;
+ struct termios tios;
+ struct statfs stfs;
+ socklen_t ssl;
+ off_t off;
+
+ /*
+ * Prime-sized mappings encourage fragmentation;
+ * thus exposing some address entropy.
+ */
+ struct mm {
+ size_t npg;
+ void *p;
+ } mm[] = {
+ { 17, MAP_FAILED }, { 3, MAP_FAILED },
+ { 11, MAP_FAILED }, { 2, MAP_FAILED },
+ { 5, MAP_FAILED }, { 3, MAP_FAILED },
+ { 7, MAP_FAILED }, { 1, MAP_FAILED },
+ { 57, MAP_FAILED }, { 3, MAP_FAILED },
+ { 131, MAP_FAILED }, { 1, MAP_FAILED },
+ };
+
+ for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) {
+ HX(mm[m].p = mmap(NULL,
+ mm[m].npg * pgs,
+ PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANON, -1,
+ (off_t)0), mm[m].p);
+ if (mm[m].p != MAP_FAILED) {
+ size_t mo;
+
+ /* Touch some memory... */
+ p = mm[m].p;
+ mo = cnt %
+ (mm[m].npg * pgs - 1);
+ p[mo] = 1;
+ cnt += (int)((long)(mm[m].p)
+ / pgs);
+ }
+
+ /* Check cnts and times... */
+ for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]);
+ ii++) {
+ HX((e = clock_gettime(cl[ii],
+ &ts)) == -1, ts);
+ if (e != -1)
+ cnt += (int)ts.tv_nsec;
+ }
+
+ HX((e = getrusage(RUSAGE_SELF,
+ &ru)) == -1, ru);
+ if (e != -1) {
+ cnt += (int)ru.ru_utime.tv_sec;
+ cnt += (int)ru.ru_utime.tv_usec;
+ }
+ }
+
+ for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) {
+ if (mm[m].p != MAP_FAILED)
+ munmap(mm[m].p, mm[m].npg * pgs);
+ mm[m].p = MAP_FAILED;
+ }
+
+ HX(stat(".", &st) == -1, st);
+ HX(statvfs(".", &stvfs) == -1, stvfs);
+ HX(statfs(".", &stfs) == -1, stfs);
+
+ HX(stat("/", &st) == -1, st);
+ HX(statvfs("/", &stvfs) == -1, stvfs);
+ HX(statfs("/", &stfs) == -1, stfs);
+
+ HX((e = fstat(0, &st)) == -1, st);
+ if (e == -1) {
+ if (S_ISREG(st.st_mode) ||
+ S_ISFIFO(st.st_mode) ||
+ S_ISSOCK(st.st_mode)) {
+ HX(fstatvfs(0, &stvfs) == -1,
+ stvfs);
+ HX(fstatfs(0, &stfs) == -1,
+ stfs);
+ HX((off = lseek(0, (off_t)0,
+ SEEK_CUR)) < 0, off);
+ }
+ if (S_ISCHR(st.st_mode)) {
+ HX(tcgetattr(0, &tios) == -1,
+ tios);
+ } else if (S_ISSOCK(st.st_mode)) {
+ memset(&ss, 0, sizeof ss);
+ ssl = sizeof(ss);
+ HX(getpeername(0,
+ (void *)&ss, &ssl) == -1,
+ ss);
+ }
+ }
+
+ HX((e = getrusage(RUSAGE_CHILDREN,
+ &ru)) == -1, ru);
+ if (e != -1) {
+ cnt += (int)ru.ru_utime.tv_sec;
+ cnt += (int)ru.ru_utime.tv_usec;
+ }
+ } else {
+ /* Subsequent hashes absorb previous result */
+ HD(results);
+ }
+
+ HX((e = gettimeofday(&tv, NULL)) == -1, tv);
+ if (e != -1) {
+ cnt += (int)tv.tv_sec;
+ cnt += (int)tv.tv_usec;
+ }
+
+ HD(cnt);
+ }
+#ifdef HAVE_GETAUXVAL
+#ifdef AT_RANDOM
+ /* Not as random as you think but we take what we are given */
+ p = (char *) getauxval(AT_RANDOM);
+ if (p)
+ HR(p, 16);
+#endif
+#ifdef AT_SYSINFO_EHDR
+ p = (char *) getauxval(AT_SYSINFO_EHDR);
+ if (p)
+ HR(p, pgs);
+#endif
+#ifdef AT_BASE
+ p = (char *) getauxval(AT_BASE);
+ if (p)
+ HD(p);
+#endif
+#endif
+
+ SHA512_Final(results, &ctx);
+ memcpy((char *)buf + i, results, min(sizeof(results), len - i));
+ i += min(sizeof(results), len - i);
+ }
+ explicit_bzero(&ctx, sizeof ctx);
+ explicit_bzero(results, sizeof results);
+ if (gotdata(buf, len) == 0) {
+ errno = save_errno;
+ return 0; /* satisfied */
+ }
+ errno = EIO;
+ return -1;
+}
diff --git a/compat/getentropy/getentropy_osx.c b/compat/getentropy/getentropy_osx.c
@@ -0,0 +1,429 @@
+/* $OpenBSD: getentropy_osx.c,v 1.7 2014/07/19 16:12:00 deraadt Exp $ */
+
+/*
+ * Copyright (c) 2014 Theo de Raadt <deraadt@openbsd.org>
+ * Copyright (c) 2014 Bob Beck <beck@obtuse.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Emulation of getentropy(2) as documented at:
+ * http://www.openbsd.org/cgi-bin/man.cgi/OpenBSD-current/man2/getentropy.2
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/sysctl.h>
+#include <sys/statvfs.h>
+#include <sys/socket.h>
+#include <sys/mount.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <termios.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <time.h>
+#include <mach/mach_time.h>
+#include <mach/mach_host.h>
+#include <mach/host_info.h>
+#include <sys/socketvar.h>
+#include <sys/vmmeter.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp_var.h>
+#include <netinet/udp_var.h>
+#include <CommonCrypto/CommonDigest.h>
+#define SHA512_Update(a, b, c) (CC_SHA512_Update((a), (b), (c)))
+#define SHA512_Init(xxx) (CC_SHA512_Init((xxx)))
+#define SHA512_Final(xxx, yyy) (CC_SHA512_Final((xxx), (yyy)))
+#define SHA512_CTX CC_SHA512_CTX
+#define SHA512_DIGEST_LENGTH CC_SHA512_DIGEST_LENGTH
+
+#define REPEAT 5
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+
+#define HX(a, b) \
+ do { \
+ if ((a)) \
+ HD(errno); \
+ else \
+ HD(b); \
+ } while (0)
+
+#define HR(x, l) (SHA512_Update(&ctx, (char *)(x), (l)))
+#define HD(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (x)))
+#define HF(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (void*)))
+
+int getentropy(void *buf, size_t len);
+
+static int gotdata(char *buf, size_t len);
+static int getentropy_urandom(void *buf, size_t len);
+static int getentropy_fallback(void *buf, size_t len);
+
+int
+getentropy(void *buf, size_t len)
+{
+ int ret = -1;
+
+ if (len > 256) {
+ errno = EIO;
+ return -1;
+ }
+
+ /*
+ * Try to get entropy with /dev/urandom
+ *
+ * This can fail if the process is inside a chroot or if file
+ * descriptors are exhausted.
+ */
+ ret = getentropy_urandom(buf, len);
+ if (ret != -1)
+ return (ret);
+
+ /*
+ * Entropy collection via /dev/urandom and sysctl have failed.
+ *
+ * No other API exists for collecting entropy, and we have
+ * no failsafe way to get it on OSX that is not sensitive
+ * to resource exhaustion.
+ *
+ * We have very few options:
+ * - Even syslog_r is unsafe to call at this low level, so
+ * there is no way to alert the user or program.
+ * - Cannot call abort() because some systems have unsafe
+ * corefiles.
+ * - Could raise(SIGKILL) resulting in silent program termination.
+ * - Return EIO, to hint that arc4random's stir function
+ * should raise(SIGKILL)
+ * - Do the best under the circumstances....
+ *
+ * This code path exists to bring light to the issue that OSX
+ * does not provide a failsafe API for entropy collection.
+ *
+ * We hope this demonstrates that OSX should consider
+ * providing a new failsafe API which works in a chroot or
+ * when file descriptors are exhausted.
+ */
+#undef FAIL_INSTEAD_OF_TRYING_FALLBACK
+#ifdef FAIL_INSTEAD_OF_TRYING_FALLBACK
+ raise(SIGKILL);
+#endif
+ ret = getentropy_fallback(buf, len);
+ if (ret != -1)
+ return (ret);
+
+ errno = EIO;
+ return (ret);
+}
+
+/*
+ * Basic sanity checking; wish we could do better.
+ */
+static int
+gotdata(char *buf, size_t len)
+{
+ char any_set = 0;
+ size_t i;
+
+ for (i = 0; i < len; ++i)
+ any_set |= buf[i];
+ if (any_set == 0)
+ return -1;
+ return 0;
+}
+
+static int
+getentropy_urandom(void *buf, size_t len)
+{
+ struct stat st;
+ size_t i;
+ int fd, flags;
+ int save_errno = errno;
+
+start:
+
+ flags = O_RDONLY;
+#ifdef O_NOFOLLOW
+ flags |= O_NOFOLLOW;
+#endif
+#ifdef O_CLOEXEC
+ flags |= O_CLOEXEC;
+#endif
+ fd = open("/dev/urandom", flags, 0);
+ if (fd == -1) {
+ if (errno == EINTR)
+ goto start;
+ goto nodevrandom;
+ }
+#ifndef O_CLOEXEC
+ fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+#endif
+
+ /* Lightly verify that the device node looks sane */
+ if (fstat(fd, &st) == -1 || !S_ISCHR(st.st_mode)) {
+ close(fd);
+ goto nodevrandom;
+ }
+ for (i = 0; i < len; ) {
+ size_t wanted = len - i;
+ ssize_t ret = read(fd, (char *)buf + i, wanted);
+
+ if (ret == -1) {
+ if (errno == EAGAIN || errno == EINTR)
+ continue;
+ close(fd);
+ goto nodevrandom;
+ }
+ i += ret;
+ }
+ close(fd);
+ if (gotdata(buf, len) == 0) {
+ errno = save_errno;
+ return 0; /* satisfied */
+ }
+nodevrandom:
+ errno = EIO;
+ return -1;
+}
+
+static int tcpmib[] = { CTL_NET, AF_INET, IPPROTO_TCP, TCPCTL_STATS };
+static int udpmib[] = { CTL_NET, AF_INET, IPPROTO_UDP, UDPCTL_STATS };
+static int ipmib[] = { CTL_NET, AF_INET, IPPROTO_IP, IPCTL_STATS };
+static int kmib[] = { CTL_KERN, KERN_USRSTACK };
+static int hwmib[] = { CTL_HW, HW_USERMEM };
+
+static int
+getentropy_fallback(void *buf, size_t len)
+{
+ uint8_t results[SHA512_DIGEST_LENGTH];
+ int save_errno = errno, e, pgs = getpagesize(), faster = 0, repeat;
+ static int cnt;
+ struct timespec ts;
+ struct timeval tv;
+ struct rusage ru;
+ sigset_t sigset;
+ struct stat st;
+ SHA512_CTX ctx;
+ static pid_t lastpid;
+ pid_t pid;
+ size_t i, ii, m;
+ char *p;
+ struct tcpstat tcpstat;
+ struct udpstat udpstat;
+ struct ipstat ipstat;
+ u_int64_t mach_time;
+ unsigned int idata;
+ void *addr;
+
+ pid = getpid();
+ if (lastpid == pid) {
+ faster = 1;
+ repeat = 2;
+ } else {
+ faster = 0;
+ lastpid = pid;
+ repeat = REPEAT;
+ }
+ for (i = 0; i < len; ) {
+ int j;
+ SHA512_Init(&ctx);
+ for (j = 0; j < repeat; j++) {
+ HX((e = gettimeofday(&tv, NULL)) == -1, tv);
+ if (e != -1) {
+ cnt += (int)tv.tv_sec;
+ cnt += (int)tv.tv_usec;
+ }
+
+ mach_time = mach_absolute_time();
+ HD(mach_time);
+
+ ii = sizeof(addr);
+ HX(sysctl(kmib, sizeof(kmib) / sizeof(kmib[0]),
+ &addr, &ii, NULL, 0) == -1, addr);
+
+ ii = sizeof(idata);
+ HX(sysctl(hwmib, sizeof(hwmib) / sizeof(hwmib[0]),
+ &idata, &ii, NULL, 0) == -1, idata);
+
+ ii = sizeof(tcpstat);
+ HX(sysctl(tcpmib, sizeof(tcpmib) / sizeof(tcpmib[0]),
+ &tcpstat, &ii, NULL, 0) == -1, tcpstat);
+
+ ii = sizeof(udpstat);
+ HX(sysctl(udpmib, sizeof(udpmib) / sizeof(udpmib[0]),
+ &udpstat, &ii, NULL, 0) == -1, udpstat);
+
+ ii = sizeof(ipstat);
+ HX(sysctl(ipmib, sizeof(ipmib) / sizeof(ipmib[0]),
+ &ipstat, &ii, NULL, 0) == -1, ipstat);
+
+ HX((pid = getpid()) == -1, pid);
+ HX((pid = getsid(pid)) == -1, pid);
+ HX((pid = getppid()) == -1, pid);
+ HX((pid = getpgid(0)) == -1, pid);
+ HX((e = getpriority(0, 0)) == -1, e);
+
+ if (!faster) {
+ ts.tv_sec = 0;
+ ts.tv_nsec = 1;
+ (void) nanosleep(&ts, NULL);
+ }
+
+ HX(sigpending(&sigset) == -1, sigset);
+ HX(sigprocmask(SIG_BLOCK, NULL, &sigset) == -1,
+ sigset);
+
+ HF(getentropy); /* an addr in this library */
+ HF(printf); /* an addr in libc */
+ p = (char *)&p;
+ HD(p); /* an addr on stack */
+ p = (char *)&errno;
+ HD(p); /* the addr of errno */
+
+ if (i == 0) {
+ struct sockaddr_storage ss;
+ struct statvfs stvfs;
+ struct termios tios;
+ struct statfs stfs;
+ socklen_t ssl;
+ off_t off;
+
+ /*
+ * Prime-sized mappings encourage fragmentation;
+ * thus exposing some address entropy.
+ */
+ struct mm {
+ size_t npg;
+ void *p;
+ } mm[] = {
+ { 17, MAP_FAILED }, { 3, MAP_FAILED },
+ { 11, MAP_FAILED }, { 2, MAP_FAILED },
+ { 5, MAP_FAILED }, { 3, MAP_FAILED },
+ { 7, MAP_FAILED }, { 1, MAP_FAILED },
+ { 57, MAP_FAILED }, { 3, MAP_FAILED },
+ { 131, MAP_FAILED }, { 1, MAP_FAILED },
+ };
+
+ for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) {
+ HX(mm[m].p = mmap(NULL,
+ mm[m].npg * pgs,
+ PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANON, -1,
+ (off_t)0), mm[m].p);
+ if (mm[m].p != MAP_FAILED) {
+ size_t mo;
+
+ /* Touch some memory... */
+ p = mm[m].p;
+ mo = cnt %
+ (mm[m].npg * pgs - 1);
+ p[mo] = 1;
+ cnt += (int)((long)(mm[m].p)
+ / pgs);
+ }
+
+ /* Check cnts and times... */
+ mach_time = mach_absolute_time();
+ HD(mach_time);
+ cnt += (int)mach_time;
+
+ HX((e = getrusage(RUSAGE_SELF,
+ &ru)) == -1, ru);
+ if (e != -1) {
+ cnt += (int)ru.ru_utime.tv_sec;
+ cnt += (int)ru.ru_utime.tv_usec;
+ }
+ }
+
+ for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) {
+ if (mm[m].p != MAP_FAILED)
+ munmap(mm[m].p, mm[m].npg * pgs);
+ mm[m].p = MAP_FAILED;
+ }
+
+ HX(stat(".", &st) == -1, st);
+ HX(statvfs(".", &stvfs) == -1, stvfs);
+ HX(statfs(".", &stfs) == -1, stfs);
+
+ HX(stat("/", &st) == -1, st);
+ HX(statvfs("/", &stvfs) == -1, stvfs);
+ HX(statfs("/", &stfs) == -1, stfs);
+
+ HX((e = fstat(0, &st)) == -1, st);
+ if (e == -1) {
+ if (S_ISREG(st.st_mode) ||
+ S_ISFIFO(st.st_mode) ||
+ S_ISSOCK(st.st_mode)) {
+ HX(fstatvfs(0, &stvfs) == -1,
+ stvfs);
+ HX(fstatfs(0, &stfs) == -1,
+ stfs);
+ HX((off = lseek(0, (off_t)0,
+ SEEK_CUR)) < 0, off);
+ }
+ if (S_ISCHR(st.st_mode)) {
+ HX(tcgetattr(0, &tios) == -1,
+ tios);
+ } else if (S_ISSOCK(st.st_mode)) {
+ memset(&ss, 0, sizeof ss);
+ ssl = sizeof(ss);
+ HX(getpeername(0,
+ (void *)&ss, &ssl) == -1,
+ ss);
+ }
+ }
+
+ HX((e = getrusage(RUSAGE_CHILDREN,
+ &ru)) == -1, ru);
+ if (e != -1) {
+ cnt += (int)ru.ru_utime.tv_sec;
+ cnt += (int)ru.ru_utime.tv_usec;
+ }
+ } else {
+ /* Subsequent hashes absorb previous result */
+ HD(results);
+ }
+
+ HX((e = gettimeofday(&tv, NULL)) == -1, tv);
+ if (e != -1) {
+ cnt += (int)tv.tv_sec;
+ cnt += (int)tv.tv_usec;
+ }
+
+ HD(cnt);
+ }
+
+ SHA512_Final(results, &ctx);
+ memcpy((char *)buf + i, results, min(sizeof(results), len - i));
+ i += min(sizeof(results), len - i);
+ }
+ explicit_bzero(&ctx, sizeof ctx);
+ explicit_bzero(results, sizeof results);
+ if (gotdata(buf, len) == 0) {
+ errno = save_errno;
+ return 0; /* satisfied */
+ }
+ errno = EIO;
+ return -1;
+}
diff --git a/compat/getentropy/getentropy_solaris.c b/compat/getentropy/getentropy_solaris.c
@@ -0,0 +1,445 @@
+/* $OpenBSD: getentropy_solaris.c,v 1.8 2014/07/19 16:12:00 deraadt Exp $ */
+
+/*
+ * Copyright (c) 2014 Theo de Raadt <deraadt@openbsd.org>
+ * Copyright (c) 2014 Bob Beck <beck@obtuse.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Emulation of getentropy(2) as documented at:
+ * http://www.openbsd.org/cgi-bin/man.cgi/OpenBSD-current/man2/getentropy.2
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/statvfs.h>
+#include <sys/socket.h>
+#include <sys/mount.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <link.h>
+#include <termios.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/sha2.h>
+#define SHA512_Init SHA512Init
+#define SHA512_Update SHA512Update
+#define SHA512_Final SHA512Final
+
+#include <sys/vfs.h>
+#include <sys/statfs.h>
+#include <sys/loadavg.h>
+
+#define REPEAT 5
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+
+#define HX(a, b) \
+ do { \
+ if ((a)) \
+ HD(errno); \
+ else \
+ HD(b); \
+ } while (0)
+
+#define HR(x, l) (SHA512_Update(&ctx, (char *)(x), (l)))
+#define HD(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (x)))
+#define HF(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (void*)))
+
+int getentropy(void *buf, size_t len);
+
+static int gotdata(char *buf, size_t len);
+static int getentropy_urandom(void *buf, size_t len, const char *path,
+ int devfscheck);
+static int getentropy_fallback(void *buf, size_t len);
+static int getentropy_phdr(struct dl_phdr_info *info, size_t size, void *data);
+
+int
+getentropy(void *buf, size_t len)
+{
+ int ret = -1;
+
+ if (len > 256) {
+ errno = EIO;
+ return -1;
+ }
+
+ /*
+ * Try to get entropy with /dev/urandom
+ *
+ * Solaris provides /dev/urandom as a symbolic link to
+ * /devices/pseudo/random@0:urandom which is provided by
+ * a devfs filesystem. Best practice is to use O_NOFOLLOW,
+ * so we must try the unpublished name directly.
+ *
+ * This can fail if the process is inside a chroot which lacks
+ * the devfs mount, or if file descriptors are exhausted.
+ */
+ ret = getentropy_urandom(buf, len,
+ "/devices/pseudo/random@0:urandom", 1);
+ if (ret != -1)
+ return (ret);
+
+ /*
+ * Unfortunately, chroot spaces on Solaris are sometimes setup
+ * with direct device node of the well-known /dev/urandom name
+ * (perhaps to avoid dragging all of devfs into the space).
+ *
+ * This can fail if the process is inside a chroot or if file
+ * descriptors are exhausted.
+ */
+ ret = getentropy_urandom(buf, len, "/dev/urandom", 0);
+ if (ret != -1)
+ return (ret);
+
+ /*
+ * Entropy collection via /dev/urandom has failed.
+ *
+ * No other API exists for collecting entropy, and we have
+ * no failsafe way to get it on Solaris that is not sensitive
+ * to resource exhaustion.
+ *
+ * We have very few options:
+ * - Even syslog_r is unsafe to call at this low level, so
+ * there is no way to alert the user or program.
+ * - Cannot call abort() because some systems have unsafe
+ * corefiles.
+ * - Could raise(SIGKILL) resulting in silent program termination.
+ * - Return EIO, to hint that arc4random's stir function
+ * should raise(SIGKILL)
+ * - Do the best under the circumstances....
+ *
+ * This code path exists to bring light to the issue that Solaris
+ * does not provide a failsafe API for entropy collection.
+ *
+ * We hope this demonstrates that Solaris should consider
+ * providing a new failsafe API which works in a chroot or
+ * when file descriptors are exhausted.
+ */
+#undef FAIL_INSTEAD_OF_TRYING_FALLBACK
+#ifdef FAIL_INSTEAD_OF_TRYING_FALLBACK
+ raise(SIGKILL);
+#endif
+ ret = getentropy_fallback(buf, len);
+ if (ret != -1)
+ return (ret);
+
+ errno = EIO;
+ return (ret);
+}
+
+/*
+ * Basic sanity checking; wish we could do better.
+ */
+static int
+gotdata(char *buf, size_t len)
+{
+ char any_set = 0;
+ size_t i;
+
+ for (i = 0; i < len; ++i)
+ any_set |= buf[i];
+ if (any_set == 0)
+ return -1;
+ return 0;
+}
+
+static int
+getentropy_urandom(void *buf, size_t len, const char *path, int devfscheck)
+{
+ struct stat st;
+ size_t i;
+ int fd, flags;
+ int save_errno = errno;
+
+start:
+
+ flags = O_RDONLY;
+#ifdef O_NOFOLLOW
+ flags |= O_NOFOLLOW;
+#endif
+#ifdef O_CLOEXEC
+ flags |= O_CLOEXEC;
+#endif
+ fd = open(path, flags, 0);
+ if (fd == -1) {
+ if (errno == EINTR)
+ goto start;
+ goto nodevrandom;
+ }
+#ifndef O_CLOEXEC
+ fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+#endif
+
+ /* Lightly verify that the device node looks sane */
+ if (fstat(fd, &st) == -1 || !S_ISCHR(st.st_mode) ||
+ (devfscheck && (strcmp(st.st_fstype, "devfs") != 0))) {
+ close(fd);
+ goto nodevrandom;
+ }
+ for (i = 0; i < len; ) {
+ size_t wanted = len - i;
+ ssize_t ret = read(fd, (char *)buf + i, wanted);
+
+ if (ret == -1) {
+ if (errno == EAGAIN || errno == EINTR)
+ continue;
+ close(fd);
+ goto nodevrandom;
+ }
+ i += ret;
+ }
+ close(fd);
+ if (gotdata(buf, len) == 0) {
+ errno = save_errno;
+ return 0; /* satisfied */
+ }
+nodevrandom:
+ errno = EIO;
+ return -1;
+}
+
+static const int cl[] = {
+ CLOCK_REALTIME,
+#ifdef CLOCK_MONOTONIC
+ CLOCK_MONOTONIC,
+#endif
+#ifdef CLOCK_MONOTONIC_RAW
+ CLOCK_MONOTONIC_RAW,
+#endif
+#ifdef CLOCK_TAI
+ CLOCK_TAI,
+#endif
+#ifdef CLOCK_VIRTUAL
+ CLOCK_VIRTUAL,
+#endif
+#ifdef CLOCK_UPTIME
+ CLOCK_UPTIME,
+#endif
+#ifdef CLOCK_PROCESS_CPUTIME_ID
+ CLOCK_PROCESS_CPUTIME_ID,
+#endif
+#ifdef CLOCK_THREAD_CPUTIME_ID
+ CLOCK_THREAD_CPUTIME_ID,
+#endif
+};
+
+static int
+getentropy_phdr(struct dl_phdr_info *info, size_t size, void *data)
+{
+ SHA512_CTX *ctx = data;
+
+ SHA512_Update(ctx, &info->dlpi_addr, sizeof (info->dlpi_addr));
+ return 0;
+}
+
+static int
+getentropy_fallback(void *buf, size_t len)
+{
+ uint8_t results[SHA512_DIGEST_LENGTH];
+ int save_errno = errno, e, pgs = getpagesize(), faster = 0, repeat;
+ static int cnt;
+ struct timespec ts;
+ struct timeval tv;
+ double loadavg[3];
+ struct rusage ru;
+ sigset_t sigset;
+ struct stat st;
+ SHA512_CTX ctx;
+ static pid_t lastpid;
+ pid_t pid;
+ size_t i, ii, m;
+ char *p;
+
+ pid = getpid();
+ if (lastpid == pid) {
+ faster = 1;
+ repeat = 2;
+ } else {
+ faster = 0;
+ lastpid = pid;
+ repeat = REPEAT;
+ }
+ for (i = 0; i < len; ) {
+ int j;
+ SHA512_Init(&ctx);
+ for (j = 0; j < repeat; j++) {
+ HX((e = gettimeofday(&tv, NULL)) == -1, tv);
+ if (e != -1) {
+ cnt += (int)tv.tv_sec;
+ cnt += (int)tv.tv_usec;
+ }
+
+ dl_iterate_phdr(getentropy_phdr, &ctx);
+
+ for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]); ii++)
+ HX(clock_gettime(cl[ii], &ts) == -1, ts);
+
+ HX((pid = getpid()) == -1, pid);
+ HX((pid = getsid(pid)) == -1, pid);
+ HX((pid = getppid()) == -1, pid);
+ HX((pid = getpgid(0)) == -1, pid);
+ HX((e = getpriority(0, 0)) == -1, e);
+ HX((getloadavg(loadavg, 3) == -1), loadavg);
+
+ if (!faster) {
+ ts.tv_sec = 0;
+ ts.tv_nsec = 1;
+ (void) nanosleep(&ts, NULL);
+ }
+
+ HX(sigpending(&sigset) == -1, sigset);
+ HX(sigprocmask(SIG_BLOCK, NULL, &sigset) == -1,
+ sigset);
+
+ HF(getentropy); /* an addr in this library */
+ HF(printf); /* an addr in libc */
+ p = (char *)&p;
+ HD(p); /* an addr on stack */
+ p = (char *)&errno;
+ HD(p); /* the addr of errno */
+
+ if (i == 0) {
+ struct sockaddr_storage ss;
+ struct statvfs stvfs;
+ struct termios tios;
+ socklen_t ssl;
+ off_t off;
+
+ /*
+ * Prime-sized mappings encourage fragmentation;
+ * thus exposing some address entropy.
+ */
+ struct mm {
+ size_t npg;
+ void *p;
+ } mm[] = {
+ { 17, MAP_FAILED }, { 3, MAP_FAILED },
+ { 11, MAP_FAILED }, { 2, MAP_FAILED },
+ { 5, MAP_FAILED }, { 3, MAP_FAILED },
+ { 7, MAP_FAILED }, { 1, MAP_FAILED },
+ { 57, MAP_FAILED }, { 3, MAP_FAILED },
+ { 131, MAP_FAILED }, { 1, MAP_FAILED },
+ };
+
+ for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) {
+ HX(mm[m].p = mmap(NULL,
+ mm[m].npg * pgs,
+ PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANON, -1,
+ (off_t)0), mm[m].p);
+ if (mm[m].p != MAP_FAILED) {
+ size_t mo;
+
+ /* Touch some memory... */
+ p = mm[m].p;
+ mo = cnt %
+ (mm[m].npg * pgs - 1);
+ p[mo] = 1;
+ cnt += (int)((long)(mm[m].p)
+ / pgs);
+ }
+
+ /* Check cnts and times... */
+ for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]);
+ ii++) {
+ HX((e = clock_gettime(cl[ii],
+ &ts)) == -1, ts);
+ if (e != -1)
+ cnt += (int)ts.tv_nsec;
+ }
+
+ HX((e = getrusage(RUSAGE_SELF,
+ &ru)) == -1, ru);
+ if (e != -1) {
+ cnt += (int)ru.ru_utime.tv_sec;
+ cnt += (int)ru.ru_utime.tv_usec;
+ }
+ }
+
+ for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) {
+ if (mm[m].p != MAP_FAILED)
+ munmap(mm[m].p, mm[m].npg * pgs);
+ mm[m].p = MAP_FAILED;
+ }
+
+ HX(stat(".", &st) == -1, st);
+ HX(statvfs(".", &stvfs) == -1, stvfs);
+
+ HX(stat("/", &st) == -1, st);
+ HX(statvfs("/", &stvfs) == -1, stvfs);
+
+ HX((e = fstat(0, &st)) == -1, st);
+ if (e == -1) {
+ if (S_ISREG(st.st_mode) ||
+ S_ISFIFO(st.st_mode) ||
+ S_ISSOCK(st.st_mode)) {
+ HX(fstatvfs(0, &stvfs) == -1,
+ stvfs);
+ HX((off = lseek(0, (off_t)0,
+ SEEK_CUR)) < 0, off);
+ }
+ if (S_ISCHR(st.st_mode)) {
+ HX(tcgetattr(0, &tios) == -1,
+ tios);
+ } else if (S_ISSOCK(st.st_mode)) {
+ memset(&ss, 0, sizeof ss);
+ ssl = sizeof(ss);
+ HX(getpeername(0,
+ (void *)&ss, &ssl) == -1,
+ ss);
+ }
+ }
+
+ HX((e = getrusage(RUSAGE_CHILDREN,
+ &ru)) == -1, ru);
+ if (e != -1) {
+ cnt += (int)ru.ru_utime.tv_sec;
+ cnt += (int)ru.ru_utime.tv_usec;
+ }
+ } else {
+ /* Subsequent hashes absorb previous result */
+ HD(results);
+ }
+
+ HX((e = gettimeofday(&tv, NULL)) == -1, tv);
+ if (e != -1) {
+ cnt += (int)tv.tv_sec;
+ cnt += (int)tv.tv_usec;
+ }
+
+ HD(cnt);
+ }
+ SHA512_Final(results, &ctx);
+ memcpy((char *)buf + i, results, min(sizeof(results), len - i));
+ i += min(sizeof(results), len - i);
+ }
+ explicit_bzero(&ctx, sizeof ctx);
+ explicit_bzero(results, sizeof results);
+ if (gotdata(buf, len) == 0) {
+ errno = save_errno;
+ return 0; /* satisfied */
+ }
+ errno = EIO;
+ return -1;
+}
diff --git a/compat/getentropy/getentropy_win.c b/compat/getentropy/getentropy_win.c
@@ -0,0 +1,59 @@
+/* $OpenBSD: getentropy_win.c,v 1.2 2014/07/13 13:03:09 deraadt Exp $ */
+
+/*
+ * Copyright (c) 2014, Theo de Raadt <deraadt@openbsd.org>
+ * Copyright (c) 2014, Bob Beck <beck@obtuse.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Emulation of getentropy(2) as documented at:
+ * http://www.openbsd.org/cgi-bin/man.cgi/OpenBSD-current/man2/getentropy.2
+ */
+
+#include <windows.h>
+#include <errno.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <wincrypt.h>
+#include <process.h>
+
+int getentropy(void *buf, size_t len);
+
+/*
+ * On Windows, CryptGenRandom is supposed to be a well-seeded
+ * cryptographically strong random number generator.
+ */
+int
+getentropy(void *buf, size_t len)
+{
+ HCRYPTPROV provider;
+
+ if (len > 256) {
+ errno = EIO;
+ return -1;
+ }
+
+ if (CryptAcquireContext(&provider, NULL, NULL, PROV_RSA_FULL,
+ CRYPT_VERIFYCONTEXT) == 0)
+ goto fail;
+ if (CryptGenRandom(provider, len, buf) == 0) {
+ CryptReleaseContext(provider, 0);
+ goto fail;
+ }
+ CryptReleaseContext(provider, 0);
+ return (0);
+
+fail:
+ errno = EIO;
+ return (-1);
+}
diff --git a/compat/safebfuns.c b/compat/safebfuns.c
@@ -0,0 +1,106 @@
+/* Public domain */
+
+#include <string.h>
+
+#if __clang__
+ /*
+ * http://clang.llvm.org/docs/LanguageExtensions.html#feature-checking-macros
+ * https://www.mail-archive.com/cfe-commits@cs.uiuc.edu/msg97459.html
+ */
+ #if __clang_major__ > 3 || ( __clang_major__ == 3 && __clang_minor__ >= 5 )
+ #pragma clang optimize off
+ #define NOINLINE __attribute__ (( noinline ))
+ #else
+ #error "require clang >= 3.5"
+ #endif
+#elif __GNUC__
+ /*
+ * http://gcc.gnu.org/onlinedocs/gcc/Function-Specific-Option-Pragmas.html
+ * http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html
+ */
+ #if __GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 4 )
+ #pragma GCC push_options
+ #pragma GCC optimize ( "-O0" )
+ #define NOINLINE __attribute__ (( noinline ))
+ #else
+ #error "require gcc >= 4.4"
+ #endif
+#else
+ #error "unrecognised compiler"
+#endif
+
+NOINLINE void explicit_bzero( void * const buf, const size_t n ) {
+ size_t i;
+ unsigned char * p = buf;
+
+ for( i = 0; i < n; i++ ) {
+ p[ i ] = 0;
+ }
+}
+
+NOINLINE int timingsafe_bcmp( const void * const b1, const void * const b2, const size_t n ) {
+ size_t i;
+ const unsigned char * const p1 = b1;
+ const unsigned char * const p2 = b2;
+ int result = 0;
+
+ for( i = 0; i < n; i++ ) {
+ result |= p1[ i ] ^ p2[ i ];
+ }
+
+ return result != 0;
+}
+
+/* $OpenBSD: timingsafe_memcmp.c,v 1.1 2014/06/13 02:12:17 matthew Exp $ */
+/*
+ * Copyright (c) 2014 Google Inc.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <limits.h>
+#include <string.h>
+
+NOINLINE int timingsafe_memcmp( const void * const b1, const void * const b2, const size_t len ) {
+ const unsigned char * p1 = b1;
+ const unsigned char * p2 = b2;
+ size_t i;
+ int res = 0, done = 0;
+
+ for( i = 0; i < len; i++ ) {
+ /* lt is -1 if p1[i] < p2[i]; else 0. */
+ int lt = ( p1[ i ] - p2[ i ] ) >> CHAR_BIT;
+
+ /* gt is -1 if p1[i] > p2[i]; else 0. */
+ int gt = ( p2[ i ] - p1[ i ] ) >> CHAR_BIT;
+
+ /* cmp is 1 if p1[i] > p2[i]; -1 if p1[i] < p2[i]; else 0. */
+ int cmp = lt - gt;
+
+ /* set res = cmp if !done. */
+ res |= cmp & ~done;
+
+ /* set done if p1[i] != p2[i]. */
+ done |= lt | gt;
+ }
+
+ return res;
+}
+
+/* Public domain */
+
+#ifdef __clang__
+ #pragma clang optimize on
+#else
+ #pragma GCC pop_options
+#endif
diff --git a/compat/sha/sha1-elf-x86_64.s b/compat/sha/sha1-elf-x86_64.s
@@ -0,0 +1,2486 @@
+.text
+
+
+.globl sha1_block_data_order
+.type sha1_block_data_order,@function
+.align 16
+sha1_block_data_order:
+ movl OPENSSL_ia32cap_P+0(%rip),%r9d
+ movl OPENSSL_ia32cap_P+4(%rip),%r8d
+ testl $512,%r8d
+ jz .Lialu
+ jmp _ssse3_shortcut
+
+.align 16
+.Lialu:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ movq %rsp,%r11
+ movq %rdi,%r8
+ subq $72,%rsp
+ movq %rsi,%r9
+ andq $-64,%rsp
+ movq %rdx,%r10
+ movq %r11,64(%rsp)
+.Lprologue:
+
+ movl 0(%r8),%esi
+ movl 4(%r8),%edi
+ movl 8(%r8),%r11d
+ movl 12(%r8),%r12d
+ movl 16(%r8),%r13d
+ jmp .Lloop
+
+.align 16
+.Lloop:
+ movl 0(%r9),%edx
+ bswapl %edx
+ movl %edx,0(%rsp)
+ movl %r11d,%eax
+ movl 4(%r9),%ebp
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r13,1),%r13d
+ andl %edi,%eax
+ movl %ebp,4(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 8(%r9),%edx
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r12,1),%r12d
+ andl %esi,%eax
+ movl %edx,8(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 12(%r9),%ebp
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %ebp,12(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 16(%r9),%edx
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %edx,16(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 20(%r9),%ebp
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %ebp,20(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl %r11d,%eax
+ movl 24(%r9),%edx
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r13,1),%r13d
+ andl %edi,%eax
+ movl %edx,24(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 28(%r9),%ebp
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r12,1),%r12d
+ andl %esi,%eax
+ movl %ebp,28(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 32(%r9),%edx
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %edx,32(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 36(%r9),%ebp
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %ebp,36(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 40(%r9),%edx
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %edx,40(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl %r11d,%eax
+ movl 44(%r9),%ebp
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r13,1),%r13d
+ andl %edi,%eax
+ movl %ebp,44(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 48(%r9),%edx
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r12,1),%r12d
+ andl %esi,%eax
+ movl %edx,48(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 52(%r9),%ebp
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %ebp,52(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 56(%r9),%edx
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %edx,56(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 60(%r9),%ebp
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %ebp,60(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl 0(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ xorl 32(%rsp),%edx
+ andl %edi,%eax
+ leal 1518500249(%rbp,%r13,1),%r13d
+ xorl 52(%rsp),%edx
+ xorl %r12d,%eax
+ roll $1,%edx
+ addl %ecx,%r13d
+ roll $30,%edi
+ movl %edx,0(%rsp)
+ addl %eax,%r13d
+ movl 4(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ xorl 36(%rsp),%ebp
+ andl %esi,%eax
+ leal 1518500249(%rdx,%r12,1),%r12d
+ xorl 56(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $1,%ebp
+ addl %ecx,%r12d
+ roll $30,%esi
+ movl %ebp,4(%rsp)
+ addl %eax,%r12d
+ movl 8(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ xorl 40(%rsp),%edx
+ andl %r13d,%eax
+ leal 1518500249(%rbp,%r11,1),%r11d
+ xorl 60(%rsp),%edx
+ xorl %edi,%eax
+ roll $1,%edx
+ addl %ecx,%r11d
+ roll $30,%r13d
+ movl %edx,8(%rsp)
+ addl %eax,%r11d
+ movl 12(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ xorl 44(%rsp),%ebp
+ andl %r12d,%eax
+ leal 1518500249(%rdx,%rdi,1),%edi
+ xorl 0(%rsp),%ebp
+ xorl %esi,%eax
+ roll $1,%ebp
+ addl %ecx,%edi
+ roll $30,%r12d
+ movl %ebp,12(%rsp)
+ addl %eax,%edi
+ movl 16(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ xorl 48(%rsp),%edx
+ andl %r11d,%eax
+ leal 1518500249(%rbp,%rsi,1),%esi
+ xorl 4(%rsp),%edx
+ xorl %r13d,%eax
+ roll $1,%edx
+ addl %ecx,%esi
+ roll $30,%r11d
+ movl %edx,16(%rsp)
+ addl %eax,%esi
+ movl 20(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r13,1),%r13d
+ xorl 52(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 8(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r12,1),%r12d
+ xorl 56(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 12(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r11,1),%r11d
+ xorl 60(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 16(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rdi,1),%edi
+ xorl 0(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 20(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rsi,1),%esi
+ xorl 4(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 24(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,36(%rsp)
+ movl 40(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 48(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r13,1),%r13d
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 28(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,40(%rsp)
+ movl 44(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r12,1),%r12d
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 32(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,44(%rsp)
+ movl 48(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 56(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r11,1),%r11d
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 36(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,48(%rsp)
+ movl 52(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rdi,1),%edi
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 40(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,52(%rsp)
+ movl 56(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rsi,1),%esi
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 44(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,56(%rsp)
+ movl 60(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r13,1),%r13d
+ xorl 28(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 48(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,60(%rsp)
+ movl 0(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r12,1),%r12d
+ xorl 32(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 52(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,0(%rsp)
+ movl 4(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r11,1),%r11d
+ xorl 36(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 56(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,4(%rsp)
+ movl 8(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rdi,1),%edi
+ xorl 40(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 60(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,8(%rsp)
+ movl 12(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rsi,1),%esi
+ xorl 44(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 0(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,12(%rsp)
+ movl 16(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r13,1),%r13d
+ xorl 48(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 4(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,16(%rsp)
+ movl 20(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r12,1),%r12d
+ xorl 52(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 8(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r11,1),%r11d
+ xorl 56(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 12(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rdi,1),%edi
+ xorl 60(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 16(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rsi,1),%esi
+ xorl 0(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 20(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 44(%rsp),%ebp
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r12d,%ebx
+ leal -1894007588(%rdx,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 24(%rsp),%ebp
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %ebp,36(%rsp)
+ addl %ecx,%r13d
+ movl 40(%rsp),%edx
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 48(%rsp),%edx
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r11d,%ebx
+ leal -1894007588(%rbp,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 28(%rsp),%edx
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%edx
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %edx,40(%rsp)
+ addl %ecx,%r12d
+ movl 44(%rsp),%ebp
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 52(%rsp),%ebp
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %edi,%ebx
+ leal -1894007588(%rdx,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 32(%rsp),%ebp
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%ebp
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %ebp,44(%rsp)
+ addl %ecx,%r11d
+ movl 48(%rsp),%edx
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 56(%rsp),%edx
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %esi,%ebx
+ leal -1894007588(%rbp,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 36(%rsp),%edx
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%edx
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %edx,48(%rsp)
+ addl %ecx,%edi
+ movl 52(%rsp),%ebp
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 60(%rsp),%ebp
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %r13d,%ebx
+ leal -1894007588(%rdx,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 40(%rsp),%ebp
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%ebp
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %ebp,52(%rsp)
+ addl %ecx,%esi
+ movl 56(%rsp),%edx
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 0(%rsp),%edx
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r12d,%ebx
+ leal -1894007588(%rbp,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 44(%rsp),%edx
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%edx
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %edx,56(%rsp)
+ addl %ecx,%r13d
+ movl 60(%rsp),%ebp
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 4(%rsp),%ebp
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %r11d,%ebx
+ leal -1894007588(%rdx,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 48(%rsp),%ebp
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %ebp,60(%rsp)
+ addl %ecx,%r12d
+ movl 0(%rsp),%edx
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 8(%rsp),%edx
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %edi,%ebx
+ leal -1894007588(%rbp,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 52(%rsp),%edx
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%edx
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %edx,0(%rsp)
+ addl %ecx,%r11d
+ movl 4(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 12(%rsp),%ebp
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %esi,%ebx
+ leal -1894007588(%rdx,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 56(%rsp),%ebp
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%ebp
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %ebp,4(%rsp)
+ addl %ecx,%edi
+ movl 8(%rsp),%edx
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 16(%rsp),%edx
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r13d,%ebx
+ leal -1894007588(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 60(%rsp),%edx
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%edx
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %edx,8(%rsp)
+ addl %ecx,%esi
+ movl 12(%rsp),%ebp
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 20(%rsp),%ebp
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r12d,%ebx
+ leal -1894007588(%rdx,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 0(%rsp),%ebp
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %ebp,12(%rsp)
+ addl %ecx,%r13d
+ movl 16(%rsp),%edx
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 24(%rsp),%edx
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 48(%rsp),%edx
+ xorl %r11d,%ebx
+ leal -1894007588(%rbp,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 4(%rsp),%edx
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%edx
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %edx,16(%rsp)
+ addl %ecx,%r12d
+ movl 20(%rsp),%ebp
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 28(%rsp),%ebp
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %edi,%ebx
+ leal -1894007588(%rdx,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 8(%rsp),%ebp
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%ebp
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %ebp,20(%rsp)
+ addl %ecx,%r11d
+ movl 24(%rsp),%edx
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 32(%rsp),%edx
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 56(%rsp),%edx
+ xorl %esi,%ebx
+ leal -1894007588(%rbp,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 12(%rsp),%edx
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%edx
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %edx,24(%rsp)
+ addl %ecx,%edi
+ movl 28(%rsp),%ebp
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 36(%rsp),%ebp
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %r13d,%ebx
+ leal -1894007588(%rdx,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 16(%rsp),%ebp
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%ebp
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %ebp,28(%rsp)
+ addl %ecx,%esi
+ movl 32(%rsp),%edx
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 40(%rsp),%edx
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r12d,%ebx
+ leal -1894007588(%rbp,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 20(%rsp),%edx
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%edx
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %edx,32(%rsp)
+ addl %ecx,%r13d
+ movl 36(%rsp),%ebp
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 44(%rsp),%ebp
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r11d,%ebx
+ leal -1894007588(%rdx,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 24(%rsp),%ebp
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %ebp,36(%rsp)
+ addl %ecx,%r12d
+ movl 40(%rsp),%edx
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 48(%rsp),%edx
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %edi,%ebx
+ leal -1894007588(%rbp,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 28(%rsp),%edx
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%edx
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %edx,40(%rsp)
+ addl %ecx,%r11d
+ movl 44(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 52(%rsp),%ebp
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %esi,%ebx
+ leal -1894007588(%rdx,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 32(%rsp),%ebp
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%ebp
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %ebp,44(%rsp)
+ addl %ecx,%edi
+ movl 48(%rsp),%edx
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 56(%rsp),%edx
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 16(%rsp),%edx
+ xorl %r13d,%ebx
+ leal -1894007588(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 36(%rsp),%edx
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%edx
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %edx,48(%rsp)
+ addl %ecx,%esi
+ movl 52(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r13,1),%r13d
+ xorl 20(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 40(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,52(%rsp)
+ movl 56(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 0(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r12,1),%r12d
+ xorl 24(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 44(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,56(%rsp)
+ movl 60(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r11,1),%r11d
+ xorl 28(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 48(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,60(%rsp)
+ movl 0(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rdi,1),%edi
+ xorl 32(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 52(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,0(%rsp)
+ movl 4(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rsi,1),%esi
+ xorl 36(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 56(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,4(%rsp)
+ movl 8(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r13,1),%r13d
+ xorl 40(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 60(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,8(%rsp)
+ movl 12(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r12,1),%r12d
+ xorl 44(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 0(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,12(%rsp)
+ movl 16(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r11,1),%r11d
+ xorl 48(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 4(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,16(%rsp)
+ movl 20(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rdi,1),%edi
+ xorl 52(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 8(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 32(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rsi,1),%esi
+ xorl 56(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 12(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r13,1),%r13d
+ xorl 60(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 16(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 40(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r12,1),%r12d
+ xorl 0(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 20(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r11,1),%r11d
+ xorl 4(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 24(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,36(%rsp)
+ movl 40(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 48(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rdi,1),%edi
+ xorl 8(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 28(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,40(%rsp)
+ movl 44(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rsi,1),%esi
+ xorl 12(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 32(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,44(%rsp)
+ movl 48(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 56(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r13,1),%r13d
+ xorl 16(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 36(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,48(%rsp)
+ movl 52(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r12,1),%r12d
+ xorl 20(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 40(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl 56(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r11,1),%r11d
+ xorl 24(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 44(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl 60(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rdi,1),%edi
+ xorl 28(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 48(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl %r11d,%eax
+ leal -899497514(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ roll $30,%r11d
+ addl %eax,%esi
+ addl 0(%r8),%esi
+ addl 4(%r8),%edi
+ addl 8(%r8),%r11d
+ addl 12(%r8),%r12d
+ addl 16(%r8),%r13d
+ movl %esi,0(%r8)
+ movl %edi,4(%r8)
+ movl %r11d,8(%r8)
+ movl %r12d,12(%r8)
+ movl %r13d,16(%r8)
+
+ subq $1,%r10
+ leaq 64(%r9),%r9
+ jnz .Lloop
+
+ movq 64(%rsp),%rsi
+ movq (%rsi),%r13
+ movq 8(%rsi),%r12
+ movq 16(%rsi),%rbp
+ movq 24(%rsi),%rbx
+ leaq 32(%rsi),%rsp
+.Lepilogue:
+ .byte 0xf3,0xc3
+.size sha1_block_data_order,.-sha1_block_data_order
+.type sha1_block_data_order_ssse3,@function
+.align 16
+sha1_block_data_order_ssse3:
+_ssse3_shortcut:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ leaq -64(%rsp),%rsp
+ movq %rdi,%r8
+ movq %rsi,%r9
+ movq %rdx,%r10
+
+ shlq $6,%r10
+ addq %r9,%r10
+ leaq K_XX_XX(%rip),%r11
+
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movl %ebx,%esi
+ movl 16(%r8),%ebp
+
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r9),%xmm0
+ movdqu 16(%r9),%xmm1
+ movdqu 32(%r9),%xmm2
+ movdqu 48(%r9),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r9
+.byte 102,15,56,0,206
+.byte 102,15,56,0,214
+.byte 102,15,56,0,222
+ paddd %xmm9,%xmm0
+ paddd %xmm9,%xmm1
+ paddd %xmm9,%xmm2
+ movdqa %xmm0,0(%rsp)
+ psubd %xmm9,%xmm0
+ movdqa %xmm1,16(%rsp)
+ psubd %xmm9,%xmm1
+ movdqa %xmm2,32(%rsp)
+ psubd %xmm9,%xmm2
+ jmp .Loop_ssse3
+.align 16
+.Loop_ssse3:
+ movdqa %xmm1,%xmm4
+ addl 0(%rsp),%ebp
+ xorl %edx,%ecx
+ movdqa %xmm3,%xmm8
+.byte 102,15,58,15,224,8
+ movl %eax,%edi
+ roll $5,%eax
+ paddd %xmm3,%xmm9
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrldq $4,%xmm8
+ xorl %edx,%esi
+ addl %eax,%ebp
+ pxor %xmm0,%xmm4
+ rorl $2,%ebx
+ addl %esi,%ebp
+ pxor %xmm2,%xmm8
+ addl 4(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pxor %xmm8,%xmm4
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm9,48(%rsp)
+ xorl %ecx,%edi
+ addl %ebp,%edx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm4,%xmm8
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 8(%rsp),%ecx
+ xorl %ebx,%eax
+ pslldq $12,%xmm10
+ paddd %xmm4,%xmm4
+ movl %edx,%edi
+ roll $5,%edx
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrld $31,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ movdqa %xmm10,%xmm9
+ rorl $7,%ebp
+ addl %esi,%ecx
+ psrld $30,%xmm10
+ por %xmm8,%xmm4
+ addl 12(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm4
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa 0(%r11),%xmm10
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ pxor %xmm9,%xmm4
+ rorl $7,%edx
+ addl %edi,%ebx
+ movdqa %xmm2,%xmm5
+ addl 16(%rsp),%eax
+ xorl %ebp,%edx
+ movdqa %xmm4,%xmm9
+.byte 102,15,58,15,233,8
+ movl %ebx,%edi
+ roll $5,%ebx
+ paddd %xmm4,%xmm10
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrldq $4,%xmm9
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ pxor %xmm1,%xmm5
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm3,%xmm9
+ addl 20(%rsp),%ebp
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pxor %xmm9,%xmm5
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa %xmm10,0(%rsp)
+ xorl %edx,%edi
+ addl %eax,%ebp
+ movdqa %xmm5,%xmm8
+ movdqa %xmm5,%xmm9
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 24(%rsp),%edx
+ xorl %ecx,%ebx
+ pslldq $12,%xmm8
+ paddd %xmm5,%xmm5
+ movl %ebp,%edi
+ roll $5,%ebp
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ psrld $31,%xmm9
+ xorl %ecx,%esi
+ addl %ebp,%edx
+ movdqa %xmm8,%xmm10
+ rorl $7,%eax
+ addl %esi,%edx
+ psrld $30,%xmm8
+ por %xmm9,%xmm5
+ addl 28(%rsp),%ecx
+ xorl %ebx,%eax
+ movl %edx,%esi
+ roll $5,%edx
+ pslld $2,%xmm10
+ pxor %xmm8,%xmm5
+ andl %eax,%edi
+ xorl %ebx,%eax
+ movdqa 16(%r11),%xmm8
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ pxor %xmm10,%xmm5
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movdqa %xmm3,%xmm6
+ addl 32(%rsp),%ebx
+ xorl %eax,%ebp
+ movdqa %xmm5,%xmm10
+.byte 102,15,58,15,242,8
+ movl %ecx,%edi
+ roll $5,%ecx
+ paddd %xmm5,%xmm8
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ psrldq $4,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ pxor %xmm2,%xmm6
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm4,%xmm10
+ addl 36(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ pxor %xmm10,%xmm6
+ andl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm8,16(%rsp)
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ movdqa %xmm6,%xmm9
+ movdqa %xmm6,%xmm10
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 40(%rsp),%ebp
+ xorl %edx,%ecx
+ pslldq $12,%xmm9
+ paddd %xmm6,%xmm6
+ movl %eax,%edi
+ roll $5,%eax
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrld $31,%xmm10
+ xorl %edx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ psrld $30,%xmm9
+ por %xmm10,%xmm6
+ addl 44(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pslld $2,%xmm8
+ pxor %xmm9,%xmm6
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa 16(%r11),%xmm9
+ xorl %ecx,%edi
+ addl %ebp,%edx
+ pxor %xmm8,%xmm6
+ rorl $7,%eax
+ addl %edi,%edx
+ movdqa %xmm4,%xmm7
+ addl 48(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm8
+.byte 102,15,58,15,251,8
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm6,%xmm9
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrldq $4,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ pxor %xmm3,%xmm7
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm5,%xmm8
+ addl 52(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ pxor %xmm8,%xmm7
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm9,32(%rsp)
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ movdqa %xmm7,%xmm10
+ movdqa %xmm7,%xmm8
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 56(%rsp),%eax
+ xorl %ebp,%edx
+ pslldq $12,%xmm10
+ paddd %xmm7,%xmm7
+ movl %ebx,%edi
+ roll $5,%ebx
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrld $31,%xmm8
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ movdqa %xmm10,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ psrld $30,%xmm10
+ por %xmm8,%xmm7
+ addl 60(%rsp),%ebp
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm7
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa 16(%r11),%xmm10
+ xorl %edx,%edi
+ addl %eax,%ebp
+ pxor %xmm9,%xmm7
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movdqa %xmm7,%xmm9
+ addl 0(%rsp),%edx
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,206,8
+ xorl %ecx,%ebx
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm1,%xmm0
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm7,%xmm10
+ xorl %ecx,%esi
+ addl %ebp,%edx
+ pxor %xmm9,%xmm0
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 4(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm0,%xmm9
+ movdqa %xmm10,48(%rsp)
+ movl %edx,%esi
+ roll $5,%edx
+ andl %eax,%edi
+ xorl %ebx,%eax
+ pslld $2,%xmm0
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ psrld $30,%xmm9
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 8(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%edi
+ roll $5,%ecx
+ por %xmm9,%xmm0
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ movdqa %xmm0,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 12(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ andl %edx,%edi
+ xorl %ebp,%edx
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 16(%rsp),%ebp
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,215,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm2,%xmm1
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm8,%xmm9
+ paddd %xmm0,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm10,%xmm1
+ addl 20(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm1,%xmm10
+ movdqa %xmm8,0(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm1
+ addl 24(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm10
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm10,%xmm1
+ addl 28(%rsp),%ebx
+ xorl %eax,%edi
+ movdqa %xmm1,%xmm8
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 32(%rsp),%eax
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,192,8
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ pxor %xmm3,%xmm2
+ xorl %edx,%esi
+ addl %ebx,%eax
+ movdqa 32(%r11),%xmm10
+ paddd %xmm1,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm8,%xmm2
+ addl 36(%rsp),%ebp
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ movdqa %xmm2,%xmm8
+ movdqa %xmm9,16(%rsp)
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ pslld $2,%xmm2
+ addl 40(%rsp),%edx
+ xorl %ecx,%esi
+ psrld $30,%xmm8
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ por %xmm8,%xmm2
+ addl 44(%rsp),%ecx
+ xorl %ebx,%edi
+ movdqa %xmm2,%xmm9
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 48(%rsp),%ebx
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,201,8
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ pxor %xmm4,%xmm3
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm2,%xmm10
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm9,%xmm3
+ addl 52(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ movdqa %xmm3,%xmm9
+ movdqa %xmm10,32(%rsp)
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ pslld $2,%xmm3
+ addl 56(%rsp),%ebp
+ xorl %edx,%esi
+ psrld $30,%xmm9
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ por %xmm9,%xmm3
+ addl 60(%rsp),%edx
+ xorl %ecx,%edi
+ movdqa %xmm3,%xmm10
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 0(%rsp),%ecx
+ pxor %xmm0,%xmm4
+.byte 102,68,15,58,15,210,8
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ pxor %xmm5,%xmm4
+ xorl %eax,%esi
+ addl %edx,%ecx
+ movdqa %xmm8,%xmm9
+ paddd %xmm3,%xmm8
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm10,%xmm4
+ addl 4(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm8,48(%rsp)
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ pslld $2,%xmm4
+ addl 8(%rsp),%eax
+ xorl %ebp,%esi
+ psrld $30,%xmm10
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ por %xmm10,%xmm4
+ addl 12(%rsp),%ebp
+ xorl %edx,%edi
+ movdqa %xmm4,%xmm8
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 16(%rsp),%edx
+ pxor %xmm1,%xmm5
+.byte 102,68,15,58,15,195,8
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm6,%xmm5
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm4,%xmm9
+ rorl $7,%eax
+ addl %esi,%edx
+ pxor %xmm8,%xmm5
+ addl 20(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ movdqa %xmm5,%xmm8
+ movdqa %xmm9,0(%rsp)
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ pslld $2,%xmm5
+ addl 24(%rsp),%ebx
+ xorl %eax,%esi
+ psrld $30,%xmm8
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ por %xmm8,%xmm5
+ addl 28(%rsp),%eax
+ xorl %ebp,%edi
+ movdqa %xmm5,%xmm9
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ecx,%edi
+ pxor %xmm2,%xmm6
+.byte 102,68,15,58,15,204,8
+ xorl %edx,%ecx
+ addl 32(%rsp),%ebp
+ andl %edx,%edi
+ pxor %xmm7,%xmm6
+ andl %ecx,%esi
+ rorl $7,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm5,%xmm10
+ addl %edi,%ebp
+ movl %eax,%edi
+ pxor %xmm9,%xmm6
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movdqa %xmm6,%xmm9
+ movdqa %xmm10,16(%rsp)
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 36(%rsp),%edx
+ andl %ecx,%esi
+ pslld $2,%xmm6
+ andl %ebx,%edi
+ rorl $7,%eax
+ psrld $30,%xmm9
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ por %xmm9,%xmm6
+ movl %eax,%edi
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm10
+ addl 40(%rsp),%ecx
+ andl %ebx,%edi
+ andl %eax,%esi
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 44(%rsp),%ebx
+ andl %eax,%esi
+ andl %ebp,%edi
+ rorl $7,%edx
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%edi
+ pxor %xmm3,%xmm7
+.byte 102,68,15,58,15,213,8
+ xorl %ebp,%edx
+ addl 48(%rsp),%eax
+ andl %ebp,%edi
+ pxor %xmm0,%xmm7
+ andl %edx,%esi
+ rorl $7,%ecx
+ movdqa 48(%r11),%xmm9
+ paddd %xmm6,%xmm8
+ addl %edi,%eax
+ movl %ebx,%edi
+ pxor %xmm10,%xmm7
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movdqa %xmm7,%xmm10
+ movdqa %xmm8,32(%rsp)
+ movl %ecx,%esi
+ xorl %edx,%ecx
+ addl 52(%rsp),%ebp
+ andl %edx,%esi
+ pslld $2,%xmm7
+ andl %ecx,%edi
+ rorl $7,%ebx
+ psrld $30,%xmm10
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ por %xmm10,%xmm7
+ movl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm7,%xmm8
+ addl 56(%rsp),%edx
+ andl %ecx,%edi
+ andl %ebx,%esi
+ rorl $7,%eax
+ addl %edi,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 60(%rsp),%ecx
+ andl %ebx,%esi
+ andl %eax,%edi
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%edi
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,198,8
+ xorl %eax,%ebp
+ addl 0(%rsp),%ebx
+ andl %eax,%edi
+ pxor %xmm1,%xmm0
+ andl %ebp,%esi
+ rorl $7,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm7,%xmm9
+ addl %edi,%ebx
+ movl %ecx,%edi
+ pxor %xmm8,%xmm0
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movdqa %xmm0,%xmm8
+ movdqa %xmm9,48(%rsp)
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 4(%rsp),%eax
+ andl %ebp,%esi
+ pslld $2,%xmm0
+ andl %edx,%edi
+ rorl $7,%ecx
+ psrld $30,%xmm8
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ por %xmm8,%xmm0
+ movl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa %xmm0,%xmm9
+ addl 8(%rsp),%ebp
+ andl %edx,%edi
+ andl %ecx,%esi
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 12(%rsp),%edx
+ andl %ecx,%esi
+ andl %ebx,%edi
+ rorl $7,%eax
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%edi
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,207,8
+ xorl %ebx,%eax
+ addl 16(%rsp),%ecx
+ andl %ebx,%edi
+ pxor %xmm2,%xmm1
+ andl %eax,%esi
+ rorl $7,%ebp
+ movdqa %xmm10,%xmm8
+ paddd %xmm0,%xmm10
+ addl %edi,%ecx
+ movl %edx,%edi
+ pxor %xmm9,%xmm1
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movdqa %xmm1,%xmm9
+ movdqa %xmm10,0(%rsp)
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 20(%rsp),%ebx
+ andl %eax,%esi
+ pslld $2,%xmm1
+ andl %ebp,%edi
+ rorl $7,%edx
+ psrld $30,%xmm9
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ por %xmm9,%xmm1
+ movl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm1,%xmm10
+ addl 24(%rsp),%eax
+ andl %ebp,%edi
+ andl %edx,%esi
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movl %ecx,%esi
+ xorl %edx,%ecx
+ addl 28(%rsp),%ebp
+ andl %edx,%esi
+ andl %ecx,%edi
+ rorl $7,%ebx
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%edi
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,208,8
+ xorl %ecx,%ebx
+ addl 32(%rsp),%edx
+ andl %ecx,%edi
+ pxor %xmm3,%xmm2
+ andl %ebx,%esi
+ rorl $7,%eax
+ movdqa %xmm8,%xmm9
+ paddd %xmm1,%xmm8
+ addl %edi,%edx
+ movl %ebp,%edi
+ pxor %xmm10,%xmm2
+ roll $5,%ebp
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movdqa %xmm2,%xmm10
+ movdqa %xmm8,16(%rsp)
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 36(%rsp),%ecx
+ andl %ebx,%esi
+ pslld $2,%xmm2
+ andl %eax,%edi
+ rorl $7,%ebp
+ psrld $30,%xmm10
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ por %xmm10,%xmm2
+ movl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm2,%xmm8
+ addl 40(%rsp),%ebx
+ andl %eax,%edi
+ andl %ebp,%esi
+ rorl $7,%edx
+ addl %edi,%ebx
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 44(%rsp),%eax
+ andl %ebp,%esi
+ andl %edx,%edi
+ rorl $7,%ecx
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ addl 48(%rsp),%ebp
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,193,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm4,%xmm3
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm10
+ paddd %xmm2,%xmm9
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm8,%xmm3
+ addl 52(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm3,%xmm8
+ movdqa %xmm9,32(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm3
+ addl 56(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm8
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm8,%xmm3
+ addl 60(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 0(%rsp),%eax
+ paddd %xmm3,%xmm10
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ movdqa %xmm10,48(%rsp)
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 4(%rsp),%ebp
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 8(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 12(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ cmpq %r10,%r9
+ je .Ldone_ssse3
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r9),%xmm0
+ movdqu 16(%r9),%xmm1
+ movdqu 32(%r9),%xmm2
+ movdqu 48(%r9),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r9
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+.byte 102,15,56,0,206
+ movl %ecx,%edi
+ roll $5,%ecx
+ paddd %xmm9,%xmm0
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ movdqa %xmm0,0(%rsp)
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ psubd %xmm9,%xmm0
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+.byte 102,15,56,0,214
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm9,%xmm1
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movdqa %xmm1,16(%rsp)
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ psubd %xmm9,%xmm1
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+.byte 102,15,56,0,222
+ movl %ebp,%edi
+ roll $5,%ebp
+ paddd %xmm9,%xmm2
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ movdqa %xmm2,32(%rsp)
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ psubd %xmm9,%xmm2
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 0(%r8),%eax
+ addl 4(%r8),%esi
+ addl 8(%r8),%ecx
+ addl 12(%r8),%edx
+ movl %eax,0(%r8)
+ addl 16(%r8),%ebp
+ movl %esi,4(%r8)
+ movl %esi,%ebx
+ movl %ecx,8(%r8)
+ movl %edx,12(%r8)
+ movl %ebp,16(%r8)
+ jmp .Loop_ssse3
+
+.align 16
+.Ldone_ssse3:
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 0(%r8),%eax
+ addl 4(%r8),%esi
+ addl 8(%r8),%ecx
+ movl %eax,0(%r8)
+ addl 12(%r8),%edx
+ movl %esi,4(%r8)
+ addl 16(%r8),%ebp
+ movl %ecx,8(%r8)
+ movl %edx,12(%r8)
+ movl %ebp,16(%r8)
+ leaq 64(%rsp),%rsi
+ movq 0(%rsi),%r12
+ movq 8(%rsi),%rbp
+ movq 16(%rsi),%rbx
+ leaq 24(%rsi),%rsp
+.Lepilogue_ssse3:
+ .byte 0xf3,0xc3
+.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3
+.align 64
+K_XX_XX:
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
diff --git a/compat/sha/sha1-macosx-x86_64.s b/compat/sha/sha1-macosx-x86_64.s
@@ -0,0 +1,2486 @@
+.text
+
+
+.globl _sha1_block_data_order
+
+.p2align 4
+_sha1_block_data_order:
+ movl _OPENSSL_ia32cap_P+0(%rip),%r9d
+ movl _OPENSSL_ia32cap_P+4(%rip),%r8d
+ testl $512,%r8d
+ jz L$ialu
+ jmp _ssse3_shortcut
+
+.p2align 4
+L$ialu:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ movq %rsp,%r11
+ movq %rdi,%r8
+ subq $72,%rsp
+ movq %rsi,%r9
+ andq $-64,%rsp
+ movq %rdx,%r10
+ movq %r11,64(%rsp)
+L$prologue:
+
+ movl 0(%r8),%esi
+ movl 4(%r8),%edi
+ movl 8(%r8),%r11d
+ movl 12(%r8),%r12d
+ movl 16(%r8),%r13d
+ jmp L$loop
+
+.p2align 4
+L$loop:
+ movl 0(%r9),%edx
+ bswapl %edx
+ movl %edx,0(%rsp)
+ movl %r11d,%eax
+ movl 4(%r9),%ebp
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r13,1),%r13d
+ andl %edi,%eax
+ movl %ebp,4(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 8(%r9),%edx
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r12,1),%r12d
+ andl %esi,%eax
+ movl %edx,8(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 12(%r9),%ebp
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %ebp,12(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 16(%r9),%edx
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %edx,16(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 20(%r9),%ebp
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %ebp,20(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl %r11d,%eax
+ movl 24(%r9),%edx
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r13,1),%r13d
+ andl %edi,%eax
+ movl %edx,24(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 28(%r9),%ebp
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r12,1),%r12d
+ andl %esi,%eax
+ movl %ebp,28(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 32(%r9),%edx
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %edx,32(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 36(%r9),%ebp
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %ebp,36(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 40(%r9),%edx
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %edx,40(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl %r11d,%eax
+ movl 44(%r9),%ebp
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r13,1),%r13d
+ andl %edi,%eax
+ movl %ebp,44(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 48(%r9),%edx
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r12,1),%r12d
+ andl %esi,%eax
+ movl %edx,48(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 52(%r9),%ebp
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %ebp,52(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 56(%r9),%edx
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %edx,56(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 60(%r9),%ebp
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %ebp,60(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl 0(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ xorl 32(%rsp),%edx
+ andl %edi,%eax
+ leal 1518500249(%rbp,%r13,1),%r13d
+ xorl 52(%rsp),%edx
+ xorl %r12d,%eax
+ roll $1,%edx
+ addl %ecx,%r13d
+ roll $30,%edi
+ movl %edx,0(%rsp)
+ addl %eax,%r13d
+ movl 4(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ xorl 36(%rsp),%ebp
+ andl %esi,%eax
+ leal 1518500249(%rdx,%r12,1),%r12d
+ xorl 56(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $1,%ebp
+ addl %ecx,%r12d
+ roll $30,%esi
+ movl %ebp,4(%rsp)
+ addl %eax,%r12d
+ movl 8(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ xorl 40(%rsp),%edx
+ andl %r13d,%eax
+ leal 1518500249(%rbp,%r11,1),%r11d
+ xorl 60(%rsp),%edx
+ xorl %edi,%eax
+ roll $1,%edx
+ addl %ecx,%r11d
+ roll $30,%r13d
+ movl %edx,8(%rsp)
+ addl %eax,%r11d
+ movl 12(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ xorl 44(%rsp),%ebp
+ andl %r12d,%eax
+ leal 1518500249(%rdx,%rdi,1),%edi
+ xorl 0(%rsp),%ebp
+ xorl %esi,%eax
+ roll $1,%ebp
+ addl %ecx,%edi
+ roll $30,%r12d
+ movl %ebp,12(%rsp)
+ addl %eax,%edi
+ movl 16(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ xorl 48(%rsp),%edx
+ andl %r11d,%eax
+ leal 1518500249(%rbp,%rsi,1),%esi
+ xorl 4(%rsp),%edx
+ xorl %r13d,%eax
+ roll $1,%edx
+ addl %ecx,%esi
+ roll $30,%r11d
+ movl %edx,16(%rsp)
+ addl %eax,%esi
+ movl 20(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r13,1),%r13d
+ xorl 52(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 8(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r12,1),%r12d
+ xorl 56(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 12(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r11,1),%r11d
+ xorl 60(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 16(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rdi,1),%edi
+ xorl 0(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 20(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rsi,1),%esi
+ xorl 4(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 24(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,36(%rsp)
+ movl 40(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 48(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r13,1),%r13d
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 28(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,40(%rsp)
+ movl 44(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r12,1),%r12d
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 32(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,44(%rsp)
+ movl 48(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 56(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r11,1),%r11d
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 36(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,48(%rsp)
+ movl 52(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rdi,1),%edi
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 40(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,52(%rsp)
+ movl 56(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rsi,1),%esi
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 44(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,56(%rsp)
+ movl 60(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r13,1),%r13d
+ xorl 28(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 48(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,60(%rsp)
+ movl 0(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r12,1),%r12d
+ xorl 32(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 52(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,0(%rsp)
+ movl 4(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r11,1),%r11d
+ xorl 36(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 56(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,4(%rsp)
+ movl 8(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rdi,1),%edi
+ xorl 40(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 60(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,8(%rsp)
+ movl 12(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rsi,1),%esi
+ xorl 44(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 0(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,12(%rsp)
+ movl 16(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r13,1),%r13d
+ xorl 48(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 4(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,16(%rsp)
+ movl 20(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r12,1),%r12d
+ xorl 52(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 8(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r11,1),%r11d
+ xorl 56(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 12(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rdi,1),%edi
+ xorl 60(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 16(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rsi,1),%esi
+ xorl 0(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 20(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 44(%rsp),%ebp
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r12d,%ebx
+ leal -1894007588(%rdx,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 24(%rsp),%ebp
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %ebp,36(%rsp)
+ addl %ecx,%r13d
+ movl 40(%rsp),%edx
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 48(%rsp),%edx
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r11d,%ebx
+ leal -1894007588(%rbp,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 28(%rsp),%edx
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%edx
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %edx,40(%rsp)
+ addl %ecx,%r12d
+ movl 44(%rsp),%ebp
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 52(%rsp),%ebp
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %edi,%ebx
+ leal -1894007588(%rdx,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 32(%rsp),%ebp
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%ebp
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %ebp,44(%rsp)
+ addl %ecx,%r11d
+ movl 48(%rsp),%edx
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 56(%rsp),%edx
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %esi,%ebx
+ leal -1894007588(%rbp,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 36(%rsp),%edx
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%edx
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %edx,48(%rsp)
+ addl %ecx,%edi
+ movl 52(%rsp),%ebp
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 60(%rsp),%ebp
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %r13d,%ebx
+ leal -1894007588(%rdx,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 40(%rsp),%ebp
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%ebp
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %ebp,52(%rsp)
+ addl %ecx,%esi
+ movl 56(%rsp),%edx
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 0(%rsp),%edx
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r12d,%ebx
+ leal -1894007588(%rbp,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 44(%rsp),%edx
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%edx
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %edx,56(%rsp)
+ addl %ecx,%r13d
+ movl 60(%rsp),%ebp
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 4(%rsp),%ebp
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %r11d,%ebx
+ leal -1894007588(%rdx,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 48(%rsp),%ebp
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %ebp,60(%rsp)
+ addl %ecx,%r12d
+ movl 0(%rsp),%edx
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 8(%rsp),%edx
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %edi,%ebx
+ leal -1894007588(%rbp,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 52(%rsp),%edx
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%edx
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %edx,0(%rsp)
+ addl %ecx,%r11d
+ movl 4(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 12(%rsp),%ebp
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %esi,%ebx
+ leal -1894007588(%rdx,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 56(%rsp),%ebp
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%ebp
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %ebp,4(%rsp)
+ addl %ecx,%edi
+ movl 8(%rsp),%edx
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 16(%rsp),%edx
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r13d,%ebx
+ leal -1894007588(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 60(%rsp),%edx
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%edx
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %edx,8(%rsp)
+ addl %ecx,%esi
+ movl 12(%rsp),%ebp
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 20(%rsp),%ebp
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r12d,%ebx
+ leal -1894007588(%rdx,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 0(%rsp),%ebp
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %ebp,12(%rsp)
+ addl %ecx,%r13d
+ movl 16(%rsp),%edx
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 24(%rsp),%edx
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 48(%rsp),%edx
+ xorl %r11d,%ebx
+ leal -1894007588(%rbp,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 4(%rsp),%edx
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%edx
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %edx,16(%rsp)
+ addl %ecx,%r12d
+ movl 20(%rsp),%ebp
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 28(%rsp),%ebp
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %edi,%ebx
+ leal -1894007588(%rdx,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 8(%rsp),%ebp
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%ebp
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %ebp,20(%rsp)
+ addl %ecx,%r11d
+ movl 24(%rsp),%edx
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 32(%rsp),%edx
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 56(%rsp),%edx
+ xorl %esi,%ebx
+ leal -1894007588(%rbp,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 12(%rsp),%edx
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%edx
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %edx,24(%rsp)
+ addl %ecx,%edi
+ movl 28(%rsp),%ebp
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 36(%rsp),%ebp
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %r13d,%ebx
+ leal -1894007588(%rdx,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 16(%rsp),%ebp
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%ebp
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %ebp,28(%rsp)
+ addl %ecx,%esi
+ movl 32(%rsp),%edx
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 40(%rsp),%edx
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r12d,%ebx
+ leal -1894007588(%rbp,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 20(%rsp),%edx
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%edx
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %edx,32(%rsp)
+ addl %ecx,%r13d
+ movl 36(%rsp),%ebp
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 44(%rsp),%ebp
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r11d,%ebx
+ leal -1894007588(%rdx,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 24(%rsp),%ebp
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %ebp,36(%rsp)
+ addl %ecx,%r12d
+ movl 40(%rsp),%edx
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 48(%rsp),%edx
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %edi,%ebx
+ leal -1894007588(%rbp,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 28(%rsp),%edx
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%edx
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %edx,40(%rsp)
+ addl %ecx,%r11d
+ movl 44(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 52(%rsp),%ebp
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %esi,%ebx
+ leal -1894007588(%rdx,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 32(%rsp),%ebp
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%ebp
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %ebp,44(%rsp)
+ addl %ecx,%edi
+ movl 48(%rsp),%edx
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 56(%rsp),%edx
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 16(%rsp),%edx
+ xorl %r13d,%ebx
+ leal -1894007588(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 36(%rsp),%edx
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%edx
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %edx,48(%rsp)
+ addl %ecx,%esi
+ movl 52(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r13,1),%r13d
+ xorl 20(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 40(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,52(%rsp)
+ movl 56(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 0(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r12,1),%r12d
+ xorl 24(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 44(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,56(%rsp)
+ movl 60(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r11,1),%r11d
+ xorl 28(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 48(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,60(%rsp)
+ movl 0(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rdi,1),%edi
+ xorl 32(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 52(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,0(%rsp)
+ movl 4(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rsi,1),%esi
+ xorl 36(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 56(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,4(%rsp)
+ movl 8(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r13,1),%r13d
+ xorl 40(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 60(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,8(%rsp)
+ movl 12(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r12,1),%r12d
+ xorl 44(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 0(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,12(%rsp)
+ movl 16(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r11,1),%r11d
+ xorl 48(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 4(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,16(%rsp)
+ movl 20(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rdi,1),%edi
+ xorl 52(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 8(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 32(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rsi,1),%esi
+ xorl 56(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 12(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r13,1),%r13d
+ xorl 60(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 16(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 40(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r12,1),%r12d
+ xorl 0(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 20(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r11,1),%r11d
+ xorl 4(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 24(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,36(%rsp)
+ movl 40(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 48(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rdi,1),%edi
+ xorl 8(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 28(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,40(%rsp)
+ movl 44(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rsi,1),%esi
+ xorl 12(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 32(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,44(%rsp)
+ movl 48(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 56(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r13,1),%r13d
+ xorl 16(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 36(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,48(%rsp)
+ movl 52(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r12,1),%r12d
+ xorl 20(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 40(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl 56(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r11,1),%r11d
+ xorl 24(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 44(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl 60(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rdi,1),%edi
+ xorl 28(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 48(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl %r11d,%eax
+ leal -899497514(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ roll $30,%r11d
+ addl %eax,%esi
+ addl 0(%r8),%esi
+ addl 4(%r8),%edi
+ addl 8(%r8),%r11d
+ addl 12(%r8),%r12d
+ addl 16(%r8),%r13d
+ movl %esi,0(%r8)
+ movl %edi,4(%r8)
+ movl %r11d,8(%r8)
+ movl %r12d,12(%r8)
+ movl %r13d,16(%r8)
+
+ subq $1,%r10
+ leaq 64(%r9),%r9
+ jnz L$loop
+
+ movq 64(%rsp),%rsi
+ movq (%rsi),%r13
+ movq 8(%rsi),%r12
+ movq 16(%rsi),%rbp
+ movq 24(%rsi),%rbx
+ leaq 32(%rsi),%rsp
+L$epilogue:
+ .byte 0xf3,0xc3
+
+
+.p2align 4
+sha1_block_data_order_ssse3:
+_ssse3_shortcut:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ leaq -64(%rsp),%rsp
+ movq %rdi,%r8
+ movq %rsi,%r9
+ movq %rdx,%r10
+
+ shlq $6,%r10
+ addq %r9,%r10
+ leaq K_XX_XX(%rip),%r11
+
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movl %ebx,%esi
+ movl 16(%r8),%ebp
+
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r9),%xmm0
+ movdqu 16(%r9),%xmm1
+ movdqu 32(%r9),%xmm2
+ movdqu 48(%r9),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r9
+.byte 102,15,56,0,206
+.byte 102,15,56,0,214
+.byte 102,15,56,0,222
+ paddd %xmm9,%xmm0
+ paddd %xmm9,%xmm1
+ paddd %xmm9,%xmm2
+ movdqa %xmm0,0(%rsp)
+ psubd %xmm9,%xmm0
+ movdqa %xmm1,16(%rsp)
+ psubd %xmm9,%xmm1
+ movdqa %xmm2,32(%rsp)
+ psubd %xmm9,%xmm2
+ jmp L$oop_ssse3
+.p2align 4
+L$oop_ssse3:
+ movdqa %xmm1,%xmm4
+ addl 0(%rsp),%ebp
+ xorl %edx,%ecx
+ movdqa %xmm3,%xmm8
+.byte 102,15,58,15,224,8
+ movl %eax,%edi
+ roll $5,%eax
+ paddd %xmm3,%xmm9
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrldq $4,%xmm8
+ xorl %edx,%esi
+ addl %eax,%ebp
+ pxor %xmm0,%xmm4
+ rorl $2,%ebx
+ addl %esi,%ebp
+ pxor %xmm2,%xmm8
+ addl 4(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pxor %xmm8,%xmm4
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm9,48(%rsp)
+ xorl %ecx,%edi
+ addl %ebp,%edx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm4,%xmm8
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 8(%rsp),%ecx
+ xorl %ebx,%eax
+ pslldq $12,%xmm10
+ paddd %xmm4,%xmm4
+ movl %edx,%edi
+ roll $5,%edx
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrld $31,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ movdqa %xmm10,%xmm9
+ rorl $7,%ebp
+ addl %esi,%ecx
+ psrld $30,%xmm10
+ por %xmm8,%xmm4
+ addl 12(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm4
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa 0(%r11),%xmm10
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ pxor %xmm9,%xmm4
+ rorl $7,%edx
+ addl %edi,%ebx
+ movdqa %xmm2,%xmm5
+ addl 16(%rsp),%eax
+ xorl %ebp,%edx
+ movdqa %xmm4,%xmm9
+.byte 102,15,58,15,233,8
+ movl %ebx,%edi
+ roll $5,%ebx
+ paddd %xmm4,%xmm10
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrldq $4,%xmm9
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ pxor %xmm1,%xmm5
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm3,%xmm9
+ addl 20(%rsp),%ebp
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pxor %xmm9,%xmm5
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa %xmm10,0(%rsp)
+ xorl %edx,%edi
+ addl %eax,%ebp
+ movdqa %xmm5,%xmm8
+ movdqa %xmm5,%xmm9
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 24(%rsp),%edx
+ xorl %ecx,%ebx
+ pslldq $12,%xmm8
+ paddd %xmm5,%xmm5
+ movl %ebp,%edi
+ roll $5,%ebp
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ psrld $31,%xmm9
+ xorl %ecx,%esi
+ addl %ebp,%edx
+ movdqa %xmm8,%xmm10
+ rorl $7,%eax
+ addl %esi,%edx
+ psrld $30,%xmm8
+ por %xmm9,%xmm5
+ addl 28(%rsp),%ecx
+ xorl %ebx,%eax
+ movl %edx,%esi
+ roll $5,%edx
+ pslld $2,%xmm10
+ pxor %xmm8,%xmm5
+ andl %eax,%edi
+ xorl %ebx,%eax
+ movdqa 16(%r11),%xmm8
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ pxor %xmm10,%xmm5
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movdqa %xmm3,%xmm6
+ addl 32(%rsp),%ebx
+ xorl %eax,%ebp
+ movdqa %xmm5,%xmm10
+.byte 102,15,58,15,242,8
+ movl %ecx,%edi
+ roll $5,%ecx
+ paddd %xmm5,%xmm8
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ psrldq $4,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ pxor %xmm2,%xmm6
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm4,%xmm10
+ addl 36(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ pxor %xmm10,%xmm6
+ andl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm8,16(%rsp)
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ movdqa %xmm6,%xmm9
+ movdqa %xmm6,%xmm10
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 40(%rsp),%ebp
+ xorl %edx,%ecx
+ pslldq $12,%xmm9
+ paddd %xmm6,%xmm6
+ movl %eax,%edi
+ roll $5,%eax
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ psrld $31,%xmm10
+ xorl %edx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ psrld $30,%xmm9
+ por %xmm10,%xmm6
+ addl 44(%rsp),%edx
+ xorl %ecx,%ebx
+ movl %ebp,%esi
+ roll $5,%ebp
+ pslld $2,%xmm8
+ pxor %xmm9,%xmm6
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa 16(%r11),%xmm9
+ xorl %ecx,%edi
+ addl %ebp,%edx
+ pxor %xmm8,%xmm6
+ rorl $7,%eax
+ addl %edi,%edx
+ movdqa %xmm4,%xmm7
+ addl 48(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm8
+.byte 102,15,58,15,251,8
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm6,%xmm9
+ andl %eax,%esi
+ xorl %ebx,%eax
+ psrldq $4,%xmm8
+ xorl %ebx,%esi
+ addl %edx,%ecx
+ pxor %xmm3,%xmm7
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm5,%xmm8
+ addl 52(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%esi
+ roll $5,%ecx
+ pxor %xmm8,%xmm7
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm9,32(%rsp)
+ xorl %eax,%edi
+ addl %ecx,%ebx
+ movdqa %xmm7,%xmm10
+ movdqa %xmm7,%xmm8
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 56(%rsp),%eax
+ xorl %ebp,%edx
+ pslldq $12,%xmm10
+ paddd %xmm7,%xmm7
+ movl %ebx,%edi
+ roll $5,%ebx
+ andl %edx,%esi
+ xorl %ebp,%edx
+ psrld $31,%xmm8
+ xorl %ebp,%esi
+ addl %ebx,%eax
+ movdqa %xmm10,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ psrld $30,%xmm10
+ por %xmm8,%xmm7
+ addl 60(%rsp),%ebp
+ xorl %edx,%ecx
+ movl %eax,%esi
+ roll $5,%eax
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm7
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa 16(%r11),%xmm10
+ xorl %edx,%edi
+ addl %eax,%ebp
+ pxor %xmm9,%xmm7
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movdqa %xmm7,%xmm9
+ addl 0(%rsp),%edx
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,206,8
+ xorl %ecx,%ebx
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm1,%xmm0
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm7,%xmm10
+ xorl %ecx,%esi
+ addl %ebp,%edx
+ pxor %xmm9,%xmm0
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 4(%rsp),%ecx
+ xorl %ebx,%eax
+ movdqa %xmm0,%xmm9
+ movdqa %xmm10,48(%rsp)
+ movl %edx,%esi
+ roll $5,%edx
+ andl %eax,%edi
+ xorl %ebx,%eax
+ pslld $2,%xmm0
+ xorl %ebx,%edi
+ addl %edx,%ecx
+ psrld $30,%xmm9
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 8(%rsp),%ebx
+ xorl %eax,%ebp
+ movl %ecx,%edi
+ roll $5,%ecx
+ por %xmm9,%xmm0
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ movdqa %xmm0,%xmm10
+ xorl %eax,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 12(%rsp),%eax
+ xorl %ebp,%edx
+ movl %ebx,%esi
+ roll $5,%ebx
+ andl %edx,%edi
+ xorl %ebp,%edx
+ xorl %ebp,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 16(%rsp),%ebp
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,215,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm2,%xmm1
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm8,%xmm9
+ paddd %xmm0,%xmm8
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm10,%xmm1
+ addl 20(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm1,%xmm10
+ movdqa %xmm8,0(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm1
+ addl 24(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm10
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm10,%xmm1
+ addl 28(%rsp),%ebx
+ xorl %eax,%edi
+ movdqa %xmm1,%xmm8
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 32(%rsp),%eax
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,192,8
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ pxor %xmm3,%xmm2
+ xorl %edx,%esi
+ addl %ebx,%eax
+ movdqa 32(%r11),%xmm10
+ paddd %xmm1,%xmm9
+ rorl $7,%ecx
+ addl %esi,%eax
+ pxor %xmm8,%xmm2
+ addl 36(%rsp),%ebp
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ movdqa %xmm2,%xmm8
+ movdqa %xmm9,16(%rsp)
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ pslld $2,%xmm2
+ addl 40(%rsp),%edx
+ xorl %ecx,%esi
+ psrld $30,%xmm8
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ por %xmm8,%xmm2
+ addl 44(%rsp),%ecx
+ xorl %ebx,%edi
+ movdqa %xmm2,%xmm9
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 48(%rsp),%ebx
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,201,8
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ pxor %xmm4,%xmm3
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm2,%xmm10
+ rorl $7,%edx
+ addl %esi,%ebx
+ pxor %xmm9,%xmm3
+ addl 52(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ movdqa %xmm3,%xmm9
+ movdqa %xmm10,32(%rsp)
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ pslld $2,%xmm3
+ addl 56(%rsp),%ebp
+ xorl %edx,%esi
+ psrld $30,%xmm9
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ por %xmm9,%xmm3
+ addl 60(%rsp),%edx
+ xorl %ecx,%edi
+ movdqa %xmm3,%xmm10
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 0(%rsp),%ecx
+ pxor %xmm0,%xmm4
+.byte 102,68,15,58,15,210,8
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ pxor %xmm5,%xmm4
+ xorl %eax,%esi
+ addl %edx,%ecx
+ movdqa %xmm8,%xmm9
+ paddd %xmm3,%xmm8
+ rorl $7,%ebp
+ addl %esi,%ecx
+ pxor %xmm10,%xmm4
+ addl 4(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm8,48(%rsp)
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ pslld $2,%xmm4
+ addl 8(%rsp),%eax
+ xorl %ebp,%esi
+ psrld $30,%xmm10
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ por %xmm10,%xmm4
+ addl 12(%rsp),%ebp
+ xorl %edx,%edi
+ movdqa %xmm4,%xmm8
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 16(%rsp),%edx
+ pxor %xmm1,%xmm5
+.byte 102,68,15,58,15,195,8
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm6,%xmm5
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm4,%xmm9
+ rorl $7,%eax
+ addl %esi,%edx
+ pxor %xmm8,%xmm5
+ addl 20(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ movdqa %xmm5,%xmm8
+ movdqa %xmm9,0(%rsp)
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ pslld $2,%xmm5
+ addl 24(%rsp),%ebx
+ xorl %eax,%esi
+ psrld $30,%xmm8
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ por %xmm8,%xmm5
+ addl 28(%rsp),%eax
+ xorl %ebp,%edi
+ movdqa %xmm5,%xmm9
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ecx,%edi
+ pxor %xmm2,%xmm6
+.byte 102,68,15,58,15,204,8
+ xorl %edx,%ecx
+ addl 32(%rsp),%ebp
+ andl %edx,%edi
+ pxor %xmm7,%xmm6
+ andl %ecx,%esi
+ rorl $7,%ebx
+ movdqa %xmm10,%xmm8
+ paddd %xmm5,%xmm10
+ addl %edi,%ebp
+ movl %eax,%edi
+ pxor %xmm9,%xmm6
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movdqa %xmm6,%xmm9
+ movdqa %xmm10,16(%rsp)
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 36(%rsp),%edx
+ andl %ecx,%esi
+ pslld $2,%xmm6
+ andl %ebx,%edi
+ rorl $7,%eax
+ psrld $30,%xmm9
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ por %xmm9,%xmm6
+ movl %eax,%edi
+ xorl %ebx,%eax
+ movdqa %xmm6,%xmm10
+ addl 40(%rsp),%ecx
+ andl %ebx,%edi
+ andl %eax,%esi
+ rorl $7,%ebp
+ addl %edi,%ecx
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 44(%rsp),%ebx
+ andl %eax,%esi
+ andl %ebp,%edi
+ rorl $7,%edx
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%edi
+ pxor %xmm3,%xmm7
+.byte 102,68,15,58,15,213,8
+ xorl %ebp,%edx
+ addl 48(%rsp),%eax
+ andl %ebp,%edi
+ pxor %xmm0,%xmm7
+ andl %edx,%esi
+ rorl $7,%ecx
+ movdqa 48(%r11),%xmm9
+ paddd %xmm6,%xmm8
+ addl %edi,%eax
+ movl %ebx,%edi
+ pxor %xmm10,%xmm7
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movdqa %xmm7,%xmm10
+ movdqa %xmm8,32(%rsp)
+ movl %ecx,%esi
+ xorl %edx,%ecx
+ addl 52(%rsp),%ebp
+ andl %edx,%esi
+ pslld $2,%xmm7
+ andl %ecx,%edi
+ rorl $7,%ebx
+ psrld $30,%xmm10
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ por %xmm10,%xmm7
+ movl %ebx,%edi
+ xorl %ecx,%ebx
+ movdqa %xmm7,%xmm8
+ addl 56(%rsp),%edx
+ andl %ecx,%edi
+ andl %ebx,%esi
+ rorl $7,%eax
+ addl %edi,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 60(%rsp),%ecx
+ andl %ebx,%esi
+ andl %eax,%edi
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movl %ebp,%edi
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,198,8
+ xorl %eax,%ebp
+ addl 0(%rsp),%ebx
+ andl %eax,%edi
+ pxor %xmm1,%xmm0
+ andl %ebp,%esi
+ rorl $7,%edx
+ movdqa %xmm9,%xmm10
+ paddd %xmm7,%xmm9
+ addl %edi,%ebx
+ movl %ecx,%edi
+ pxor %xmm8,%xmm0
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movdqa %xmm0,%xmm8
+ movdqa %xmm9,48(%rsp)
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 4(%rsp),%eax
+ andl %ebp,%esi
+ pslld $2,%xmm0
+ andl %edx,%edi
+ rorl $7,%ecx
+ psrld $30,%xmm8
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ por %xmm8,%xmm0
+ movl %ecx,%edi
+ xorl %edx,%ecx
+ movdqa %xmm0,%xmm9
+ addl 8(%rsp),%ebp
+ andl %edx,%edi
+ andl %ecx,%esi
+ rorl $7,%ebx
+ addl %edi,%ebp
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%esi
+ xorl %ecx,%ebx
+ addl 12(%rsp),%edx
+ andl %ecx,%esi
+ andl %ebx,%edi
+ rorl $7,%eax
+ addl %esi,%edx
+ movl %ebp,%esi
+ roll $5,%ebp
+ addl %edi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movl %eax,%edi
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,207,8
+ xorl %ebx,%eax
+ addl 16(%rsp),%ecx
+ andl %ebx,%edi
+ pxor %xmm2,%xmm1
+ andl %eax,%esi
+ rorl $7,%ebp
+ movdqa %xmm10,%xmm8
+ paddd %xmm0,%xmm10
+ addl %edi,%ecx
+ movl %edx,%edi
+ pxor %xmm9,%xmm1
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ movdqa %xmm1,%xmm9
+ movdqa %xmm10,0(%rsp)
+ movl %ebp,%esi
+ xorl %eax,%ebp
+ addl 20(%rsp),%ebx
+ andl %eax,%esi
+ pslld $2,%xmm1
+ andl %ebp,%edi
+ rorl $7,%edx
+ psrld $30,%xmm9
+ addl %esi,%ebx
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ por %xmm9,%xmm1
+ movl %edx,%edi
+ xorl %ebp,%edx
+ movdqa %xmm1,%xmm10
+ addl 24(%rsp),%eax
+ andl %ebp,%edi
+ andl %edx,%esi
+ rorl $7,%ecx
+ addl %edi,%eax
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ movl %ecx,%esi
+ xorl %edx,%ecx
+ addl 28(%rsp),%ebp
+ andl %edx,%esi
+ andl %ecx,%edi
+ rorl $7,%ebx
+ addl %esi,%ebp
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %edx,%ecx
+ addl %eax,%ebp
+ movl %ebx,%edi
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,208,8
+ xorl %ecx,%ebx
+ addl 32(%rsp),%edx
+ andl %ecx,%edi
+ pxor %xmm3,%xmm2
+ andl %ebx,%esi
+ rorl $7,%eax
+ movdqa %xmm8,%xmm9
+ paddd %xmm1,%xmm8
+ addl %edi,%edx
+ movl %ebp,%edi
+ pxor %xmm10,%xmm2
+ roll $5,%ebp
+ addl %esi,%edx
+ xorl %ecx,%ebx
+ addl %ebp,%edx
+ movdqa %xmm2,%xmm10
+ movdqa %xmm8,16(%rsp)
+ movl %eax,%esi
+ xorl %ebx,%eax
+ addl 36(%rsp),%ecx
+ andl %ebx,%esi
+ pslld $2,%xmm2
+ andl %eax,%edi
+ rorl $7,%ebp
+ psrld $30,%xmm10
+ addl %esi,%ecx
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebx,%eax
+ addl %edx,%ecx
+ por %xmm10,%xmm2
+ movl %ebp,%edi
+ xorl %eax,%ebp
+ movdqa %xmm2,%xmm8
+ addl 40(%rsp),%ebx
+ andl %eax,%edi
+ andl %ebp,%esi
+ rorl $7,%edx
+ addl %edi,%ebx
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %eax,%ebp
+ addl %ecx,%ebx
+ movl %edx,%esi
+ xorl %ebp,%edx
+ addl 44(%rsp),%eax
+ andl %ebp,%esi
+ andl %edx,%edi
+ rorl $7,%ecx
+ addl %esi,%eax
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ebp,%edx
+ addl %ebx,%eax
+ addl 48(%rsp),%ebp
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,193,8
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm4,%xmm3
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ movdqa %xmm9,%xmm10
+ paddd %xmm2,%xmm9
+ rorl $7,%ebx
+ addl %esi,%ebp
+ pxor %xmm8,%xmm3
+ addl 52(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm3,%xmm8
+ movdqa %xmm9,32(%rsp)
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ pslld $2,%xmm3
+ addl 56(%rsp),%ecx
+ xorl %ebx,%esi
+ psrld $30,%xmm8
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ por %xmm8,%xmm3
+ addl 60(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 0(%rsp),%eax
+ paddd %xmm3,%xmm10
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ movdqa %xmm10,48(%rsp)
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 4(%rsp),%ebp
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 8(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 12(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ cmpq %r10,%r9
+ je L$done_ssse3
+ movdqa 64(%r11),%xmm6
+ movdqa 0(%r11),%xmm9
+ movdqu 0(%r9),%xmm0
+ movdqu 16(%r9),%xmm1
+ movdqu 32(%r9),%xmm2
+ movdqu 48(%r9),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r9
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+.byte 102,15,56,0,206
+ movl %ecx,%edi
+ roll $5,%ecx
+ paddd %xmm9,%xmm0
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ movdqa %xmm0,0(%rsp)
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ psubd %xmm9,%xmm0
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+.byte 102,15,56,0,214
+ movl %edx,%edi
+ roll $5,%edx
+ paddd %xmm9,%xmm1
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ movdqa %xmm1,16(%rsp)
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ psubd %xmm9,%xmm1
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+.byte 102,15,56,0,222
+ movl %ebp,%edi
+ roll $5,%ebp
+ paddd %xmm9,%xmm2
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ movdqa %xmm2,32(%rsp)
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ psubd %xmm9,%xmm2
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 0(%r8),%eax
+ addl 4(%r8),%esi
+ addl 8(%r8),%ecx
+ addl 12(%r8),%edx
+ movl %eax,0(%r8)
+ addl 16(%r8),%ebp
+ movl %esi,4(%r8)
+ movl %esi,%ebx
+ movl %ecx,8(%r8)
+ movl %edx,12(%r8)
+ movl %ebp,16(%r8)
+ jmp L$oop_ssse3
+
+.p2align 4
+L$done_ssse3:
+ addl 16(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 20(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 24(%rsp),%ebp
+ xorl %edx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ xorl %ecx,%esi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %esi,%ebp
+ addl 28(%rsp),%edx
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ebx,%edi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%rsp),%ecx
+ xorl %ebx,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ xorl %eax,%esi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %esi,%ecx
+ addl 36(%rsp),%ebx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ xorl %ebp,%edi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %edi,%ebx
+ addl 40(%rsp),%eax
+ xorl %ebp,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ xorl %edx,%esi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %esi,%eax
+ addl 44(%rsp),%ebp
+ xorl %edx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ xorl %ecx,%edi
+ addl %eax,%ebp
+ rorl $7,%ebx
+ addl %edi,%ebp
+ addl 48(%rsp),%edx
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%esi
+ addl %ebp,%edx
+ rorl $7,%eax
+ addl %esi,%edx
+ addl 52(%rsp),%ecx
+ xorl %ebx,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%ebp
+ addl %edi,%ecx
+ addl 56(%rsp),%ebx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ xorl %ebp,%esi
+ addl %ecx,%ebx
+ rorl $7,%edx
+ addl %esi,%ebx
+ addl 60(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ xorl %edx,%edi
+ addl %ebx,%eax
+ rorl $7,%ecx
+ addl %edi,%eax
+ addl 0(%r8),%eax
+ addl 4(%r8),%esi
+ addl 8(%r8),%ecx
+ movl %eax,0(%r8)
+ addl 12(%r8),%edx
+ movl %esi,4(%r8)
+ addl 16(%r8),%ebp
+ movl %ecx,8(%r8)
+ movl %edx,12(%r8)
+ movl %ebp,16(%r8)
+ leaq 64(%rsp),%rsi
+ movq 0(%rsi),%r12
+ movq 8(%rsi),%rbp
+ movq 16(%rsi),%rbx
+ leaq 24(%rsi),%rsp
+L$epilogue_ssse3:
+ .byte 0xf3,0xc3
+
+.p2align 6
+K_XX_XX:
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
diff --git a/compat/sha/sha1_one.c b/compat/sha/sha1_one.c
@@ -0,0 +1,81 @@
+/* $OpenBSD: sha1_one.c,v 1.10 2014/06/12 15:49:30 deraadt Exp $ */
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young (eay@cryptsoft.com)"
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <openssl/opensslconf.h>
+
+#include <openssl/crypto.h>
+#include <openssl/sha.h>
+
+#ifndef OPENSSL_NO_SHA1
+unsigned char *SHA1(const unsigned char *d, size_t n, unsigned char *md)
+ {
+ SHA_CTX c;
+ static unsigned char m[SHA_DIGEST_LENGTH];
+
+ if (md == NULL) md=m;
+ if (!SHA1_Init(&c))
+ return NULL;
+ SHA1_Update(&c,d,n);
+ SHA1_Final(md,&c);
+ OPENSSL_cleanse(&c,sizeof(c));
+ return(md);
+ }
+#endif
diff --git a/compat/sha/sha1dgst.c b/compat/sha/sha1dgst.c
@@ -0,0 +1,75 @@
+/* $OpenBSD: sha1dgst.c,v 1.12 2014/07/09 11:10:51 bcook Exp $ */
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young (eay@cryptsoft.com)"
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+#include <openssl/opensslconf.h>
+
+#include <openssl/crypto.h>
+
+#if !defined(OPENSSL_NO_SHA1) && !defined(OPENSSL_NO_SHA)
+
+#undef SHA_0
+#define SHA_1
+
+#include <openssl/opensslv.h>
+
+/* The implementation is in ../md32_common.h */
+
+#include "sha_locl.h"
+
+#endif
+
diff --git a/compat/sha/sha256-elf-x86_64.S b/compat/sha/sha256-elf-x86_64.S
@@ -0,0 +1,1778 @@
+.text
+
+.globl sha256_block_data_order
+.type sha256_block_data_order,@function
+.align 16
+sha256_block_data_order:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r11
+ shlq $4,%rdx
+ subq $64+32,%rsp
+ leaq (%rsi,%rdx,4),%rdx
+ andq $-64,%rsp
+ movq %rdi,64+0(%rsp)
+ movq %rsi,64+8(%rsp)
+ movq %rdx,64+16(%rsp)
+ movq %r11,64+24(%rsp)
+.Lprologue:
+
+ leaq K256(%rip),%rbp
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+ movl 16(%rdi),%r8d
+ movl 20(%rdi),%r9d
+ movl 24(%rdi),%r10d
+ movl 28(%rdi),%r11d
+ jmp .Lloop
+
+.align 16
+.Lloop:
+ xorq %rdi,%rdi
+ movl 0(%rsi),%r12d
+ movl %r8d,%r13d
+ movl %eax,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ movl %r12d,0(%rsp)
+
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %eax,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r8d,%r15d
+ movl %ebx,%r11d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ xorl %ecx,%r11d
+ xorl %eax,%r14d
+ addl %r15d,%r12d
+ movl %ebx,%r15d
+
+ rorl $6,%r13d
+ andl %eax,%r11d
+ andl %ecx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r11d
+
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r11d
+
+ movl 4(%rsi),%r12d
+ movl %edx,%r13d
+ movl %r11d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r8d,%r15d
+ movl %r12d,4(%rsp)
+
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r11d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %edx,%r15d
+ movl %eax,%r10d
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ xorl %ebx,%r10d
+ xorl %r11d,%r14d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+
+ rorl $6,%r13d
+ andl %r11d,%r10d
+ andl %ebx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r10d
+
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r10d
+
+ movl 8(%rsi),%r12d
+ movl %ecx,%r13d
+ movl %r10d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ movl %r12d,8(%rsp)
+
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r10d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ecx,%r15d
+ movl %r11d,%r9d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ xorl %eax,%r9d
+ xorl %r10d,%r14d
+ addl %r15d,%r12d
+ movl %r11d,%r15d
+
+ rorl $6,%r13d
+ andl %r10d,%r9d
+ andl %eax,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r9d
+
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r9d
+
+ movl 12(%rsi),%r12d
+ movl %ebx,%r13d
+ movl %r9d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ecx,%r15d
+ movl %r12d,12(%rsp)
+
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %r9d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ebx,%r15d
+ movl %r10d,%r8d
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ xorl %r11d,%r8d
+ xorl %r9d,%r14d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+
+ rorl $6,%r13d
+ andl %r9d,%r8d
+ andl %r11d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r8d
+
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r8d
+
+ movl 16(%rsi),%r12d
+ movl %eax,%r13d
+ movl %r8d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ movl %r12d,16(%rsp)
+
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %r8d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %eax,%r15d
+ movl %r9d,%edx
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ xorl %r10d,%edx
+ xorl %r8d,%r14d
+ addl %r15d,%r12d
+ movl %r9d,%r15d
+
+ rorl $6,%r13d
+ andl %r8d,%edx
+ andl %r10d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%edx
+
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%edx
+
+ movl 20(%rsi),%r12d
+ movl %r11d,%r13d
+ movl %edx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %eax,%r15d
+ movl %r12d,20(%rsp)
+
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %edx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r11d,%r15d
+ movl %r8d,%ecx
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ xorl %r9d,%ecx
+ xorl %edx,%r14d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+
+ rorl $6,%r13d
+ andl %edx,%ecx
+ andl %r9d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ecx
+
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ecx
+
+ movl 24(%rsi),%r12d
+ movl %r10d,%r13d
+ movl %ecx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ movl %r12d,24(%rsp)
+
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %ecx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r10d,%r15d
+ movl %edx,%ebx
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ xorl %r8d,%ebx
+ xorl %ecx,%r14d
+ addl %r15d,%r12d
+ movl %edx,%r15d
+
+ rorl $6,%r13d
+ andl %ecx,%ebx
+ andl %r8d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ebx
+
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ebx
+
+ movl 28(%rsi),%r12d
+ movl %r9d,%r13d
+ movl %ebx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r10d,%r15d
+ movl %r12d,28(%rsp)
+
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %ebx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r9d,%r15d
+ movl %ecx,%eax
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ xorl %edx,%eax
+ xorl %ebx,%r14d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+
+ rorl $6,%r13d
+ andl %ebx,%eax
+ andl %edx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%eax
+
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 1(%rdi),%rdi
+ addl %r14d,%eax
+
+ movl 32(%rsi),%r12d
+ movl %r8d,%r13d
+ movl %eax,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ movl %r12d,32(%rsp)
+
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %eax,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r8d,%r15d
+ movl %ebx,%r11d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ xorl %ecx,%r11d
+ xorl %eax,%r14d
+ addl %r15d,%r12d
+ movl %ebx,%r15d
+
+ rorl $6,%r13d
+ andl %eax,%r11d
+ andl %ecx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r11d
+
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r11d
+
+ movl 36(%rsi),%r12d
+ movl %edx,%r13d
+ movl %r11d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r8d,%r15d
+ movl %r12d,36(%rsp)
+
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r11d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %edx,%r15d
+ movl %eax,%r10d
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ xorl %ebx,%r10d
+ xorl %r11d,%r14d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+
+ rorl $6,%r13d
+ andl %r11d,%r10d
+ andl %ebx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r10d
+
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r10d
+
+ movl 40(%rsi),%r12d
+ movl %ecx,%r13d
+ movl %r10d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ movl %r12d,40(%rsp)
+
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r10d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ecx,%r15d
+ movl %r11d,%r9d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ xorl %eax,%r9d
+ xorl %r10d,%r14d
+ addl %r15d,%r12d
+ movl %r11d,%r15d
+
+ rorl $6,%r13d
+ andl %r10d,%r9d
+ andl %eax,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r9d
+
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r9d
+
+ movl 44(%rsi),%r12d
+ movl %ebx,%r13d
+ movl %r9d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ecx,%r15d
+ movl %r12d,44(%rsp)
+
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %r9d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ebx,%r15d
+ movl %r10d,%r8d
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ xorl %r11d,%r8d
+ xorl %r9d,%r14d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+
+ rorl $6,%r13d
+ andl %r9d,%r8d
+ andl %r11d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r8d
+
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r8d
+
+ movl 48(%rsi),%r12d
+ movl %eax,%r13d
+ movl %r8d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ movl %r12d,48(%rsp)
+
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %r8d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %eax,%r15d
+ movl %r9d,%edx
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ xorl %r10d,%edx
+ xorl %r8d,%r14d
+ addl %r15d,%r12d
+ movl %r9d,%r15d
+
+ rorl $6,%r13d
+ andl %r8d,%edx
+ andl %r10d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%edx
+
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%edx
+
+ movl 52(%rsi),%r12d
+ movl %r11d,%r13d
+ movl %edx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %eax,%r15d
+ movl %r12d,52(%rsp)
+
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %edx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r11d,%r15d
+ movl %r8d,%ecx
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ xorl %r9d,%ecx
+ xorl %edx,%r14d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+
+ rorl $6,%r13d
+ andl %edx,%ecx
+ andl %r9d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ecx
+
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ecx
+
+ movl 56(%rsi),%r12d
+ movl %r10d,%r13d
+ movl %ecx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ movl %r12d,56(%rsp)
+
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %ecx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r10d,%r15d
+ movl %edx,%ebx
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ xorl %r8d,%ebx
+ xorl %ecx,%r14d
+ addl %r15d,%r12d
+ movl %edx,%r15d
+
+ rorl $6,%r13d
+ andl %ecx,%ebx
+ andl %r8d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ebx
+
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ebx
+
+ movl 60(%rsi),%r12d
+ movl %r9d,%r13d
+ movl %ebx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r10d,%r15d
+ movl %r12d,60(%rsp)
+
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %ebx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r9d,%r15d
+ movl %ecx,%eax
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ xorl %edx,%eax
+ xorl %ebx,%r14d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+
+ rorl $6,%r13d
+ andl %ebx,%eax
+ andl %edx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%eax
+
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 1(%rdi),%rdi
+ addl %r14d,%eax
+
+ jmp .Lrounds_16_xx
+.align 16
+.Lrounds_16_xx:
+ movl 4(%rsp),%r13d
+ movl 56(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 36(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 0(%rsp),%r12d
+ movl %r8d,%r13d
+ addl %r14d,%r12d
+ movl %eax,%r14d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ movl %r12d,0(%rsp)
+
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %eax,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r8d,%r15d
+ movl %ebx,%r11d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ xorl %ecx,%r11d
+ xorl %eax,%r14d
+ addl %r15d,%r12d
+ movl %ebx,%r15d
+
+ rorl $6,%r13d
+ andl %eax,%r11d
+ andl %ecx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r11d
+
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r11d
+
+ movl 8(%rsp),%r13d
+ movl 60(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 40(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 4(%rsp),%r12d
+ movl %edx,%r13d
+ addl %r14d,%r12d
+ movl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r8d,%r15d
+ movl %r12d,4(%rsp)
+
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r11d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %edx,%r15d
+ movl %eax,%r10d
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ xorl %ebx,%r10d
+ xorl %r11d,%r14d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+
+ rorl $6,%r13d
+ andl %r11d,%r10d
+ andl %ebx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r10d
+
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r10d
+
+ movl 12(%rsp),%r13d
+ movl 0(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 44(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 8(%rsp),%r12d
+ movl %ecx,%r13d
+ addl %r14d,%r12d
+ movl %r10d,%r14d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ movl %r12d,8(%rsp)
+
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r10d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ecx,%r15d
+ movl %r11d,%r9d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ xorl %eax,%r9d
+ xorl %r10d,%r14d
+ addl %r15d,%r12d
+ movl %r11d,%r15d
+
+ rorl $6,%r13d
+ andl %r10d,%r9d
+ andl %eax,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r9d
+
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r9d
+
+ movl 16(%rsp),%r13d
+ movl 4(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 48(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 12(%rsp),%r12d
+ movl %ebx,%r13d
+ addl %r14d,%r12d
+ movl %r9d,%r14d
+ rorl $14,%r13d
+ movl %ecx,%r15d
+ movl %r12d,12(%rsp)
+
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %r9d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ebx,%r15d
+ movl %r10d,%r8d
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ xorl %r11d,%r8d
+ xorl %r9d,%r14d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+
+ rorl $6,%r13d
+ andl %r9d,%r8d
+ andl %r11d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r8d
+
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r8d
+
+ movl 20(%rsp),%r13d
+ movl 8(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 52(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 16(%rsp),%r12d
+ movl %eax,%r13d
+ addl %r14d,%r12d
+ movl %r8d,%r14d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ movl %r12d,16(%rsp)
+
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %r8d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %eax,%r15d
+ movl %r9d,%edx
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ xorl %r10d,%edx
+ xorl %r8d,%r14d
+ addl %r15d,%r12d
+ movl %r9d,%r15d
+
+ rorl $6,%r13d
+ andl %r8d,%edx
+ andl %r10d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%edx
+
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%edx
+
+ movl 24(%rsp),%r13d
+ movl 12(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 56(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 20(%rsp),%r12d
+ movl %r11d,%r13d
+ addl %r14d,%r12d
+ movl %edx,%r14d
+ rorl $14,%r13d
+ movl %eax,%r15d
+ movl %r12d,20(%rsp)
+
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %edx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r11d,%r15d
+ movl %r8d,%ecx
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ xorl %r9d,%ecx
+ xorl %edx,%r14d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+
+ rorl $6,%r13d
+ andl %edx,%ecx
+ andl %r9d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ecx
+
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ecx
+
+ movl 28(%rsp),%r13d
+ movl 16(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 60(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 24(%rsp),%r12d
+ movl %r10d,%r13d
+ addl %r14d,%r12d
+ movl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ movl %r12d,24(%rsp)
+
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %ecx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r10d,%r15d
+ movl %edx,%ebx
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ xorl %r8d,%ebx
+ xorl %ecx,%r14d
+ addl %r15d,%r12d
+ movl %edx,%r15d
+
+ rorl $6,%r13d
+ andl %ecx,%ebx
+ andl %r8d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ebx
+
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ebx
+
+ movl 32(%rsp),%r13d
+ movl 20(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 0(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 28(%rsp),%r12d
+ movl %r9d,%r13d
+ addl %r14d,%r12d
+ movl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r10d,%r15d
+ movl %r12d,28(%rsp)
+
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %ebx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r9d,%r15d
+ movl %ecx,%eax
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ xorl %edx,%eax
+ xorl %ebx,%r14d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+
+ rorl $6,%r13d
+ andl %ebx,%eax
+ andl %edx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%eax
+
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 1(%rdi),%rdi
+ addl %r14d,%eax
+
+ movl 36(%rsp),%r13d
+ movl 24(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 4(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 32(%rsp),%r12d
+ movl %r8d,%r13d
+ addl %r14d,%r12d
+ movl %eax,%r14d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ movl %r12d,32(%rsp)
+
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %eax,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r8d,%r15d
+ movl %ebx,%r11d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ xorl %ecx,%r11d
+ xorl %eax,%r14d
+ addl %r15d,%r12d
+ movl %ebx,%r15d
+
+ rorl $6,%r13d
+ andl %eax,%r11d
+ andl %ecx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r11d
+
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r11d
+
+ movl 40(%rsp),%r13d
+ movl 28(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 8(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 36(%rsp),%r12d
+ movl %edx,%r13d
+ addl %r14d,%r12d
+ movl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r8d,%r15d
+ movl %r12d,36(%rsp)
+
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r11d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %edx,%r15d
+ movl %eax,%r10d
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ xorl %ebx,%r10d
+ xorl %r11d,%r14d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+
+ rorl $6,%r13d
+ andl %r11d,%r10d
+ andl %ebx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r10d
+
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r10d
+
+ movl 44(%rsp),%r13d
+ movl 32(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 12(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 40(%rsp),%r12d
+ movl %ecx,%r13d
+ addl %r14d,%r12d
+ movl %r10d,%r14d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ movl %r12d,40(%rsp)
+
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r10d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ecx,%r15d
+ movl %r11d,%r9d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ xorl %eax,%r9d
+ xorl %r10d,%r14d
+ addl %r15d,%r12d
+ movl %r11d,%r15d
+
+ rorl $6,%r13d
+ andl %r10d,%r9d
+ andl %eax,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r9d
+
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r9d
+
+ movl 48(%rsp),%r13d
+ movl 36(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 16(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 44(%rsp),%r12d
+ movl %ebx,%r13d
+ addl %r14d,%r12d
+ movl %r9d,%r14d
+ rorl $14,%r13d
+ movl %ecx,%r15d
+ movl %r12d,44(%rsp)
+
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %r9d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ebx,%r15d
+ movl %r10d,%r8d
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ xorl %r11d,%r8d
+ xorl %r9d,%r14d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+
+ rorl $6,%r13d
+ andl %r9d,%r8d
+ andl %r11d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r8d
+
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r8d
+
+ movl 52(%rsp),%r13d
+ movl 40(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 20(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 48(%rsp),%r12d
+ movl %eax,%r13d
+ addl %r14d,%r12d
+ movl %r8d,%r14d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ movl %r12d,48(%rsp)
+
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %r8d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %eax,%r15d
+ movl %r9d,%edx
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ xorl %r10d,%edx
+ xorl %r8d,%r14d
+ addl %r15d,%r12d
+ movl %r9d,%r15d
+
+ rorl $6,%r13d
+ andl %r8d,%edx
+ andl %r10d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%edx
+
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%edx
+
+ movl 56(%rsp),%r13d
+ movl 44(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 24(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 52(%rsp),%r12d
+ movl %r11d,%r13d
+ addl %r14d,%r12d
+ movl %edx,%r14d
+ rorl $14,%r13d
+ movl %eax,%r15d
+ movl %r12d,52(%rsp)
+
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %edx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r11d,%r15d
+ movl %r8d,%ecx
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ xorl %r9d,%ecx
+ xorl %edx,%r14d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+
+ rorl $6,%r13d
+ andl %edx,%ecx
+ andl %r9d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ecx
+
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ecx
+
+ movl 60(%rsp),%r13d
+ movl 48(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 28(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 56(%rsp),%r12d
+ movl %r10d,%r13d
+ addl %r14d,%r12d
+ movl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ movl %r12d,56(%rsp)
+
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %ecx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r10d,%r15d
+ movl %edx,%ebx
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ xorl %r8d,%ebx
+ xorl %ecx,%r14d
+ addl %r15d,%r12d
+ movl %edx,%r15d
+
+ rorl $6,%r13d
+ andl %ecx,%ebx
+ andl %r8d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ebx
+
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ebx
+
+ movl 0(%rsp),%r13d
+ movl 52(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 32(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 60(%rsp),%r12d
+ movl %r9d,%r13d
+ addl %r14d,%r12d
+ movl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r10d,%r15d
+ movl %r12d,60(%rsp)
+
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %ebx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r9d,%r15d
+ movl %ecx,%eax
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ xorl %edx,%eax
+ xorl %ebx,%r14d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+
+ rorl $6,%r13d
+ andl %ebx,%eax
+ andl %edx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%eax
+
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 1(%rdi),%rdi
+ addl %r14d,%eax
+
+ cmpq $64,%rdi
+ jb .Lrounds_16_xx
+
+ movq 64+0(%rsp),%rdi
+ leaq 64(%rsi),%rsi
+
+ addl 0(%rdi),%eax
+ addl 4(%rdi),%ebx
+ addl 8(%rdi),%ecx
+ addl 12(%rdi),%edx
+ addl 16(%rdi),%r8d
+ addl 20(%rdi),%r9d
+ addl 24(%rdi),%r10d
+ addl 28(%rdi),%r11d
+
+ cmpq 64+16(%rsp),%rsi
+
+ movl %eax,0(%rdi)
+ movl %ebx,4(%rdi)
+ movl %ecx,8(%rdi)
+ movl %edx,12(%rdi)
+ movl %r8d,16(%rdi)
+ movl %r9d,20(%rdi)
+ movl %r10d,24(%rdi)
+ movl %r11d,28(%rdi)
+ jb .Lloop
+
+ movq 64+24(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lepilogue:
+ .byte 0xf3,0xc3
+.size sha256_block_data_order,.-sha256_block_data_order
+.align 64
+.type K256,@object
+K256:
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
diff --git a/compat/sha/sha256-macosx-x86_64.S b/compat/sha/sha256-macosx-x86_64.S
@@ -0,0 +1,1778 @@
+.text
+
+.globl _sha256_block_data_order
+
+.p2align 4
+_sha256_block_data_order:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r11
+ shlq $4,%rdx
+ subq $64+32,%rsp
+ leaq (%rsi,%rdx,4),%rdx
+ andq $-64,%rsp
+ movq %rdi,64+0(%rsp)
+ movq %rsi,64+8(%rsp)
+ movq %rdx,64+16(%rsp)
+ movq %r11,64+24(%rsp)
+L$prologue:
+
+ leaq K256(%rip),%rbp
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+ movl 16(%rdi),%r8d
+ movl 20(%rdi),%r9d
+ movl 24(%rdi),%r10d
+ movl 28(%rdi),%r11d
+ jmp L$loop
+
+.p2align 4
+L$loop:
+ xorq %rdi,%rdi
+ movl 0(%rsi),%r12d
+ movl %r8d,%r13d
+ movl %eax,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ movl %r12d,0(%rsp)
+
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %eax,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r8d,%r15d
+ movl %ebx,%r11d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ xorl %ecx,%r11d
+ xorl %eax,%r14d
+ addl %r15d,%r12d
+ movl %ebx,%r15d
+
+ rorl $6,%r13d
+ andl %eax,%r11d
+ andl %ecx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r11d
+
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r11d
+
+ movl 4(%rsi),%r12d
+ movl %edx,%r13d
+ movl %r11d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r8d,%r15d
+ movl %r12d,4(%rsp)
+
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r11d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %edx,%r15d
+ movl %eax,%r10d
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ xorl %ebx,%r10d
+ xorl %r11d,%r14d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+
+ rorl $6,%r13d
+ andl %r11d,%r10d
+ andl %ebx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r10d
+
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r10d
+
+ movl 8(%rsi),%r12d
+ movl %ecx,%r13d
+ movl %r10d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ movl %r12d,8(%rsp)
+
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r10d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ecx,%r15d
+ movl %r11d,%r9d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ xorl %eax,%r9d
+ xorl %r10d,%r14d
+ addl %r15d,%r12d
+ movl %r11d,%r15d
+
+ rorl $6,%r13d
+ andl %r10d,%r9d
+ andl %eax,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r9d
+
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r9d
+
+ movl 12(%rsi),%r12d
+ movl %ebx,%r13d
+ movl %r9d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ecx,%r15d
+ movl %r12d,12(%rsp)
+
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %r9d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ebx,%r15d
+ movl %r10d,%r8d
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ xorl %r11d,%r8d
+ xorl %r9d,%r14d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+
+ rorl $6,%r13d
+ andl %r9d,%r8d
+ andl %r11d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r8d
+
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r8d
+
+ movl 16(%rsi),%r12d
+ movl %eax,%r13d
+ movl %r8d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ movl %r12d,16(%rsp)
+
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %r8d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %eax,%r15d
+ movl %r9d,%edx
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ xorl %r10d,%edx
+ xorl %r8d,%r14d
+ addl %r15d,%r12d
+ movl %r9d,%r15d
+
+ rorl $6,%r13d
+ andl %r8d,%edx
+ andl %r10d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%edx
+
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%edx
+
+ movl 20(%rsi),%r12d
+ movl %r11d,%r13d
+ movl %edx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %eax,%r15d
+ movl %r12d,20(%rsp)
+
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %edx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r11d,%r15d
+ movl %r8d,%ecx
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ xorl %r9d,%ecx
+ xorl %edx,%r14d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+
+ rorl $6,%r13d
+ andl %edx,%ecx
+ andl %r9d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ecx
+
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ecx
+
+ movl 24(%rsi),%r12d
+ movl %r10d,%r13d
+ movl %ecx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ movl %r12d,24(%rsp)
+
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %ecx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r10d,%r15d
+ movl %edx,%ebx
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ xorl %r8d,%ebx
+ xorl %ecx,%r14d
+ addl %r15d,%r12d
+ movl %edx,%r15d
+
+ rorl $6,%r13d
+ andl %ecx,%ebx
+ andl %r8d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ebx
+
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ebx
+
+ movl 28(%rsi),%r12d
+ movl %r9d,%r13d
+ movl %ebx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r10d,%r15d
+ movl %r12d,28(%rsp)
+
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %ebx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r9d,%r15d
+ movl %ecx,%eax
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ xorl %edx,%eax
+ xorl %ebx,%r14d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+
+ rorl $6,%r13d
+ andl %ebx,%eax
+ andl %edx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%eax
+
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 1(%rdi),%rdi
+ addl %r14d,%eax
+
+ movl 32(%rsi),%r12d
+ movl %r8d,%r13d
+ movl %eax,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ movl %r12d,32(%rsp)
+
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %eax,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r8d,%r15d
+ movl %ebx,%r11d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ xorl %ecx,%r11d
+ xorl %eax,%r14d
+ addl %r15d,%r12d
+ movl %ebx,%r15d
+
+ rorl $6,%r13d
+ andl %eax,%r11d
+ andl %ecx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r11d
+
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r11d
+
+ movl 36(%rsi),%r12d
+ movl %edx,%r13d
+ movl %r11d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r8d,%r15d
+ movl %r12d,36(%rsp)
+
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r11d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %edx,%r15d
+ movl %eax,%r10d
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ xorl %ebx,%r10d
+ xorl %r11d,%r14d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+
+ rorl $6,%r13d
+ andl %r11d,%r10d
+ andl %ebx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r10d
+
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r10d
+
+ movl 40(%rsi),%r12d
+ movl %ecx,%r13d
+ movl %r10d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ movl %r12d,40(%rsp)
+
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r10d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ecx,%r15d
+ movl %r11d,%r9d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ xorl %eax,%r9d
+ xorl %r10d,%r14d
+ addl %r15d,%r12d
+ movl %r11d,%r15d
+
+ rorl $6,%r13d
+ andl %r10d,%r9d
+ andl %eax,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r9d
+
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r9d
+
+ movl 44(%rsi),%r12d
+ movl %ebx,%r13d
+ movl %r9d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ecx,%r15d
+ movl %r12d,44(%rsp)
+
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %r9d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ebx,%r15d
+ movl %r10d,%r8d
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ xorl %r11d,%r8d
+ xorl %r9d,%r14d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+
+ rorl $6,%r13d
+ andl %r9d,%r8d
+ andl %r11d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r8d
+
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r8d
+
+ movl 48(%rsi),%r12d
+ movl %eax,%r13d
+ movl %r8d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ movl %r12d,48(%rsp)
+
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %r8d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %eax,%r15d
+ movl %r9d,%edx
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ xorl %r10d,%edx
+ xorl %r8d,%r14d
+ addl %r15d,%r12d
+ movl %r9d,%r15d
+
+ rorl $6,%r13d
+ andl %r8d,%edx
+ andl %r10d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%edx
+
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%edx
+
+ movl 52(%rsi),%r12d
+ movl %r11d,%r13d
+ movl %edx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %eax,%r15d
+ movl %r12d,52(%rsp)
+
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %edx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r11d,%r15d
+ movl %r8d,%ecx
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ xorl %r9d,%ecx
+ xorl %edx,%r14d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+
+ rorl $6,%r13d
+ andl %edx,%ecx
+ andl %r9d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ecx
+
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ecx
+
+ movl 56(%rsi),%r12d
+ movl %r10d,%r13d
+ movl %ecx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ movl %r12d,56(%rsp)
+
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %ecx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r10d,%r15d
+ movl %edx,%ebx
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ xorl %r8d,%ebx
+ xorl %ecx,%r14d
+ addl %r15d,%r12d
+ movl %edx,%r15d
+
+ rorl $6,%r13d
+ andl %ecx,%ebx
+ andl %r8d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ebx
+
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ebx
+
+ movl 60(%rsi),%r12d
+ movl %r9d,%r13d
+ movl %ebx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r10d,%r15d
+ movl %r12d,60(%rsp)
+
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %ebx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r9d,%r15d
+ movl %ecx,%eax
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ xorl %edx,%eax
+ xorl %ebx,%r14d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+
+ rorl $6,%r13d
+ andl %ebx,%eax
+ andl %edx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%eax
+
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 1(%rdi),%rdi
+ addl %r14d,%eax
+
+ jmp L$rounds_16_xx
+.p2align 4
+L$rounds_16_xx:
+ movl 4(%rsp),%r13d
+ movl 56(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 36(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 0(%rsp),%r12d
+ movl %r8d,%r13d
+ addl %r14d,%r12d
+ movl %eax,%r14d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ movl %r12d,0(%rsp)
+
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %eax,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r8d,%r15d
+ movl %ebx,%r11d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ xorl %ecx,%r11d
+ xorl %eax,%r14d
+ addl %r15d,%r12d
+ movl %ebx,%r15d
+
+ rorl $6,%r13d
+ andl %eax,%r11d
+ andl %ecx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r11d
+
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r11d
+
+ movl 8(%rsp),%r13d
+ movl 60(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 40(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 4(%rsp),%r12d
+ movl %edx,%r13d
+ addl %r14d,%r12d
+ movl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r8d,%r15d
+ movl %r12d,4(%rsp)
+
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r11d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %edx,%r15d
+ movl %eax,%r10d
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ xorl %ebx,%r10d
+ xorl %r11d,%r14d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+
+ rorl $6,%r13d
+ andl %r11d,%r10d
+ andl %ebx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r10d
+
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r10d
+
+ movl 12(%rsp),%r13d
+ movl 0(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 44(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 8(%rsp),%r12d
+ movl %ecx,%r13d
+ addl %r14d,%r12d
+ movl %r10d,%r14d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ movl %r12d,8(%rsp)
+
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r10d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ecx,%r15d
+ movl %r11d,%r9d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ xorl %eax,%r9d
+ xorl %r10d,%r14d
+ addl %r15d,%r12d
+ movl %r11d,%r15d
+
+ rorl $6,%r13d
+ andl %r10d,%r9d
+ andl %eax,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r9d
+
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r9d
+
+ movl 16(%rsp),%r13d
+ movl 4(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 48(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 12(%rsp),%r12d
+ movl %ebx,%r13d
+ addl %r14d,%r12d
+ movl %r9d,%r14d
+ rorl $14,%r13d
+ movl %ecx,%r15d
+ movl %r12d,12(%rsp)
+
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %r9d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ebx,%r15d
+ movl %r10d,%r8d
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ xorl %r11d,%r8d
+ xorl %r9d,%r14d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+
+ rorl $6,%r13d
+ andl %r9d,%r8d
+ andl %r11d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r8d
+
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r8d
+
+ movl 20(%rsp),%r13d
+ movl 8(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 52(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 16(%rsp),%r12d
+ movl %eax,%r13d
+ addl %r14d,%r12d
+ movl %r8d,%r14d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ movl %r12d,16(%rsp)
+
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %r8d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %eax,%r15d
+ movl %r9d,%edx
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ xorl %r10d,%edx
+ xorl %r8d,%r14d
+ addl %r15d,%r12d
+ movl %r9d,%r15d
+
+ rorl $6,%r13d
+ andl %r8d,%edx
+ andl %r10d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%edx
+
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%edx
+
+ movl 24(%rsp),%r13d
+ movl 12(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 56(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 20(%rsp),%r12d
+ movl %r11d,%r13d
+ addl %r14d,%r12d
+ movl %edx,%r14d
+ rorl $14,%r13d
+ movl %eax,%r15d
+ movl %r12d,20(%rsp)
+
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %edx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r11d,%r15d
+ movl %r8d,%ecx
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ xorl %r9d,%ecx
+ xorl %edx,%r14d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+
+ rorl $6,%r13d
+ andl %edx,%ecx
+ andl %r9d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ecx
+
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ecx
+
+ movl 28(%rsp),%r13d
+ movl 16(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 60(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 24(%rsp),%r12d
+ movl %r10d,%r13d
+ addl %r14d,%r12d
+ movl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ movl %r12d,24(%rsp)
+
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %ecx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r10d,%r15d
+ movl %edx,%ebx
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ xorl %r8d,%ebx
+ xorl %ecx,%r14d
+ addl %r15d,%r12d
+ movl %edx,%r15d
+
+ rorl $6,%r13d
+ andl %ecx,%ebx
+ andl %r8d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ebx
+
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ebx
+
+ movl 32(%rsp),%r13d
+ movl 20(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 0(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 28(%rsp),%r12d
+ movl %r9d,%r13d
+ addl %r14d,%r12d
+ movl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r10d,%r15d
+ movl %r12d,28(%rsp)
+
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %ebx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r9d,%r15d
+ movl %ecx,%eax
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ xorl %edx,%eax
+ xorl %ebx,%r14d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+
+ rorl $6,%r13d
+ andl %ebx,%eax
+ andl %edx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%eax
+
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 1(%rdi),%rdi
+ addl %r14d,%eax
+
+ movl 36(%rsp),%r13d
+ movl 24(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 4(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 32(%rsp),%r12d
+ movl %r8d,%r13d
+ addl %r14d,%r12d
+ movl %eax,%r14d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+ movl %r12d,32(%rsp)
+
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %eax,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r8d,%r15d
+ movl %ebx,%r11d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r15d
+
+ xorl %ecx,%r11d
+ xorl %eax,%r14d
+ addl %r15d,%r12d
+ movl %ebx,%r15d
+
+ rorl $6,%r13d
+ andl %eax,%r11d
+ andl %ecx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r11d
+
+ addl %r12d,%edx
+ addl %r12d,%r11d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r11d
+
+ movl 40(%rsp),%r13d
+ movl 28(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 8(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 36(%rsp),%r12d
+ movl %edx,%r13d
+ addl %r14d,%r12d
+ movl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r8d,%r15d
+ movl %r12d,36(%rsp)
+
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r11d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %edx,%r15d
+ movl %eax,%r10d
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r15d
+
+ xorl %ebx,%r10d
+ xorl %r11d,%r14d
+ addl %r15d,%r12d
+ movl %eax,%r15d
+
+ rorl $6,%r13d
+ andl %r11d,%r10d
+ andl %ebx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r10d
+
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r10d
+
+ movl 44(%rsp),%r13d
+ movl 32(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 12(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 40(%rsp),%r12d
+ movl %ecx,%r13d
+ addl %r14d,%r12d
+ movl %r10d,%r14d
+ rorl $14,%r13d
+ movl %edx,%r15d
+ movl %r12d,40(%rsp)
+
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r10d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ecx,%r15d
+ movl %r11d,%r9d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r15d
+
+ xorl %eax,%r9d
+ xorl %r10d,%r14d
+ addl %r15d,%r12d
+ movl %r11d,%r15d
+
+ rorl $6,%r13d
+ andl %r10d,%r9d
+ andl %eax,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r9d
+
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r9d
+
+ movl 48(%rsp),%r13d
+ movl 36(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 16(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 44(%rsp),%r12d
+ movl %ebx,%r13d
+ addl %r14d,%r12d
+ movl %r9d,%r14d
+ rorl $14,%r13d
+ movl %ecx,%r15d
+ movl %r12d,44(%rsp)
+
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %r9d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %ebx,%r15d
+ movl %r10d,%r8d
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r15d
+
+ xorl %r11d,%r8d
+ xorl %r9d,%r14d
+ addl %r15d,%r12d
+ movl %r10d,%r15d
+
+ rorl $6,%r13d
+ andl %r9d,%r8d
+ andl %r11d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%r8d
+
+ addl %r12d,%eax
+ addl %r12d,%r8d
+ leaq 1(%rdi),%rdi
+ addl %r14d,%r8d
+
+ movl 52(%rsp),%r13d
+ movl 40(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 20(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 48(%rsp),%r12d
+ movl %eax,%r13d
+ addl %r14d,%r12d
+ movl %r8d,%r14d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+ movl %r12d,48(%rsp)
+
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %r8d,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %eax,%r15d
+ movl %r9d,%edx
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r15d
+
+ xorl %r10d,%edx
+ xorl %r8d,%r14d
+ addl %r15d,%r12d
+ movl %r9d,%r15d
+
+ rorl $6,%r13d
+ andl %r8d,%edx
+ andl %r10d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%edx
+
+ addl %r12d,%r11d
+ addl %r12d,%edx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%edx
+
+ movl 56(%rsp),%r13d
+ movl 44(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 24(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 52(%rsp),%r12d
+ movl %r11d,%r13d
+ addl %r14d,%r12d
+ movl %edx,%r14d
+ rorl $14,%r13d
+ movl %eax,%r15d
+ movl %r12d,52(%rsp)
+
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %edx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r11d,%r15d
+ movl %r8d,%ecx
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r15d
+
+ xorl %r9d,%ecx
+ xorl %edx,%r14d
+ addl %r15d,%r12d
+ movl %r8d,%r15d
+
+ rorl $6,%r13d
+ andl %edx,%ecx
+ andl %r9d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ecx
+
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ecx
+
+ movl 60(%rsp),%r13d
+ movl 48(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 28(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 56(%rsp),%r12d
+ movl %r10d,%r13d
+ addl %r14d,%r12d
+ movl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+ movl %r12d,56(%rsp)
+
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %ecx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r10d,%r15d
+ movl %edx,%ebx
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r15d
+
+ xorl %r8d,%ebx
+ xorl %ecx,%r14d
+ addl %r15d,%r12d
+ movl %edx,%r15d
+
+ rorl $6,%r13d
+ andl %ecx,%ebx
+ andl %r8d,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%ebx
+
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+ leaq 1(%rdi),%rdi
+ addl %r14d,%ebx
+
+ movl 0(%rsp),%r13d
+ movl 52(%rsp),%r14d
+ movl %r13d,%r12d
+ movl %r14d,%r15d
+
+ rorl $11,%r12d
+ xorl %r13d,%r12d
+ shrl $3,%r13d
+
+ rorl $7,%r12d
+ xorl %r12d,%r13d
+ movl 32(%rsp),%r12d
+
+ rorl $2,%r15d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ addl %r13d,%r12d
+ xorl %r15d,%r14d
+
+ addl 60(%rsp),%r12d
+ movl %r9d,%r13d
+ addl %r14d,%r12d
+ movl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r10d,%r15d
+ movl %r12d,60(%rsp)
+
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %ebx,%r14d
+
+ addl (%rbp,%rdi,4),%r12d
+ andl %r9d,%r15d
+ movl %ecx,%eax
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r15d
+
+ xorl %edx,%eax
+ xorl %ebx,%r14d
+ addl %r15d,%r12d
+ movl %ecx,%r15d
+
+ rorl $6,%r13d
+ andl %ebx,%eax
+ andl %edx,%r15d
+
+ rorl $2,%r14d
+ addl %r13d,%r12d
+ addl %r15d,%eax
+
+ addl %r12d,%r8d
+ addl %r12d,%eax
+ leaq 1(%rdi),%rdi
+ addl %r14d,%eax
+
+ cmpq $64,%rdi
+ jb L$rounds_16_xx
+
+ movq 64+0(%rsp),%rdi
+ leaq 64(%rsi),%rsi
+
+ addl 0(%rdi),%eax
+ addl 4(%rdi),%ebx
+ addl 8(%rdi),%ecx
+ addl 12(%rdi),%edx
+ addl 16(%rdi),%r8d
+ addl 20(%rdi),%r9d
+ addl 24(%rdi),%r10d
+ addl 28(%rdi),%r11d
+
+ cmpq 64+16(%rsp),%rsi
+
+ movl %eax,0(%rdi)
+ movl %ebx,4(%rdi)
+ movl %ecx,8(%rdi)
+ movl %edx,12(%rdi)
+ movl %r8d,16(%rdi)
+ movl %r9d,20(%rdi)
+ movl %r10d,24(%rdi)
+ movl %r11d,28(%rdi)
+ jb L$loop
+
+ movq 64+24(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+L$epilogue:
+ .byte 0xf3,0xc3
+
+.p2align 6
+
+K256:
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
diff --git a/compat/sha/sha256.c b/compat/sha/sha256.c
@@ -0,0 +1,284 @@
+/* $OpenBSD: sha256.c,v 1.7 2014/07/10 22:45:58 jsing Exp $ */
+/* ====================================================================
+ * Copyright (c) 2004 The OpenSSL Project. All rights reserved
+ * according to the OpenSSL license [found in ../../LICENSE].
+ * ====================================================================
+ */
+
+#include <openssl/opensslconf.h>
+
+#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA256)
+
+#include <machine/endian.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <openssl/crypto.h>
+#include <openssl/sha.h>
+#include <openssl/opensslv.h>
+
+int SHA224_Init(SHA256_CTX *c)
+ {
+ memset (c,0,sizeof(*c));
+ c->h[0]=0xc1059ed8UL; c->h[1]=0x367cd507UL;
+ c->h[2]=0x3070dd17UL; c->h[3]=0xf70e5939UL;
+ c->h[4]=0xffc00b31UL; c->h[5]=0x68581511UL;
+ c->h[6]=0x64f98fa7UL; c->h[7]=0xbefa4fa4UL;
+ c->md_len=SHA224_DIGEST_LENGTH;
+ return 1;
+ }
+
+int SHA256_Init(SHA256_CTX *c)
+ {
+ memset (c,0,sizeof(*c));
+ c->h[0]=0x6a09e667UL; c->h[1]=0xbb67ae85UL;
+ c->h[2]=0x3c6ef372UL; c->h[3]=0xa54ff53aUL;
+ c->h[4]=0x510e527fUL; c->h[5]=0x9b05688cUL;
+ c->h[6]=0x1f83d9abUL; c->h[7]=0x5be0cd19UL;
+ c->md_len=SHA256_DIGEST_LENGTH;
+ return 1;
+ }
+
+unsigned char *SHA224(const unsigned char *d, size_t n, unsigned char *md)
+ {
+ SHA256_CTX c;
+ static unsigned char m[SHA224_DIGEST_LENGTH];
+
+ if (md == NULL) md=m;
+ SHA224_Init(&c);
+ SHA256_Update(&c,d,n);
+ SHA256_Final(md,&c);
+ OPENSSL_cleanse(&c,sizeof(c));
+ return(md);
+ }
+
+unsigned char *SHA256(const unsigned char *d, size_t n, unsigned char *md)
+ {
+ SHA256_CTX c;
+ static unsigned char m[SHA256_DIGEST_LENGTH];
+
+ if (md == NULL) md=m;
+ SHA256_Init(&c);
+ SHA256_Update(&c,d,n);
+ SHA256_Final(md,&c);
+ OPENSSL_cleanse(&c,sizeof(c));
+ return(md);
+ }
+
+int SHA224_Update(SHA256_CTX *c, const void *data, size_t len)
+{ return SHA256_Update (c,data,len); }
+int SHA224_Final (unsigned char *md, SHA256_CTX *c)
+{ return SHA256_Final (md,c); }
+
+#define DATA_ORDER_IS_BIG_ENDIAN
+
+#define HASH_LONG SHA_LONG
+#define HASH_CTX SHA256_CTX
+#define HASH_CBLOCK SHA_CBLOCK
+/*
+ * Note that FIPS180-2 discusses "Truncation of the Hash Function Output."
+ * default: case below covers for it. It's not clear however if it's
+ * permitted to truncate to amount of bytes not divisible by 4. I bet not,
+ * but if it is, then default: case shall be extended. For reference.
+ * Idea behind separate cases for pre-defined lenghts is to let the
+ * compiler decide if it's appropriate to unroll small loops.
+ */
+#define HASH_MAKE_STRING(c,s) do { \
+ unsigned long ll; \
+ unsigned int nn; \
+ switch ((c)->md_len) \
+ { case SHA224_DIGEST_LENGTH: \
+ for (nn=0;nn<SHA224_DIGEST_LENGTH/4;nn++) \
+ { ll=(c)->h[nn]; HOST_l2c(ll,(s)); } \
+ break; \
+ case SHA256_DIGEST_LENGTH: \
+ for (nn=0;nn<SHA256_DIGEST_LENGTH/4;nn++) \
+ { ll=(c)->h[nn]; HOST_l2c(ll,(s)); } \
+ break; \
+ default: \
+ if ((c)->md_len > SHA256_DIGEST_LENGTH) \
+ return 0; \
+ for (nn=0;nn<(c)->md_len/4;nn++) \
+ { ll=(c)->h[nn]; HOST_l2c(ll,(s)); } \
+ break; \
+ } \
+ } while (0)
+
+#define HASH_UPDATE SHA256_Update
+#define HASH_TRANSFORM SHA256_Transform
+#define HASH_FINAL SHA256_Final
+#define HASH_BLOCK_DATA_ORDER sha256_block_data_order
+#ifndef SHA256_ASM
+static
+#endif
+void sha256_block_data_order (SHA256_CTX *ctx, const void *in, size_t num);
+
+#include "md32_common.h"
+
+#ifndef SHA256_ASM
+static const SHA_LONG K256[64] = {
+ 0x428a2f98UL,0x71374491UL,0xb5c0fbcfUL,0xe9b5dba5UL,
+ 0x3956c25bUL,0x59f111f1UL,0x923f82a4UL,0xab1c5ed5UL,
+ 0xd807aa98UL,0x12835b01UL,0x243185beUL,0x550c7dc3UL,
+ 0x72be5d74UL,0x80deb1feUL,0x9bdc06a7UL,0xc19bf174UL,
+ 0xe49b69c1UL,0xefbe4786UL,0x0fc19dc6UL,0x240ca1ccUL,
+ 0x2de92c6fUL,0x4a7484aaUL,0x5cb0a9dcUL,0x76f988daUL,
+ 0x983e5152UL,0xa831c66dUL,0xb00327c8UL,0xbf597fc7UL,
+ 0xc6e00bf3UL,0xd5a79147UL,0x06ca6351UL,0x14292967UL,
+ 0x27b70a85UL,0x2e1b2138UL,0x4d2c6dfcUL,0x53380d13UL,
+ 0x650a7354UL,0x766a0abbUL,0x81c2c92eUL,0x92722c85UL,
+ 0xa2bfe8a1UL,0xa81a664bUL,0xc24b8b70UL,0xc76c51a3UL,
+ 0xd192e819UL,0xd6990624UL,0xf40e3585UL,0x106aa070UL,
+ 0x19a4c116UL,0x1e376c08UL,0x2748774cUL,0x34b0bcb5UL,
+ 0x391c0cb3UL,0x4ed8aa4aUL,0x5b9cca4fUL,0x682e6ff3UL,
+ 0x748f82eeUL,0x78a5636fUL,0x84c87814UL,0x8cc70208UL,
+ 0x90befffaUL,0xa4506cebUL,0xbef9a3f7UL,0xc67178f2UL };
+
+/*
+ * FIPS specification refers to right rotations, while our ROTATE macro
+ * is left one. This is why you might notice that rotation coefficients
+ * differ from those observed in FIPS document by 32-N...
+ */
+#define Sigma0(x) (ROTATE((x),30) ^ ROTATE((x),19) ^ ROTATE((x),10))
+#define Sigma1(x) (ROTATE((x),26) ^ ROTATE((x),21) ^ ROTATE((x),7))
+#define sigma0(x) (ROTATE((x),25) ^ ROTATE((x),14) ^ ((x)>>3))
+#define sigma1(x) (ROTATE((x),15) ^ ROTATE((x),13) ^ ((x)>>10))
+
+#define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
+#define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
+
+#ifdef OPENSSL_SMALL_FOOTPRINT
+
+static void sha256_block_data_order (SHA256_CTX *ctx, const void *in, size_t num)
+ {
+ unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1,T2;
+ SHA_LONG X[16],l;
+ int i;
+ const unsigned char *data=in;
+
+ while (num--) {
+
+ a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3];
+ e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7];
+
+ for (i=0;i<16;i++)
+ {
+ HOST_c2l(data,l); T1 = X[i] = l;
+ T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];
+ T2 = Sigma0(a) + Maj(a,b,c);
+ h = g; g = f; f = e; e = d + T1;
+ d = c; c = b; b = a; a = T1 + T2;
+ }
+
+ for (;i<64;i++)
+ {
+ s0 = X[(i+1)&0x0f]; s0 = sigma0(s0);
+ s1 = X[(i+14)&0x0f]; s1 = sigma1(s1);
+
+ T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
+ T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i];
+ T2 = Sigma0(a) + Maj(a,b,c);
+ h = g; g = f; f = e; e = d + T1;
+ d = c; c = b; b = a; a = T1 + T2;
+ }
+
+ ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
+ ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
+
+ }
+}
+
+#else
+
+#define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \
+ T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i]; \
+ h = Sigma0(a) + Maj(a,b,c); \
+ d += T1; h += T1; } while (0)
+
+#define ROUND_16_63(i,a,b,c,d,e,f,g,h,X) do { \
+ s0 = X[(i+1)&0x0f]; s0 = sigma0(s0); \
+ s1 = X[(i+14)&0x0f]; s1 = sigma1(s1); \
+ T1 = X[(i)&0x0f] += s0 + s1 + X[(i+9)&0x0f]; \
+ ROUND_00_15(i,a,b,c,d,e,f,g,h); } while (0)
+
+static void sha256_block_data_order (SHA256_CTX *ctx, const void *in, size_t num)
+ {
+ unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1;
+ SHA_LONG X[16];
+ int i;
+ const unsigned char *data=in;
+
+ while (num--) {
+
+ a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3];
+ e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7];
+
+ if (BYTE_ORDER != LITTLE_ENDIAN &&
+ sizeof(SHA_LONG)==4 && ((size_t)in%4)==0)
+ {
+ const SHA_LONG *W=(const SHA_LONG *)data;
+
+ T1 = X[0] = W[0]; ROUND_00_15(0,a,b,c,d,e,f,g,h);
+ T1 = X[1] = W[1]; ROUND_00_15(1,h,a,b,c,d,e,f,g);
+ T1 = X[2] = W[2]; ROUND_00_15(2,g,h,a,b,c,d,e,f);
+ T1 = X[3] = W[3]; ROUND_00_15(3,f,g,h,a,b,c,d,e);
+ T1 = X[4] = W[4]; ROUND_00_15(4,e,f,g,h,a,b,c,d);
+ T1 = X[5] = W[5]; ROUND_00_15(5,d,e,f,g,h,a,b,c);
+ T1 = X[6] = W[6]; ROUND_00_15(6,c,d,e,f,g,h,a,b);
+ T1 = X[7] = W[7]; ROUND_00_15(7,b,c,d,e,f,g,h,a);
+ T1 = X[8] = W[8]; ROUND_00_15(8,a,b,c,d,e,f,g,h);
+ T1 = X[9] = W[9]; ROUND_00_15(9,h,a,b,c,d,e,f,g);
+ T1 = X[10] = W[10]; ROUND_00_15(10,g,h,a,b,c,d,e,f);
+ T1 = X[11] = W[11]; ROUND_00_15(11,f,g,h,a,b,c,d,e);
+ T1 = X[12] = W[12]; ROUND_00_15(12,e,f,g,h,a,b,c,d);
+ T1 = X[13] = W[13]; ROUND_00_15(13,d,e,f,g,h,a,b,c);
+ T1 = X[14] = W[14]; ROUND_00_15(14,c,d,e,f,g,h,a,b);
+ T1 = X[15] = W[15]; ROUND_00_15(15,b,c,d,e,f,g,h,a);
+
+ data += SHA256_CBLOCK;
+ }
+ else
+ {
+ SHA_LONG l;
+
+ HOST_c2l(data,l); T1 = X[0] = l; ROUND_00_15(0,a,b,c,d,e,f,g,h);
+ HOST_c2l(data,l); T1 = X[1] = l; ROUND_00_15(1,h,a,b,c,d,e,f,g);
+ HOST_c2l(data,l); T1 = X[2] = l; ROUND_00_15(2,g,h,a,b,c,d,e,f);
+ HOST_c2l(data,l); T1 = X[3] = l; ROUND_00_15(3,f,g,h,a,b,c,d,e);
+ HOST_c2l(data,l); T1 = X[4] = l; ROUND_00_15(4,e,f,g,h,a,b,c,d);
+ HOST_c2l(data,l); T1 = X[5] = l; ROUND_00_15(5,d,e,f,g,h,a,b,c);
+ HOST_c2l(data,l); T1 = X[6] = l; ROUND_00_15(6,c,d,e,f,g,h,a,b);
+ HOST_c2l(data,l); T1 = X[7] = l; ROUND_00_15(7,b,c,d,e,f,g,h,a);
+ HOST_c2l(data,l); T1 = X[8] = l; ROUND_00_15(8,a,b,c,d,e,f,g,h);
+ HOST_c2l(data,l); T1 = X[9] = l; ROUND_00_15(9,h,a,b,c,d,e,f,g);
+ HOST_c2l(data,l); T1 = X[10] = l; ROUND_00_15(10,g,h,a,b,c,d,e,f);
+ HOST_c2l(data,l); T1 = X[11] = l; ROUND_00_15(11,f,g,h,a,b,c,d,e);
+ HOST_c2l(data,l); T1 = X[12] = l; ROUND_00_15(12,e,f,g,h,a,b,c,d);
+ HOST_c2l(data,l); T1 = X[13] = l; ROUND_00_15(13,d,e,f,g,h,a,b,c);
+ HOST_c2l(data,l); T1 = X[14] = l; ROUND_00_15(14,c,d,e,f,g,h,a,b);
+ HOST_c2l(data,l); T1 = X[15] = l; ROUND_00_15(15,b,c,d,e,f,g,h,a);
+ }
+
+ for (i=16;i<64;i+=8)
+ {
+ ROUND_16_63(i+0,a,b,c,d,e,f,g,h,X);
+ ROUND_16_63(i+1,h,a,b,c,d,e,f,g,X);
+ ROUND_16_63(i+2,g,h,a,b,c,d,e,f,X);
+ ROUND_16_63(i+3,f,g,h,a,b,c,d,e,X);
+ ROUND_16_63(i+4,e,f,g,h,a,b,c,d,X);
+ ROUND_16_63(i+5,d,e,f,g,h,a,b,c,X);
+ ROUND_16_63(i+6,c,d,e,f,g,h,a,b,X);
+ ROUND_16_63(i+7,b,c,d,e,f,g,h,a,X);
+ }
+
+ ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d;
+ ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h;
+
+ }
+ }
+
+#endif
+#endif /* SHA256_ASM */
+
+#endif /* OPENSSL_NO_SHA256 */
diff --git a/compat/sha/sha512-elf-x86_64.S b/compat/sha/sha512-elf-x86_64.S
@@ -0,0 +1,1802 @@
+.text
+
+.globl sha512_block_data_order
+.type sha512_block_data_order,@function
+.align 16
+sha512_block_data_order:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r11
+ shlq $4,%rdx
+ subq $128+32,%rsp
+ leaq (%rsi,%rdx,8),%rdx
+ andq $-64,%rsp
+ movq %rdi,128+0(%rsp)
+ movq %rsi,128+8(%rsp)
+ movq %rdx,128+16(%rsp)
+ movq %r11,128+24(%rsp)
+.Lprologue:
+
+ leaq K512(%rip),%rbp
+
+ movq 0(%rdi),%rax
+ movq 8(%rdi),%rbx
+ movq 16(%rdi),%rcx
+ movq 24(%rdi),%rdx
+ movq 32(%rdi),%r8
+ movq 40(%rdi),%r9
+ movq 48(%rdi),%r10
+ movq 56(%rdi),%r11
+ jmp .Lloop
+
+.align 16
+.Lloop:
+ xorq %rdi,%rdi
+ movq 0(%rsi),%r12
+ movq %r8,%r13
+ movq %rax,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,0(%rsp)
+
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
+
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
+
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
+
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 1(%rdi),%rdi
+ addq %r14,%r11
+
+ movq 8(%rsi),%r12
+ movq %rdx,%r13
+ movq %r11,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,8(%rsp)
+
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
+
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
+
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
+
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 1(%rdi),%rdi
+ addq %r14,%r10
+
+ movq 16(%rsi),%r12
+ movq %rcx,%r13
+ movq %r10,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,16(%rsp)
+
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
+
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
+
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
+
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 1(%rdi),%rdi
+ addq %r14,%r9
+
+ movq 24(%rsi),%r12
+ movq %rbx,%r13
+ movq %r9,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,24(%rsp)
+
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
+
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
+
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
+
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 1(%rdi),%rdi
+ addq %r14,%r8
+
+ movq 32(%rsi),%r12
+ movq %rax,%r13
+ movq %r8,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,32(%rsp)
+
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
+
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
+
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
+
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rdx
+
+ movq 40(%rsi),%r12
+ movq %r11,%r13
+ movq %rdx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,40(%rsp)
+
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
+
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
+
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
+
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rcx
+
+ movq 48(%rsi),%r12
+ movq %r10,%r13
+ movq %rcx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,48(%rsp)
+
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
+
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
+
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
+
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rbx
+
+ movq 56(%rsi),%r12
+ movq %r9,%r13
+ movq %rbx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,56(%rsp)
+
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
+
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
+
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
+
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 1(%rdi),%rdi
+ addq %r14,%rax
+
+ movq 64(%rsi),%r12
+ movq %r8,%r13
+ movq %rax,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,64(%rsp)
+
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
+
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
+
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
+
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 1(%rdi),%rdi
+ addq %r14,%r11
+
+ movq 72(%rsi),%r12
+ movq %rdx,%r13
+ movq %r11,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,72(%rsp)
+
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
+
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
+
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
+
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 1(%rdi),%rdi
+ addq %r14,%r10
+
+ movq 80(%rsi),%r12
+ movq %rcx,%r13
+ movq %r10,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,80(%rsp)
+
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
+
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
+
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
+
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 1(%rdi),%rdi
+ addq %r14,%r9
+
+ movq 88(%rsi),%r12
+ movq %rbx,%r13
+ movq %r9,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,88(%rsp)
+
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
+
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
+
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
+
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 1(%rdi),%rdi
+ addq %r14,%r8
+
+ movq 96(%rsi),%r12
+ movq %rax,%r13
+ movq %r8,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,96(%rsp)
+
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
+
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
+
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
+
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rdx
+
+ movq 104(%rsi),%r12
+ movq %r11,%r13
+ movq %rdx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,104(%rsp)
+
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
+
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
+
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
+
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rcx
+
+ movq 112(%rsi),%r12
+ movq %r10,%r13
+ movq %rcx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,112(%rsp)
+
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
+
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
+
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
+
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rbx
+
+ movq 120(%rsi),%r12
+ movq %r9,%r13
+ movq %rbx,%r14
+ bswapq %r12
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,120(%rsp)
+
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
+
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
+
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
+
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 1(%rdi),%rdi
+ addq %r14,%rax
+
+ jmp .Lrounds_16_xx
+.align 16
+.Lrounds_16_xx:
+ movq 8(%rsp),%r13
+ movq 112(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 72(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 0(%rsp),%r12
+ movq %r8,%r13
+ addq %r14,%r12
+ movq %rax,%r14
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,0(%rsp)
+
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
+
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
+
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
+
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 1(%rdi),%rdi
+ addq %r14,%r11
+
+ movq 16(%rsp),%r13
+ movq 120(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 80(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 8(%rsp),%r12
+ movq %rdx,%r13
+ addq %r14,%r12
+ movq %r11,%r14
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,8(%rsp)
+
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
+
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
+
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
+
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 1(%rdi),%rdi
+ addq %r14,%r10
+
+ movq 24(%rsp),%r13
+ movq 0(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 88(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 16(%rsp),%r12
+ movq %rcx,%r13
+ addq %r14,%r12
+ movq %r10,%r14
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,16(%rsp)
+
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
+
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
+
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
+
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 1(%rdi),%rdi
+ addq %r14,%r9
+
+ movq 32(%rsp),%r13
+ movq 8(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 96(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 24(%rsp),%r12
+ movq %rbx,%r13
+ addq %r14,%r12
+ movq %r9,%r14
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,24(%rsp)
+
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
+
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
+
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
+
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 1(%rdi),%rdi
+ addq %r14,%r8
+
+ movq 40(%rsp),%r13
+ movq 16(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 104(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 32(%rsp),%r12
+ movq %rax,%r13
+ addq %r14,%r12
+ movq %r8,%r14
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,32(%rsp)
+
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
+
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
+
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
+
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rdx
+
+ movq 48(%rsp),%r13
+ movq 24(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 112(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 40(%rsp),%r12
+ movq %r11,%r13
+ addq %r14,%r12
+ movq %rdx,%r14
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,40(%rsp)
+
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
+
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
+
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
+
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rcx
+
+ movq 56(%rsp),%r13
+ movq 32(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 120(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 48(%rsp),%r12
+ movq %r10,%r13
+ addq %r14,%r12
+ movq %rcx,%r14
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,48(%rsp)
+
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
+
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
+
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
+
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rbx
+
+ movq 64(%rsp),%r13
+ movq 40(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 0(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 56(%rsp),%r12
+ movq %r9,%r13
+ addq %r14,%r12
+ movq %rbx,%r14
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,56(%rsp)
+
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
+
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
+
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
+
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 1(%rdi),%rdi
+ addq %r14,%rax
+
+ movq 72(%rsp),%r13
+ movq 48(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 8(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 64(%rsp),%r12
+ movq %r8,%r13
+ addq %r14,%r12
+ movq %rax,%r14
+ rorq $23,%r13
+ movq %r9,%r15
+ movq %r12,64(%rsp)
+
+ rorq $5,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ rorq $4,%r13
+ addq %r11,%r12
+ xorq %rax,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r8,%r15
+ movq %rbx,%r11
+
+ rorq $6,%r14
+ xorq %r8,%r13
+ xorq %r10,%r15
+
+ xorq %rcx,%r11
+ xorq %rax,%r14
+ addq %r15,%r12
+ movq %rbx,%r15
+
+ rorq $14,%r13
+ andq %rax,%r11
+ andq %rcx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r11
+
+ addq %r12,%rdx
+ addq %r12,%r11
+ leaq 1(%rdi),%rdi
+ addq %r14,%r11
+
+ movq 80(%rsp),%r13
+ movq 56(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 16(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 72(%rsp),%r12
+ movq %rdx,%r13
+ addq %r14,%r12
+ movq %r11,%r14
+ rorq $23,%r13
+ movq %r8,%r15
+ movq %r12,72(%rsp)
+
+ rorq $5,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ rorq $4,%r13
+ addq %r10,%r12
+ xorq %r11,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rdx,%r15
+ movq %rax,%r10
+
+ rorq $6,%r14
+ xorq %rdx,%r13
+ xorq %r9,%r15
+
+ xorq %rbx,%r10
+ xorq %r11,%r14
+ addq %r15,%r12
+ movq %rax,%r15
+
+ rorq $14,%r13
+ andq %r11,%r10
+ andq %rbx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r10
+
+ addq %r12,%rcx
+ addq %r12,%r10
+ leaq 1(%rdi),%rdi
+ addq %r14,%r10
+
+ movq 88(%rsp),%r13
+ movq 64(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 24(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 80(%rsp),%r12
+ movq %rcx,%r13
+ addq %r14,%r12
+ movq %r10,%r14
+ rorq $23,%r13
+ movq %rdx,%r15
+ movq %r12,80(%rsp)
+
+ rorq $5,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ rorq $4,%r13
+ addq %r9,%r12
+ xorq %r10,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rcx,%r15
+ movq %r11,%r9
+
+ rorq $6,%r14
+ xorq %rcx,%r13
+ xorq %r8,%r15
+
+ xorq %rax,%r9
+ xorq %r10,%r14
+ addq %r15,%r12
+ movq %r11,%r15
+
+ rorq $14,%r13
+ andq %r10,%r9
+ andq %rax,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r9
+
+ addq %r12,%rbx
+ addq %r12,%r9
+ leaq 1(%rdi),%rdi
+ addq %r14,%r9
+
+ movq 96(%rsp),%r13
+ movq 72(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 32(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 88(%rsp),%r12
+ movq %rbx,%r13
+ addq %r14,%r12
+ movq %r9,%r14
+ rorq $23,%r13
+ movq %rcx,%r15
+ movq %r12,88(%rsp)
+
+ rorq $5,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ rorq $4,%r13
+ addq %r8,%r12
+ xorq %r9,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rbx,%r15
+ movq %r10,%r8
+
+ rorq $6,%r14
+ xorq %rbx,%r13
+ xorq %rdx,%r15
+
+ xorq %r11,%r8
+ xorq %r9,%r14
+ addq %r15,%r12
+ movq %r10,%r15
+
+ rorq $14,%r13
+ andq %r9,%r8
+ andq %r11,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%r8
+
+ addq %r12,%rax
+ addq %r12,%r8
+ leaq 1(%rdi),%rdi
+ addq %r14,%r8
+
+ movq 104(%rsp),%r13
+ movq 80(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 40(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 96(%rsp),%r12
+ movq %rax,%r13
+ addq %r14,%r12
+ movq %r8,%r14
+ rorq $23,%r13
+ movq %rbx,%r15
+ movq %r12,96(%rsp)
+
+ rorq $5,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ rorq $4,%r13
+ addq %rdx,%r12
+ xorq %r8,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %rax,%r15
+ movq %r9,%rdx
+
+ rorq $6,%r14
+ xorq %rax,%r13
+ xorq %rcx,%r15
+
+ xorq %r10,%rdx
+ xorq %r8,%r14
+ addq %r15,%r12
+ movq %r9,%r15
+
+ rorq $14,%r13
+ andq %r8,%rdx
+ andq %r10,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rdx
+
+ addq %r12,%r11
+ addq %r12,%rdx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rdx
+
+ movq 112(%rsp),%r13
+ movq 88(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 48(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 104(%rsp),%r12
+ movq %r11,%r13
+ addq %r14,%r12
+ movq %rdx,%r14
+ rorq $23,%r13
+ movq %rax,%r15
+ movq %r12,104(%rsp)
+
+ rorq $5,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ rorq $4,%r13
+ addq %rcx,%r12
+ xorq %rdx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r11,%r15
+ movq %r8,%rcx
+
+ rorq $6,%r14
+ xorq %r11,%r13
+ xorq %rbx,%r15
+
+ xorq %r9,%rcx
+ xorq %rdx,%r14
+ addq %r15,%r12
+ movq %r8,%r15
+
+ rorq $14,%r13
+ andq %rdx,%rcx
+ andq %r9,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rcx
+
+ addq %r12,%r10
+ addq %r12,%rcx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rcx
+
+ movq 120(%rsp),%r13
+ movq 96(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 56(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 112(%rsp),%r12
+ movq %r10,%r13
+ addq %r14,%r12
+ movq %rcx,%r14
+ rorq $23,%r13
+ movq %r11,%r15
+ movq %r12,112(%rsp)
+
+ rorq $5,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ rorq $4,%r13
+ addq %rbx,%r12
+ xorq %rcx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r10,%r15
+ movq %rdx,%rbx
+
+ rorq $6,%r14
+ xorq %r10,%r13
+ xorq %rax,%r15
+
+ xorq %r8,%rbx
+ xorq %rcx,%r14
+ addq %r15,%r12
+ movq %rdx,%r15
+
+ rorq $14,%r13
+ andq %rcx,%rbx
+ andq %r8,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rbx
+
+ addq %r12,%r9
+ addq %r12,%rbx
+ leaq 1(%rdi),%rdi
+ addq %r14,%rbx
+
+ movq 0(%rsp),%r13
+ movq 104(%rsp),%r14
+ movq %r13,%r12
+ movq %r14,%r15
+
+ rorq $7,%r12
+ xorq %r13,%r12
+ shrq $7,%r13
+
+ rorq $1,%r12
+ xorq %r12,%r13
+ movq 64(%rsp),%r12
+
+ rorq $42,%r15
+ xorq %r14,%r15
+ shrq $6,%r14
+
+ rorq $19,%r15
+ addq %r13,%r12
+ xorq %r15,%r14
+
+ addq 120(%rsp),%r12
+ movq %r9,%r13
+ addq %r14,%r12
+ movq %rbx,%r14
+ rorq $23,%r13
+ movq %r10,%r15
+ movq %r12,120(%rsp)
+
+ rorq $5,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ rorq $4,%r13
+ addq %rax,%r12
+ xorq %rbx,%r14
+
+ addq (%rbp,%rdi,8),%r12
+ andq %r9,%r15
+ movq %rcx,%rax
+
+ rorq $6,%r14
+ xorq %r9,%r13
+ xorq %r11,%r15
+
+ xorq %rdx,%rax
+ xorq %rbx,%r14
+ addq %r15,%r12
+ movq %rcx,%r15
+
+ rorq $14,%r13
+ andq %rbx,%rax
+ andq %rdx,%r15
+
+ rorq $28,%r14
+ addq %r13,%r12
+ addq %r15,%rax
+
+ addq %r12,%r8
+ addq %r12,%rax
+ leaq 1(%rdi),%rdi
+ addq %r14,%rax
+
+ cmpq $80,%rdi
+ jb .Lrounds_16_xx
+
+ movq 128+0(%rsp),%rdi
+ leaq 128(%rsi),%rsi
+
+ addq 0(%rdi),%rax
+ addq 8(%rdi),%rbx
+ addq 16(%rdi),%rcx
+ addq 24(%rdi),%rdx
+ addq 32(%rdi),%r8
+ addq 40(%rdi),%r9
+ addq 48(%rdi),%r10
+ addq 56(%rdi),%r11
+
+ cmpq 128+16(%rsp),%rsi
+
+ movq %rax,0(%rdi)
+ movq %rbx,8(%rdi)
+ movq %rcx,16(%rdi)
+ movq %rdx,24(%rdi)
+ movq %r8,32(%rdi)
+ movq %r9,40(%rdi)
+ movq %r10,48(%rdi)
+ movq %r11,56(%rdi)
+ jb .Lloop
+
+ movq 128+24(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lepilogue:
+ .byte 0xf3,0xc3
+.size sha512_block_data_order,.-sha512_block_data_order
+.align 64
+.type K512,@object
+K512:
+.quad 0x428a2f98d728ae22,0x7137449123ef65cd
+.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad 0x3956c25bf348b538,0x59f111f1b605d019
+.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad 0xd807aa98a3030242,0x12835b0145706fbe