lua-bcrypt

Secure password hashing for Lua
Log | Files | Refs

commit e01448b5ddae2a8c67706f2ae789e8fa522fdd12
parent 98b03e3aae6db9b613545e1e02a0349d8f3dcfa9
Author: Michael Savage <mikejsavage@gmail.com>
Date:   Thu, 11 Dec 2014 16:28:07 +0000

Rewrite!

Important changes:
- Support `$2b$` digests.
- Simplified interface to just digest/verify. Generating salts and
  setting random devices is not something anyone should have to worry
  about.
- Use OpenBSD's bcrypt. tedu's work on the new crypt API makes it much
  nicer to work with, and leaves less room for me to get things wrong.
- Pull in arc4random from libressl. OpenBSD's bcrypt requires this
  anyway, but it also fixes things like #3 for free and again, leaves
  less room for me to make mistakes.
- Support more build targets. I have tested on Linux and OpenBSD (5.6+),
  and it should work on OSX and FreeBSD.

Diffstat:
.gitignore | 2--
Makefile | 41+++++++++++------------------------------
Makefile.mess | 44++++++++++++++++++++++++++++++++++++++++++++
Makefile.obsd | 10++++++++++
README.md | 109++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
compat/arc4random/arc4random.c | 195+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/arc4random/arc4random.h | 26++++++++++++++++++++++++++
compat/arc4random/arc4random_freebsd.h | 85+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/arc4random/arc4random_linux.h | 85+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/arc4random/arc4random_osx.h | 79+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/arc4random/arc4random_solaris.h | 79+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/arc4random/arc4random_win.h | 74++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/arc4random/chacha_private.h | 222+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/bcrypt/bcrypt.c | 353+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/bcrypt/blowfish.c | 685+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/getentropy/getentropy_freebsd.c | 64++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/getentropy/getentropy_linux.c | 548+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/getentropy/getentropy_osx.c | 429+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/getentropy/getentropy_solaris.c | 445+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/getentropy/getentropy_win.c | 59+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/safebfuns.c | 106+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/sha/sha1-elf-x86_64.s | 2486+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/sha/sha1-macosx-x86_64.s | 2486+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/sha/sha1_one.c | 81+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/sha/sha1dgst.c | 75+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/sha/sha256-elf-x86_64.S | 1778+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/sha/sha256-macosx-x86_64.S | 1778+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/sha/sha256.c | 284+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/sha/sha512-elf-x86_64.S | 1802+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/sha/sha512-macosx-x86_64.S | 1802+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/sha/sha512.c | 558+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/sha/sha_dgst.c | 74++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/sha/sha_locl.h | 435+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/sha/sha_one.c | 81+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
compat/strlcpy.c | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
include/bcrypt_hashspace.h | 1+
include/blf.h | 82+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
include/machine/endian.h | 40++++++++++++++++++++++++++++++++++++++++
include/pwd.h | 116+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
include/stdlib.h | 29+++++++++++++++++++++++++++++
include/string.h | 69+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
include/sys/types.h | 21+++++++++++++++++++++
include/unistd.h | 19+++++++++++++++++++
lib/bcrypt/LINKS | 29-----------------------------
lib/bcrypt/Makefile | 68--------------------------------------------------------------------
lib/bcrypt/PERFORMANCE | 30------------------------------
lib/bcrypt/README | 68--------------------------------------------------------------------
lib/bcrypt/crypt.h | 24------------------------
lib/bcrypt/crypt_blowfish.c | 902-------------------------------------------------------------------------------
lib/bcrypt/crypt_blowfish.h | 27---------------------------
lib/bcrypt/crypt_gensalt.c | 124-------------------------------------------------------------------------------
lib/bcrypt/crypt_gensalt.h | 30------------------------------
lib/bcrypt/ow-crypt.h | 43-------------------------------------------
lib/bcrypt/wrapper.c | 545-------------------------------------------------------------------------------
lib/bcrypt/x86.S | 203-------------------------------------------------------------------------------
src/main.c | 189+++++--------------------------------------------------------------------------
test-chroot.lua | 11+++++++++++
test-digest.lua | 69+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
tests.lua | 90-------------------------------------------------------------------------------
tune.lua | 28++++++++++++++++++++++++++++
60 files changed, 17928 insertions(+), 2440 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -1,6 +1,4 @@ .tags bcrypt.so -crypt_test - *.o diff --git a/Makefile b/Makefile @@ -1,39 +1,20 @@ -SOURCES = $(wildcard src/**/*.c src/*.c) -OBJECTS = $(patsubst %.c,%.o,$(SOURCES)) +include Makefile.mess -TARGET = bcrypt.so +SRCS += src/main.c -CFLAGS = -O2 -fPIC -std=c99 -D_GNU_SOURCE -Wall -Wextra -Wno-nonnull -Wwrite-strings -Wformat=2 -DNDEBUG -Ilib/bcrypt +CFLAGS += -Wall -Wno-pointer-sign +CFLAGS += -O2 -fPIC -DNDEBUG -BCRYPT_OBJECTS = lib/bcrypt/crypt_blowfish.o lib/bcrypt/x86.o lib/bcrypt/crypt_gensalt.o lib/bcrypt/wrapper.o +OBJS := $(patsubst %.c,%.o,$(SRCS)) -ifdef LUA_INCDIR - CFLAGS += -I$(LUA_INCDIR) -endif +all: bcrypt.so -ifeq ($(shell uname -s),Darwin) - CFLAGS += -bundle -undefined dynamic_lookup -else - CFLAGS += -shared -endif - -.PHONY: debug test clean - -all: $(TARGET) - -debug: CFLAGS += -ggdb -UNDEBUG +debug: CFLAGS += -ggdb3 -UNDEBUG debug: all -lib/bcrypt/%.o: - make -C lib/bcrypt - -$(TARGET): $(OBJECTS) $(BCRYPT_OBJECTS) - $(CC) -o $(TARGET) $^ $(CFLAGS) - -test: - make -C lib/bcrypt check - @lua tests.lua +bcrypt.so: $(OBJS) + $(CC) -o bcrypt.so $(LDFLAGS) $(OBJS) clean: - make -C lib/bcrypt clean - rm -f $(OBJECTS) $(TARGET) + rm -f bcrypt.so + rm -f $(OBJS) diff --git a/Makefile.mess b/Makefile.mess @@ -0,0 +1,44 @@ +# This is more or less what libressl does +# See http://openbsd.cs.toronto.edu/cgi-bin/cvsweb/src/lib/libssl/src/crypto/mem_clr.c?rev=1.4&content-type=text/x-cvsweb-markup +CFLAGS += -DOPENSSL_cleanse=explicit_bzero + +# Make pwd.h define bcrypt_newnhash/bcrypt_checkpass +CFLAGS += -D__BSD_VISIBLE + +# GCC whines without this. Assume everyone has strndup anyway +CFLAGS += -DHAVE_STRNDUP + +# Let Luarocks point us to the right headers +ifdef LUA_INCDIR + CFLAGS += -I$(LUA_INCDIR) +endif + +# OS detection +uname := $(shell uname -s) + +ifneq ($(uname),Darwin) + LDFLAGS += -shared +else + LDFLAGS += -bundle -undefined dynamic_lookup +endif + +CFLAGS += -Iinclude + +SRCS += compat/safebfuns.c +SRCS += compat/bcrypt/bcrypt.c +SRCS += compat/bcrypt/blowfish.c +SRCS += compat/arc4random/arc4random.c +SRCS += compat/strlcpy.c +SRCS += compat/sha/sha512.c + +ifeq ($(uname),Linux) + SRCS += compat/getentropy/getentropy_linux.c +endif + +ifeq ($(uname),Darwin) + SRCS += compat/getentropy/getentropy_osx.c +endif + +ifeq ($(uname),FreeBSD) + SRCS += compat/getentropy/getentropy_freebsd.c +endif diff --git a/Makefile.obsd b/Makefile.obsd @@ -0,0 +1,10 @@ +LIB = bcrypt +SRCS = src/main.c + +LDADD = -lcrypto +CFLAGS += `pkg-config --cflags lua51` + +SHLIB_MAJOR = 2 +SHLIB_MINOR = 0 + +.include <bsd.lib.mk> diff --git a/README.md b/README.md @@ -1,4 +1,4 @@ -A Lua wrapper for [crypt_blowfish](http://www.openwall.com/crypt/). +A Lua wrapper for OpenBSD's bcrypt. Requirements @@ -17,68 +17,81 @@ Usage ----- local bcrypt = require( "bcrypt" ) - local rounds = 5 -- tune this as appropriate - local digest = bcrypt.digest( "password", rounds ) - assert( bcrypt.verify( "password", digest ) ) - -You can also explicitly pass a salt to `bcrypt.digest`: - - local bcrypt = require( "bcrypt" ) - local rounds = 5 -- tune this as appropriate - - local salt = bcrypt.salt( rounds ) - local digest = bcrypt.digest( "password", salt ) + -- Bigger numbers here will make your digest exponentially harder to compute + local log_rounds = 9 + local digest = bcrypt.digest( "password", log_rounds ) assert( bcrypt.verify( "password", digest ) ) -If you want to use a different random device, you can use -`bcrypt.random`: - local bcrypt = require( "bcrypt" ) - bcrypt.random( "/dev/random" ) - local digest = bcrypt.digest( "password", rounds ) - - -- bcrypt.random( "/dev/null" ) - fails +Security concerns +----------------- +Lua will keep plaintext passwords around in memory as part of its string +interning mechanism. As far as I'm aware, there's nothing I can do about +this. -Chroot + +Tuning ------ -If you want to use `lua-bcrypt` from inside a chroot, `/dev/urandom` -must still exist when `require( "bcrypt" )` is called, even if you are -planning to use a different random device. The reasoning behind this is -that chroots are a less common use case of this library, and we should -fail as early as possible on errors in the common use case. +If you would like to automatically tune the number of rounds to your +hardware, you can include a function like: -Therefore, if you wish to use chroot, you should run: + function bcrypt.tune( t ) + local SAMPLES = 10 + local rounds = 5 + + while true do + local total = 0 + + for i = 1, SAMPLES do + local start = os.clock() + bcrypt.digest( "asdf", rounds ) + local delta = os.clock() - start + + total = total + delta + end + + if ( total / SAMPLES ) * 1000 >= t then + return rounds - 1 + end + + rounds = rounds + 1 + end + end - mknod -m 644 /path/to/chroot/dev/urandom c 1 9 +This function returns the largest load factor such that `bcrypt.digest( +str, work )` takes less than `t` milliseconds (assuming your CPU isn't +dodgy). +Note that this will take at least `2 * SAMPLES * t` ms to evaluate. -Security --------- -You might have noticed that `luabcrypt_verify` doesn't use a constant -time comparison and `luabcrypt_digest` doesn't take steps to zero out -the entropy buffer. I don't think either of these are a problem. +Chroot +------ -I haven't used a constant time comparison because I'm not sure how to -write one and then guarantee the compiler won't optimise it out. I don't -think this is a big deal because strong hash functions, such as bcrypt, -have good preimage resistance. That said, the code is currently relying -on this holding forever, and if you aren't happy with that you shouldn't -use it. +[lua-setuid]: https://github.com/mikejsavage/lua-setuid +[test-chroot]: https://github.com/mikejsavage/lua-bcrypt/blob/master/test-chroot.lua -I don't zero out the entropy buffer after generating a salt for the same -reason. There are two ways in which this can be obtained: +Some operating systems do not provide a method for reliably getting +random data from inside a chroot. One workaround for this is to chroot +after initialising lua-bcrypt, for example by using +[lua-setuid][lua-setuid]. -* another program calls `malloc` and gets handed memory that was - previously used as an entropy buffer -* the entropy buffer is read directly from memory + local setuid = require( "setuid" ) + local bcrypt = require( "bcrypt" ) + + assert( setuid.chroot( "." ) ) + assert( not io.open( "/etc/passwd", "r" ) ) + + print( bcrypt.digest( "adsf", 5 ) ) + +There are also operating system specific workarounds. On +non-bleeding-edge (earlier than 3.17) Linux kernels, you can run: + + mkdir /path/to/chroot/dev + mknod -m 644 /path/to/chroot/dev/urandom c 1 9 -I don't think it matters because you shouldn't be able to predict the -output of a CSPRNG even given previous outputs. If you are worried about -the second case, note that Lua interns short strings which will include -passwords handled by your application so an attacker can read those -directly instead. +I have included a test script in [`test-chroot.lua`][test-chroot]. diff --git a/compat/arc4random/arc4random.c b/compat/arc4random/arc4random.c @@ -0,0 +1,195 @@ +/* $OpenBSD: arc4random.c,v 1.49 2014/07/20 20:51:13 bcook Exp $ */ + +/* + * Copyright (c) 1996, David Mazieres <dm@uun.org> + * Copyright (c) 2008, Damien Miller <djm@openbsd.org> + * Copyright (c) 2013, Markus Friedl <markus@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * ChaCha based random number generator for OpenBSD. + */ + +#include <fcntl.h> +#include <limits.h> +#include <signal.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/time.h> + +#define KEYSTREAM_ONLY +#include "chacha_private.h" + +#define min(a, b) ((a) < (b) ? (a) : (b)) +#ifdef __GNUC__ +#define inline __inline +#else /* !__GNUC__ */ +#define inline +#endif /* !__GNUC__ */ + +#define KEYSZ 32 +#define IVSZ 8 +#define BLOCKSZ 64 +#define RSBUFSZ (16*BLOCKSZ) + +/* Marked MAP_INHERIT_ZERO, so zero'd out in fork children. */ +static struct _rs { + size_t rs_have; /* valid bytes at end of rs_buf */ + size_t rs_count; /* bytes till reseed */ +} *rs; + +/* Maybe be preserved in fork children, if _rs_allocate() decides. */ +static struct _rsx { + chacha_ctx rs_chacha; /* chacha context for random keystream */ + u_char rs_buf[RSBUFSZ]; /* keystream blocks */ +} *rsx; + +static inline int _rs_allocate(struct _rs **, struct _rsx **); +static inline void _rs_forkdetect(void); +#include "arc4random.h" + +static inline void _rs_rekey(u_char *dat, size_t datlen); + +static inline void +_rs_init(u_char *buf, size_t n) +{ + if (n < KEYSZ + IVSZ) + return; + + if (rs == NULL) { + if (_rs_allocate(&rs, &rsx) == -1) + abort(); + } + + chacha_keysetup(&rsx->rs_chacha, buf, KEYSZ * 8, 0); + chacha_ivsetup(&rsx->rs_chacha, buf + KEYSZ); +} + +static void +_rs_stir(void) +{ + u_char rnd[KEYSZ + IVSZ]; + + if (getentropy(rnd, sizeof rnd) == -1) + _getentropy_fail(); + + if (!rs) + _rs_init(rnd, sizeof(rnd)); + else + _rs_rekey(rnd, sizeof(rnd)); + explicit_bzero(rnd, sizeof(rnd)); /* discard source seed */ + + /* invalidate rs_buf */ + rs->rs_have = 0; + memset(rsx->rs_buf, 0, sizeof(rsx->rs_buf)); + + rs->rs_count = 1600000; +} + +static inline void +_rs_stir_if_needed(size_t len) +{ + _rs_forkdetect(); + if (!rs || rs->rs_count <= len) + _rs_stir(); + if (rs->rs_count <= len) + rs->rs_count = 0; + else + rs->rs_count -= len; +} + +static inline void +_rs_rekey(u_char *dat, size_t datlen) +{ +#ifndef KEYSTREAM_ONLY + memset(rsx->rs_buf, 0, sizeof(rsx->rs_buf)); +#endif + /* fill rs_buf with the keystream */ + chacha_encrypt_bytes(&rsx->rs_chacha, rsx->rs_buf, + rsx->rs_buf, sizeof(rsx->rs_buf)); + /* mix in optional user provided data */ + if (dat) { + size_t i, m; + + m = min(datlen, KEYSZ + IVSZ); + for (i = 0; i < m; i++) + rsx->rs_buf[i] ^= dat[i]; + } + /* immediately reinit for backtracking resistance */ + _rs_init(rsx->rs_buf, KEYSZ + IVSZ); + memset(rsx->rs_buf, 0, KEYSZ + IVSZ); + rs->rs_have = sizeof(rsx->rs_buf) - KEYSZ - IVSZ; +} + +static inline void +_rs_random_buf(void *_buf, size_t n) +{ + u_char *buf = (u_char *)_buf; + u_char *keystream; + size_t m; + + _rs_stir_if_needed(n); + while (n > 0) { + if (rs->rs_have > 0) { + m = min(n, rs->rs_have); + keystream = rsx->rs_buf + sizeof(rsx->rs_buf) + - rs->rs_have; + memcpy(buf, keystream, m); + memset(keystream, 0, m); + buf += m; + n -= m; + rs->rs_have -= m; + } + if (rs->rs_have == 0) + _rs_rekey(NULL, 0); + } +} + +static inline void +_rs_random_u32(uint32_t *val) +{ + u_char *keystream; + + _rs_stir_if_needed(sizeof(*val)); + if (rs->rs_have < sizeof(*val)) + _rs_rekey(NULL, 0); + keystream = rsx->rs_buf + sizeof(rsx->rs_buf) - rs->rs_have; + memcpy(val, keystream, sizeof(*val)); + memset(keystream, 0, sizeof(*val)); + rs->rs_have -= sizeof(*val); +} + +uint32_t +arc4random(void) +{ + uint32_t val; + + _ARC4_LOCK(); + _rs_random_u32(&val); + _ARC4_UNLOCK(); + return val; +} + +void +arc4random_buf(void *buf, size_t n) +{ + _ARC4_LOCK(); + _rs_random_buf(buf, n); + _ARC4_UNLOCK(); +} diff --git a/compat/arc4random/arc4random.h b/compat/arc4random/arc4random.h @@ -0,0 +1,26 @@ +#ifndef LIBCRYPTOCOMPAT_ARC4RANDOM_H +#define LIBCRYPTOCOMPAT_ARC4RANDOM_H + +#include <sys/param.h> + +#if defined(__FreeBSD__) +#include "arc4random_freebsd.h" + +#elif defined(__linux__) +#include "arc4random_linux.h" + +#elif defined(__APPLE__) +#include "arc4random_osx.h" + +#elif defined(__sun) +#include "arc4random_solaris.h" + +#elif defined(_WIN32) +#include "arc4random_win.h" + +#else +#error "No arc4random hooks defined for this platform." + +#endif + +#endif diff --git a/compat/arc4random/arc4random_freebsd.h b/compat/arc4random/arc4random_freebsd.h @@ -0,0 +1,85 @@ +/* $OpenBSD: arc4random_freebsd.h,v 1.1 2014/07/20 20:51:13 bcook Exp $ */ + +/* + * Copyright (c) 1996, David Mazieres <dm@uun.org> + * Copyright (c) 2008, Damien Miller <djm@openbsd.org> + * Copyright (c) 2013, Markus Friedl <markus@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Stub functions for portability. + */ + +#include <sys/mman.h> + +#include <pthread.h> +#include <signal.h> + +static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER; +#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx) +#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx) + +/* + * Unfortunately, pthread_atfork() is broken on FreeBSD (at least 9 and 10) if + * a program does not link to -lthr. Callbacks registered with pthread_atfork() + * appear to fail silently. So, it is not always possible to detect a PID + * wraparound. + */ +#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f)) + +static inline void +_getentropy_fail(void) +{ + raise(SIGKILL); +} + +static volatile sig_atomic_t _rs_forked; + +static inline void +_rs_forkhandler(void) +{ + _rs_forked = 1; +} + +static inline void +_rs_forkdetect(void) +{ + static pid_t _rs_pid = 0; + pid_t pid = getpid(); + + if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) { + _rs_pid = pid; + _rs_forked = 0; + if (rs) + memset(rs, 0, sizeof(*rs)); + } +} + +static inline int +_rs_allocate(struct _rs **rsp, struct _rsx **rsxp) +{ + if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) + return -1; + + if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) { + munmap(*rsp, sizeof(**rsp)); + return -1; + } + + _ARC4_ATFORK(_rs_forkhandler); + return 0; +} diff --git a/compat/arc4random/arc4random_linux.h b/compat/arc4random/arc4random_linux.h @@ -0,0 +1,85 @@ +/* $OpenBSD: arc4random_linux.h,v 1.7 2014/07/20 20:51:13 bcook Exp $ */ + +/* + * Copyright (c) 1996, David Mazieres <dm@uun.org> + * Copyright (c) 2008, Damien Miller <djm@openbsd.org> + * Copyright (c) 2013, Markus Friedl <markus@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Stub functions for portability. + */ + +#include <sys/mman.h> + +#include <pthread.h> +#include <signal.h> + +static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER; +#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx) +#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx) + +#ifdef __GLIBC__ +extern void *__dso_handle; +extern int __register_atfork(void (*)(void), void(*)(void), void (*)(void), void *); +#define _ARC4_ATFORK(f) __register_atfork(NULL, NULL, (f), __dso_handle) +#else +#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f)) +#endif + +static inline void +_getentropy_fail(void) +{ + raise(SIGKILL); +} + +static volatile sig_atomic_t _rs_forked; + +static inline void +_rs_forkhandler(void) +{ + _rs_forked = 1; +} + +static inline void +_rs_forkdetect(void) +{ + static pid_t _rs_pid = 0; + pid_t pid = getpid(); + + if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) { + _rs_pid = pid; + _rs_forked = 0; + if (rs) + memset(rs, 0, sizeof(*rs)); + } +} + +static inline int +_rs_allocate(struct _rs **rsp, struct _rsx **rsxp) +{ + if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) + return (-1); + + if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) { + munmap(*rsp, sizeof(**rsp)); + return (-1); + } + + _ARC4_ATFORK(_rs_forkhandler); + return (0); +} diff --git a/compat/arc4random/arc4random_osx.h b/compat/arc4random/arc4random_osx.h @@ -0,0 +1,79 @@ +/* $OpenBSD: arc4random_osx.h,v 1.7 2014/07/20 20:51:13 bcook Exp $ */ + +/* + * Copyright (c) 1996, David Mazieres <dm@uun.org> + * Copyright (c) 2008, Damien Miller <djm@openbsd.org> + * Copyright (c) 2013, Markus Friedl <markus@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Stub functions for portability. + */ + +#include <sys/mman.h> + +#include <pthread.h> +#include <signal.h> + +static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER; +#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx) +#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx) + +#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f)) + +static inline void +_getentropy_fail(void) +{ + raise(SIGKILL); +} + +static volatile sig_atomic_t _rs_forked; + +static inline void +_rs_forkhandler(void) +{ + _rs_forked = 1; +} + +static inline void +_rs_forkdetect(void) +{ + static pid_t _rs_pid = 0; + pid_t pid = getpid(); + + if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) { + _rs_pid = pid; + _rs_forked = 0; + if (rs) + memset(rs, 0, sizeof(*rs)); + } +} + +static inline int +_rs_allocate(struct _rs **rsp, struct _rsx **rsxp) +{ + if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) + return -1; + + if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) { + munmap(*rsp, sizeof(**rsp)); + return -1; + } + + _ARC4_ATFORK(_rs_forkhandler); + return 0; +} diff --git a/compat/arc4random/arc4random_solaris.h b/compat/arc4random/arc4random_solaris.h @@ -0,0 +1,79 @@ +/* $OpenBSD: arc4random_solaris.h,v 1.7 2014/07/20 20:51:13 bcook Exp $ */ + +/* + * Copyright (c) 1996, David Mazieres <dm@uun.org> + * Copyright (c) 2008, Damien Miller <djm@openbsd.org> + * Copyright (c) 2013, Markus Friedl <markus@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Stub functions for portability. + */ + +#include <sys/mman.h> + +#include <pthread.h> +#include <signal.h> + +static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER; +#define _ARC4_LOCK() pthread_mutex_lock(&arc4random_mtx) +#define _ARC4_UNLOCK() pthread_mutex_unlock(&arc4random_mtx) + +#define _ARC4_ATFORK(f) pthread_atfork(NULL, NULL, (f)) + +static inline void +_getentropy_fail(void) +{ + raise(SIGKILL); +} + +static volatile sig_atomic_t _rs_forked; + +static inline void +_rs_forkhandler(void) +{ + _rs_forked = 1; +} + +static inline void +_rs_forkdetect(void) +{ + static pid_t _rs_pid = 0; + pid_t pid = getpid(); + + if (_rs_pid == 0 || _rs_pid != pid || _rs_forked) { + _rs_pid = pid; + _rs_forked = 0; + if (rs) + memset(rs, 0, sizeof(*rs)); + } +} + +static inline int +_rs_allocate(struct _rs **rsp, struct _rsx **rsxp) +{ + if ((*rsp = mmap(NULL, sizeof(**rsp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) + return (-1); + + if ((*rsxp = mmap(NULL, sizeof(**rsxp), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) { + munmap(*rsp, sizeof(**rsp)); + return (-1); + } + + _ARC4_ATFORK(_rs_forkhandler); + return (0); +} diff --git a/compat/arc4random/arc4random_win.h b/compat/arc4random/arc4random_win.h @@ -0,0 +1,74 @@ +/* $OpenBSD: arc4random_win.h,v 1.3 2014/07/20 16:59:31 bcook Exp $ */ + +/* + * Copyright (c) 1996, David Mazieres <dm@uun.org> + * Copyright (c) 2008, Damien Miller <djm@openbsd.org> + * Copyright (c) 2013, Markus Friedl <markus@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Stub functions for portability. + */ + +#include <windows.h> + +static volatile HANDLE arc4random_mtx = NULL; + +/* + * Initialize the mutex on the first lock attempt. On collision, each thread + * will attempt to allocate a mutex and compare-and-swap it into place as the + * global mutex. On failure to swap in the global mutex, the mutex is closed. + */ +#define _ARC4_LOCK() { \ + if (!arc4random_mtx) { \ + HANDLE p = CreateMutex(NULL, FALSE, NULL); \ + if (InterlockedCompareExchangePointer((void **)&arc4random_mtx, (void *)p, NULL)) \ + CloseHandle(p); \ + } \ + WaitForSingleObject(arc4random_mtx, INFINITE); \ +} \ + +#define _ARC4_UNLOCK() ReleaseMutex(arc4random_mtx) + +static inline void +_getentropy_fail(void) +{ + TerminateProcess(GetCurrentProcess(), 0); +} + +static inline int +_rs_allocate(struct _rs **rsp, struct _rsx **rsxp) +{ + *rsp = calloc(1, sizeof(**rsp)); + if (*rsp == NULL) + return (-1); + + *rsxp = calloc(1, sizeof(**rsxp)); + if (*rsxp == NULL) { + free(*rsp); + return (-1); + } + return (0); +} + +static inline void +_rs_forkhandler(void) +{ +} + +static inline void +_rs_forkdetect(void) +{ +} diff --git a/compat/arc4random/chacha_private.h b/compat/arc4random/chacha_private.h @@ -0,0 +1,222 @@ +/* +chacha-merged.c version 20080118 +D. J. Bernstein +Public domain. +*/ + +/* $OpenBSD$ */ + +typedef unsigned char u8; +typedef unsigned int u32; + +typedef struct +{ + u32 input[16]; /* could be compressed */ +} chacha_ctx; + +#define U8C(v) (v##U) +#define U32C(v) (v##U) + +#define U8V(v) ((u8)(v) & U8C(0xFF)) +#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF)) + +#define ROTL32(v, n) \ + (U32V((v) << (n)) | ((v) >> (32 - (n)))) + +#define U8TO32_LITTLE(p) \ + (((u32)((p)[0]) ) | \ + ((u32)((p)[1]) << 8) | \ + ((u32)((p)[2]) << 16) | \ + ((u32)((p)[3]) << 24)) + +#define U32TO8_LITTLE(p, v) \ + do { \ + (p)[0] = U8V((v) ); \ + (p)[1] = U8V((v) >> 8); \ + (p)[2] = U8V((v) >> 16); \ + (p)[3] = U8V((v) >> 24); \ + } while (0) + +#define ROTATE(v,c) (ROTL32(v,c)) +#define XOR(v,w) ((v) ^ (w)) +#define PLUS(v,w) (U32V((v) + (w))) +#define PLUSONE(v) (PLUS((v),1)) + +#define QUARTERROUND(a,b,c,d) \ + a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \ + c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \ + a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \ + c = PLUS(c,d); b = ROTATE(XOR(b,c), 7); + +static const char sigma[16] = "expand 32-byte k"; +static const char tau[16] = "expand 16-byte k"; + +static void +chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits,u32 ivbits) +{ + const char *constants; + + x->input[4] = U8TO32_LITTLE(k + 0); + x->input[5] = U8TO32_LITTLE(k + 4); + x->input[6] = U8TO32_LITTLE(k + 8); + x->input[7] = U8TO32_LITTLE(k + 12); + if (kbits == 256) { /* recommended */ + k += 16; + constants = sigma; + } else { /* kbits == 128 */ + constants = tau; + } + x->input[8] = U8TO32_LITTLE(k + 0); + x->input[9] = U8TO32_LITTLE(k + 4); + x->input[10] = U8TO32_LITTLE(k + 8); + x->input[11] = U8TO32_LITTLE(k + 12); + x->input[0] = U8TO32_LITTLE(constants + 0); + x->input[1] = U8TO32_LITTLE(constants + 4); + x->input[2] = U8TO32_LITTLE(constants + 8); + x->input[3] = U8TO32_LITTLE(constants + 12); +} + +static void +chacha_ivsetup(chacha_ctx *x,const u8 *iv) +{ + x->input[12] = 0; + x->input[13] = 0; + x->input[14] = U8TO32_LITTLE(iv + 0); + x->input[15] = U8TO32_LITTLE(iv + 4); +} + +static void +chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes) +{ + u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; + u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; + u8 *ctarget = NULL; + u8 tmp[64]; + u_int i; + + if (!bytes) return; + + j0 = x->input[0]; + j1 = x->input[1]; + j2 = x->input[2]; + j3 = x->input[3]; + j4 = x->input[4]; + j5 = x->input[5]; + j6 = x->input[6]; + j7 = x->input[7]; + j8 = x->input[8]; + j9 = x->input[9]; + j10 = x->input[10]; + j11 = x->input[11]; + j12 = x->input[12]; + j13 = x->input[13]; + j14 = x->input[14]; + j15 = x->input[15]; + + for (;;) { + if (bytes < 64) { + for (i = 0;i < bytes;++i) tmp[i] = m[i]; + m = tmp; + ctarget = c; + c = tmp; + } + x0 = j0; + x1 = j1; + x2 = j2; + x3 = j3; + x4 = j4; + x5 = j5; + x6 = j6; + x7 = j7; + x8 = j8; + x9 = j9; + x10 = j10; + x11 = j11; + x12 = j12; + x13 = j13; + x14 = j14; + x15 = j15; + for (i = 20;i > 0;i -= 2) { + QUARTERROUND( x0, x4, x8,x12) + QUARTERROUND( x1, x5, x9,x13) + QUARTERROUND( x2, x6,x10,x14) + QUARTERROUND( x3, x7,x11,x15) + QUARTERROUND( x0, x5,x10,x15) + QUARTERROUND( x1, x6,x11,x12) + QUARTERROUND( x2, x7, x8,x13) + QUARTERROUND( x3, x4, x9,x14) + } + x0 = PLUS(x0,j0); + x1 = PLUS(x1,j1); + x2 = PLUS(x2,j2); + x3 = PLUS(x3,j3); + x4 = PLUS(x4,j4); + x5 = PLUS(x5,j5); + x6 = PLUS(x6,j6); + x7 = PLUS(x7,j7); + x8 = PLUS(x8,j8); + x9 = PLUS(x9,j9); + x10 = PLUS(x10,j10); + x11 = PLUS(x11,j11); + x12 = PLUS(x12,j12); + x13 = PLUS(x13,j13); + x14 = PLUS(x14,j14); + x15 = PLUS(x15,j15); + +#ifndef KEYSTREAM_ONLY + x0 = XOR(x0,U8TO32_LITTLE(m + 0)); + x1 = XOR(x1,U8TO32_LITTLE(m + 4)); + x2 = XOR(x2,U8TO32_LITTLE(m + 8)); + x3 = XOR(x3,U8TO32_LITTLE(m + 12)); + x4 = XOR(x4,U8TO32_LITTLE(m + 16)); + x5 = XOR(x5,U8TO32_LITTLE(m + 20)); + x6 = XOR(x6,U8TO32_LITTLE(m + 24)); + x7 = XOR(x7,U8TO32_LITTLE(m + 28)); + x8 = XOR(x8,U8TO32_LITTLE(m + 32)); + x9 = XOR(x9,U8TO32_LITTLE(m + 36)); + x10 = XOR(x10,U8TO32_LITTLE(m + 40)); + x11 = XOR(x11,U8TO32_LITTLE(m + 44)); + x12 = XOR(x12,U8TO32_LITTLE(m + 48)); + x13 = XOR(x13,U8TO32_LITTLE(m + 52)); + x14 = XOR(x14,U8TO32_LITTLE(m + 56)); + x15 = XOR(x15,U8TO32_LITTLE(m + 60)); +#endif + + j12 = PLUSONE(j12); + if (!j12) { + j13 = PLUSONE(j13); + /* stopping at 2^70 bytes per nonce is user's responsibility */ + } + + U32TO8_LITTLE(c + 0,x0); + U32TO8_LITTLE(c + 4,x1); + U32TO8_LITTLE(c + 8,x2); + U32TO8_LITTLE(c + 12,x3); + U32TO8_LITTLE(c + 16,x4); + U32TO8_LITTLE(c + 20,x5); + U32TO8_LITTLE(c + 24,x6); + U32TO8_LITTLE(c + 28,x7); + U32TO8_LITTLE(c + 32,x8); + U32TO8_LITTLE(c + 36,x9); + U32TO8_LITTLE(c + 40,x10); + U32TO8_LITTLE(c + 44,x11); + U32TO8_LITTLE(c + 48,x12); + U32TO8_LITTLE(c + 52,x13); + U32TO8_LITTLE(c + 56,x14); + U32TO8_LITTLE(c + 60,x15); + + if (bytes <= 64) { + if (bytes < 64) { + for (i = 0;i < bytes;++i) ctarget[i] = c[i]; + } + x->input[12] = j12; + x->input[13] = j13; + return; + } + bytes -= 64; + c += 64; +#ifndef KEYSTREAM_ONLY + m += 64; +#endif + } +} diff --git a/compat/bcrypt/bcrypt.c b/compat/bcrypt/bcrypt.c @@ -0,0 +1,353 @@ +/* $OpenBSD: bcrypt.c,v 1.45 2014/07/20 04:22:34 guenther Exp $ */ + +/* + * Copyright (c) 2014 Ted Unangst <tedu@openbsd.org> + * Copyright (c) 1997 Niels Provos <provos@umich.edu> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* This password hashing algorithm was designed by David Mazieres + * <dm@lcs.mit.edu> and works as follows: + * + * 1. state := InitState () + * 2. state := ExpandKey (state, salt, password) + * 3. REPEAT rounds: + * state := ExpandKey (state, 0, password) + * state := ExpandKey (state, 0, salt) + * 4. ctext := "OrpheanBeholderScryDoubt" + * 5. REPEAT 64: + * ctext := Encrypt_ECB (state, ctext); + * 6. RETURN Concatenate (salt, ctext); + * + */ + +#include <sys/types.h> +#include <blf.h> +#include <ctype.h> +#include <pwd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/* This implementation is adaptable to current computing power. + * You can have up to 2^31 rounds which should be enough for some + * time to come. + */ + +#define BCRYPT_VERSION '2' +#define BCRYPT_MAXSALT 16 /* Precomputation is just so nice */ +#define BCRYPT_BLOCKS 6 /* Ciphertext blocks */ +#define BCRYPT_MINLOGROUNDS 4 /* we have log2(rounds) in salt */ + +#define BCRYPT_SALTSPACE (7 + (BCRYPT_MAXSALT * 4 + 2) / 3 + 1) +#define BCRYPT_HASHSPACE 61 + +char *bcrypt_gensalt(u_int8_t); + +static int encode_base64(char *, const u_int8_t *, size_t); +static int decode_base64(u_int8_t *, size_t, const char *); + +/* + * Generates a salt for this version of crypt. + */ +static int +bcrypt_initsalt(int log_rounds, uint8_t *salt, size_t saltbuflen) +{ + uint8_t csalt[BCRYPT_MAXSALT]; + + if (saltbuflen < BCRYPT_SALTSPACE) + return -1; + + arc4random_buf(csalt, sizeof(csalt)); + + if (log_rounds < 4) + log_rounds = 4; + else if (log_rounds > 31) + log_rounds = 31; + + snprintf(salt, saltbuflen, "$2b$%2.2u$", log_rounds); + encode_base64(salt + 7, csalt, sizeof(csalt)); + + return 0; +} + +/* + * the core bcrypt function + */ +static int +bcrypt_hashpass(const char *key, const char *salt, char *encrypted, + size_t encryptedlen) +{ + blf_ctx state; + u_int32_t rounds, i, k; + u_int16_t j; + size_t key_len; + u_int8_t salt_len, logr, minor; + u_int8_t ciphertext[4 * BCRYPT_BLOCKS] = "OrpheanBeholderScryDoubt"; + u_int8_t csalt[BCRYPT_MAXSALT]; + u_int32_t cdata[BCRYPT_BLOCKS]; + + if (encryptedlen < BCRYPT_HASHSPACE) + return -1; + + /* Check and discard "$" identifier */ + if (salt[0] != '$') + return -1; + salt += 1; + + if (salt[0] != BCRYPT_VERSION) + return -1; + + /* Check for minor versions */ + switch ((minor = salt[1])) { + case 'a': + key_len = (u_int8_t)(strlen(key) + 1); + break; + case 'b': + /* strlen() returns a size_t, but the function calls + * below result in implicit casts to a narrower integer + * type, so cap key_len at the actual maximum supported + * length here to avoid integer wraparound */ + key_len = strlen(key); + if (key_len > 72) + key_len = 72; + key_len++; /* include the NUL */ + break; + default: + return -1; + } + if (salt[2] != '$') + return -1; + /* Discard version + "$" identifier */ + salt += 3; + + /* Check and parse num rounds */ + if (!isdigit((unsigned char)salt[0]) || + !isdigit((unsigned char)salt[1]) || salt[2] != '$') + return -1; + logr = atoi(salt); + if (logr < BCRYPT_MINLOGROUNDS || logr > 31) + return -1; + /* Computer power doesn't increase linearly, 2^x should be fine */ + rounds = 1U << logr; + + /* Discard num rounds + "$" identifier */ + salt += 3; + + if (strlen(salt) * 3 / 4 < BCRYPT_MAXSALT) + return -1; + + /* We dont want the base64 salt but the raw data */ + if (decode_base64(csalt, BCRYPT_MAXSALT, salt)) + return -1; + salt_len = BCRYPT_MAXSALT; + + /* Setting up S-Boxes and Subkeys */ + Blowfish_initstate(&state); + Blowfish_expandstate(&state, csalt, salt_len, + (u_int8_t *) key, key_len); + for (k = 0; k < rounds; k++) { + Blowfish_expand0state(&state, (u_int8_t *) key, key_len); + Blowfish_expand0state(&state, csalt, salt_len); + } + + /* This can be precomputed later */ + j = 0; + for (i = 0; i < BCRYPT_BLOCKS; i++) + cdata[i] = Blowfish_stream2word(ciphertext, 4 * BCRYPT_BLOCKS, &j); + + /* Now do the encryption */ + for (k = 0; k < 64; k++) + blf_enc(&state, cdata, BCRYPT_BLOCKS / 2); + + for (i = 0; i < BCRYPT_BLOCKS; i++) { + ciphertext[4 * i + 3] = cdata[i] & 0xff; + cdata[i] = cdata[i] >> 8; + ciphertext[4 * i + 2] = cdata[i] & 0xff; + cdata[i] = cdata[i] >> 8; + ciphertext[4 * i + 1] = cdata[i] & 0xff; + cdata[i] = cdata[i] >> 8; + ciphertext[4 * i + 0] = cdata[i] & 0xff; + } + + + snprintf(encrypted, 8, "$2%c$%2.2u$", minor, logr); + encode_base64(encrypted + 7, csalt, BCRYPT_MAXSALT); + encode_base64(encrypted + 7 + 22, ciphertext, 4 * BCRYPT_BLOCKS - 1); + explicit_bzero(&state, sizeof(state)); + explicit_bzero(ciphertext, sizeof(ciphertext)); + explicit_bzero(csalt, sizeof(csalt)); + explicit_bzero(cdata, sizeof(cdata)); + return 0; +} + +/* + * user friendly functions + */ +int +bcrypt_newhash(const char *pass, int log_rounds, char *hash, size_t hashlen) +{ + char salt[BCRYPT_SALTSPACE]; + + if (bcrypt_initsalt(log_rounds, salt, sizeof(salt)) != 0) + return -1; + + if (bcrypt_hashpass(pass, salt, hash, hashlen) != 0) + return -1; + + explicit_bzero(salt, sizeof(salt)); + return 0; +} + +int +bcrypt_checkpass(const char *pass, const char *goodhash) +{ + char hash[BCRYPT_HASHSPACE]; + + if (bcrypt_hashpass(pass, goodhash, hash, sizeof(hash)) != 0) + return -1; + if (strlen(hash) != strlen(goodhash) || + timingsafe_bcmp(hash, goodhash, strlen(goodhash)) != 0) + return -1; + + explicit_bzero(hash, sizeof(hash)); + return 0; +} + +/* + * internal utilities + */ +static const u_int8_t Base64Code[] = +"./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; + +static const u_int8_t index_64[128] = { + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 0, 1, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, 255, 255, + 255, 255, 255, 255, 255, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, + 255, 255, 255, 255, 255, 255, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 255, 255, 255, 255, 255 +}; +#define CHAR64(c) ( (c) > 127 ? 255 : index_64[(c)]) + +/* + * read buflen (after decoding) bytes of data from b64data + */ +static int +decode_base64(u_int8_t *buffer, size_t len, const char *b64data) +{ + u_int8_t *bp = buffer; + const u_int8_t *p = b64data; + u_int8_t c1, c2, c3, c4; + + while (bp < buffer + len) { + c1 = CHAR64(*p); + /* Invalid data */ + if (c1 == 255) + return -1; + + c2 = CHAR64(*(p + 1)); + if (c2 == 255) + return -1; + + *bp++ = (c1 << 2) | ((c2 & 0x30) >> 4); + if (bp >= buffer + len) + break; + + c3 = CHAR64(*(p + 2)); + if (c3 == 255) + return -1; + + *bp++ = ((c2 & 0x0f) << 4) | ((c3 & 0x3c) >> 2); + if (bp >= buffer + len) + break; + + c4 = CHAR64(*(p + 3)); + if (c4 == 255) + return -1; + *bp++ = ((c3 & 0x03) << 6) | c4; + + p += 4; + } + return 0; +} + +/* + * Turn len bytes of data into base64 encoded data. + * This works without = padding. + */ +static int +encode_base64(char *b64buffer, const u_int8_t *data, size_t len) +{ + u_int8_t *bp = b64buffer; + const u_int8_t *p = data; + u_int8_t c1, c2; + + while (p < data + len) { + c1 = *p++; + *bp++ = Base64Code[(c1 >> 2)]; + c1 = (c1 & 0x03) << 4; + if (p >= data + len) { + *bp++ = Base64Code[c1]; + break; + } + c2 = *p++; + c1 |= (c2 >> 4) & 0x0f; + *bp++ = Base64Code[c1]; + c1 = (c2 & 0x0f) << 2; + if (p >= data + len) { + *bp++ = Base64Code[c1]; + break; + } + c2 = *p++; + c1 |= (c2 >> 6) & 0x03; + *bp++ = Base64Code[c1]; + *bp++ = Base64Code[c2 & 0x3f]; + } + *bp = '\0'; + return 0; +} + +/* + * classic interface + */ +char * +bcrypt_gensalt(u_int8_t log_rounds) +{ + static char gsalt[BCRYPT_SALTSPACE]; + + bcrypt_initsalt(log_rounds, gsalt, sizeof(gsalt)); + + return gsalt; +} + +char * +bcrypt(const char *pass, const char *salt) +{ + static char gencrypted[BCRYPT_HASHSPACE]; + static char gerror[2]; + + /* How do I handle errors ? Return ':' */ + strlcpy(gerror, ":", sizeof(gerror)); + if (bcrypt_hashpass(pass, salt, gencrypted, sizeof(gencrypted)) != 0) + return gerror; + + return gencrypted; +} diff --git a/compat/bcrypt/blowfish.c b/compat/bcrypt/blowfish.c @@ -0,0 +1,685 @@ +/* $OpenBSD: blowfish.c,v 1.17 2003/04/09 21:46:02 markus Exp $ */ +/* + * Blowfish block cipher for OpenBSD + * Copyright 1997 Niels Provos <provos@physnet.uni-hamburg.de> + * All rights reserved. + * + * Implementation advice by David Mazieres <dm@lcs.mit.edu>. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Niels Provos. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This code is derived from section 14.3 and the given source + * in section V of Applied Cryptography, second edition. + * Blowfish is an unpatented fast block cipher designed by + * Bruce Schneier. + */ + +#if 0 +#include <stdio.h> /* used for debugging */ +#include <string.h> +#endif + +#include <sys/types.h> +#include <blf.h> + +#undef inline +#ifdef __GNUC__ +#define inline __inline +#else /* !__GNUC__ */ +#define inline +#endif /* !__GNUC__ */ + +/* Function for Feistel Networks */ + +#define F(s, x) ((((s)[ (((x)>>24)&0xFF)] \ + + (s)[0x100 + (((x)>>16)&0xFF)]) \ + ^ (s)[0x200 + (((x)>> 8)&0xFF)]) \ + + (s)[0x300 + ( (x) &0xFF)]) + +#define BLFRND(s,p,i,j,n) (i ^= F(s,j) ^ (p)[n]) + +void +Blowfish_encipher(blf_ctx *c, u_int32_t *xl, u_int32_t *xr) +{ + u_int32_t Xl; + u_int32_t Xr; + u_int32_t *s = c->S[0]; + u_int32_t *p = c->P; + + Xl = *xl; + Xr = *xr; + + Xl ^= p[0]; + BLFRND(s, p, Xr, Xl, 1); BLFRND(s, p, Xl, Xr, 2); + BLFRND(s, p, Xr, Xl, 3); BLFRND(s, p, Xl, Xr, 4); + BLFRND(s, p, Xr, Xl, 5); BLFRND(s, p, Xl, Xr, 6); + BLFRND(s, p, Xr, Xl, 7); BLFRND(s, p, Xl, Xr, 8); + BLFRND(s, p, Xr, Xl, 9); BLFRND(s, p, Xl, Xr, 10); + BLFRND(s, p, Xr, Xl, 11); BLFRND(s, p, Xl, Xr, 12); + BLFRND(s, p, Xr, Xl, 13); BLFRND(s, p, Xl, Xr, 14); + BLFRND(s, p, Xr, Xl, 15); BLFRND(s, p, Xl, Xr, 16); + + *xl = Xr ^ p[17]; + *xr = Xl; +} + +void +Blowfish_decipher(blf_ctx *c, u_int32_t *xl, u_int32_t *xr) +{ + u_int32_t Xl; + u_int32_t Xr; + u_int32_t *s = c->S[0]; + u_int32_t *p = c->P; + + Xl = *xl; + Xr = *xr; + + Xl ^= p[17]; + BLFRND(s, p, Xr, Xl, 16); BLFRND(s, p, Xl, Xr, 15); + BLFRND(s, p, Xr, Xl, 14); BLFRND(s, p, Xl, Xr, 13); + BLFRND(s, p, Xr, Xl, 12); BLFRND(s, p, Xl, Xr, 11); + BLFRND(s, p, Xr, Xl, 10); BLFRND(s, p, Xl, Xr, 9); + BLFRND(s, p, Xr, Xl, 8); BLFRND(s, p, Xl, Xr, 7); + BLFRND(s, p, Xr, Xl, 6); BLFRND(s, p, Xl, Xr, 5); + BLFRND(s, p, Xr, Xl, 4); BLFRND(s, p, Xl, Xr, 3); + BLFRND(s, p, Xr, Xl, 2); BLFRND(s, p, Xl, Xr, 1); + + *xl = Xr ^ p[0]; + *xr = Xl; +} + +void +Blowfish_initstate(blf_ctx *c) +{ + /* P-box and S-box tables initialized with digits of Pi */ + + static const blf_ctx initstate = + { { + { + 0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7, + 0xb8e1afed, 0x6a267e96, 0xba7c9045, 0xf12c7f99, + 0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16, + 0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e, + 0x0d95748f, 0x728eb658, 0x718bcd58, 0x82154aee, + 0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013, + 0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef, + 0x8e79dcb0, 0x603a180e, 0x6c9e0e8b, 0xb01e8a3e, + 0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60, + 0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440, + 0x55ca396a, 0x2aab10b6, 0xb4cc5c34, 0x1141e8ce, + 0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a, + 0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e, + 0xafd6ba33, 0x6c24cf5c, 0x7a325381, 0x28958677, + 0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193, + 0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032, + 0xef845d5d, 0xe98575b1, 0xdc262302, 0xeb651b88, + 0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239, + 0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e, + 0x21c66842, 0xf6e96c9a, 0x670c9c61, 0xabd388f0, + 0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3, + 0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98, + 0xa1f1651d, 0x39af0176, 0x66ca593e, 0x82430e88, + 0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe, + 0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6, + 0x4ed3aa62, 0x363f7706, 0x1bfedf72, 0x429b023d, + 0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b, + 0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7, + 0xe3fe501a, 0xb6794c3b, 0x976ce0bd, 0x04c006ba, + 0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463, + 0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f, + 0x6dfc511f, 0x9b30952c, 0xcc814544, 0xaf5ebd09, + 0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3, + 0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb, + 0x5579c0bd, 0x1a60320a, 0xd6a100c6, 0x402c7279, + 0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8, + 0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab, + 0x323db5fa, 0xfd238760, 0x53317b48, 0x3e00df82, + 0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db, + 0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573, + 0x695b27b0, 0xbbca58c8, 0xe1ffa35d, 0xb8f011a0, + 0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b, + 0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790, + 0xe1ddf2da, 0xa4cb7e33, 0x62fb1341, 0xcee4c6e8, + 0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4, + 0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0, + 0xd08ed1d0, 0xafc725e0, 0x8e3c5b2f, 0x8e7594b7, + 0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c, + 0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad, + 0x2f2f2218, 0xbe0e1777, 0xea752dfe, 0x8b021fa1, + 0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299, + 0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9, + 0x165fa266, 0x80957705, 0x93cc7314, 0x211a1477, + 0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf, + 0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49, + 0x00250e2d, 0x2071b35e, 0x226800bb, 0x57b8e0af, + 0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa, + 0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5, + 0x83260376, 0x6295cfa9, 0x11c81968, 0x4e734a41, + 0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915, + 0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400, + 0x08ba6fb5, 0x571be91f, 0xf296ec6b, 0x2a0dd915, + 0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664, + 0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a}, + { + 0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623, + 0xad6ea6b0, 0x49a7df7d, 0x9cee60b8, 0x8fedb266, + 0xecaa8c71, 0x699a17ff, 0x5664526c, 0xc2b19ee1, + 0x193602a5, 0x75094c29, 0xa0591340, 0xe4183a3e, + 0x3f54989a, 0x5b429d65, 0x6b8fe4d6, 0x99f73fd6, + 0xa1d29c07, 0xefe830f5, 0x4d2d38e6, 0xf0255dc1, + 0x4cdd2086, 0x8470eb26, 0x6382e9c6, 0x021ecc5e, + 0x09686b3f, 0x3ebaefc9, 0x3c971814, 0x6b6a70a1, + 0x687f3584, 0x52a0e286, 0xb79c5305, 0xaa500737, + 0x3e07841c, 0x7fdeae5c, 0x8e7d44ec, 0x5716f2b8, + 0xb03ada37, 0xf0500c0d, 0xf01c1f04, 0x0200b3ff, + 0xae0cf51a, 0x3cb574b2, 0x25837a58, 0xdc0921bd, + 0xd19113f9, 0x7ca92ff6, 0x94324773, 0x22f54701, + 0x3ae5e581, 0x37c2dadc, 0xc8b57634, 0x9af3dda7, + 0xa9446146, 0x0fd0030e, 0xecc8c73e, 0xa4751e41, + 0xe238cd99, 0x3bea0e2f, 0x3280bba1, 0x183eb331, + 0x4e548b38, 0x4f6db908, 0x6f420d03, 0xf60a04bf, + 0x2cb81290, 0x24977c79, 0x5679b072, 0xbcaf89af, + 0xde9a771f, 0xd9930810, 0xb38bae12, 0xdccf3f2e, + 0x5512721f, 0x2e6b7124, 0x501adde6, 0x9f84cd87, + 0x7a584718, 0x7408da17, 0xbc9f9abc, 0xe94b7d8c, + 0xec7aec3a, 0xdb851dfa, 0x63094366, 0xc464c3d2, + 0xef1c1847, 0x3215d908, 0xdd433b37, 0x24c2ba16, + 0x12a14d43, 0x2a65c451, 0x50940002, 0x133ae4dd, + 0x71dff89e, 0x10314e55, 0x81ac77d6, 0x5f11199b, + 0x043556f1, 0xd7a3c76b, 0x3c11183b, 0x5924a509, + 0xf28fe6ed, 0x97f1fbfa, 0x9ebabf2c, 0x1e153c6e, + 0x86e34570, 0xeae96fb1, 0x860e5e0a, 0x5a3e2ab3, + 0x771fe71c, 0x4e3d06fa, 0x2965dcb9, 0x99e71d0f, + 0x803e89d6, 0x5266c825, 0x2e4cc978, 0x9c10b36a, + 0xc6150eba, 0x94e2ea78, 0xa5fc3c53, 0x1e0a2df4, + 0xf2f74ea7, 0x361d2b3d, 0x1939260f, 0x19c27960, + 0x5223a708, 0xf71312b6, 0xebadfe6e, 0xeac31f66, + 0xe3bc4595, 0xa67bc883, 0xb17f37d1, 0x018cff28, + 0xc332ddef, 0xbe6c5aa5, 0x65582185, 0x68ab9802, + 0xeecea50f, 0xdb2f953b, 0x2aef7dad, 0x5b6e2f84, + 0x1521b628, 0x29076170, 0xecdd4775, 0x619f1510, + 0x13cca830, 0xeb61bd96, 0x0334fe1e, 0xaa0363cf, + 0xb5735c90, 0x4c70a239, 0xd59e9e0b, 0xcbaade14, + 0xeecc86bc, 0x60622ca7, 0x9cab5cab, 0xb2f3846e, + 0x648b1eaf, 0x19bdf0ca, 0xa02369b9, 0x655abb50, + 0x40685a32, 0x3c2ab4b3, 0x319ee9d5, 0xc021b8f7, + 0x9b540b19, 0x875fa099, 0x95f7997e, 0x623d7da8, + 0xf837889a, 0x97e32d77, 0x11ed935f, 0x16681281, + 0x0e358829, 0xc7e61fd6, 0x96dedfa1, 0x7858ba99, + 0x57f584a5, 0x1b227263, 0x9b83c3ff, 0x1ac24696, + 0xcdb30aeb, 0x532e3054, 0x8fd948e4, 0x6dbc3128, + 0x58ebf2ef, 0x34c6ffea, 0xfe28ed61, 0xee7c3c73, + 0x5d4a14d9, 0xe864b7e3, 0x42105d14, 0x203e13e0, + 0x45eee2b6, 0xa3aaabea, 0xdb6c4f15, 0xfacb4fd0, + 0xc742f442, 0xef6abbb5, 0x654f3b1d, 0x41cd2105, + 0xd81e799e, 0x86854dc7, 0xe44b476a, 0x3d816250, + 0xcf62a1f2, 0x5b8d2646, 0xfc8883a0, 0xc1c7b6a3, + 0x7f1524c3, 0x69cb7492, 0x47848a0b, 0x5692b285, + 0x095bbf00, 0xad19489d, 0x1462b174, 0x23820e00, + 0x58428d2a, 0x0c55f5ea, 0x1dadf43e, 0x233f7061, + 0x3372f092, 0x8d937e41, 0xd65fecf1, 0x6c223bdb, + 0x7cde3759, 0xcbee7460, 0x4085f2a7, 0xce77326e, + 0xa6078084, 0x19f8509e, 0xe8efd855, 0x61d99735, + 0xa969a7aa, 0xc50c06c2, 0x5a04abfc, 0x800bcadc, + 0x9e447a2e, 0xc3453484, 0xfdd56705, 0x0e1e9ec9, + 0xdb73dbd3, 0x105588cd, 0x675fda79, 0xe3674340, + 0xc5c43465, 0x713e38d8, 0x3d28f89e, 0xf16dff20, + 0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7}, + { + 0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934, + 0x411520f7, 0x7602d4f7, 0xbcf46b2e, 0xd4a20068, + 0xd4082471, 0x3320f46a, 0x43b7d4b7, 0x500061af, + 0x1e39f62e, 0x97244546, 0x14214f74, 0xbf8b8840, + 0x4d95fc1d, 0x96b591af, 0x70f4ddd3, 0x66a02f45, + 0xbfbc09ec, 0x03bd9785, 0x7fac6dd0, 0x31cb8504, + 0x96eb27b3, 0x55fd3941, 0xda2547e6, 0xabca0a9a, + 0x28507825, 0x530429f4, 0x0a2c86da, 0xe9b66dfb, + 0x68dc1462, 0xd7486900, 0x680ec0a4, 0x27a18dee, + 0x4f3ffea2, 0xe887ad8c, 0xb58ce006, 0x7af4d6b6, + 0xaace1e7c, 0xd3375fec, 0xce78a399, 0x406b2a42, + 0x20fe9e35, 0xd9f385b9, 0xee39d7ab, 0x3b124e8b, + 0x1dc9faf7, 0x4b6d1856, 0x26a36631, 0xeae397b2, + 0x3a6efa74, 0xdd5b4332, 0x6841e7f7, 0xca7820fb, + 0xfb0af54e, 0xd8feb397, 0x454056ac, 0xba489527, + 0x55533a3a, 0x20838d87, 0xfe6ba9b7, 0xd096954b, + 0x55a867bc, 0xa1159a58, 0xcca92963, 0x99e1db33, + 0xa62a4a56, 0x3f3125f9, 0x5ef47e1c, 0x9029317c, + 0xfdf8e802, 0x04272f70, 0x80bb155c, 0x05282ce3, + 0x95c11548, 0xe4c66d22, 0x48c1133f, 0xc70f86dc, + 0x07f9c9ee, 0x41041f0f, 0x404779a4, 0x5d886e17, + 0x325f51eb, 0xd59bc0d1, 0xf2bcc18f, 0x41113564, + 0x257b7834, 0x602a9c60, 0xdff8e8a3, 0x1f636c1b, + 0x0e12b4c2, 0x02e1329e, 0xaf664fd1, 0xcad18115, + 0x6b2395e0, 0x333e92e1, 0x3b240b62, 0xeebeb922, + 0x85b2a20e, 0xe6ba0d99, 0xde720c8c, 0x2da2f728, + 0xd0127845, 0x95b794fd, 0x647d0862, 0xe7ccf5f0, + 0x5449a36f, 0x877d48fa, 0xc39dfd27, 0xf33e8d1e, + 0x0a476341, 0x992eff74, 0x3a6f6eab, 0xf4f8fd37, + 0xa812dc60, 0xa1ebddf8, 0x991be14c, 0xdb6e6b0d, + 0xc67b5510, 0x6d672c37, 0x2765d43b, 0xdcd0e804, + 0xf1290dc7, 0xcc00ffa3, 0xb5390f92, 0x690fed0b, + 0x667b9ffb, 0xcedb7d9c, 0xa091cf0b, 0xd9155ea3, + 0xbb132f88, 0x515bad24, 0x7b9479bf, 0x763bd6eb, + 0x37392eb3, 0xcc115979, 0x8026e297, 0xf42e312d, + 0x6842ada7, 0xc66a2b3b, 0x12754ccc, 0x782ef11c, + 0x6a124237, 0xb79251e7, 0x06a1bbe6, 0x4bfb6350, + 0x1a6b1018, 0x11caedfa, 0x3d25bdd8, 0xe2e1c3c9, + 0x44421659, 0x0a121386, 0xd90cec6e, 0xd5abea2a, + 0x64af674e, 0xda86a85f, 0xbebfe988, 0x64e4c3fe, + 0x9dbc8057, 0xf0f7c086, 0x60787bf8, 0x6003604d, + 0xd1fd8346, 0xf6381fb0, 0x7745ae04, 0xd736fccc, + 0x83426b33, 0xf01eab71, 0xb0804187, 0x3c005e5f, + 0x77a057be, 0xbde8ae24, 0x55464299, 0xbf582e61, + 0x4e58f48f, 0xf2ddfda2, 0xf474ef38, 0x8789bdc2, + 0x5366f9c3, 0xc8b38e74, 0xb475f255, 0x46fcd9b9, + 0x7aeb2661, 0x8b1ddf84, 0x846a0e79, 0x915f95e2, + 0x466e598e, 0x20b45770, 0x8cd55591, 0xc902de4c, + 0xb90bace1, 0xbb8205d0, 0x11a86248, 0x7574a99e, + 0xb77f19b6, 0xe0a9dc09, 0x662d09a1, 0xc4324633, + 0xe85a1f02, 0x09f0be8c, 0x4a99a025, 0x1d6efe10, + 0x1ab93d1d, 0x0ba5a4df, 0xa186f20f, 0x2868f169, + 0xdcb7da83, 0x573906fe, 0xa1e2ce9b, 0x4fcd7f52, + 0x50115e01, 0xa70683fa, 0xa002b5c4, 0x0de6d027, + 0x9af88c27, 0x773f8641, 0xc3604c06, 0x61a806b5, + 0xf0177a28, 0xc0f586e0, 0x006058aa, 0x30dc7d62, + 0x11e69ed7, 0x2338ea63, 0x53c2dd94, 0xc2c21634, + 0xbbcbee56, 0x90bcb6de, 0xebfc7da1, 0xce591d76, + 0x6f05e409, 0x4b7c0188, 0x39720a3d, 0x7c927c24, + 0x86e3725f, 0x724d9db9, 0x1ac15bb4, 0xd39eb8fc, + 0xed545578, 0x08fca5b5, 0xd83d7cd3, 0x4dad0fc4, + 0x1e50ef5e, 0xb161e6f8, 0xa28514d9, 0x6c51133c, + 0x6fd5c7e7, 0x56e14ec4, 0x362abfce, 0xddc6c837, + 0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0}, + { + 0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b, + 0x5cb0679e, 0x4fa33742, 0xd3822740, 0x99bc9bbe, + 0xd5118e9d, 0xbf0f7315, 0xd62d1c7e, 0xc700c47b, + 0xb78c1b6b, 0x21a19045, 0xb26eb1be, 0x6a366eb4, + 0x5748ab2f, 0xbc946e79, 0xc6a376d2, 0x6549c2c8, + 0x530ff8ee, 0x468dde7d, 0xd5730a1d, 0x4cd04dc6, + 0x2939bbdb, 0xa9ba4650, 0xac9526e8, 0xbe5ee304, + 0xa1fad5f0, 0x6a2d519a, 0x63ef8ce2, 0x9a86ee22, + 0xc089c2b8, 0x43242ef6, 0xa51e03aa, 0x9cf2d0a4, + 0x83c061ba, 0x9be96a4d, 0x8fe51550, 0xba645bd6, + 0x2826a2f9, 0xa73a3ae1, 0x4ba99586, 0xef5562e9, + 0xc72fefd3, 0xf752f7da, 0x3f046f69, 0x77fa0a59, + 0x80e4a915, 0x87b08601, 0x9b09e6ad, 0x3b3ee593, + 0xe990fd5a, 0x9e34d797, 0x2cf0b7d9, 0x022b8b51, + 0x96d5ac3a, 0x017da67d, 0xd1cf3ed6, 0x7c7d2d28, + 0x1f9f25cf, 0xadf2b89b, 0x5ad6b472, 0x5a88f54c, + 0xe029ac71, 0xe019a5e6, 0x47b0acfd, 0xed93fa9b, + 0xe8d3c48d, 0x283b57cc, 0xf8d56629, 0x79132e28, + 0x785f0191, 0xed756055, 0xf7960e44, 0xe3d35e8c, + 0x15056dd4, 0x88f46dba, 0x03a16125, 0x0564f0bd, + 0xc3eb9e15, 0x3c9057a2, 0x97271aec, 0xa93a072a, + 0x1b3f6d9b, 0x1e6321f5, 0xf59c66fb, 0x26dcf319, + 0x7533d928, 0xb155fdf5, 0x03563482, 0x8aba3cbb, + 0x28517711, 0xc20ad9f8, 0xabcc5167, 0xccad925f, + 0x4de81751, 0x3830dc8e, 0x379d5862, 0x9320f991, + 0xea7a90c2, 0xfb3e7bce, 0x5121ce64, 0x774fbe32, + 0xa8b6e37e, 0xc3293d46, 0x48de5369, 0x6413e680, + 0xa2ae0810, 0xdd6db224, 0x69852dfd, 0x09072166, + 0xb39a460a, 0x6445c0dd, 0x586cdecf, 0x1c20c8ae, + 0x5bbef7dd, 0x1b588d40, 0xccd2017f, 0x6bb4e3bb, + 0xdda26a7e, 0x3a59ff45, 0x3e350a44, 0xbcb4cdd5, + 0x72eacea8, 0xfa6484bb, 0x8d6612ae, 0xbf3c6f47, + 0xd29be463, 0x542f5d9e, 0xaec2771b, 0xf64e6370, + 0x740e0d8d, 0xe75b1357, 0xf8721671, 0xaf537d5d, + 0x4040cb08, 0x4eb4e2cc, 0x34d2466a, 0x0115af84, + 0xe1b00428, 0x95983a1d, 0x06b89fb4, 0xce6ea048, + 0x6f3f3b82, 0x3520ab82, 0x011a1d4b, 0x277227f8, + 0x611560b1, 0xe7933fdc, 0xbb3a792b, 0x344525bd, + 0xa08839e1, 0x51ce794b, 0x2f32c9b7, 0xa01fbac9, + 0xe01cc87e, 0xbcc7d1f6, 0xcf0111c3, 0xa1e8aac7, + 0x1a908749, 0xd44fbd9a, 0xd0dadecb, 0xd50ada38, + 0x0339c32a, 0xc6913667, 0x8df9317c, 0xe0b12b4f, + 0xf79e59b7, 0x43f5bb3a, 0xf2d519ff, 0x27d9459c, + 0xbf97222c, 0x15e6fc2a, 0x0f91fc71, 0x9b941525, + 0xfae59361, 0xceb69ceb, 0xc2a86459, 0x12baa8d1, + 0xb6c1075e, 0xe3056a0c, 0x10d25065, 0xcb03a442, + 0xe0ec6e0e, 0x1698db3b, 0x4c98a0be, 0x3278e964, + 0x9f1f9532, 0xe0d392df, 0xd3a0342b, 0x8971f21e, + 0x1b0a7441, 0x4ba3348c, 0xc5be7120, 0xc37632d8, + 0xdf359f8d, 0x9b992f2e, 0xe60b6f47, 0x0fe3f11d, + 0xe54cda54, 0x1edad891, 0xce6279cf, 0xcd3e7e6f, + 0x1618b166, 0xfd2c1d05, 0x848fd2c5, 0xf6fb2299, + 0xf523f357, 0xa6327623, 0x93a83531, 0x56cccd02, + 0xacf08162, 0x5a75ebb5, 0x6e163697, 0x88d273cc, + 0xde966292, 0x81b949d0, 0x4c50901b, 0x71c65614, + 0xe6c6c7bd, 0x327a140a, 0x45e1d006, 0xc3f27b9a, + 0xc9aa53fd, 0x62a80f00, 0xbb25bfe2, 0x35bdd2f6, + 0x71126905, 0xb2040222, 0xb6cbcf7c, 0xcd769c2b, + 0x53113ec0, 0x1640e3d3, 0x38abbd60, 0x2547adf0, + 0xba38209c, 0xf746ce76, 0x77afa1c5, 0x20756060, + 0x85cbfe4e, 0x8ae88dd8, 0x7aaaf9b0, 0x4cf9aa7e, + 0x1948c25c, 0x02fb8a8c, 0x01c36ae4, 0xd6ebe1f9, + 0x90d4f869, 0xa65cdea0, 0x3f09252d, 0xc208e69f, + 0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6} + }, + { + 0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344, + 0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89, + 0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c, + 0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917, + 0x9216d5d9, 0x8979fb1b + } }; + + *c = initstate; +} + +u_int32_t +Blowfish_stream2word(const u_int8_t *data, u_int16_t databytes, + u_int16_t *current) +{ + u_int8_t i; + u_int16_t j; + u_int32_t temp; + + temp = 0x00000000; + j = *current; + + for (i = 0; i < 4; i++, j++) { + if (j >= databytes) + j = 0; + temp = (temp << 8) | data[j]; + } + + *current = j; + return temp; +} + +void +Blowfish_expand0state(blf_ctx *c, const u_int8_t *key, u_int16_t keybytes) +{ + u_int16_t i; + u_int16_t j; + u_int16_t k; + u_int32_t temp; + u_int32_t datal; + u_int32_t datar; + + j = 0; + for (i = 0; i < BLF_N + 2; i++) { + /* Extract 4 int8 to 1 int32 from keystream */ + temp = Blowfish_stream2word(key, keybytes, &j); + c->P[i] = c->P[i] ^ temp; + } + + j = 0; + datal = 0x00000000; + datar = 0x00000000; + for (i = 0; i < BLF_N + 2; i += 2) { + Blowfish_encipher(c, &datal, &datar); + + c->P[i] = datal; + c->P[i + 1] = datar; + } + + for (i = 0; i < 4; i++) { + for (k = 0; k < 256; k += 2) { + Blowfish_encipher(c, &datal, &datar); + + c->S[i][k] = datal; + c->S[i][k + 1] = datar; + } + } +} + + +void +Blowfish_expandstate(blf_ctx *c, const u_int8_t *data, u_int16_t databytes, + const u_int8_t *key, u_int16_t keybytes) +{ + u_int16_t i; + u_int16_t j; + u_int16_t k; + u_int32_t temp; + u_int32_t datal; + u_int32_t datar; + + j = 0; + for (i = 0; i < BLF_N + 2; i++) { + /* Extract 4 int8 to 1 int32 from keystream */ + temp = Blowfish_stream2word(key, keybytes, &j); + c->P[i] = c->P[i] ^ temp; + } + + j = 0; + datal = 0x00000000; + datar = 0x00000000; + for (i = 0; i < BLF_N + 2; i += 2) { + datal ^= Blowfish_stream2word(data, databytes, &j); + datar ^= Blowfish_stream2word(data, databytes, &j); + Blowfish_encipher(c, &datal, &datar); + + c->P[i] = datal; + c->P[i + 1] = datar; + } + + for (i = 0; i < 4; i++) { + for (k = 0; k < 256; k += 2) { + datal ^= Blowfish_stream2word(data, databytes, &j); + datar ^= Blowfish_stream2word(data, databytes, &j); + Blowfish_encipher(c, &datal, &datar); + + c->S[i][k] = datal; + c->S[i][k + 1] = datar; + } + } + +} + +void +blf_key(blf_ctx *c, const u_int8_t *k, u_int16_t len) +{ + /* Initialize S-boxes and subkeys with Pi */ + Blowfish_initstate(c); + + /* Transform S-boxes and subkeys with key */ + Blowfish_expand0state(c, k, len); +} + +void +blf_enc(blf_ctx *c, u_int32_t *data, u_int16_t blocks) +{ + u_int32_t *d; + u_int16_t i; + + d = data; + for (i = 0; i < blocks; i++) { + Blowfish_encipher(c, d, d + 1); + d += 2; + } +} + +void +blf_dec(blf_ctx *c, u_int32_t *data, u_int16_t blocks) +{ + u_int32_t *d; + u_int16_t i; + + d = data; + for (i = 0; i < blocks; i++) { + Blowfish_decipher(c, d, d + 1); + d += 2; + } +} + +void +blf_ecb_encrypt(blf_ctx *c, u_int8_t *data, u_int32_t len) +{ + u_int32_t l, r; + u_int32_t i; + + for (i = 0; i < len; i += 8) { + l = data[0] << 24 | data[1] << 16 | data[2] << 8 | data[3]; + r = data[4] << 24 | data[5] << 16 | data[6] << 8 | data[7]; + Blowfish_encipher(c, &l, &r); + data[0] = l >> 24 & 0xff; + data[1] = l >> 16 & 0xff; + data[2] = l >> 8 & 0xff; + data[3] = l & 0xff; + data[4] = r >> 24 & 0xff; + data[5] = r >> 16 & 0xff; + data[6] = r >> 8 & 0xff; + data[7] = r & 0xff; + data += 8; + } +} + +void +blf_ecb_decrypt(blf_ctx *c, u_int8_t *data, u_int32_t len) +{ + u_int32_t l, r; + u_int32_t i; + + for (i = 0; i < len; i += 8) { + l = data[0] << 24 | data[1] << 16 | data[2] << 8 | data[3]; + r = data[4] << 24 | data[5] << 16 | data[6] << 8 | data[7]; + Blowfish_decipher(c, &l, &r); + data[0] = l >> 24 & 0xff; + data[1] = l >> 16 & 0xff; + data[2] = l >> 8 & 0xff; + data[3] = l & 0xff; + data[4] = r >> 24 & 0xff; + data[5] = r >> 16 & 0xff; + data[6] = r >> 8 & 0xff; + data[7] = r & 0xff; + data += 8; + } +} + +void +blf_cbc_encrypt(blf_ctx *c, u_int8_t *iv, u_int8_t *data, u_int32_t len) +{ + u_int32_t l, r; + u_int32_t i, j; + + for (i = 0; i < len; i += 8) { + for (j = 0; j < 8; j++) + data[j] ^= iv[j]; + l = data[0] << 24 | data[1] << 16 | data[2] << 8 | data[3]; + r = data[4] << 24 | data[5] << 16 | data[6] << 8 | data[7]; + Blowfish_encipher(c, &l, &r); + data[0] = l >> 24 & 0xff; + data[1] = l >> 16 & 0xff; + data[2] = l >> 8 & 0xff; + data[3] = l & 0xff; + data[4] = r >> 24 & 0xff; + data[5] = r >> 16 & 0xff; + data[6] = r >> 8 & 0xff; + data[7] = r & 0xff; + iv = data; + data += 8; + } +} + +void +blf_cbc_decrypt(blf_ctx *c, u_int8_t *iva, u_int8_t *data, u_int32_t len) +{ + u_int32_t l, r; + u_int8_t *iv; + u_int32_t i, j; + + iv = data + len - 16; + data = data + len - 8; + for (i = len - 8; i >= 8; i -= 8) { + l = data[0] << 24 | data[1] << 16 | data[2] << 8 | data[3]; + r = data[4] << 24 | data[5] << 16 | data[6] << 8 | data[7]; + Blowfish_decipher(c, &l, &r); + data[0] = l >> 24 & 0xff; + data[1] = l >> 16 & 0xff; + data[2] = l >> 8 & 0xff; + data[3] = l & 0xff; + data[4] = r >> 24 & 0xff; + data[5] = r >> 16 & 0xff; + data[6] = r >> 8 & 0xff; + data[7] = r & 0xff; + for (j = 0; j < 8; j++) + data[j] ^= iv[j]; + iv -= 8; + data -= 8; + } + l = data[0] << 24 | data[1] << 16 | data[2] << 8 | data[3]; + r = data[4] << 24 | data[5] << 16 | data[6] << 8 | data[7]; + Blowfish_decipher(c, &l, &r); + data[0] = l >> 24 & 0xff; + data[1] = l >> 16 & 0xff; + data[2] = l >> 8 & 0xff; + data[3] = l & 0xff; + data[4] = r >> 24 & 0xff; + data[5] = r >> 16 & 0xff; + data[6] = r >> 8 & 0xff; + data[7] = r & 0xff; + for (j = 0; j < 8; j++) + data[j] ^= iva[j]; +} + +#if 0 +void +report(u_int32_t data[], u_int16_t len) +{ + u_int16_t i; + for (i = 0; i < len; i += 2) + printf("Block %0hd: %08lx %08lx.\n", + i / 2, data[i], data[i + 1]); +} +void +main(void) +{ + + blf_ctx c; + char key[] = "AAAAA"; + char key2[] = "abcdefghijklmnopqrstuvwxyz"; + + u_int32_t data[10]; + u_int32_t data2[] = + {0x424c4f57l, 0x46495348l}; + + u_int16_t i; + + /* First test */ + for (i = 0; i < 10; i++) + data[i] = i; + + blf_key(&c, (u_int8_t *) key, 5); + blf_enc(&c, data, 5); + blf_dec(&c, data, 1); + blf_dec(&c, data + 2, 4); + printf("Should read as 0 - 9.\n"); + report(data, 10); + + /* Second test */ + blf_key(&c, (u_int8_t *) key2, strlen(key2)); + blf_enc(&c, data2, 1); + printf("\nShould read as: 0x324ed0fe 0xf413a203.\n"); + report(data2, 2); + blf_dec(&c, data2, 1); + report(data2, 2); +} +#endif diff --git a/compat/getentropy/getentropy_freebsd.c b/compat/getentropy/getentropy_freebsd.c @@ -0,0 +1,64 @@ +/* $OpenBSD: getentropy_osx.c,v 1.7 2014/07/19 16:12:00 deraadt Exp $ */ + +/* + * Copyright (c) 2014 Pawel Jakub Dawidek <pjd@FreeBSD.org> + * Copyright (c) 2014 Brent Cook <bcook@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Emulation of getentropy(2) as documented at: + * http://www.openbsd.org/cgi-bin/man.cgi/OpenBSD-current/man2/getentropy.2 + */ + +#include <sys/types.h> +#include <sys/sysctl.h> + +#include <errno.h> +#include <stddef.h> + +/* + * Derived from lib/libc/gen/arc4random.c from FreeBSD. + */ +static size_t +getentropy_sysctl(u_char *buf, size_t size) +{ + int mib[2]; + size_t len, done; + + mib[0] = CTL_KERN; + mib[1] = KERN_ARND; + done = 0; + + do { + len = size; + if (sysctl(mib, 2, buf, &len, NULL, 0) == -1) + return (done); + done += len; + buf += len; + size -= len; + } while (size > 0); + + return (done); +} + +int +getentropy(void *buf, size_t len) +{ + if (len <= 256 && + getentropy_sysctl(buf, len) == len) { + return 0; + } + + errno = EIO; + return -1; +} diff --git a/compat/getentropy/getentropy_linux.c b/compat/getentropy/getentropy_linux.c @@ -0,0 +1,548 @@ +/* $OpenBSD: getentropy_linux.c,v 1.35 2014/08/28 01:00:57 bcook Exp $ */ + +/* + * Copyright (c) 2014 Theo de Raadt <deraadt@openbsd.org> + * Copyright (c) 2014 Bob Beck <beck@obtuse.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Emulation of getentropy(2) as documented at: + * http://www.openbsd.org/cgi-bin/man.cgi/OpenBSD-current/man2/getentropy.2 + */ + +#define _POSIX_C_SOURCE 199309L +#define _GNU_SOURCE 1 +#include <sys/types.h> +#include <sys/param.h> +#include <sys/ioctl.h> +#include <sys/resource.h> +#include <sys/syscall.h> +#ifdef HAVE_SYS_SYSCTL_H +#include <sys/sysctl.h> +#endif +#include <sys/statvfs.h> +#include <sys/socket.h> +#include <sys/mount.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <link.h> +#include <termios.h> +#include <fcntl.h> +#include <signal.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <time.h> +#include <openssl/sha.h> + +#include <linux/types.h> +#include <linux/random.h> +#include <linux/sysctl.h> +#ifdef HAVE_GETAUXVAL +#include <sys/auxv.h> +#endif +#include <sys/vfs.h> + +#define REPEAT 5 +#define min(a, b) (((a) < (b)) ? (a) : (b)) + +#define HX(a, b) \ + do { \ + if ((a)) \ + HD(errno); \ + else \ + HD(b); \ + } while (0) + +#define HR(x, l) (SHA512_Update(&ctx, (char *)(x), (l))) +#define HD(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (x))) +#define HF(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (void*))) + +int getentropy(void *buf, size_t len); + +static int gotdata(char *buf, size_t len); +#ifdef SYS_getrandom +static int getentropy_getrandom(void *buf, size_t len); +#endif +static int getentropy_urandom(void *buf, size_t len); +#ifdef SYS__sysctl +static int getentropy_sysctl(void *buf, size_t len); +#endif +static int getentropy_fallback(void *buf, size_t len); +static int getentropy_phdr(struct dl_phdr_info *info, size_t size, void *data); + +int +getentropy(void *buf, size_t len) +{ + int ret = -1; + + if (len > 256) { + errno = EIO; + return -1; + } + +#ifdef SYS_getrandom + /* + * Try descriptor-less getrandom() + */ + ret = getentropy_getrandom(buf, len); + if (ret != -1) + return (ret); + if (errno != ENOSYS) + return (-1); +#endif + + /* + * Try to get entropy with /dev/urandom + * + * This can fail if the process is inside a chroot or if file + * descriptors are exhausted. + */ + ret = getentropy_urandom(buf, len); + if (ret != -1) + return (ret); + +#ifdef SYS__sysctl + /* + * Try to use sysctl CTL_KERN, KERN_RANDOM, RANDOM_UUID. + * sysctl is a failsafe API, so it guarantees a result. This + * should work inside a chroot, or when file descriptors are + * exhuasted. + * + * However this can fail if the Linux kernel removes support + * for sysctl. Starting in 2007, there have been efforts to + * deprecate the sysctl API/ABI, and push callers towards use + * of the chroot-unavailable fd-using /proc mechanism -- + * essentially the same problems as /dev/urandom. + * + * Numerous setbacks have been encountered in their deprecation + * schedule, so as of June 2014 the kernel ABI still exists on + * most Linux architectures. The sysctl() stub in libc is missing + * on some systems. There are also reports that some kernels + * spew messages to the console. + */ + ret = getentropy_sysctl(buf, len); + if (ret != -1) + return (ret); +#endif /* SYS__sysctl */ + + /* + * Entropy collection via /dev/urandom and sysctl have failed. + * + * No other API exists for collecting entropy. See the large + * comment block above. + * + * We have very few options: + * - Even syslog_r is unsafe to call at this low level, so + * there is no way to alert the user or program. + * - Cannot call abort() because some systems have unsafe + * corefiles. + * - Could raise(SIGKILL) resulting in silent program termination. + * - Return EIO, to hint that arc4random's stir function + * should raise(SIGKILL) + * - Do the best under the circumstances.... + * + * This code path exists to bring light to the issue that Linux + * does not provide a failsafe API for entropy collection. + * + * We hope this demonstrates that Linux should either retain their + * sysctl ABI, or consider providing a new failsafe API which + * works in a chroot or when file descriptors are exhausted. + */ +#undef FAIL_INSTEAD_OF_TRYING_FALLBACK +#ifdef FAIL_INSTEAD_OF_TRYING_FALLBACK + raise(SIGKILL); +#endif + ret = getentropy_fallback(buf, len); + if (ret != -1) + return (ret); + + errno = EIO; + return (ret); +} + +/* + * Basic sanity checking; wish we could do better. + */ +static int +gotdata(char *buf, size_t len) +{ + char any_set = 0; + size_t i; + + for (i = 0; i < len; ++i) + any_set |= buf[i]; + if (any_set == 0) + return -1; + return 0; +} + +#ifdef SYS_getrandom +static int +getentropy_getrandom(void *buf, size_t len) +{ + int pre_errno = errno; + int ret; + if (len > 256) + return (-1); + do { + ret = syscall(SYS_getrandom, buf, len, 0); + } while (ret == -1 && errno == EINTR); + + if (ret != len) + return (-1); + errno = pre_errno; + return (0); +} +#endif + +static int +getentropy_urandom(void *buf, size_t len) +{ + struct stat st; + size_t i; + int fd, cnt, flags; + int save_errno = errno; + +start: + + flags = O_RDONLY; +#ifdef O_NOFOLLOW + flags |= O_NOFOLLOW; +#endif +#ifdef O_CLOEXEC + flags |= O_CLOEXEC; +#endif + fd = open("/dev/urandom", flags, 0); + if (fd == -1) { + if (errno == EINTR) + goto start; + goto nodevrandom; + } +#ifndef O_CLOEXEC + fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); +#endif + + /* Lightly verify that the device node looks sane */ + if (fstat(fd, &st) == -1 || !S_ISCHR(st.st_mode)) { + close(fd); + goto nodevrandom; + } + if (ioctl(fd, RNDGETENTCNT, &cnt) == -1) { + close(fd); + goto nodevrandom; + } + for (i = 0; i < len; ) { + size_t wanted = len - i; + ssize_t ret = read(fd, (char *)buf + i, wanted); + + if (ret == -1) { + if (errno == EAGAIN || errno == EINTR) + continue; + close(fd); + goto nodevrandom; + } + i += ret; + } + close(fd); + if (gotdata(buf, len) == 0) { + errno = save_errno; + return 0; /* satisfied */ + } +nodevrandom: + errno = EIO; + return -1; +} + +#ifdef SYS__sysctl +static int +getentropy_sysctl(void *buf, size_t len) +{ + static int mib[] = { CTL_KERN, KERN_RANDOM, RANDOM_UUID }; + size_t i; + int save_errno = errno; + + for (i = 0; i < len; ) { + size_t chunk = min(len - i, 16); + + /* SYS__sysctl because some systems already removed sysctl() */ + struct __sysctl_args args = { + .name = mib, + .nlen = 3, + .oldval = (char *)buf + i, + .oldlenp = &chunk, + }; + if (syscall(SYS__sysctl, &args) != 0) + goto sysctlfailed; + i += chunk; + } + if (gotdata(buf, len) == 0) { + errno = save_errno; + return (0); /* satisfied */ + } +sysctlfailed: + errno = EIO; + return -1; +} +#endif /* SYS__sysctl */ + +static int cl[] = { + CLOCK_REALTIME, +#ifdef CLOCK_MONOTONIC + CLOCK_MONOTONIC, +#endif +#ifdef CLOCK_MONOTONIC_RAW + CLOCK_MONOTONIC_RAW, +#endif +#ifdef CLOCK_TAI + CLOCK_TAI, +#endif +#ifdef CLOCK_VIRTUAL + CLOCK_VIRTUAL, +#endif +#ifdef CLOCK_UPTIME + CLOCK_UPTIME, +#endif +#ifdef CLOCK_PROCESS_CPUTIME_ID + CLOCK_PROCESS_CPUTIME_ID, +#endif +#ifdef CLOCK_THREAD_CPUTIME_ID + CLOCK_THREAD_CPUTIME_ID, +#endif +}; + +static int +getentropy_phdr(struct dl_phdr_info *info, size_t size, void *data) +{ + SHA512_CTX *ctx = data; + + SHA512_Update(ctx, &info->dlpi_addr, sizeof (info->dlpi_addr)); + return 0; +} + +static int +getentropy_fallback(void *buf, size_t len) +{ + uint8_t results[SHA512_DIGEST_LENGTH]; + int save_errno = errno, e, pgs = getpagesize(), faster = 0, repeat; + static int cnt; + struct timespec ts; + struct timeval tv; + struct rusage ru; + sigset_t sigset; + struct stat st; + SHA512_CTX ctx; + static pid_t lastpid; + pid_t pid; + size_t i, ii, m; + char *p; + + pid = getpid(); + if (lastpid == pid) { + faster = 1; + repeat = 2; + } else { + faster = 0; + lastpid = pid; + repeat = REPEAT; + } + for (i = 0; i < len; ) { + int j; + SHA512_Init(&ctx); + for (j = 0; j < repeat; j++) { + HX((e = gettimeofday(&tv, NULL)) == -1, tv); + if (e != -1) { + cnt += (int)tv.tv_sec; + cnt += (int)tv.tv_usec; + } + + dl_iterate_phdr(getentropy_phdr, &ctx); + + for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]); ii++) + HX(clock_gettime(cl[ii], &ts) == -1, ts); + + HX((pid = getpid()) == -1, pid); + HX((pid = getsid(pid)) == -1, pid); + HX((pid = getppid()) == -1, pid); + HX((pid = getpgid(0)) == -1, pid); + HX((e = getpriority(0, 0)) == -1, e); + + if (!faster) { + ts.tv_sec = 0; + ts.tv_nsec = 1; + (void) nanosleep(&ts, NULL); + } + + HX(sigpending(&sigset) == -1, sigset); + HX(sigprocmask(SIG_BLOCK, NULL, &sigset) == -1, + sigset); + + HF(getentropy); /* an addr in this library */ + HF(printf); /* an addr in libc */ + p = (char *)&p; + HD(p); /* an addr on stack */ + p = (char *)&errno; + HD(p); /* the addr of errno */ + + if (i == 0) { + struct sockaddr_storage ss; + struct statvfs stvfs; + struct termios tios; + struct statfs stfs; + socklen_t ssl; + off_t off; + + /* + * Prime-sized mappings encourage fragmentation; + * thus exposing some address entropy. + */ + struct mm { + size_t npg; + void *p; + } mm[] = { + { 17, MAP_FAILED }, { 3, MAP_FAILED }, + { 11, MAP_FAILED }, { 2, MAP_FAILED }, + { 5, MAP_FAILED }, { 3, MAP_FAILED }, + { 7, MAP_FAILED }, { 1, MAP_FAILED }, + { 57, MAP_FAILED }, { 3, MAP_FAILED }, + { 131, MAP_FAILED }, { 1, MAP_FAILED }, + }; + + for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) { + HX(mm[m].p = mmap(NULL, + mm[m].npg * pgs, + PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, + (off_t)0), mm[m].p); + if (mm[m].p != MAP_FAILED) { + size_t mo; + + /* Touch some memory... */ + p = mm[m].p; + mo = cnt % + (mm[m].npg * pgs - 1); + p[mo] = 1; + cnt += (int)((long)(mm[m].p) + / pgs); + } + + /* Check cnts and times... */ + for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]); + ii++) { + HX((e = clock_gettime(cl[ii], + &ts)) == -1, ts); + if (e != -1) + cnt += (int)ts.tv_nsec; + } + + HX((e = getrusage(RUSAGE_SELF, + &ru)) == -1, ru); + if (e != -1) { + cnt += (int)ru.ru_utime.tv_sec; + cnt += (int)ru.ru_utime.tv_usec; + } + } + + for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) { + if (mm[m].p != MAP_FAILED) + munmap(mm[m].p, mm[m].npg * pgs); + mm[m].p = MAP_FAILED; + } + + HX(stat(".", &st) == -1, st); + HX(statvfs(".", &stvfs) == -1, stvfs); + HX(statfs(".", &stfs) == -1, stfs); + + HX(stat("/", &st) == -1, st); + HX(statvfs("/", &stvfs) == -1, stvfs); + HX(statfs("/", &stfs) == -1, stfs); + + HX((e = fstat(0, &st)) == -1, st); + if (e == -1) { + if (S_ISREG(st.st_mode) || + S_ISFIFO(st.st_mode) || + S_ISSOCK(st.st_mode)) { + HX(fstatvfs(0, &stvfs) == -1, + stvfs); + HX(fstatfs(0, &stfs) == -1, + stfs); + HX((off = lseek(0, (off_t)0, + SEEK_CUR)) < 0, off); + } + if (S_ISCHR(st.st_mode)) { + HX(tcgetattr(0, &tios) == -1, + tios); + } else if (S_ISSOCK(st.st_mode)) { + memset(&ss, 0, sizeof ss); + ssl = sizeof(ss); + HX(getpeername(0, + (void *)&ss, &ssl) == -1, + ss); + } + } + + HX((e = getrusage(RUSAGE_CHILDREN, + &ru)) == -1, ru); + if (e != -1) { + cnt += (int)ru.ru_utime.tv_sec; + cnt += (int)ru.ru_utime.tv_usec; + } + } else { + /* Subsequent hashes absorb previous result */ + HD(results); + } + + HX((e = gettimeofday(&tv, NULL)) == -1, tv); + if (e != -1) { + cnt += (int)tv.tv_sec; + cnt += (int)tv.tv_usec; + } + + HD(cnt); + } +#ifdef HAVE_GETAUXVAL +#ifdef AT_RANDOM + /* Not as random as you think but we take what we are given */ + p = (char *) getauxval(AT_RANDOM); + if (p) + HR(p, 16); +#endif +#ifdef AT_SYSINFO_EHDR + p = (char *) getauxval(AT_SYSINFO_EHDR); + if (p) + HR(p, pgs); +#endif +#ifdef AT_BASE + p = (char *) getauxval(AT_BASE); + if (p) + HD(p); +#endif +#endif + + SHA512_Final(results, &ctx); + memcpy((char *)buf + i, results, min(sizeof(results), len - i)); + i += min(sizeof(results), len - i); + } + explicit_bzero(&ctx, sizeof ctx); + explicit_bzero(results, sizeof results); + if (gotdata(buf, len) == 0) { + errno = save_errno; + return 0; /* satisfied */ + } + errno = EIO; + return -1; +} diff --git a/compat/getentropy/getentropy_osx.c b/compat/getentropy/getentropy_osx.c @@ -0,0 +1,429 @@ +/* $OpenBSD: getentropy_osx.c,v 1.7 2014/07/19 16:12:00 deraadt Exp $ */ + +/* + * Copyright (c) 2014 Theo de Raadt <deraadt@openbsd.org> + * Copyright (c) 2014 Bob Beck <beck@obtuse.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Emulation of getentropy(2) as documented at: + * http://www.openbsd.org/cgi-bin/man.cgi/OpenBSD-current/man2/getentropy.2 + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/ioctl.h> +#include <sys/resource.h> +#include <sys/syscall.h> +#include <sys/sysctl.h> +#include <sys/statvfs.h> +#include <sys/socket.h> +#include <sys/mount.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <termios.h> +#include <fcntl.h> +#include <signal.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <time.h> +#include <mach/mach_time.h> +#include <mach/mach_host.h> +#include <mach/host_info.h> +#include <sys/socketvar.h> +#include <sys/vmmeter.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <netinet/ip_var.h> +#include <netinet/tcp_var.h> +#include <netinet/udp_var.h> +#include <CommonCrypto/CommonDigest.h> +#define SHA512_Update(a, b, c) (CC_SHA512_Update((a), (b), (c))) +#define SHA512_Init(xxx) (CC_SHA512_Init((xxx))) +#define SHA512_Final(xxx, yyy) (CC_SHA512_Final((xxx), (yyy))) +#define SHA512_CTX CC_SHA512_CTX +#define SHA512_DIGEST_LENGTH CC_SHA512_DIGEST_LENGTH + +#define REPEAT 5 +#define min(a, b) (((a) < (b)) ? (a) : (b)) + +#define HX(a, b) \ + do { \ + if ((a)) \ + HD(errno); \ + else \ + HD(b); \ + } while (0) + +#define HR(x, l) (SHA512_Update(&ctx, (char *)(x), (l))) +#define HD(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (x))) +#define HF(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (void*))) + +int getentropy(void *buf, size_t len); + +static int gotdata(char *buf, size_t len); +static int getentropy_urandom(void *buf, size_t len); +static int getentropy_fallback(void *buf, size_t len); + +int +getentropy(void *buf, size_t len) +{ + int ret = -1; + + if (len > 256) { + errno = EIO; + return -1; + } + + /* + * Try to get entropy with /dev/urandom + * + * This can fail if the process is inside a chroot or if file + * descriptors are exhausted. + */ + ret = getentropy_urandom(buf, len); + if (ret != -1) + return (ret); + + /* + * Entropy collection via /dev/urandom and sysctl have failed. + * + * No other API exists for collecting entropy, and we have + * no failsafe way to get it on OSX that is not sensitive + * to resource exhaustion. + * + * We have very few options: + * - Even syslog_r is unsafe to call at this low level, so + * there is no way to alert the user or program. + * - Cannot call abort() because some systems have unsafe + * corefiles. + * - Could raise(SIGKILL) resulting in silent program termination. + * - Return EIO, to hint that arc4random's stir function + * should raise(SIGKILL) + * - Do the best under the circumstances.... + * + * This code path exists to bring light to the issue that OSX + * does not provide a failsafe API for entropy collection. + * + * We hope this demonstrates that OSX should consider + * providing a new failsafe API which works in a chroot or + * when file descriptors are exhausted. + */ +#undef FAIL_INSTEAD_OF_TRYING_FALLBACK +#ifdef FAIL_INSTEAD_OF_TRYING_FALLBACK + raise(SIGKILL); +#endif + ret = getentropy_fallback(buf, len); + if (ret != -1) + return (ret); + + errno = EIO; + return (ret); +} + +/* + * Basic sanity checking; wish we could do better. + */ +static int +gotdata(char *buf, size_t len) +{ + char any_set = 0; + size_t i; + + for (i = 0; i < len; ++i) + any_set |= buf[i]; + if (any_set == 0) + return -1; + return 0; +} + +static int +getentropy_urandom(void *buf, size_t len) +{ + struct stat st; + size_t i; + int fd, flags; + int save_errno = errno; + +start: + + flags = O_RDONLY; +#ifdef O_NOFOLLOW + flags |= O_NOFOLLOW; +#endif +#ifdef O_CLOEXEC + flags |= O_CLOEXEC; +#endif + fd = open("/dev/urandom", flags, 0); + if (fd == -1) { + if (errno == EINTR) + goto start; + goto nodevrandom; + } +#ifndef O_CLOEXEC + fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); +#endif + + /* Lightly verify that the device node looks sane */ + if (fstat(fd, &st) == -1 || !S_ISCHR(st.st_mode)) { + close(fd); + goto nodevrandom; + } + for (i = 0; i < len; ) { + size_t wanted = len - i; + ssize_t ret = read(fd, (char *)buf + i, wanted); + + if (ret == -1) { + if (errno == EAGAIN || errno == EINTR) + continue; + close(fd); + goto nodevrandom; + } + i += ret; + } + close(fd); + if (gotdata(buf, len) == 0) { + errno = save_errno; + return 0; /* satisfied */ + } +nodevrandom: + errno = EIO; + return -1; +} + +static int tcpmib[] = { CTL_NET, AF_INET, IPPROTO_TCP, TCPCTL_STATS }; +static int udpmib[] = { CTL_NET, AF_INET, IPPROTO_UDP, UDPCTL_STATS }; +static int ipmib[] = { CTL_NET, AF_INET, IPPROTO_IP, IPCTL_STATS }; +static int kmib[] = { CTL_KERN, KERN_USRSTACK }; +static int hwmib[] = { CTL_HW, HW_USERMEM }; + +static int +getentropy_fallback(void *buf, size_t len) +{ + uint8_t results[SHA512_DIGEST_LENGTH]; + int save_errno = errno, e, pgs = getpagesize(), faster = 0, repeat; + static int cnt; + struct timespec ts; + struct timeval tv; + struct rusage ru; + sigset_t sigset; + struct stat st; + SHA512_CTX ctx; + static pid_t lastpid; + pid_t pid; + size_t i, ii, m; + char *p; + struct tcpstat tcpstat; + struct udpstat udpstat; + struct ipstat ipstat; + u_int64_t mach_time; + unsigned int idata; + void *addr; + + pid = getpid(); + if (lastpid == pid) { + faster = 1; + repeat = 2; + } else { + faster = 0; + lastpid = pid; + repeat = REPEAT; + } + for (i = 0; i < len; ) { + int j; + SHA512_Init(&ctx); + for (j = 0; j < repeat; j++) { + HX((e = gettimeofday(&tv, NULL)) == -1, tv); + if (e != -1) { + cnt += (int)tv.tv_sec; + cnt += (int)tv.tv_usec; + } + + mach_time = mach_absolute_time(); + HD(mach_time); + + ii = sizeof(addr); + HX(sysctl(kmib, sizeof(kmib) / sizeof(kmib[0]), + &addr, &ii, NULL, 0) == -1, addr); + + ii = sizeof(idata); + HX(sysctl(hwmib, sizeof(hwmib) / sizeof(hwmib[0]), + &idata, &ii, NULL, 0) == -1, idata); + + ii = sizeof(tcpstat); + HX(sysctl(tcpmib, sizeof(tcpmib) / sizeof(tcpmib[0]), + &tcpstat, &ii, NULL, 0) == -1, tcpstat); + + ii = sizeof(udpstat); + HX(sysctl(udpmib, sizeof(udpmib) / sizeof(udpmib[0]), + &udpstat, &ii, NULL, 0) == -1, udpstat); + + ii = sizeof(ipstat); + HX(sysctl(ipmib, sizeof(ipmib) / sizeof(ipmib[0]), + &ipstat, &ii, NULL, 0) == -1, ipstat); + + HX((pid = getpid()) == -1, pid); + HX((pid = getsid(pid)) == -1, pid); + HX((pid = getppid()) == -1, pid); + HX((pid = getpgid(0)) == -1, pid); + HX((e = getpriority(0, 0)) == -1, e); + + if (!faster) { + ts.tv_sec = 0; + ts.tv_nsec = 1; + (void) nanosleep(&ts, NULL); + } + + HX(sigpending(&sigset) == -1, sigset); + HX(sigprocmask(SIG_BLOCK, NULL, &sigset) == -1, + sigset); + + HF(getentropy); /* an addr in this library */ + HF(printf); /* an addr in libc */ + p = (char *)&p; + HD(p); /* an addr on stack */ + p = (char *)&errno; + HD(p); /* the addr of errno */ + + if (i == 0) { + struct sockaddr_storage ss; + struct statvfs stvfs; + struct termios tios; + struct statfs stfs; + socklen_t ssl; + off_t off; + + /* + * Prime-sized mappings encourage fragmentation; + * thus exposing some address entropy. + */ + struct mm { + size_t npg; + void *p; + } mm[] = { + { 17, MAP_FAILED }, { 3, MAP_FAILED }, + { 11, MAP_FAILED }, { 2, MAP_FAILED }, + { 5, MAP_FAILED }, { 3, MAP_FAILED }, + { 7, MAP_FAILED }, { 1, MAP_FAILED }, + { 57, MAP_FAILED }, { 3, MAP_FAILED }, + { 131, MAP_FAILED }, { 1, MAP_FAILED }, + }; + + for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) { + HX(mm[m].p = mmap(NULL, + mm[m].npg * pgs, + PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, + (off_t)0), mm[m].p); + if (mm[m].p != MAP_FAILED) { + size_t mo; + + /* Touch some memory... */ + p = mm[m].p; + mo = cnt % + (mm[m].npg * pgs - 1); + p[mo] = 1; + cnt += (int)((long)(mm[m].p) + / pgs); + } + + /* Check cnts and times... */ + mach_time = mach_absolute_time(); + HD(mach_time); + cnt += (int)mach_time; + + HX((e = getrusage(RUSAGE_SELF, + &ru)) == -1, ru); + if (e != -1) { + cnt += (int)ru.ru_utime.tv_sec; + cnt += (int)ru.ru_utime.tv_usec; + } + } + + for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) { + if (mm[m].p != MAP_FAILED) + munmap(mm[m].p, mm[m].npg * pgs); + mm[m].p = MAP_FAILED; + } + + HX(stat(".", &st) == -1, st); + HX(statvfs(".", &stvfs) == -1, stvfs); + HX(statfs(".", &stfs) == -1, stfs); + + HX(stat("/", &st) == -1, st); + HX(statvfs("/", &stvfs) == -1, stvfs); + HX(statfs("/", &stfs) == -1, stfs); + + HX((e = fstat(0, &st)) == -1, st); + if (e == -1) { + if (S_ISREG(st.st_mode) || + S_ISFIFO(st.st_mode) || + S_ISSOCK(st.st_mode)) { + HX(fstatvfs(0, &stvfs) == -1, + stvfs); + HX(fstatfs(0, &stfs) == -1, + stfs); + HX((off = lseek(0, (off_t)0, + SEEK_CUR)) < 0, off); + } + if (S_ISCHR(st.st_mode)) { + HX(tcgetattr(0, &tios) == -1, + tios); + } else if (S_ISSOCK(st.st_mode)) { + memset(&ss, 0, sizeof ss); + ssl = sizeof(ss); + HX(getpeername(0, + (void *)&ss, &ssl) == -1, + ss); + } + } + + HX((e = getrusage(RUSAGE_CHILDREN, + &ru)) == -1, ru); + if (e != -1) { + cnt += (int)ru.ru_utime.tv_sec; + cnt += (int)ru.ru_utime.tv_usec; + } + } else { + /* Subsequent hashes absorb previous result */ + HD(results); + } + + HX((e = gettimeofday(&tv, NULL)) == -1, tv); + if (e != -1) { + cnt += (int)tv.tv_sec; + cnt += (int)tv.tv_usec; + } + + HD(cnt); + } + + SHA512_Final(results, &ctx); + memcpy((char *)buf + i, results, min(sizeof(results), len - i)); + i += min(sizeof(results), len - i); + } + explicit_bzero(&ctx, sizeof ctx); + explicit_bzero(results, sizeof results); + if (gotdata(buf, len) == 0) { + errno = save_errno; + return 0; /* satisfied */ + } + errno = EIO; + return -1; +} diff --git a/compat/getentropy/getentropy_solaris.c b/compat/getentropy/getentropy_solaris.c @@ -0,0 +1,445 @@ +/* $OpenBSD: getentropy_solaris.c,v 1.8 2014/07/19 16:12:00 deraadt Exp $ */ + +/* + * Copyright (c) 2014 Theo de Raadt <deraadt@openbsd.org> + * Copyright (c) 2014 Bob Beck <beck@obtuse.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Emulation of getentropy(2) as documented at: + * http://www.openbsd.org/cgi-bin/man.cgi/OpenBSD-current/man2/getentropy.2 + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/ioctl.h> +#include <sys/resource.h> +#include <sys/syscall.h> +#include <sys/statvfs.h> +#include <sys/socket.h> +#include <sys/mount.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <link.h> +#include <termios.h> +#include <fcntl.h> +#include <signal.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <time.h> +#include <sys/sha2.h> +#define SHA512_Init SHA512Init +#define SHA512_Update SHA512Update +#define SHA512_Final SHA512Final + +#include <sys/vfs.h> +#include <sys/statfs.h> +#include <sys/loadavg.h> + +#define REPEAT 5 +#define min(a, b) (((a) < (b)) ? (a) : (b)) + +#define HX(a, b) \ + do { \ + if ((a)) \ + HD(errno); \ + else \ + HD(b); \ + } while (0) + +#define HR(x, l) (SHA512_Update(&ctx, (char *)(x), (l))) +#define HD(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (x))) +#define HF(x) (SHA512_Update(&ctx, (char *)&(x), sizeof (void*))) + +int getentropy(void *buf, size_t len); + +static int gotdata(char *buf, size_t len); +static int getentropy_urandom(void *buf, size_t len, const char *path, + int devfscheck); +static int getentropy_fallback(void *buf, size_t len); +static int getentropy_phdr(struct dl_phdr_info *info, size_t size, void *data); + +int +getentropy(void *buf, size_t len) +{ + int ret = -1; + + if (len > 256) { + errno = EIO; + return -1; + } + + /* + * Try to get entropy with /dev/urandom + * + * Solaris provides /dev/urandom as a symbolic link to + * /devices/pseudo/random@0:urandom which is provided by + * a devfs filesystem. Best practice is to use O_NOFOLLOW, + * so we must try the unpublished name directly. + * + * This can fail if the process is inside a chroot which lacks + * the devfs mount, or if file descriptors are exhausted. + */ + ret = getentropy_urandom(buf, len, + "/devices/pseudo/random@0:urandom", 1); + if (ret != -1) + return (ret); + + /* + * Unfortunately, chroot spaces on Solaris are sometimes setup + * with direct device node of the well-known /dev/urandom name + * (perhaps to avoid dragging all of devfs into the space). + * + * This can fail if the process is inside a chroot or if file + * descriptors are exhausted. + */ + ret = getentropy_urandom(buf, len, "/dev/urandom", 0); + if (ret != -1) + return (ret); + + /* + * Entropy collection via /dev/urandom has failed. + * + * No other API exists for collecting entropy, and we have + * no failsafe way to get it on Solaris that is not sensitive + * to resource exhaustion. + * + * We have very few options: + * - Even syslog_r is unsafe to call at this low level, so + * there is no way to alert the user or program. + * - Cannot call abort() because some systems have unsafe + * corefiles. + * - Could raise(SIGKILL) resulting in silent program termination. + * - Return EIO, to hint that arc4random's stir function + * should raise(SIGKILL) + * - Do the best under the circumstances.... + * + * This code path exists to bring light to the issue that Solaris + * does not provide a failsafe API for entropy collection. + * + * We hope this demonstrates that Solaris should consider + * providing a new failsafe API which works in a chroot or + * when file descriptors are exhausted. + */ +#undef FAIL_INSTEAD_OF_TRYING_FALLBACK +#ifdef FAIL_INSTEAD_OF_TRYING_FALLBACK + raise(SIGKILL); +#endif + ret = getentropy_fallback(buf, len); + if (ret != -1) + return (ret); + + errno = EIO; + return (ret); +} + +/* + * Basic sanity checking; wish we could do better. + */ +static int +gotdata(char *buf, size_t len) +{ + char any_set = 0; + size_t i; + + for (i = 0; i < len; ++i) + any_set |= buf[i]; + if (any_set == 0) + return -1; + return 0; +} + +static int +getentropy_urandom(void *buf, size_t len, const char *path, int devfscheck) +{ + struct stat st; + size_t i; + int fd, flags; + int save_errno = errno; + +start: + + flags = O_RDONLY; +#ifdef O_NOFOLLOW + flags |= O_NOFOLLOW; +#endif +#ifdef O_CLOEXEC + flags |= O_CLOEXEC; +#endif + fd = open(path, flags, 0); + if (fd == -1) { + if (errno == EINTR) + goto start; + goto nodevrandom; + } +#ifndef O_CLOEXEC + fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); +#endif + + /* Lightly verify that the device node looks sane */ + if (fstat(fd, &st) == -1 || !S_ISCHR(st.st_mode) || + (devfscheck && (strcmp(st.st_fstype, "devfs") != 0))) { + close(fd); + goto nodevrandom; + } + for (i = 0; i < len; ) { + size_t wanted = len - i; + ssize_t ret = read(fd, (char *)buf + i, wanted); + + if (ret == -1) { + if (errno == EAGAIN || errno == EINTR) + continue; + close(fd); + goto nodevrandom; + } + i += ret; + } + close(fd); + if (gotdata(buf, len) == 0) { + errno = save_errno; + return 0; /* satisfied */ + } +nodevrandom: + errno = EIO; + return -1; +} + +static const int cl[] = { + CLOCK_REALTIME, +#ifdef CLOCK_MONOTONIC + CLOCK_MONOTONIC, +#endif +#ifdef CLOCK_MONOTONIC_RAW + CLOCK_MONOTONIC_RAW, +#endif +#ifdef CLOCK_TAI + CLOCK_TAI, +#endif +#ifdef CLOCK_VIRTUAL + CLOCK_VIRTUAL, +#endif +#ifdef CLOCK_UPTIME + CLOCK_UPTIME, +#endif +#ifdef CLOCK_PROCESS_CPUTIME_ID + CLOCK_PROCESS_CPUTIME_ID, +#endif +#ifdef CLOCK_THREAD_CPUTIME_ID + CLOCK_THREAD_CPUTIME_ID, +#endif +}; + +static int +getentropy_phdr(struct dl_phdr_info *info, size_t size, void *data) +{ + SHA512_CTX *ctx = data; + + SHA512_Update(ctx, &info->dlpi_addr, sizeof (info->dlpi_addr)); + return 0; +} + +static int +getentropy_fallback(void *buf, size_t len) +{ + uint8_t results[SHA512_DIGEST_LENGTH]; + int save_errno = errno, e, pgs = getpagesize(), faster = 0, repeat; + static int cnt; + struct timespec ts; + struct timeval tv; + double loadavg[3]; + struct rusage ru; + sigset_t sigset; + struct stat st; + SHA512_CTX ctx; + static pid_t lastpid; + pid_t pid; + size_t i, ii, m; + char *p; + + pid = getpid(); + if (lastpid == pid) { + faster = 1; + repeat = 2; + } else { + faster = 0; + lastpid = pid; + repeat = REPEAT; + } + for (i = 0; i < len; ) { + int j; + SHA512_Init(&ctx); + for (j = 0; j < repeat; j++) { + HX((e = gettimeofday(&tv, NULL)) == -1, tv); + if (e != -1) { + cnt += (int)tv.tv_sec; + cnt += (int)tv.tv_usec; + } + + dl_iterate_phdr(getentropy_phdr, &ctx); + + for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]); ii++) + HX(clock_gettime(cl[ii], &ts) == -1, ts); + + HX((pid = getpid()) == -1, pid); + HX((pid = getsid(pid)) == -1, pid); + HX((pid = getppid()) == -1, pid); + HX((pid = getpgid(0)) == -1, pid); + HX((e = getpriority(0, 0)) == -1, e); + HX((getloadavg(loadavg, 3) == -1), loadavg); + + if (!faster) { + ts.tv_sec = 0; + ts.tv_nsec = 1; + (void) nanosleep(&ts, NULL); + } + + HX(sigpending(&sigset) == -1, sigset); + HX(sigprocmask(SIG_BLOCK, NULL, &sigset) == -1, + sigset); + + HF(getentropy); /* an addr in this library */ + HF(printf); /* an addr in libc */ + p = (char *)&p; + HD(p); /* an addr on stack */ + p = (char *)&errno; + HD(p); /* the addr of errno */ + + if (i == 0) { + struct sockaddr_storage ss; + struct statvfs stvfs; + struct termios tios; + socklen_t ssl; + off_t off; + + /* + * Prime-sized mappings encourage fragmentation; + * thus exposing some address entropy. + */ + struct mm { + size_t npg; + void *p; + } mm[] = { + { 17, MAP_FAILED }, { 3, MAP_FAILED }, + { 11, MAP_FAILED }, { 2, MAP_FAILED }, + { 5, MAP_FAILED }, { 3, MAP_FAILED }, + { 7, MAP_FAILED }, { 1, MAP_FAILED }, + { 57, MAP_FAILED }, { 3, MAP_FAILED }, + { 131, MAP_FAILED }, { 1, MAP_FAILED }, + }; + + for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) { + HX(mm[m].p = mmap(NULL, + mm[m].npg * pgs, + PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON, -1, + (off_t)0), mm[m].p); + if (mm[m].p != MAP_FAILED) { + size_t mo; + + /* Touch some memory... */ + p = mm[m].p; + mo = cnt % + (mm[m].npg * pgs - 1); + p[mo] = 1; + cnt += (int)((long)(mm[m].p) + / pgs); + } + + /* Check cnts and times... */ + for (ii = 0; ii < sizeof(cl)/sizeof(cl[0]); + ii++) { + HX((e = clock_gettime(cl[ii], + &ts)) == -1, ts); + if (e != -1) + cnt += (int)ts.tv_nsec; + } + + HX((e = getrusage(RUSAGE_SELF, + &ru)) == -1, ru); + if (e != -1) { + cnt += (int)ru.ru_utime.tv_sec; + cnt += (int)ru.ru_utime.tv_usec; + } + } + + for (m = 0; m < sizeof mm/sizeof(mm[0]); m++) { + if (mm[m].p != MAP_FAILED) + munmap(mm[m].p, mm[m].npg * pgs); + mm[m].p = MAP_FAILED; + } + + HX(stat(".", &st) == -1, st); + HX(statvfs(".", &stvfs) == -1, stvfs); + + HX(stat("/", &st) == -1, st); + HX(statvfs("/", &stvfs) == -1, stvfs); + + HX((e = fstat(0, &st)) == -1, st); + if (e == -1) { + if (S_ISREG(st.st_mode) || + S_ISFIFO(st.st_mode) || + S_ISSOCK(st.st_mode)) { + HX(fstatvfs(0, &stvfs) == -1, + stvfs); + HX((off = lseek(0, (off_t)0, + SEEK_CUR)) < 0, off); + } + if (S_ISCHR(st.st_mode)) { + HX(tcgetattr(0, &tios) == -1, + tios); + } else if (S_ISSOCK(st.st_mode)) { + memset(&ss, 0, sizeof ss); + ssl = sizeof(ss); + HX(getpeername(0, + (void *)&ss, &ssl) == -1, + ss); + } + } + + HX((e = getrusage(RUSAGE_CHILDREN, + &ru)) == -1, ru); + if (e != -1) { + cnt += (int)ru.ru_utime.tv_sec; + cnt += (int)ru.ru_utime.tv_usec; + } + } else { + /* Subsequent hashes absorb previous result */ + HD(results); + } + + HX((e = gettimeofday(&tv, NULL)) == -1, tv); + if (e != -1) { + cnt += (int)tv.tv_sec; + cnt += (int)tv.tv_usec; + } + + HD(cnt); + } + SHA512_Final(results, &ctx); + memcpy((char *)buf + i, results, min(sizeof(results), len - i)); + i += min(sizeof(results), len - i); + } + explicit_bzero(&ctx, sizeof ctx); + explicit_bzero(results, sizeof results); + if (gotdata(buf, len) == 0) { + errno = save_errno; + return 0; /* satisfied */ + } + errno = EIO; + return -1; +} diff --git a/compat/getentropy/getentropy_win.c b/compat/getentropy/getentropy_win.c @@ -0,0 +1,59 @@ +/* $OpenBSD: getentropy_win.c,v 1.2 2014/07/13 13:03:09 deraadt Exp $ */ + +/* + * Copyright (c) 2014, Theo de Raadt <deraadt@openbsd.org> + * Copyright (c) 2014, Bob Beck <beck@obtuse.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Emulation of getentropy(2) as documented at: + * http://www.openbsd.org/cgi-bin/man.cgi/OpenBSD-current/man2/getentropy.2 + */ + +#include <windows.h> +#include <errno.h> +#include <stdint.h> +#include <sys/types.h> +#include <wincrypt.h> +#include <process.h> + +int getentropy(void *buf, size_t len); + +/* + * On Windows, CryptGenRandom is supposed to be a well-seeded + * cryptographically strong random number generator. + */ +int +getentropy(void *buf, size_t len) +{ + HCRYPTPROV provider; + + if (len > 256) { + errno = EIO; + return -1; + } + + if (CryptAcquireContext(&provider, NULL, NULL, PROV_RSA_FULL, + CRYPT_VERIFYCONTEXT) == 0) + goto fail; + if (CryptGenRandom(provider, len, buf) == 0) { + CryptReleaseContext(provider, 0); + goto fail; + } + CryptReleaseContext(provider, 0); + return (0); + +fail: + errno = EIO; + return (-1); +} diff --git a/compat/safebfuns.c b/compat/safebfuns.c @@ -0,0 +1,106 @@ +/* Public domain */ + +#include <string.h> + +#if __clang__ + /* + * http://clang.llvm.org/docs/LanguageExtensions.html#feature-checking-macros + * https://www.mail-archive.com/cfe-commits@cs.uiuc.edu/msg97459.html + */ + #if __clang_major__ > 3 || ( __clang_major__ == 3 && __clang_minor__ >= 5 ) + #pragma clang optimize off + #define NOINLINE __attribute__ (( noinline )) + #else + #error "require clang >= 3.5" + #endif +#elif __GNUC__ + /* + * http://gcc.gnu.org/onlinedocs/gcc/Function-Specific-Option-Pragmas.html + * http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html + */ + #if __GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 4 ) + #pragma GCC push_options + #pragma GCC optimize ( "-O0" ) + #define NOINLINE __attribute__ (( noinline )) + #else + #error "require gcc >= 4.4" + #endif +#else + #error "unrecognised compiler" +#endif + +NOINLINE void explicit_bzero( void * const buf, const size_t n ) { + size_t i; + unsigned char * p = buf; + + for( i = 0; i < n; i++ ) { + p[ i ] = 0; + } +} + +NOINLINE int timingsafe_bcmp( const void * const b1, const void * const b2, const size_t n ) { + size_t i; + const unsigned char * const p1 = b1; + const unsigned char * const p2 = b2; + int result = 0; + + for( i = 0; i < n; i++ ) { + result |= p1[ i ] ^ p2[ i ]; + } + + return result != 0; +} + +/* $OpenBSD: timingsafe_memcmp.c,v 1.1 2014/06/13 02:12:17 matthew Exp $ */ +/* + * Copyright (c) 2014 Google Inc. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <limits.h> +#include <string.h> + +NOINLINE int timingsafe_memcmp( const void * const b1, const void * const b2, const size_t len ) { + const unsigned char * p1 = b1; + const unsigned char * p2 = b2; + size_t i; + int res = 0, done = 0; + + for( i = 0; i < len; i++ ) { + /* lt is -1 if p1[i] < p2[i]; else 0. */ + int lt = ( p1[ i ] - p2[ i ] ) >> CHAR_BIT; + + /* gt is -1 if p1[i] > p2[i]; else 0. */ + int gt = ( p2[ i ] - p1[ i ] ) >> CHAR_BIT; + + /* cmp is 1 if p1[i] > p2[i]; -1 if p1[i] < p2[i]; else 0. */ + int cmp = lt - gt; + + /* set res = cmp if !done. */ + res |= cmp & ~done; + + /* set done if p1[i] != p2[i]. */ + done |= lt | gt; + } + + return res; +} + +/* Public domain */ + +#ifdef __clang__ + #pragma clang optimize on +#else + #pragma GCC pop_options +#endif diff --git a/compat/sha/sha1-elf-x86_64.s b/compat/sha/sha1-elf-x86_64.s @@ -0,0 +1,2486 @@ +.text + + +.globl sha1_block_data_order +.type sha1_block_data_order,@function +.align 16 +sha1_block_data_order: + movl OPENSSL_ia32cap_P+0(%rip),%r9d + movl OPENSSL_ia32cap_P+4(%rip),%r8d + testl $512,%r8d + jz .Lialu + jmp _ssse3_shortcut + +.align 16 +.Lialu: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + movq %rsp,%r11 + movq %rdi,%r8 + subq $72,%rsp + movq %rsi,%r9 + andq $-64,%rsp + movq %rdx,%r10 + movq %r11,64(%rsp) +.Lprologue: + + movl 0(%r8),%esi + movl 4(%r8),%edi + movl 8(%r8),%r11d + movl 12(%r8),%r12d + movl 16(%r8),%r13d + jmp .Lloop + +.align 16 +.Lloop: + movl 0(%r9),%edx + bswapl %edx + movl %edx,0(%rsp) + movl %r11d,%eax + movl 4(%r9),%ebp + movl %esi,%ecx + xorl %r12d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r13,1),%r13d + andl %edi,%eax + movl %ebp,4(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 8(%r9),%edx + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r12,1),%r12d + andl %esi,%eax + movl %edx,8(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 12(%r9),%ebp + movl %r12d,%ecx + xorl %edi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r11,1),%r11d + andl %r13d,%eax + movl %ebp,12(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 16(%r9),%edx + movl %r11d,%ecx + xorl %esi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rdi,1),%edi + andl %r12d,%eax + movl %edx,16(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 20(%r9),%ebp + movl %edi,%ecx + xorl %r13d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rsi,1),%esi + andl %r11d,%eax + movl %ebp,20(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl %r11d,%eax + movl 24(%r9),%edx + movl %esi,%ecx + xorl %r12d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r13,1),%r13d + andl %edi,%eax + movl %edx,24(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 28(%r9),%ebp + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r12,1),%r12d + andl %esi,%eax + movl %ebp,28(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 32(%r9),%edx + movl %r12d,%ecx + xorl %edi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r11,1),%r11d + andl %r13d,%eax + movl %edx,32(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 36(%r9),%ebp + movl %r11d,%ecx + xorl %esi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rdi,1),%edi + andl %r12d,%eax + movl %ebp,36(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 40(%r9),%edx + movl %edi,%ecx + xorl %r13d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rsi,1),%esi + andl %r11d,%eax + movl %edx,40(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl %r11d,%eax + movl 44(%r9),%ebp + movl %esi,%ecx + xorl %r12d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r13,1),%r13d + andl %edi,%eax + movl %ebp,44(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 48(%r9),%edx + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r12,1),%r12d + andl %esi,%eax + movl %edx,48(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 52(%r9),%ebp + movl %r12d,%ecx + xorl %edi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r11,1),%r11d + andl %r13d,%eax + movl %ebp,52(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 56(%r9),%edx + movl %r11d,%ecx + xorl %esi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rdi,1),%edi + andl %r12d,%eax + movl %edx,56(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 60(%r9),%ebp + movl %edi,%ecx + xorl %r13d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rsi,1),%esi + andl %r11d,%eax + movl %ebp,60(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 0(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 8(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 32(%rsp),%edx + andl %edi,%eax + leal 1518500249(%rbp,%r13,1),%r13d + xorl 52(%rsp),%edx + xorl %r12d,%eax + roll $1,%edx + addl %ecx,%r13d + roll $30,%edi + movl %edx,0(%rsp) + addl %eax,%r13d + movl 4(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 12(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 36(%rsp),%ebp + andl %esi,%eax + leal 1518500249(%rdx,%r12,1),%r12d + xorl 56(%rsp),%ebp + xorl %r11d,%eax + roll $1,%ebp + addl %ecx,%r12d + roll $30,%esi + movl %ebp,4(%rsp) + addl %eax,%r12d + movl 8(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 16(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + andl %r13d,%eax + leal 1518500249(%rbp,%r11,1),%r11d + xorl 60(%rsp),%edx + xorl %edi,%eax + roll $1,%edx + addl %ecx,%r11d + roll $30,%r13d + movl %edx,8(%rsp) + addl %eax,%r11d + movl 12(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 20(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + xorl 0(%rsp),%ebp + xorl %esi,%eax + roll $1,%ebp + addl %ecx,%edi + roll $30,%r12d + movl %ebp,12(%rsp) + addl %eax,%edi + movl 16(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 24(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 48(%rsp),%edx + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + xorl 4(%rsp),%edx + xorl %r13d,%eax + roll $1,%edx + addl %ecx,%esi + roll $30,%r11d + movl %edx,16(%rsp) + addl %eax,%esi + movl 20(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 28(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r13,1),%r13d + xorl 52(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 8(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 32(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r12,1),%r12d + xorl 56(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 12(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 36(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r11,1),%r11d + xorl 60(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 16(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 40(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rdi,1),%edi + xorl 0(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 20(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 44(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rsi,1),%esi + xorl 4(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 24(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,36(%rsp) + movl 40(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 48(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r13,1),%r13d + xorl 8(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 28(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,40(%rsp) + movl 44(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 52(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r12,1),%r12d + xorl 12(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 32(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,44(%rsp) + movl 48(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 56(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r11,1),%r11d + xorl 16(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 36(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,48(%rsp) + movl 52(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 60(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rdi,1),%edi + xorl 20(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 40(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,52(%rsp) + movl 56(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 0(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rsi,1),%esi + xorl 24(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 44(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,56(%rsp) + movl 60(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r13,1),%r13d + xorl 28(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 48(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,60(%rsp) + movl 0(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r12,1),%r12d + xorl 32(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 52(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,0(%rsp) + movl 4(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r11,1),%r11d + xorl 36(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 56(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,4(%rsp) + movl 8(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 16(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rdi,1),%edi + xorl 40(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 60(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,8(%rsp) + movl 12(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 20(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rsi,1),%esi + xorl 44(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 0(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,12(%rsp) + movl 16(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r13,1),%r13d + xorl 48(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 4(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,16(%rsp) + movl 20(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r12,1),%r12d + xorl 52(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 8(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r11,1),%r11d + xorl 56(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 12(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rdi,1),%edi + xorl 60(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 16(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 40(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rsi,1),%esi + xorl 0(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 20(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %r11d,%eax + movl %r11d,%ebx + xorl 44(%rsp),%ebp + andl %r12d,%eax + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%ebx + leal -1894007588(%rdx,%r13,1),%r13d + roll $5,%ecx + xorl 24(%rsp),%ebp + addl %eax,%r13d + andl %edi,%ebx + roll $1,%ebp + addl %ebx,%r13d + roll $30,%edi + movl %ebp,36(%rsp) + addl %ecx,%r13d + movl 40(%rsp),%edx + movl %edi,%eax + movl %edi,%ebx + xorl 48(%rsp),%edx + andl %r11d,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + xorl %r11d,%ebx + leal -1894007588(%rbp,%r12,1),%r12d + roll $5,%ecx + xorl 28(%rsp),%edx + addl %eax,%r12d + andl %esi,%ebx + roll $1,%edx + addl %ebx,%r12d + roll $30,%esi + movl %edx,40(%rsp) + addl %ecx,%r12d + movl 44(%rsp),%ebp + movl %esi,%eax + movl %esi,%ebx + xorl 52(%rsp),%ebp + andl %edi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + xorl %edi,%ebx + leal -1894007588(%rdx,%r11,1),%r11d + roll $5,%ecx + xorl 32(%rsp),%ebp + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%ebp + addl %ebx,%r11d + roll $30,%r13d + movl %ebp,44(%rsp) + addl %ecx,%r11d + movl 48(%rsp),%edx + movl %r13d,%eax + movl %r13d,%ebx + xorl 56(%rsp),%edx + andl %esi,%eax + movl %r11d,%ecx + xorl 16(%rsp),%edx + xorl %esi,%ebx + leal -1894007588(%rbp,%rdi,1),%edi + roll $5,%ecx + xorl 36(%rsp),%edx + addl %eax,%edi + andl %r12d,%ebx + roll $1,%edx + addl %ebx,%edi + roll $30,%r12d + movl %edx,48(%rsp) + addl %ecx,%edi + movl 52(%rsp),%ebp + movl %r12d,%eax + movl %r12d,%ebx + xorl 60(%rsp),%ebp + andl %r13d,%eax + movl %edi,%ecx + xorl 20(%rsp),%ebp + xorl %r13d,%ebx + leal -1894007588(%rdx,%rsi,1),%esi + roll $5,%ecx + xorl 40(%rsp),%ebp + addl %eax,%esi + andl %r11d,%ebx + roll $1,%ebp + addl %ebx,%esi + roll $30,%r11d + movl %ebp,52(%rsp) + addl %ecx,%esi + movl 56(%rsp),%edx + movl %r11d,%eax + movl %r11d,%ebx + xorl 0(%rsp),%edx + andl %r12d,%eax + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %r12d,%ebx + leal -1894007588(%rbp,%r13,1),%r13d + roll $5,%ecx + xorl 44(%rsp),%edx + addl %eax,%r13d + andl %edi,%ebx + roll $1,%edx + addl %ebx,%r13d + roll $30,%edi + movl %edx,56(%rsp) + addl %ecx,%r13d + movl 60(%rsp),%ebp + movl %edi,%eax + movl %edi,%ebx + xorl 4(%rsp),%ebp + andl %r11d,%eax + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %r11d,%ebx + leal -1894007588(%rdx,%r12,1),%r12d + roll $5,%ecx + xorl 48(%rsp),%ebp + addl %eax,%r12d + andl %esi,%ebx + roll $1,%ebp + addl %ebx,%r12d + roll $30,%esi + movl %ebp,60(%rsp) + addl %ecx,%r12d + movl 0(%rsp),%edx + movl %esi,%eax + movl %esi,%ebx + xorl 8(%rsp),%edx + andl %edi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + xorl %edi,%ebx + leal -1894007588(%rbp,%r11,1),%r11d + roll $5,%ecx + xorl 52(%rsp),%edx + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%edx + addl %ebx,%r11d + roll $30,%r13d + movl %edx,0(%rsp) + addl %ecx,%r11d + movl 4(%rsp),%ebp + movl %r13d,%eax + movl %r13d,%ebx + xorl 12(%rsp),%ebp + andl %esi,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + xorl %esi,%ebx + leal -1894007588(%rdx,%rdi,1),%edi + roll $5,%ecx + xorl 56(%rsp),%ebp + addl %eax,%edi + andl %r12d,%ebx + roll $1,%ebp + addl %ebx,%edi + roll $30,%r12d + movl %ebp,4(%rsp) + addl %ecx,%edi + movl 8(%rsp),%edx + movl %r12d,%eax + movl %r12d,%ebx + xorl 16(%rsp),%edx + andl %r13d,%eax + movl %edi,%ecx + xorl 40(%rsp),%edx + xorl %r13d,%ebx + leal -1894007588(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl 60(%rsp),%edx + addl %eax,%esi + andl %r11d,%ebx + roll $1,%edx + addl %ebx,%esi + roll $30,%r11d + movl %edx,8(%rsp) + addl %ecx,%esi + movl 12(%rsp),%ebp + movl %r11d,%eax + movl %r11d,%ebx + xorl 20(%rsp),%ebp + andl %r12d,%eax + movl %esi,%ecx + xorl 44(%rsp),%ebp + xorl %r12d,%ebx + leal -1894007588(%rdx,%r13,1),%r13d + roll $5,%ecx + xorl 0(%rsp),%ebp + addl %eax,%r13d + andl %edi,%ebx + roll $1,%ebp + addl %ebx,%r13d + roll $30,%edi + movl %ebp,12(%rsp) + addl %ecx,%r13d + movl 16(%rsp),%edx + movl %edi,%eax + movl %edi,%ebx + xorl 24(%rsp),%edx + andl %r11d,%eax + movl %r13d,%ecx + xorl 48(%rsp),%edx + xorl %r11d,%ebx + leal -1894007588(%rbp,%r12,1),%r12d + roll $5,%ecx + xorl 4(%rsp),%edx + addl %eax,%r12d + andl %esi,%ebx + roll $1,%edx + addl %ebx,%r12d + roll $30,%esi + movl %edx,16(%rsp) + addl %ecx,%r12d + movl 20(%rsp),%ebp + movl %esi,%eax + movl %esi,%ebx + xorl 28(%rsp),%ebp + andl %edi,%eax + movl %r12d,%ecx + xorl 52(%rsp),%ebp + xorl %edi,%ebx + leal -1894007588(%rdx,%r11,1),%r11d + roll $5,%ecx + xorl 8(%rsp),%ebp + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%ebp + addl %ebx,%r11d + roll $30,%r13d + movl %ebp,20(%rsp) + addl %ecx,%r11d + movl 24(%rsp),%edx + movl %r13d,%eax + movl %r13d,%ebx + xorl 32(%rsp),%edx + andl %esi,%eax + movl %r11d,%ecx + xorl 56(%rsp),%edx + xorl %esi,%ebx + leal -1894007588(%rbp,%rdi,1),%edi + roll $5,%ecx + xorl 12(%rsp),%edx + addl %eax,%edi + andl %r12d,%ebx + roll $1,%edx + addl %ebx,%edi + roll $30,%r12d + movl %edx,24(%rsp) + addl %ecx,%edi + movl 28(%rsp),%ebp + movl %r12d,%eax + movl %r12d,%ebx + xorl 36(%rsp),%ebp + andl %r13d,%eax + movl %edi,%ecx + xorl 60(%rsp),%ebp + xorl %r13d,%ebx + leal -1894007588(%rdx,%rsi,1),%esi + roll $5,%ecx + xorl 16(%rsp),%ebp + addl %eax,%esi + andl %r11d,%ebx + roll $1,%ebp + addl %ebx,%esi + roll $30,%r11d + movl %ebp,28(%rsp) + addl %ecx,%esi + movl 32(%rsp),%edx + movl %r11d,%eax + movl %r11d,%ebx + xorl 40(%rsp),%edx + andl %r12d,%eax + movl %esi,%ecx + xorl 0(%rsp),%edx + xorl %r12d,%ebx + leal -1894007588(%rbp,%r13,1),%r13d + roll $5,%ecx + xorl 20(%rsp),%edx + addl %eax,%r13d + andl %edi,%ebx + roll $1,%edx + addl %ebx,%r13d + roll $30,%edi + movl %edx,32(%rsp) + addl %ecx,%r13d + movl 36(%rsp),%ebp + movl %edi,%eax + movl %edi,%ebx + xorl 44(%rsp),%ebp + andl %r11d,%eax + movl %r13d,%ecx + xorl 4(%rsp),%ebp + xorl %r11d,%ebx + leal -1894007588(%rdx,%r12,1),%r12d + roll $5,%ecx + xorl 24(%rsp),%ebp + addl %eax,%r12d + andl %esi,%ebx + roll $1,%ebp + addl %ebx,%r12d + roll $30,%esi + movl %ebp,36(%rsp) + addl %ecx,%r12d + movl 40(%rsp),%edx + movl %esi,%eax + movl %esi,%ebx + xorl 48(%rsp),%edx + andl %edi,%eax + movl %r12d,%ecx + xorl 8(%rsp),%edx + xorl %edi,%ebx + leal -1894007588(%rbp,%r11,1),%r11d + roll $5,%ecx + xorl 28(%rsp),%edx + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%edx + addl %ebx,%r11d + roll $30,%r13d + movl %edx,40(%rsp) + addl %ecx,%r11d + movl 44(%rsp),%ebp + movl %r13d,%eax + movl %r13d,%ebx + xorl 52(%rsp),%ebp + andl %esi,%eax + movl %r11d,%ecx + xorl 12(%rsp),%ebp + xorl %esi,%ebx + leal -1894007588(%rdx,%rdi,1),%edi + roll $5,%ecx + xorl 32(%rsp),%ebp + addl %eax,%edi + andl %r12d,%ebx + roll $1,%ebp + addl %ebx,%edi + roll $30,%r12d + movl %ebp,44(%rsp) + addl %ecx,%edi + movl 48(%rsp),%edx + movl %r12d,%eax + movl %r12d,%ebx + xorl 56(%rsp),%edx + andl %r13d,%eax + movl %edi,%ecx + xorl 16(%rsp),%edx + xorl %r13d,%ebx + leal -1894007588(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl 36(%rsp),%edx + addl %eax,%esi + andl %r11d,%ebx + roll $1,%edx + addl %ebx,%esi + roll $30,%r11d + movl %edx,48(%rsp) + addl %ecx,%esi + movl 52(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 60(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r13,1),%r13d + xorl 20(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 40(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,52(%rsp) + movl 56(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 0(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r12,1),%r12d + xorl 24(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 44(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,56(%rsp) + movl 60(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 4(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rdx,%r11,1),%r11d + xorl 28(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 48(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,60(%rsp) + movl 0(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 8(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rdi,1),%edi + xorl 32(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 52(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,0(%rsp) + movl 4(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 12(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rsi,1),%esi + xorl 36(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 56(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,4(%rsp) + movl 8(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 16(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r13,1),%r13d + xorl 40(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 60(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,8(%rsp) + movl 12(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 20(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r12,1),%r12d + xorl 44(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 0(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,12(%rsp) + movl 16(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 24(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rbp,%r11,1),%r11d + xorl 48(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 4(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,16(%rsp) + movl 20(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 28(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rdi,1),%edi + xorl 52(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 8(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 32(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rsi,1),%esi + xorl 56(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 12(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 36(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r13,1),%r13d + xorl 60(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 16(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 40(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r12,1),%r12d + xorl 0(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 20(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 44(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rdx,%r11,1),%r11d + xorl 4(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 24(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,36(%rsp) + movl 40(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 48(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rdi,1),%edi + xorl 8(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 28(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,40(%rsp) + movl 44(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 52(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rsi,1),%esi + xorl 12(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 32(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,44(%rsp) + movl 48(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 56(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r13,1),%r13d + xorl 16(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 36(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,48(%rsp) + movl 52(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 60(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r12,1),%r12d + xorl 20(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 40(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl 56(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 0(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rbp,%r11,1),%r11d + xorl 24(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 44(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl 60(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rdi,1),%edi + xorl 28(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 48(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl %r11d,%eax + leal -899497514(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl %r13d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + addl 0(%r8),%esi + addl 4(%r8),%edi + addl 8(%r8),%r11d + addl 12(%r8),%r12d + addl 16(%r8),%r13d + movl %esi,0(%r8) + movl %edi,4(%r8) + movl %r11d,8(%r8) + movl %r12d,12(%r8) + movl %r13d,16(%r8) + + subq $1,%r10 + leaq 64(%r9),%r9 + jnz .Lloop + + movq 64(%rsp),%rsi + movq (%rsi),%r13 + movq 8(%rsi),%r12 + movq 16(%rsi),%rbp + movq 24(%rsi),%rbx + leaq 32(%rsi),%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size sha1_block_data_order,.-sha1_block_data_order +.type sha1_block_data_order_ssse3,@function +.align 16 +sha1_block_data_order_ssse3: +_ssse3_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + leaq -64(%rsp),%rsp + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX(%rip),%r11 + + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 +.byte 102,15,56,0,222 + paddd %xmm9,%xmm0 + paddd %xmm9,%xmm1 + paddd %xmm9,%xmm2 + movdqa %xmm0,0(%rsp) + psubd %xmm9,%xmm0 + movdqa %xmm1,16(%rsp) + psubd %xmm9,%xmm1 + movdqa %xmm2,32(%rsp) + psubd %xmm9,%xmm2 + jmp .Loop_ssse3 +.align 16 +.Loop_ssse3: + movdqa %xmm1,%xmm4 + addl 0(%rsp),%ebp + xorl %edx,%ecx + movdqa %xmm3,%xmm8 +.byte 102,15,58,15,224,8 + movl %eax,%edi + roll $5,%eax + paddd %xmm3,%xmm9 + andl %ecx,%esi + xorl %edx,%ecx + psrldq $4,%xmm8 + xorl %edx,%esi + addl %eax,%ebp + pxor %xmm0,%xmm4 + rorl $2,%ebx + addl %esi,%ebp + pxor %xmm2,%xmm8 + addl 4(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pxor %xmm8,%xmm4 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm9,48(%rsp) + xorl %ecx,%edi + addl %ebp,%edx + movdqa %xmm4,%xmm10 + movdqa %xmm4,%xmm8 + rorl $7,%eax + addl %edi,%edx + addl 8(%rsp),%ecx + xorl %ebx,%eax + pslldq $12,%xmm10 + paddd %xmm4,%xmm4 + movl %edx,%edi + roll $5,%edx + andl %eax,%esi + xorl %ebx,%eax + psrld $31,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + movdqa %xmm10,%xmm9 + rorl $7,%ebp + addl %esi,%ecx + psrld $30,%xmm10 + por %xmm8,%xmm4 + addl 12(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx + pslld $2,%xmm9 + pxor %xmm10,%xmm4 + andl %ebp,%edi + xorl %eax,%ebp + movdqa 0(%r11),%xmm10 + xorl %eax,%edi + addl %ecx,%ebx + pxor %xmm9,%xmm4 + rorl $7,%edx + addl %edi,%ebx + movdqa %xmm2,%xmm5 + addl 16(%rsp),%eax + xorl %ebp,%edx + movdqa %xmm4,%xmm9 +.byte 102,15,58,15,233,8 + movl %ebx,%edi + roll $5,%ebx + paddd %xmm4,%xmm10 + andl %edx,%esi + xorl %ebp,%edx + psrldq $4,%xmm9 + xorl %ebp,%esi + addl %ebx,%eax + pxor %xmm1,%xmm5 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm3,%xmm9 + addl 20(%rsp),%ebp + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pxor %xmm9,%xmm5 + andl %ecx,%edi + xorl %edx,%ecx + movdqa %xmm10,0(%rsp) + xorl %edx,%edi + addl %eax,%ebp + movdqa %xmm5,%xmm8 + movdqa %xmm5,%xmm9 + rorl $7,%ebx + addl %edi,%ebp + addl 24(%rsp),%edx + xorl %ecx,%ebx + pslldq $12,%xmm8 + paddd %xmm5,%xmm5 + movl %ebp,%edi + roll $5,%ebp + andl %ebx,%esi + xorl %ecx,%ebx + psrld $31,%xmm9 + xorl %ecx,%esi + addl %ebp,%edx + movdqa %xmm8,%xmm10 + rorl $7,%eax + addl %esi,%edx + psrld $30,%xmm8 + por %xmm9,%xmm5 + addl 28(%rsp),%ecx + xorl %ebx,%eax + movl %edx,%esi + roll $5,%edx + pslld $2,%xmm10 + pxor %xmm8,%xmm5 + andl %eax,%edi + xorl %ebx,%eax + movdqa 16(%r11),%xmm8 + xorl %ebx,%edi + addl %edx,%ecx + pxor %xmm10,%xmm5 + rorl $7,%ebp + addl %edi,%ecx + movdqa %xmm3,%xmm6 + addl 32(%rsp),%ebx + xorl %eax,%ebp + movdqa %xmm5,%xmm10 +.byte 102,15,58,15,242,8 + movl %ecx,%edi + roll $5,%ecx + paddd %xmm5,%xmm8 + andl %ebp,%esi + xorl %eax,%ebp + psrldq $4,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + pxor %xmm2,%xmm6 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm4,%xmm10 + addl 36(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + pxor %xmm10,%xmm6 + andl %edx,%edi + xorl %ebp,%edx + movdqa %xmm8,16(%rsp) + xorl %ebp,%edi + addl %ebx,%eax + movdqa %xmm6,%xmm9 + movdqa %xmm6,%xmm10 + rorl $7,%ecx + addl %edi,%eax + addl 40(%rsp),%ebp + xorl %edx,%ecx + pslldq $12,%xmm9 + paddd %xmm6,%xmm6 + movl %eax,%edi + roll $5,%eax + andl %ecx,%esi + xorl %edx,%ecx + psrld $31,%xmm10 + xorl %edx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + psrld $30,%xmm9 + por %xmm10,%xmm6 + addl 44(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pslld $2,%xmm8 + pxor %xmm9,%xmm6 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa 16(%r11),%xmm9 + xorl %ecx,%edi + addl %ebp,%edx + pxor %xmm8,%xmm6 + rorl $7,%eax + addl %edi,%edx + movdqa %xmm4,%xmm7 + addl 48(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm6,%xmm8 +.byte 102,15,58,15,251,8 + movl %edx,%edi + roll $5,%edx + paddd %xmm6,%xmm9 + andl %eax,%esi + xorl %ebx,%eax + psrldq $4,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + pxor %xmm3,%xmm7 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm5,%xmm8 + addl 52(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx + pxor %xmm8,%xmm7 + andl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm9,32(%rsp) + xorl %eax,%edi + addl %ecx,%ebx + movdqa %xmm7,%xmm10 + movdqa %xmm7,%xmm8 + rorl $7,%edx + addl %edi,%ebx + addl 56(%rsp),%eax + xorl %ebp,%edx + pslldq $12,%xmm10 + paddd %xmm7,%xmm7 + movl %ebx,%edi + roll $5,%ebx + andl %edx,%esi + xorl %ebp,%edx + psrld $31,%xmm8 + xorl %ebp,%esi + addl %ebx,%eax + movdqa %xmm10,%xmm9 + rorl $7,%ecx + addl %esi,%eax + psrld $30,%xmm10 + por %xmm8,%xmm7 + addl 60(%rsp),%ebp + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + andl %ecx,%edi + xorl %edx,%ecx + movdqa 16(%r11),%xmm10 + xorl %edx,%edi + addl %eax,%ebp + pxor %xmm9,%xmm7 + rorl $7,%ebx + addl %edi,%ebp + movdqa %xmm7,%xmm9 + addl 0(%rsp),%edx + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,206,8 + xorl %ecx,%ebx + movl %ebp,%edi + roll $5,%ebp + pxor %xmm1,%xmm0 + andl %ebx,%esi + xorl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm7,%xmm10 + xorl %ecx,%esi + addl %ebp,%edx + pxor %xmm9,%xmm0 + rorl $7,%eax + addl %esi,%edx + addl 4(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm0,%xmm9 + movdqa %xmm10,48(%rsp) + movl %edx,%esi + roll $5,%edx + andl %eax,%edi + xorl %ebx,%eax + pslld $2,%xmm0 + xorl %ebx,%edi + addl %edx,%ecx + psrld $30,%xmm9 + rorl $7,%ebp + addl %edi,%ecx + addl 8(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%edi + roll $5,%ecx + por %xmm9,%xmm0 + andl %ebp,%esi + xorl %eax,%ebp + movdqa %xmm0,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + andl %edx,%edi + xorl %ebp,%edx + xorl %ebp,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 16(%rsp),%ebp + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,215,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm2,%xmm1 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm8,%xmm9 + paddd %xmm0,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm10,%xmm1 + addl 20(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm1,%xmm10 + movdqa %xmm8,0(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm1 + addl 24(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm10 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm10,%xmm1 + addl 28(%rsp),%ebx + xorl %eax,%edi + movdqa %xmm1,%xmm8 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 32(%rsp),%eax + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,192,8 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + pxor %xmm3,%xmm2 + xorl %edx,%esi + addl %ebx,%eax + movdqa 32(%r11),%xmm10 + paddd %xmm1,%xmm9 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm8,%xmm2 + addl 36(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm8 + movdqa %xmm9,16(%rsp) + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + pslld $2,%xmm2 + addl 40(%rsp),%edx + xorl %ecx,%esi + psrld $30,%xmm8 + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + por %xmm8,%xmm2 + addl 44(%rsp),%ecx + xorl %ebx,%edi + movdqa %xmm2,%xmm9 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 48(%rsp),%ebx + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,201,8 + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + xorl %ebp,%esi + addl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm2,%xmm10 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm9,%xmm3 + addl 52(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm9 + movdqa %xmm10,32(%rsp) + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + pslld $2,%xmm3 + addl 56(%rsp),%ebp + xorl %edx,%esi + psrld $30,%xmm9 + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + por %xmm9,%xmm3 + addl 60(%rsp),%edx + xorl %ecx,%edi + movdqa %xmm3,%xmm10 + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 0(%rsp),%ecx + pxor %xmm0,%xmm4 +.byte 102,68,15,58,15,210,8 + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + pxor %xmm5,%xmm4 + xorl %eax,%esi + addl %edx,%ecx + movdqa %xmm8,%xmm9 + paddd %xmm3,%xmm8 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm10,%xmm4 + addl 4(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm10 + movdqa %xmm8,48(%rsp) + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + pslld $2,%xmm4 + addl 8(%rsp),%eax + xorl %ebp,%esi + psrld $30,%xmm10 + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + por %xmm10,%xmm4 + addl 12(%rsp),%ebp + xorl %edx,%edi + movdqa %xmm4,%xmm8 + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 16(%rsp),%edx + pxor %xmm1,%xmm5 +.byte 102,68,15,58,15,195,8 + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + pxor %xmm6,%xmm5 + xorl %ebx,%esi + addl %ebp,%edx + movdqa %xmm9,%xmm10 + paddd %xmm4,%xmm9 + rorl $7,%eax + addl %esi,%edx + pxor %xmm8,%xmm5 + addl 20(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm8 + movdqa %xmm9,0(%rsp) + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + pslld $2,%xmm5 + addl 24(%rsp),%ebx + xorl %eax,%esi + psrld $30,%xmm8 + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + por %xmm8,%xmm5 + addl 28(%rsp),%eax + xorl %ebp,%edi + movdqa %xmm5,%xmm9 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movl %ecx,%edi + pxor %xmm2,%xmm6 +.byte 102,68,15,58,15,204,8 + xorl %edx,%ecx + addl 32(%rsp),%ebp + andl %edx,%edi + pxor %xmm7,%xmm6 + andl %ecx,%esi + rorl $7,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm5,%xmm10 + addl %edi,%ebp + movl %eax,%edi + pxor %xmm9,%xmm6 + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movdqa %xmm6,%xmm9 + movdqa %xmm10,16(%rsp) + movl %ebx,%esi + xorl %ecx,%ebx + addl 36(%rsp),%edx + andl %ecx,%esi + pslld $2,%xmm6 + andl %ebx,%edi + rorl $7,%eax + psrld $30,%xmm9 + addl %esi,%edx + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + por %xmm9,%xmm6 + movl %eax,%edi + xorl %ebx,%eax + movdqa %xmm6,%xmm10 + addl 40(%rsp),%ecx + andl %ebx,%edi + andl %eax,%esi + rorl $7,%ebp + addl %edi,%ecx + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%esi + xorl %eax,%ebp + addl 44(%rsp),%ebx + andl %eax,%esi + andl %ebp,%edi + rorl $7,%edx + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%edi + pxor %xmm3,%xmm7 +.byte 102,68,15,58,15,213,8 + xorl %ebp,%edx + addl 48(%rsp),%eax + andl %ebp,%edi + pxor %xmm0,%xmm7 + andl %edx,%esi + rorl $7,%ecx + movdqa 48(%r11),%xmm9 + paddd %xmm6,%xmm8 + addl %edi,%eax + movl %ebx,%edi + pxor %xmm10,%xmm7 + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movdqa %xmm7,%xmm10 + movdqa %xmm8,32(%rsp) + movl %ecx,%esi + xorl %edx,%ecx + addl 52(%rsp),%ebp + andl %edx,%esi + pslld $2,%xmm7 + andl %ecx,%edi + rorl $7,%ebx + psrld $30,%xmm10 + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + por %xmm10,%xmm7 + movl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm7,%xmm8 + addl 56(%rsp),%edx + andl %ecx,%edi + andl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%esi + xorl %ebx,%eax + addl 60(%rsp),%ecx + andl %ebx,%esi + andl %eax,%edi + rorl $7,%ebp + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%edi + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,198,8 + xorl %eax,%ebp + addl 0(%rsp),%ebx + andl %eax,%edi + pxor %xmm1,%xmm0 + andl %ebp,%esi + rorl $7,%edx + movdqa %xmm9,%xmm10 + paddd %xmm7,%xmm9 + addl %edi,%ebx + movl %ecx,%edi + pxor %xmm8,%xmm0 + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movdqa %xmm0,%xmm8 + movdqa %xmm9,48(%rsp) + movl %edx,%esi + xorl %ebp,%edx + addl 4(%rsp),%eax + andl %ebp,%esi + pslld $2,%xmm0 + andl %edx,%edi + rorl $7,%ecx + psrld $30,%xmm8 + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + por %xmm8,%xmm0 + movl %ecx,%edi + xorl %edx,%ecx + movdqa %xmm0,%xmm9 + addl 8(%rsp),%ebp + andl %edx,%edi + andl %ecx,%esi + rorl $7,%ebx + addl %edi,%ebp + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%esi + xorl %ecx,%ebx + addl 12(%rsp),%edx + andl %ecx,%esi + andl %ebx,%edi + rorl $7,%eax + addl %esi,%edx + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%edi + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,207,8 + xorl %ebx,%eax + addl 16(%rsp),%ecx + andl %ebx,%edi + pxor %xmm2,%xmm1 + andl %eax,%esi + rorl $7,%ebp + movdqa %xmm10,%xmm8 + paddd %xmm0,%xmm10 + addl %edi,%ecx + movl %edx,%edi + pxor %xmm9,%xmm1 + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movdqa %xmm1,%xmm9 + movdqa %xmm10,0(%rsp) + movl %ebp,%esi + xorl %eax,%ebp + addl 20(%rsp),%ebx + andl %eax,%esi + pslld $2,%xmm1 + andl %ebp,%edi + rorl $7,%edx + psrld $30,%xmm9 + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + por %xmm9,%xmm1 + movl %edx,%edi + xorl %ebp,%edx + movdqa %xmm1,%xmm10 + addl 24(%rsp),%eax + andl %ebp,%edi + andl %edx,%esi + rorl $7,%ecx + addl %edi,%eax + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movl %ecx,%esi + xorl %edx,%ecx + addl 28(%rsp),%ebp + andl %edx,%esi + andl %ecx,%edi + rorl $7,%ebx + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%edi + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,208,8 + xorl %ecx,%ebx + addl 32(%rsp),%edx + andl %ecx,%edi + pxor %xmm3,%xmm2 + andl %ebx,%esi + rorl $7,%eax + movdqa %xmm8,%xmm9 + paddd %xmm1,%xmm8 + addl %edi,%edx + movl %ebp,%edi + pxor %xmm10,%xmm2 + roll $5,%ebp + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movdqa %xmm2,%xmm10 + movdqa %xmm8,16(%rsp) + movl %eax,%esi + xorl %ebx,%eax + addl 36(%rsp),%ecx + andl %ebx,%esi + pslld $2,%xmm2 + andl %eax,%edi + rorl $7,%ebp + psrld $30,%xmm10 + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + por %xmm10,%xmm2 + movl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm2,%xmm8 + addl 40(%rsp),%ebx + andl %eax,%edi + andl %ebp,%esi + rorl $7,%edx + addl %edi,%ebx + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%esi + xorl %ebp,%edx + addl 44(%rsp),%eax + andl %ebp,%esi + andl %edx,%edi + rorl $7,%ecx + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + addl 48(%rsp),%ebp + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,193,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm4,%xmm3 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm10 + paddd %xmm2,%xmm9 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm8,%xmm3 + addl 52(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm3,%xmm8 + movdqa %xmm9,32(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm3 + addl 56(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm8 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm8,%xmm3 + addl 60(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 0(%rsp),%eax + paddd %xmm3,%xmm10 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + movdqa %xmm10,48(%rsp) + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 4(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 8(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 12(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + cmpq %r10,%r9 + je .Ldone_ssse3 + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %eax,%esi +.byte 102,15,56,0,206 + movl %ecx,%edi + roll $5,%ecx + paddd %xmm9,%xmm0 + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + movdqa %xmm0,0(%rsp) + addl 20(%rsp),%eax + xorl %ebp,%edi + psubd %xmm9,%xmm0 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi +.byte 102,15,56,0,214 + movl %edx,%edi + roll $5,%edx + paddd %xmm9,%xmm1 + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + movdqa %xmm1,16(%rsp) + addl 36(%rsp),%ebx + xorl %eax,%edi + psubd %xmm9,%xmm1 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi +.byte 102,15,56,0,222 + movl %ebp,%edi + roll $5,%ebp + paddd %xmm9,%xmm2 + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + movdqa %xmm2,32(%rsp) + addl 52(%rsp),%ecx + xorl %ebx,%edi + psubd %xmm9,%xmm2 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + jmp .Loop_ssse3 + +.align 16 +.Ldone_ssse3: + addl 16(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 20(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + addl 36(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 52(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + leaq 64(%rsp),%rsi + movq 0(%rsi),%r12 + movq 8(%rsi),%rbp + movq 16(%rsi),%rbx + leaq 24(%rsi),%rsp +.Lepilogue_ssse3: + .byte 0xf3,0xc3 +.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3 +.align 64 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 diff --git a/compat/sha/sha1-macosx-x86_64.s b/compat/sha/sha1-macosx-x86_64.s @@ -0,0 +1,2486 @@ +.text + + +.globl _sha1_block_data_order + +.p2align 4 +_sha1_block_data_order: + movl _OPENSSL_ia32cap_P+0(%rip),%r9d + movl _OPENSSL_ia32cap_P+4(%rip),%r8d + testl $512,%r8d + jz L$ialu + jmp _ssse3_shortcut + +.p2align 4 +L$ialu: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + movq %rsp,%r11 + movq %rdi,%r8 + subq $72,%rsp + movq %rsi,%r9 + andq $-64,%rsp + movq %rdx,%r10 + movq %r11,64(%rsp) +L$prologue: + + movl 0(%r8),%esi + movl 4(%r8),%edi + movl 8(%r8),%r11d + movl 12(%r8),%r12d + movl 16(%r8),%r13d + jmp L$loop + +.p2align 4 +L$loop: + movl 0(%r9),%edx + bswapl %edx + movl %edx,0(%rsp) + movl %r11d,%eax + movl 4(%r9),%ebp + movl %esi,%ecx + xorl %r12d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r13,1),%r13d + andl %edi,%eax + movl %ebp,4(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 8(%r9),%edx + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r12,1),%r12d + andl %esi,%eax + movl %edx,8(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 12(%r9),%ebp + movl %r12d,%ecx + xorl %edi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r11,1),%r11d + andl %r13d,%eax + movl %ebp,12(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 16(%r9),%edx + movl %r11d,%ecx + xorl %esi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rdi,1),%edi + andl %r12d,%eax + movl %edx,16(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 20(%r9),%ebp + movl %edi,%ecx + xorl %r13d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rsi,1),%esi + andl %r11d,%eax + movl %ebp,20(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl %r11d,%eax + movl 24(%r9),%edx + movl %esi,%ecx + xorl %r12d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r13,1),%r13d + andl %edi,%eax + movl %edx,24(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 28(%r9),%ebp + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r12,1),%r12d + andl %esi,%eax + movl %ebp,28(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 32(%r9),%edx + movl %r12d,%ecx + xorl %edi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r11,1),%r11d + andl %r13d,%eax + movl %edx,32(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 36(%r9),%ebp + movl %r11d,%ecx + xorl %esi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rdi,1),%edi + andl %r12d,%eax + movl %ebp,36(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 40(%r9),%edx + movl %edi,%ecx + xorl %r13d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rsi,1),%esi + andl %r11d,%eax + movl %edx,40(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl %r11d,%eax + movl 44(%r9),%ebp + movl %esi,%ecx + xorl %r12d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r13,1),%r13d + andl %edi,%eax + movl %ebp,44(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 48(%r9),%edx + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r12,1),%r12d + andl %esi,%eax + movl %edx,48(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 52(%r9),%ebp + movl %r12d,%ecx + xorl %edi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r11,1),%r11d + andl %r13d,%eax + movl %ebp,52(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 56(%r9),%edx + movl %r11d,%ecx + xorl %esi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rdi,1),%edi + andl %r12d,%eax + movl %edx,56(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 60(%r9),%ebp + movl %edi,%ecx + xorl %r13d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rsi,1),%esi + andl %r11d,%eax + movl %ebp,60(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 0(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 8(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 32(%rsp),%edx + andl %edi,%eax + leal 1518500249(%rbp,%r13,1),%r13d + xorl 52(%rsp),%edx + xorl %r12d,%eax + roll $1,%edx + addl %ecx,%r13d + roll $30,%edi + movl %edx,0(%rsp) + addl %eax,%r13d + movl 4(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 12(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 36(%rsp),%ebp + andl %esi,%eax + leal 1518500249(%rdx,%r12,1),%r12d + xorl 56(%rsp),%ebp + xorl %r11d,%eax + roll $1,%ebp + addl %ecx,%r12d + roll $30,%esi + movl %ebp,4(%rsp) + addl %eax,%r12d + movl 8(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 16(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + andl %r13d,%eax + leal 1518500249(%rbp,%r11,1),%r11d + xorl 60(%rsp),%edx + xorl %edi,%eax + roll $1,%edx + addl %ecx,%r11d + roll $30,%r13d + movl %edx,8(%rsp) + addl %eax,%r11d + movl 12(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 20(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + xorl 0(%rsp),%ebp + xorl %esi,%eax + roll $1,%ebp + addl %ecx,%edi + roll $30,%r12d + movl %ebp,12(%rsp) + addl %eax,%edi + movl 16(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 24(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 48(%rsp),%edx + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + xorl 4(%rsp),%edx + xorl %r13d,%eax + roll $1,%edx + addl %ecx,%esi + roll $30,%r11d + movl %edx,16(%rsp) + addl %eax,%esi + movl 20(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 28(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r13,1),%r13d + xorl 52(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 8(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 32(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r12,1),%r12d + xorl 56(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 12(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 36(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r11,1),%r11d + xorl 60(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 16(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 40(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rdi,1),%edi + xorl 0(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 20(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 44(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rsi,1),%esi + xorl 4(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 24(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,36(%rsp) + movl 40(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 48(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r13,1),%r13d + xorl 8(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 28(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,40(%rsp) + movl 44(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 52(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r12,1),%r12d + xorl 12(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 32(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,44(%rsp) + movl 48(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 56(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r11,1),%r11d + xorl 16(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 36(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,48(%rsp) + movl 52(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 60(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rdi,1),%edi + xorl 20(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 40(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,52(%rsp) + movl 56(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 0(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rsi,1),%esi + xorl 24(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 44(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,56(%rsp) + movl 60(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r13,1),%r13d + xorl 28(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 48(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,60(%rsp) + movl 0(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r12,1),%r12d + xorl 32(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 52(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,0(%rsp) + movl 4(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r11,1),%r11d + xorl 36(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 56(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,4(%rsp) + movl 8(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 16(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rdi,1),%edi + xorl 40(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 60(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,8(%rsp) + movl 12(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 20(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rsi,1),%esi + xorl 44(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 0(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,12(%rsp) + movl 16(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r13,1),%r13d + xorl 48(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 4(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,16(%rsp) + movl 20(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r12,1),%r12d + xorl 52(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 8(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r11,1),%r11d + xorl 56(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 12(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rdi,1),%edi + xorl 60(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 16(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 40(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rsi,1),%esi + xorl 0(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 20(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %r11d,%eax + movl %r11d,%ebx + xorl 44(%rsp),%ebp + andl %r12d,%eax + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%ebx + leal -1894007588(%rdx,%r13,1),%r13d + roll $5,%ecx + xorl 24(%rsp),%ebp + addl %eax,%r13d + andl %edi,%ebx + roll $1,%ebp + addl %ebx,%r13d + roll $30,%edi + movl %ebp,36(%rsp) + addl %ecx,%r13d + movl 40(%rsp),%edx + movl %edi,%eax + movl %edi,%ebx + xorl 48(%rsp),%edx + andl %r11d,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + xorl %r11d,%ebx + leal -1894007588(%rbp,%r12,1),%r12d + roll $5,%ecx + xorl 28(%rsp),%edx + addl %eax,%r12d + andl %esi,%ebx + roll $1,%edx + addl %ebx,%r12d + roll $30,%esi + movl %edx,40(%rsp) + addl %ecx,%r12d + movl 44(%rsp),%ebp + movl %esi,%eax + movl %esi,%ebx + xorl 52(%rsp),%ebp + andl %edi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + xorl %edi,%ebx + leal -1894007588(%rdx,%r11,1),%r11d + roll $5,%ecx + xorl 32(%rsp),%ebp + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%ebp + addl %ebx,%r11d + roll $30,%r13d + movl %ebp,44(%rsp) + addl %ecx,%r11d + movl 48(%rsp),%edx + movl %r13d,%eax + movl %r13d,%ebx + xorl 56(%rsp),%edx + andl %esi,%eax + movl %r11d,%ecx + xorl 16(%rsp),%edx + xorl %esi,%ebx + leal -1894007588(%rbp,%rdi,1),%edi + roll $5,%ecx + xorl 36(%rsp),%edx + addl %eax,%edi + andl %r12d,%ebx + roll $1,%edx + addl %ebx,%edi + roll $30,%r12d + movl %edx,48(%rsp) + addl %ecx,%edi + movl 52(%rsp),%ebp + movl %r12d,%eax + movl %r12d,%ebx + xorl 60(%rsp),%ebp + andl %r13d,%eax + movl %edi,%ecx + xorl 20(%rsp),%ebp + xorl %r13d,%ebx + leal -1894007588(%rdx,%rsi,1),%esi + roll $5,%ecx + xorl 40(%rsp),%ebp + addl %eax,%esi + andl %r11d,%ebx + roll $1,%ebp + addl %ebx,%esi + roll $30,%r11d + movl %ebp,52(%rsp) + addl %ecx,%esi + movl 56(%rsp),%edx + movl %r11d,%eax + movl %r11d,%ebx + xorl 0(%rsp),%edx + andl %r12d,%eax + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %r12d,%ebx + leal -1894007588(%rbp,%r13,1),%r13d + roll $5,%ecx + xorl 44(%rsp),%edx + addl %eax,%r13d + andl %edi,%ebx + roll $1,%edx + addl %ebx,%r13d + roll $30,%edi + movl %edx,56(%rsp) + addl %ecx,%r13d + movl 60(%rsp),%ebp + movl %edi,%eax + movl %edi,%ebx + xorl 4(%rsp),%ebp + andl %r11d,%eax + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %r11d,%ebx + leal -1894007588(%rdx,%r12,1),%r12d + roll $5,%ecx + xorl 48(%rsp),%ebp + addl %eax,%r12d + andl %esi,%ebx + roll $1,%ebp + addl %ebx,%r12d + roll $30,%esi + movl %ebp,60(%rsp) + addl %ecx,%r12d + movl 0(%rsp),%edx + movl %esi,%eax + movl %esi,%ebx + xorl 8(%rsp),%edx + andl %edi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + xorl %edi,%ebx + leal -1894007588(%rbp,%r11,1),%r11d + roll $5,%ecx + xorl 52(%rsp),%edx + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%edx + addl %ebx,%r11d + roll $30,%r13d + movl %edx,0(%rsp) + addl %ecx,%r11d + movl 4(%rsp),%ebp + movl %r13d,%eax + movl %r13d,%ebx + xorl 12(%rsp),%ebp + andl %esi,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + xorl %esi,%ebx + leal -1894007588(%rdx,%rdi,1),%edi + roll $5,%ecx + xorl 56(%rsp),%ebp + addl %eax,%edi + andl %r12d,%ebx + roll $1,%ebp + addl %ebx,%edi + roll $30,%r12d + movl %ebp,4(%rsp) + addl %ecx,%edi + movl 8(%rsp),%edx + movl %r12d,%eax + movl %r12d,%ebx + xorl 16(%rsp),%edx + andl %r13d,%eax + movl %edi,%ecx + xorl 40(%rsp),%edx + xorl %r13d,%ebx + leal -1894007588(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl 60(%rsp),%edx + addl %eax,%esi + andl %r11d,%ebx + roll $1,%edx + addl %ebx,%esi + roll $30,%r11d + movl %edx,8(%rsp) + addl %ecx,%esi + movl 12(%rsp),%ebp + movl %r11d,%eax + movl %r11d,%ebx + xorl 20(%rsp),%ebp + andl %r12d,%eax + movl %esi,%ecx + xorl 44(%rsp),%ebp + xorl %r12d,%ebx + leal -1894007588(%rdx,%r13,1),%r13d + roll $5,%ecx + xorl 0(%rsp),%ebp + addl %eax,%r13d + andl %edi,%ebx + roll $1,%ebp + addl %ebx,%r13d + roll $30,%edi + movl %ebp,12(%rsp) + addl %ecx,%r13d + movl 16(%rsp),%edx + movl %edi,%eax + movl %edi,%ebx + xorl 24(%rsp),%edx + andl %r11d,%eax + movl %r13d,%ecx + xorl 48(%rsp),%edx + xorl %r11d,%ebx + leal -1894007588(%rbp,%r12,1),%r12d + roll $5,%ecx + xorl 4(%rsp),%edx + addl %eax,%r12d + andl %esi,%ebx + roll $1,%edx + addl %ebx,%r12d + roll $30,%esi + movl %edx,16(%rsp) + addl %ecx,%r12d + movl 20(%rsp),%ebp + movl %esi,%eax + movl %esi,%ebx + xorl 28(%rsp),%ebp + andl %edi,%eax + movl %r12d,%ecx + xorl 52(%rsp),%ebp + xorl %edi,%ebx + leal -1894007588(%rdx,%r11,1),%r11d + roll $5,%ecx + xorl 8(%rsp),%ebp + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%ebp + addl %ebx,%r11d + roll $30,%r13d + movl %ebp,20(%rsp) + addl %ecx,%r11d + movl 24(%rsp),%edx + movl %r13d,%eax + movl %r13d,%ebx + xorl 32(%rsp),%edx + andl %esi,%eax + movl %r11d,%ecx + xorl 56(%rsp),%edx + xorl %esi,%ebx + leal -1894007588(%rbp,%rdi,1),%edi + roll $5,%ecx + xorl 12(%rsp),%edx + addl %eax,%edi + andl %r12d,%ebx + roll $1,%edx + addl %ebx,%edi + roll $30,%r12d + movl %edx,24(%rsp) + addl %ecx,%edi + movl 28(%rsp),%ebp + movl %r12d,%eax + movl %r12d,%ebx + xorl 36(%rsp),%ebp + andl %r13d,%eax + movl %edi,%ecx + xorl 60(%rsp),%ebp + xorl %r13d,%ebx + leal -1894007588(%rdx,%rsi,1),%esi + roll $5,%ecx + xorl 16(%rsp),%ebp + addl %eax,%esi + andl %r11d,%ebx + roll $1,%ebp + addl %ebx,%esi + roll $30,%r11d + movl %ebp,28(%rsp) + addl %ecx,%esi + movl 32(%rsp),%edx + movl %r11d,%eax + movl %r11d,%ebx + xorl 40(%rsp),%edx + andl %r12d,%eax + movl %esi,%ecx + xorl 0(%rsp),%edx + xorl %r12d,%ebx + leal -1894007588(%rbp,%r13,1),%r13d + roll $5,%ecx + xorl 20(%rsp),%edx + addl %eax,%r13d + andl %edi,%ebx + roll $1,%edx + addl %ebx,%r13d + roll $30,%edi + movl %edx,32(%rsp) + addl %ecx,%r13d + movl 36(%rsp),%ebp + movl %edi,%eax + movl %edi,%ebx + xorl 44(%rsp),%ebp + andl %r11d,%eax + movl %r13d,%ecx + xorl 4(%rsp),%ebp + xorl %r11d,%ebx + leal -1894007588(%rdx,%r12,1),%r12d + roll $5,%ecx + xorl 24(%rsp),%ebp + addl %eax,%r12d + andl %esi,%ebx + roll $1,%ebp + addl %ebx,%r12d + roll $30,%esi + movl %ebp,36(%rsp) + addl %ecx,%r12d + movl 40(%rsp),%edx + movl %esi,%eax + movl %esi,%ebx + xorl 48(%rsp),%edx + andl %edi,%eax + movl %r12d,%ecx + xorl 8(%rsp),%edx + xorl %edi,%ebx + leal -1894007588(%rbp,%r11,1),%r11d + roll $5,%ecx + xorl 28(%rsp),%edx + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%edx + addl %ebx,%r11d + roll $30,%r13d + movl %edx,40(%rsp) + addl %ecx,%r11d + movl 44(%rsp),%ebp + movl %r13d,%eax + movl %r13d,%ebx + xorl 52(%rsp),%ebp + andl %esi,%eax + movl %r11d,%ecx + xorl 12(%rsp),%ebp + xorl %esi,%ebx + leal -1894007588(%rdx,%rdi,1),%edi + roll $5,%ecx + xorl 32(%rsp),%ebp + addl %eax,%edi + andl %r12d,%ebx + roll $1,%ebp + addl %ebx,%edi + roll $30,%r12d + movl %ebp,44(%rsp) + addl %ecx,%edi + movl 48(%rsp),%edx + movl %r12d,%eax + movl %r12d,%ebx + xorl 56(%rsp),%edx + andl %r13d,%eax + movl %edi,%ecx + xorl 16(%rsp),%edx + xorl %r13d,%ebx + leal -1894007588(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl 36(%rsp),%edx + addl %eax,%esi + andl %r11d,%ebx + roll $1,%edx + addl %ebx,%esi + roll $30,%r11d + movl %edx,48(%rsp) + addl %ecx,%esi + movl 52(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 60(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r13,1),%r13d + xorl 20(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 40(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,52(%rsp) + movl 56(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 0(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r12,1),%r12d + xorl 24(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 44(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,56(%rsp) + movl 60(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 4(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rdx,%r11,1),%r11d + xorl 28(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 48(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,60(%rsp) + movl 0(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 8(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rdi,1),%edi + xorl 32(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 52(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,0(%rsp) + movl 4(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 12(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rsi,1),%esi + xorl 36(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 56(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,4(%rsp) + movl 8(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 16(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r13,1),%r13d + xorl 40(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 60(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,8(%rsp) + movl 12(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 20(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r12,1),%r12d + xorl 44(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 0(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,12(%rsp) + movl 16(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 24(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rbp,%r11,1),%r11d + xorl 48(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 4(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,16(%rsp) + movl 20(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 28(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rdi,1),%edi + xorl 52(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 8(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 32(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rsi,1),%esi + xorl 56(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 12(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 36(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r13,1),%r13d + xorl 60(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 16(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 40(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r12,1),%r12d + xorl 0(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 20(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 44(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rdx,%r11,1),%r11d + xorl 4(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 24(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,36(%rsp) + movl 40(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 48(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rdi,1),%edi + xorl 8(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 28(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,40(%rsp) + movl 44(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 52(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rsi,1),%esi + xorl 12(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 32(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,44(%rsp) + movl 48(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 56(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r13,1),%r13d + xorl 16(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 36(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,48(%rsp) + movl 52(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 60(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r12,1),%r12d + xorl 20(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 40(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl 56(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 0(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rbp,%r11,1),%r11d + xorl 24(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 44(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl 60(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rdi,1),%edi + xorl 28(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 48(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl %r11d,%eax + leal -899497514(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl %r13d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + addl 0(%r8),%esi + addl 4(%r8),%edi + addl 8(%r8),%r11d + addl 12(%r8),%r12d + addl 16(%r8),%r13d + movl %esi,0(%r8) + movl %edi,4(%r8) + movl %r11d,8(%r8) + movl %r12d,12(%r8) + movl %r13d,16(%r8) + + subq $1,%r10 + leaq 64(%r9),%r9 + jnz L$loop + + movq 64(%rsp),%rsi + movq (%rsi),%r13 + movq 8(%rsi),%r12 + movq 16(%rsi),%rbp + movq 24(%rsi),%rbx + leaq 32(%rsi),%rsp +L$epilogue: + .byte 0xf3,0xc3 + + +.p2align 4 +sha1_block_data_order_ssse3: +_ssse3_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + leaq -64(%rsp),%rsp + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX(%rip),%r11 + + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 +.byte 102,15,56,0,222 + paddd %xmm9,%xmm0 + paddd %xmm9,%xmm1 + paddd %xmm9,%xmm2 + movdqa %xmm0,0(%rsp) + psubd %xmm9,%xmm0 + movdqa %xmm1,16(%rsp) + psubd %xmm9,%xmm1 + movdqa %xmm2,32(%rsp) + psubd %xmm9,%xmm2 + jmp L$oop_ssse3 +.p2align 4 +L$oop_ssse3: + movdqa %xmm1,%xmm4 + addl 0(%rsp),%ebp + xorl %edx,%ecx + movdqa %xmm3,%xmm8 +.byte 102,15,58,15,224,8 + movl %eax,%edi + roll $5,%eax + paddd %xmm3,%xmm9 + andl %ecx,%esi + xorl %edx,%ecx + psrldq $4,%xmm8 + xorl %edx,%esi + addl %eax,%ebp + pxor %xmm0,%xmm4 + rorl $2,%ebx + addl %esi,%ebp + pxor %xmm2,%xmm8 + addl 4(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pxor %xmm8,%xmm4 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm9,48(%rsp) + xorl %ecx,%edi + addl %ebp,%edx + movdqa %xmm4,%xmm10 + movdqa %xmm4,%xmm8 + rorl $7,%eax + addl %edi,%edx + addl 8(%rsp),%ecx + xorl %ebx,%eax + pslldq $12,%xmm10 + paddd %xmm4,%xmm4 + movl %edx,%edi + roll $5,%edx + andl %eax,%esi + xorl %ebx,%eax + psrld $31,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + movdqa %xmm10,%xmm9 + rorl $7,%ebp + addl %esi,%ecx + psrld $30,%xmm10 + por %xmm8,%xmm4 + addl 12(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx + pslld $2,%xmm9 + pxor %xmm10,%xmm4 + andl %ebp,%edi + xorl %eax,%ebp + movdqa 0(%r11),%xmm10 + xorl %eax,%edi + addl %ecx,%ebx + pxor %xmm9,%xmm4 + rorl $7,%edx + addl %edi,%ebx + movdqa %xmm2,%xmm5 + addl 16(%rsp),%eax + xorl %ebp,%edx + movdqa %xmm4,%xmm9 +.byte 102,15,58,15,233,8 + movl %ebx,%edi + roll $5,%ebx + paddd %xmm4,%xmm10 + andl %edx,%esi + xorl %ebp,%edx + psrldq $4,%xmm9 + xorl %ebp,%esi + addl %ebx,%eax + pxor %xmm1,%xmm5 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm3,%xmm9 + addl 20(%rsp),%ebp + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pxor %xmm9,%xmm5 + andl %ecx,%edi + xorl %edx,%ecx + movdqa %xmm10,0(%rsp) + xorl %edx,%edi + addl %eax,%ebp + movdqa %xmm5,%xmm8 + movdqa %xmm5,%xmm9 + rorl $7,%ebx + addl %edi,%ebp + addl 24(%rsp),%edx + xorl %ecx,%ebx + pslldq $12,%xmm8 + paddd %xmm5,%xmm5 + movl %ebp,%edi + roll $5,%ebp + andl %ebx,%esi + xorl %ecx,%ebx + psrld $31,%xmm9 + xorl %ecx,%esi + addl %ebp,%edx + movdqa %xmm8,%xmm10 + rorl $7,%eax + addl %esi,%edx + psrld $30,%xmm8 + por %xmm9,%xmm5 + addl 28(%rsp),%ecx + xorl %ebx,%eax + movl %edx,%esi + roll $5,%edx + pslld $2,%xmm10 + pxor %xmm8,%xmm5 + andl %eax,%edi + xorl %ebx,%eax + movdqa 16(%r11),%xmm8 + xorl %ebx,%edi + addl %edx,%ecx + pxor %xmm10,%xmm5 + rorl $7,%ebp + addl %edi,%ecx + movdqa %xmm3,%xmm6 + addl 32(%rsp),%ebx + xorl %eax,%ebp + movdqa %xmm5,%xmm10 +.byte 102,15,58,15,242,8 + movl %ecx,%edi + roll $5,%ecx + paddd %xmm5,%xmm8 + andl %ebp,%esi + xorl %eax,%ebp + psrldq $4,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + pxor %xmm2,%xmm6 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm4,%xmm10 + addl 36(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + pxor %xmm10,%xmm6 + andl %edx,%edi + xorl %ebp,%edx + movdqa %xmm8,16(%rsp) + xorl %ebp,%edi + addl %ebx,%eax + movdqa %xmm6,%xmm9 + movdqa %xmm6,%xmm10 + rorl $7,%ecx + addl %edi,%eax + addl 40(%rsp),%ebp + xorl %edx,%ecx + pslldq $12,%xmm9 + paddd %xmm6,%xmm6 + movl %eax,%edi + roll $5,%eax + andl %ecx,%esi + xorl %edx,%ecx + psrld $31,%xmm10 + xorl %edx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + psrld $30,%xmm9 + por %xmm10,%xmm6 + addl 44(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + roll $5,%ebp + pslld $2,%xmm8 + pxor %xmm9,%xmm6 + andl %ebx,%edi + xorl %ecx,%ebx + movdqa 16(%r11),%xmm9 + xorl %ecx,%edi + addl %ebp,%edx + pxor %xmm8,%xmm6 + rorl $7,%eax + addl %edi,%edx + movdqa %xmm4,%xmm7 + addl 48(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm6,%xmm8 +.byte 102,15,58,15,251,8 + movl %edx,%edi + roll $5,%edx + paddd %xmm6,%xmm9 + andl %eax,%esi + xorl %ebx,%eax + psrldq $4,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + pxor %xmm3,%xmm7 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm5,%xmm8 + addl 52(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + roll $5,%ecx + pxor %xmm8,%xmm7 + andl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm9,32(%rsp) + xorl %eax,%edi + addl %ecx,%ebx + movdqa %xmm7,%xmm10 + movdqa %xmm7,%xmm8 + rorl $7,%edx + addl %edi,%ebx + addl 56(%rsp),%eax + xorl %ebp,%edx + pslldq $12,%xmm10 + paddd %xmm7,%xmm7 + movl %ebx,%edi + roll $5,%ebx + andl %edx,%esi + xorl %ebp,%edx + psrld $31,%xmm8 + xorl %ebp,%esi + addl %ebx,%eax + movdqa %xmm10,%xmm9 + rorl $7,%ecx + addl %esi,%eax + psrld $30,%xmm10 + por %xmm8,%xmm7 + addl 60(%rsp),%ebp + xorl %edx,%ecx + movl %eax,%esi + roll $5,%eax + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + andl %ecx,%edi + xorl %edx,%ecx + movdqa 16(%r11),%xmm10 + xorl %edx,%edi + addl %eax,%ebp + pxor %xmm9,%xmm7 + rorl $7,%ebx + addl %edi,%ebp + movdqa %xmm7,%xmm9 + addl 0(%rsp),%edx + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,206,8 + xorl %ecx,%ebx + movl %ebp,%edi + roll $5,%ebp + pxor %xmm1,%xmm0 + andl %ebx,%esi + xorl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm7,%xmm10 + xorl %ecx,%esi + addl %ebp,%edx + pxor %xmm9,%xmm0 + rorl $7,%eax + addl %esi,%edx + addl 4(%rsp),%ecx + xorl %ebx,%eax + movdqa %xmm0,%xmm9 + movdqa %xmm10,48(%rsp) + movl %edx,%esi + roll $5,%edx + andl %eax,%edi + xorl %ebx,%eax + pslld $2,%xmm0 + xorl %ebx,%edi + addl %edx,%ecx + psrld $30,%xmm9 + rorl $7,%ebp + addl %edi,%ecx + addl 8(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%edi + roll $5,%ecx + por %xmm9,%xmm0 + andl %ebp,%esi + xorl %eax,%ebp + movdqa %xmm0,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + roll $5,%ebx + andl %edx,%edi + xorl %ebp,%edx + xorl %ebp,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 16(%rsp),%ebp + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,215,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm2,%xmm1 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm8,%xmm9 + paddd %xmm0,%xmm8 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm10,%xmm1 + addl 20(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm1,%xmm10 + movdqa %xmm8,0(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm1 + addl 24(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm10 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm10,%xmm1 + addl 28(%rsp),%ebx + xorl %eax,%edi + movdqa %xmm1,%xmm8 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 32(%rsp),%eax + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,192,8 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + pxor %xmm3,%xmm2 + xorl %edx,%esi + addl %ebx,%eax + movdqa 32(%r11),%xmm10 + paddd %xmm1,%xmm9 + rorl $7,%ecx + addl %esi,%eax + pxor %xmm8,%xmm2 + addl 36(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm8 + movdqa %xmm9,16(%rsp) + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + pslld $2,%xmm2 + addl 40(%rsp),%edx + xorl %ecx,%esi + psrld $30,%xmm8 + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + por %xmm8,%xmm2 + addl 44(%rsp),%ecx + xorl %ebx,%edi + movdqa %xmm2,%xmm9 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 48(%rsp),%ebx + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,201,8 + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + xorl %ebp,%esi + addl %ecx,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm2,%xmm10 + rorl $7,%edx + addl %esi,%ebx + pxor %xmm9,%xmm3 + addl 52(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm9 + movdqa %xmm10,32(%rsp) + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + pslld $2,%xmm3 + addl 56(%rsp),%ebp + xorl %edx,%esi + psrld $30,%xmm9 + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + por %xmm9,%xmm3 + addl 60(%rsp),%edx + xorl %ecx,%edi + movdqa %xmm3,%xmm10 + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 0(%rsp),%ecx + pxor %xmm0,%xmm4 +.byte 102,68,15,58,15,210,8 + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + pxor %xmm5,%xmm4 + xorl %eax,%esi + addl %edx,%ecx + movdqa %xmm8,%xmm9 + paddd %xmm3,%xmm8 + rorl $7,%ebp + addl %esi,%ecx + pxor %xmm10,%xmm4 + addl 4(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm10 + movdqa %xmm8,48(%rsp) + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + pslld $2,%xmm4 + addl 8(%rsp),%eax + xorl %ebp,%esi + psrld $30,%xmm10 + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + por %xmm10,%xmm4 + addl 12(%rsp),%ebp + xorl %edx,%edi + movdqa %xmm4,%xmm8 + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 16(%rsp),%edx + pxor %xmm1,%xmm5 +.byte 102,68,15,58,15,195,8 + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + pxor %xmm6,%xmm5 + xorl %ebx,%esi + addl %ebp,%edx + movdqa %xmm9,%xmm10 + paddd %xmm4,%xmm9 + rorl $7,%eax + addl %esi,%edx + pxor %xmm8,%xmm5 + addl 20(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm8 + movdqa %xmm9,0(%rsp) + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + pslld $2,%xmm5 + addl 24(%rsp),%ebx + xorl %eax,%esi + psrld $30,%xmm8 + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + por %xmm8,%xmm5 + addl 28(%rsp),%eax + xorl %ebp,%edi + movdqa %xmm5,%xmm9 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + movl %ecx,%edi + pxor %xmm2,%xmm6 +.byte 102,68,15,58,15,204,8 + xorl %edx,%ecx + addl 32(%rsp),%ebp + andl %edx,%edi + pxor %xmm7,%xmm6 + andl %ecx,%esi + rorl $7,%ebx + movdqa %xmm10,%xmm8 + paddd %xmm5,%xmm10 + addl %edi,%ebp + movl %eax,%edi + pxor %xmm9,%xmm6 + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movdqa %xmm6,%xmm9 + movdqa %xmm10,16(%rsp) + movl %ebx,%esi + xorl %ecx,%ebx + addl 36(%rsp),%edx + andl %ecx,%esi + pslld $2,%xmm6 + andl %ebx,%edi + rorl $7,%eax + psrld $30,%xmm9 + addl %esi,%edx + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + por %xmm9,%xmm6 + movl %eax,%edi + xorl %ebx,%eax + movdqa %xmm6,%xmm10 + addl 40(%rsp),%ecx + andl %ebx,%edi + andl %eax,%esi + rorl $7,%ebp + addl %edi,%ecx + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%esi + xorl %eax,%ebp + addl 44(%rsp),%ebx + andl %eax,%esi + andl %ebp,%edi + rorl $7,%edx + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%edi + pxor %xmm3,%xmm7 +.byte 102,68,15,58,15,213,8 + xorl %ebp,%edx + addl 48(%rsp),%eax + andl %ebp,%edi + pxor %xmm0,%xmm7 + andl %edx,%esi + rorl $7,%ecx + movdqa 48(%r11),%xmm9 + paddd %xmm6,%xmm8 + addl %edi,%eax + movl %ebx,%edi + pxor %xmm10,%xmm7 + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movdqa %xmm7,%xmm10 + movdqa %xmm8,32(%rsp) + movl %ecx,%esi + xorl %edx,%ecx + addl 52(%rsp),%ebp + andl %edx,%esi + pslld $2,%xmm7 + andl %ecx,%edi + rorl $7,%ebx + psrld $30,%xmm10 + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + por %xmm10,%xmm7 + movl %ebx,%edi + xorl %ecx,%ebx + movdqa %xmm7,%xmm8 + addl 56(%rsp),%edx + andl %ecx,%edi + andl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%esi + xorl %ebx,%eax + addl 60(%rsp),%ecx + andl %ebx,%esi + andl %eax,%edi + rorl $7,%ebp + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%edi + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,198,8 + xorl %eax,%ebp + addl 0(%rsp),%ebx + andl %eax,%edi + pxor %xmm1,%xmm0 + andl %ebp,%esi + rorl $7,%edx + movdqa %xmm9,%xmm10 + paddd %xmm7,%xmm9 + addl %edi,%ebx + movl %ecx,%edi + pxor %xmm8,%xmm0 + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movdqa %xmm0,%xmm8 + movdqa %xmm9,48(%rsp) + movl %edx,%esi + xorl %ebp,%edx + addl 4(%rsp),%eax + andl %ebp,%esi + pslld $2,%xmm0 + andl %edx,%edi + rorl $7,%ecx + psrld $30,%xmm8 + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + por %xmm8,%xmm0 + movl %ecx,%edi + xorl %edx,%ecx + movdqa %xmm0,%xmm9 + addl 8(%rsp),%ebp + andl %edx,%edi + andl %ecx,%esi + rorl $7,%ebx + addl %edi,%ebp + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%esi + xorl %ecx,%ebx + addl 12(%rsp),%edx + andl %ecx,%esi + andl %ebx,%edi + rorl $7,%eax + addl %esi,%edx + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%edi + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,207,8 + xorl %ebx,%eax + addl 16(%rsp),%ecx + andl %ebx,%edi + pxor %xmm2,%xmm1 + andl %eax,%esi + rorl $7,%ebp + movdqa %xmm10,%xmm8 + paddd %xmm0,%xmm10 + addl %edi,%ecx + movl %edx,%edi + pxor %xmm9,%xmm1 + roll $5,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movdqa %xmm1,%xmm9 + movdqa %xmm10,0(%rsp) + movl %ebp,%esi + xorl %eax,%ebp + addl 20(%rsp),%ebx + andl %eax,%esi + pslld $2,%xmm1 + andl %ebp,%edi + rorl $7,%edx + psrld $30,%xmm9 + addl %esi,%ebx + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + por %xmm9,%xmm1 + movl %edx,%edi + xorl %ebp,%edx + movdqa %xmm1,%xmm10 + addl 24(%rsp),%eax + andl %ebp,%edi + andl %edx,%esi + rorl $7,%ecx + addl %edi,%eax + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movl %ecx,%esi + xorl %edx,%ecx + addl 28(%rsp),%ebp + andl %edx,%esi + andl %ecx,%edi + rorl $7,%ebx + addl %esi,%ebp + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%edi + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,208,8 + xorl %ecx,%ebx + addl 32(%rsp),%edx + andl %ecx,%edi + pxor %xmm3,%xmm2 + andl %ebx,%esi + rorl $7,%eax + movdqa %xmm8,%xmm9 + paddd %xmm1,%xmm8 + addl %edi,%edx + movl %ebp,%edi + pxor %xmm10,%xmm2 + roll $5,%ebp + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movdqa %xmm2,%xmm10 + movdqa %xmm8,16(%rsp) + movl %eax,%esi + xorl %ebx,%eax + addl 36(%rsp),%ecx + andl %ebx,%esi + pslld $2,%xmm2 + andl %eax,%edi + rorl $7,%ebp + psrld $30,%xmm10 + addl %esi,%ecx + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + por %xmm10,%xmm2 + movl %ebp,%edi + xorl %eax,%ebp + movdqa %xmm2,%xmm8 + addl 40(%rsp),%ebx + andl %eax,%edi + andl %ebp,%esi + rorl $7,%edx + addl %edi,%ebx + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%esi + xorl %ebp,%edx + addl 44(%rsp),%eax + andl %ebp,%esi + andl %edx,%edi + rorl $7,%ecx + addl %esi,%eax + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + addl 48(%rsp),%ebp + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,193,8 + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm4,%xmm3 + xorl %ecx,%esi + addl %eax,%ebp + movdqa %xmm9,%xmm10 + paddd %xmm2,%xmm9 + rorl $7,%ebx + addl %esi,%ebp + pxor %xmm8,%xmm3 + addl 52(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm3,%xmm8 + movdqa %xmm9,32(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + pslld $2,%xmm3 + addl 56(%rsp),%ecx + xorl %ebx,%esi + psrld $30,%xmm8 + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + por %xmm8,%xmm3 + addl 60(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 0(%rsp),%eax + paddd %xmm3,%xmm10 + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + movdqa %xmm10,48(%rsp) + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 4(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 8(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 12(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + cmpq %r10,%r9 + je L$done_ssse3 + movdqa 64(%r11),%xmm6 + movdqa 0(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %eax,%esi +.byte 102,15,56,0,206 + movl %ecx,%edi + roll $5,%ecx + paddd %xmm9,%xmm0 + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + movdqa %xmm0,0(%rsp) + addl 20(%rsp),%eax + xorl %ebp,%edi + psubd %xmm9,%xmm0 + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi +.byte 102,15,56,0,214 + movl %edx,%edi + roll $5,%edx + paddd %xmm9,%xmm1 + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + movdqa %xmm1,16(%rsp) + addl 36(%rsp),%ebx + xorl %eax,%edi + psubd %xmm9,%xmm1 + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi +.byte 102,15,56,0,222 + movl %ebp,%edi + roll $5,%ebp + paddd %xmm9,%xmm2 + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + movdqa %xmm2,32(%rsp) + addl 52(%rsp),%ecx + xorl %ebx,%edi + psubd %xmm9,%xmm2 + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + jmp L$oop_ssse3 + +.p2align 4 +L$done_ssse3: + addl 16(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 20(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp + xorl %edx,%esi + movl %eax,%edi + roll $5,%eax + xorl %ecx,%esi + addl %eax,%ebp + rorl $7,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ebx,%edi + addl %ebp,%edx + rorl $7,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi + movl %edx,%edi + roll $5,%edx + xorl %eax,%esi + addl %edx,%ecx + rorl $7,%ebp + addl %esi,%ecx + addl 36(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + roll $5,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + rorl $7,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + roll $5,%ebx + xorl %edx,%esi + addl %ebx,%eax + rorl $7,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + roll $5,%eax + xorl %ecx,%edi + addl %eax,%ebp + rorl $7,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%esi + addl %ebp,%edx + rorl $7,%eax + addl %esi,%edx + addl 52(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + roll $5,%edx + xorl %eax,%edi + addl %edx,%ecx + rorl $7,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + roll $5,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + rorl $7,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + xorl %edx,%edi + addl %ebx,%eax + rorl $7,%ecx + addl %edi,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + leaq 64(%rsp),%rsi + movq 0(%rsi),%r12 + movq 8(%rsi),%rbp + movq 16(%rsi),%rbx + leaq 24(%rsi),%rsp +L$epilogue_ssse3: + .byte 0xf3,0xc3 + +.p2align 6 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 diff --git a/compat/sha/sha1_one.c b/compat/sha/sha1_one.c @@ -0,0 +1,81 @@ +/* $OpenBSD: sha1_one.c,v 1.10 2014/06/12 15:49:30 deraadt Exp $ */ +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + +#include <stdio.h> +#include <string.h> + +#include <openssl/opensslconf.h> + +#include <openssl/crypto.h> +#include <openssl/sha.h> + +#ifndef OPENSSL_NO_SHA1 +unsigned char *SHA1(const unsigned char *d, size_t n, unsigned char *md) + { + SHA_CTX c; + static unsigned char m[SHA_DIGEST_LENGTH]; + + if (md == NULL) md=m; + if (!SHA1_Init(&c)) + return NULL; + SHA1_Update(&c,d,n); + SHA1_Final(md,&c); + OPENSSL_cleanse(&c,sizeof(c)); + return(md); + } +#endif diff --git a/compat/sha/sha1dgst.c b/compat/sha/sha1dgst.c @@ -0,0 +1,75 @@ +/* $OpenBSD: sha1dgst.c,v 1.12 2014/07/09 11:10:51 bcook Exp $ */ +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + +#include <openssl/opensslconf.h> + +#include <openssl/crypto.h> + +#if !defined(OPENSSL_NO_SHA1) && !defined(OPENSSL_NO_SHA) + +#undef SHA_0 +#define SHA_1 + +#include <openssl/opensslv.h> + +/* The implementation is in ../md32_common.h */ + +#include "sha_locl.h" + +#endif + diff --git a/compat/sha/sha256-elf-x86_64.S b/compat/sha/sha256-elf-x86_64.S @@ -0,0 +1,1778 @@ +.text + +.globl sha256_block_data_order +.type sha256_block_data_order,@function +.align 16 +sha256_block_data_order: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $64+32,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +.Lprologue: + + leaq K256(%rip),%rbp + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + jmp .Lloop + +.align 16 +.Lloop: + xorq %rdi,%rdi + movl 0(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + movl %r12d,0(%rsp) + + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d + + rorl $11,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + xorl %ecx,%r11d + xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d + + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx + addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d + + movl 4(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%r15d + movl %r12d,4(%rsp) + + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d + + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d + + addl %r12d,%ecx + addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d + + movl 8(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + movl %r12d,8(%rsp) + + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d + + rorl $11,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + xorl %eax,%r9d + xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d + + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d + + addl %r12d,%ebx + addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d + + movl 12(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%r15d + movl %r12d,12(%rsp) + + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d + + rorl $11,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + xorl %r11d,%r8d + xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d + + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d + + addl %r12d,%eax + addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d + + movl 16(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + movl %r12d,16(%rsp) + + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx + + rorl $11,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + xorl %r10d,%edx + xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d + + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx + + addl %r12d,%r11d + addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx + + movl 20(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%r15d + movl %r12d,20(%rsp) + + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx + + rorl $11,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + xorl %r9d,%ecx + xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d + + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx + + addl %r12d,%r10d + addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx + + movl 24(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + movl %r12d,24(%rsp) + + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx + + rorl $11,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + xorl %r8d,%ebx + xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d + + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx + + addl %r12d,%r9d + addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx + + movl 28(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%r15d + movl %r12d,28(%rsp) + + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax + + rorl $11,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + xorl %edx,%eax + xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d + + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax + + addl %r12d,%r8d + addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax + + movl 32(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + movl %r12d,32(%rsp) + + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d + + rorl $11,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + xorl %ecx,%r11d + xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d + + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx + addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d + + movl 36(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%r15d + movl %r12d,36(%rsp) + + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d + + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d + + addl %r12d,%ecx + addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d + + movl 40(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + movl %r12d,40(%rsp) + + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d + + rorl $11,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + xorl %eax,%r9d + xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d + + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d + + addl %r12d,%ebx + addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d + + movl 44(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%r15d + movl %r12d,44(%rsp) + + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d + + rorl $11,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + xorl %r11d,%r8d + xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d + + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d + + addl %r12d,%eax + addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d + + movl 48(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + movl %r12d,48(%rsp) + + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx + + rorl $11,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + xorl %r10d,%edx + xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d + + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx + + addl %r12d,%r11d + addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx + + movl 52(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%r15d + movl %r12d,52(%rsp) + + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx + + rorl $11,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + xorl %r9d,%ecx + xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d + + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx + + addl %r12d,%r10d + addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx + + movl 56(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + movl %r12d,56(%rsp) + + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx + + rorl $11,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + xorl %r8d,%ebx + xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d + + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx + + addl %r12d,%r9d + addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx + + movl 60(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%r15d + movl %r12d,60(%rsp) + + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax + + rorl $11,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + xorl %edx,%eax + xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d + + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax + + addl %r12d,%r8d + addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax + + jmp .Lrounds_16_xx +.align 16 +.Lrounds_16_xx: + movl 4(%rsp),%r13d + movl 56(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 36(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 0(%rsp),%r12d + movl %r8d,%r13d + addl %r14d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + movl %r12d,0(%rsp) + + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d + + rorl $11,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + xorl %ecx,%r11d + xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d + + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx + addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d + + movl 8(%rsp),%r13d + movl 60(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 40(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 4(%rsp),%r12d + movl %edx,%r13d + addl %r14d,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%r15d + movl %r12d,4(%rsp) + + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d + + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d + + addl %r12d,%ecx + addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d + + movl 12(%rsp),%r13d + movl 0(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 44(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 8(%rsp),%r12d + movl %ecx,%r13d + addl %r14d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + movl %r12d,8(%rsp) + + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d + + rorl $11,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + xorl %eax,%r9d + xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d + + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d + + addl %r12d,%ebx + addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d + + movl 16(%rsp),%r13d + movl 4(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 48(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 12(%rsp),%r12d + movl %ebx,%r13d + addl %r14d,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%r15d + movl %r12d,12(%rsp) + + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d + + rorl $11,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + xorl %r11d,%r8d + xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d + + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d + + addl %r12d,%eax + addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d + + movl 20(%rsp),%r13d + movl 8(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 52(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 16(%rsp),%r12d + movl %eax,%r13d + addl %r14d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + movl %r12d,16(%rsp) + + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx + + rorl $11,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + xorl %r10d,%edx + xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d + + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx + + addl %r12d,%r11d + addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx + + movl 24(%rsp),%r13d + movl 12(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 56(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 20(%rsp),%r12d + movl %r11d,%r13d + addl %r14d,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%r15d + movl %r12d,20(%rsp) + + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx + + rorl $11,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + xorl %r9d,%ecx + xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d + + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx + + addl %r12d,%r10d + addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx + + movl 28(%rsp),%r13d + movl 16(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 60(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 24(%rsp),%r12d + movl %r10d,%r13d + addl %r14d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + movl %r12d,24(%rsp) + + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx + + rorl $11,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + xorl %r8d,%ebx + xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d + + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx + + addl %r12d,%r9d + addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx + + movl 32(%rsp),%r13d + movl 20(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 0(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 28(%rsp),%r12d + movl %r9d,%r13d + addl %r14d,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%r15d + movl %r12d,28(%rsp) + + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax + + rorl $11,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + xorl %edx,%eax + xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d + + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax + + addl %r12d,%r8d + addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax + + movl 36(%rsp),%r13d + movl 24(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 4(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 32(%rsp),%r12d + movl %r8d,%r13d + addl %r14d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + movl %r12d,32(%rsp) + + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d + + rorl $11,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + xorl %ecx,%r11d + xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d + + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx + addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d + + movl 40(%rsp),%r13d + movl 28(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 8(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 36(%rsp),%r12d + movl %edx,%r13d + addl %r14d,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%r15d + movl %r12d,36(%rsp) + + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d + + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d + + addl %r12d,%ecx + addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d + + movl 44(%rsp),%r13d + movl 32(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 12(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 40(%rsp),%r12d + movl %ecx,%r13d + addl %r14d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + movl %r12d,40(%rsp) + + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d + + rorl $11,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + xorl %eax,%r9d + xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d + + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d + + addl %r12d,%ebx + addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d + + movl 48(%rsp),%r13d + movl 36(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 16(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 44(%rsp),%r12d + movl %ebx,%r13d + addl %r14d,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%r15d + movl %r12d,44(%rsp) + + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d + + rorl $11,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + xorl %r11d,%r8d + xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d + + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d + + addl %r12d,%eax + addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d + + movl 52(%rsp),%r13d + movl 40(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 20(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 48(%rsp),%r12d + movl %eax,%r13d + addl %r14d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + movl %r12d,48(%rsp) + + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx + + rorl $11,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + xorl %r10d,%edx + xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d + + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx + + addl %r12d,%r11d + addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx + + movl 56(%rsp),%r13d + movl 44(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 24(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 52(%rsp),%r12d + movl %r11d,%r13d + addl %r14d,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%r15d + movl %r12d,52(%rsp) + + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx + + rorl $11,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + xorl %r9d,%ecx + xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d + + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx + + addl %r12d,%r10d + addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx + + movl 60(%rsp),%r13d + movl 48(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 28(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 56(%rsp),%r12d + movl %r10d,%r13d + addl %r14d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + movl %r12d,56(%rsp) + + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx + + rorl $11,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + xorl %r8d,%ebx + xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d + + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx + + addl %r12d,%r9d + addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx + + movl 0(%rsp),%r13d + movl 52(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 32(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 60(%rsp),%r12d + movl %r9d,%r13d + addl %r14d,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%r15d + movl %r12d,60(%rsp) + + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax + + rorl $11,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + xorl %edx,%eax + xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d + + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax + + addl %r12d,%r8d + addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax + + cmpq $64,%rdi + jb .Lrounds_16_xx + + movq 64+0(%rsp),%rdi + leaq 64(%rsi),%rsi + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop + + movq 64+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size sha256_block_data_order,.-sha256_block_data_order +.align 64 +.type K256,@object +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 diff --git a/compat/sha/sha256-macosx-x86_64.S b/compat/sha/sha256-macosx-x86_64.S @@ -0,0 +1,1778 @@ +.text + +.globl _sha256_block_data_order + +.p2align 4 +_sha256_block_data_order: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $64+32,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +L$prologue: + + leaq K256(%rip),%rbp + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + jmp L$loop + +.p2align 4 +L$loop: + xorq %rdi,%rdi + movl 0(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + movl %r12d,0(%rsp) + + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d + + rorl $11,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + xorl %ecx,%r11d + xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d + + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx + addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d + + movl 4(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%r15d + movl %r12d,4(%rsp) + + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d + + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d + + addl %r12d,%ecx + addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d + + movl 8(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + movl %r12d,8(%rsp) + + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d + + rorl $11,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + xorl %eax,%r9d + xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d + + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d + + addl %r12d,%ebx + addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d + + movl 12(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%r15d + movl %r12d,12(%rsp) + + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d + + rorl $11,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + xorl %r11d,%r8d + xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d + + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d + + addl %r12d,%eax + addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d + + movl 16(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + movl %r12d,16(%rsp) + + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx + + rorl $11,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + xorl %r10d,%edx + xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d + + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx + + addl %r12d,%r11d + addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx + + movl 20(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%r15d + movl %r12d,20(%rsp) + + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx + + rorl $11,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + xorl %r9d,%ecx + xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d + + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx + + addl %r12d,%r10d + addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx + + movl 24(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + movl %r12d,24(%rsp) + + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx + + rorl $11,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + xorl %r8d,%ebx + xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d + + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx + + addl %r12d,%r9d + addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx + + movl 28(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%r15d + movl %r12d,28(%rsp) + + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax + + rorl $11,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + xorl %edx,%eax + xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d + + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax + + addl %r12d,%r8d + addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax + + movl 32(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + movl %r12d,32(%rsp) + + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d + + rorl $11,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + xorl %ecx,%r11d + xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d + + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx + addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d + + movl 36(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%r15d + movl %r12d,36(%rsp) + + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d + + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d + + addl %r12d,%ecx + addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d + + movl 40(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + movl %r12d,40(%rsp) + + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d + + rorl $11,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + xorl %eax,%r9d + xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d + + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d + + addl %r12d,%ebx + addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d + + movl 44(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%r15d + movl %r12d,44(%rsp) + + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d + + rorl $11,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + xorl %r11d,%r8d + xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d + + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d + + addl %r12d,%eax + addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d + + movl 48(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + movl %r12d,48(%rsp) + + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx + + rorl $11,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + xorl %r10d,%edx + xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d + + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx + + addl %r12d,%r11d + addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx + + movl 52(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%r15d + movl %r12d,52(%rsp) + + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx + + rorl $11,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + xorl %r9d,%ecx + xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d + + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx + + addl %r12d,%r10d + addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx + + movl 56(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + movl %r12d,56(%rsp) + + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx + + rorl $11,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + xorl %r8d,%ebx + xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d + + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx + + addl %r12d,%r9d + addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx + + movl 60(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%r15d + movl %r12d,60(%rsp) + + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax + + rorl $11,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + xorl %edx,%eax + xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d + + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax + + addl %r12d,%r8d + addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax + + jmp L$rounds_16_xx +.p2align 4 +L$rounds_16_xx: + movl 4(%rsp),%r13d + movl 56(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 36(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 0(%rsp),%r12d + movl %r8d,%r13d + addl %r14d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + movl %r12d,0(%rsp) + + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d + + rorl $11,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + xorl %ecx,%r11d + xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d + + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx + addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d + + movl 8(%rsp),%r13d + movl 60(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 40(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 4(%rsp),%r12d + movl %edx,%r13d + addl %r14d,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%r15d + movl %r12d,4(%rsp) + + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d + + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d + + addl %r12d,%ecx + addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d + + movl 12(%rsp),%r13d + movl 0(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 44(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 8(%rsp),%r12d + movl %ecx,%r13d + addl %r14d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + movl %r12d,8(%rsp) + + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d + + rorl $11,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + xorl %eax,%r9d + xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d + + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d + + addl %r12d,%ebx + addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d + + movl 16(%rsp),%r13d + movl 4(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 48(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 12(%rsp),%r12d + movl %ebx,%r13d + addl %r14d,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%r15d + movl %r12d,12(%rsp) + + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d + + rorl $11,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + xorl %r11d,%r8d + xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d + + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d + + addl %r12d,%eax + addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d + + movl 20(%rsp),%r13d + movl 8(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 52(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 16(%rsp),%r12d + movl %eax,%r13d + addl %r14d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + movl %r12d,16(%rsp) + + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx + + rorl $11,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + xorl %r10d,%edx + xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d + + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx + + addl %r12d,%r11d + addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx + + movl 24(%rsp),%r13d + movl 12(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 56(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 20(%rsp),%r12d + movl %r11d,%r13d + addl %r14d,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%r15d + movl %r12d,20(%rsp) + + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx + + rorl $11,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + xorl %r9d,%ecx + xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d + + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx + + addl %r12d,%r10d + addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx + + movl 28(%rsp),%r13d + movl 16(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 60(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 24(%rsp),%r12d + movl %r10d,%r13d + addl %r14d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + movl %r12d,24(%rsp) + + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx + + rorl $11,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + xorl %r8d,%ebx + xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d + + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx + + addl %r12d,%r9d + addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx + + movl 32(%rsp),%r13d + movl 20(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 0(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 28(%rsp),%r12d + movl %r9d,%r13d + addl %r14d,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%r15d + movl %r12d,28(%rsp) + + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax + + rorl $11,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + xorl %edx,%eax + xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d + + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax + + addl %r12d,%r8d + addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax + + movl 36(%rsp),%r13d + movl 24(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 4(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 32(%rsp),%r12d + movl %r8d,%r13d + addl %r14d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + movl %r12d,32(%rsp) + + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %eax,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r8d,%r15d + movl %ebx,%r11d + + rorl $11,%r14d + xorl %r8d,%r13d + xorl %r10d,%r15d + + xorl %ecx,%r11d + xorl %eax,%r14d + addl %r15d,%r12d + movl %ebx,%r15d + + rorl $6,%r13d + andl %eax,%r11d + andl %ecx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r11d + + addl %r12d,%edx + addl %r12d,%r11d + leaq 1(%rdi),%rdi + addl %r14d,%r11d + + movl 40(%rsp),%r13d + movl 28(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 8(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 36(%rsp),%r12d + movl %edx,%r13d + addl %r14d,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%r15d + movl %r12d,36(%rsp) + + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r11d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %edx,%r15d + movl %eax,%r10d + + rorl $11,%r14d + xorl %edx,%r13d + xorl %r9d,%r15d + + xorl %ebx,%r10d + xorl %r11d,%r14d + addl %r15d,%r12d + movl %eax,%r15d + + rorl $6,%r13d + andl %r11d,%r10d + andl %ebx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r10d + + addl %r12d,%ecx + addl %r12d,%r10d + leaq 1(%rdi),%rdi + addl %r14d,%r10d + + movl 44(%rsp),%r13d + movl 32(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 12(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 40(%rsp),%r12d + movl %ecx,%r13d + addl %r14d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + movl %r12d,40(%rsp) + + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r10d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ecx,%r15d + movl %r11d,%r9d + + rorl $11,%r14d + xorl %ecx,%r13d + xorl %r8d,%r15d + + xorl %eax,%r9d + xorl %r10d,%r14d + addl %r15d,%r12d + movl %r11d,%r15d + + rorl $6,%r13d + andl %r10d,%r9d + andl %eax,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r9d + + addl %r12d,%ebx + addl %r12d,%r9d + leaq 1(%rdi),%rdi + addl %r14d,%r9d + + movl 48(%rsp),%r13d + movl 36(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 16(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 44(%rsp),%r12d + movl %ebx,%r13d + addl %r14d,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%r15d + movl %r12d,44(%rsp) + + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + rorl $5,%r13d + addl %r8d,%r12d + xorl %r9d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %ebx,%r15d + movl %r10d,%r8d + + rorl $11,%r14d + xorl %ebx,%r13d + xorl %edx,%r15d + + xorl %r11d,%r8d + xorl %r9d,%r14d + addl %r15d,%r12d + movl %r10d,%r15d + + rorl $6,%r13d + andl %r9d,%r8d + andl %r11d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%r8d + + addl %r12d,%eax + addl %r12d,%r8d + leaq 1(%rdi),%rdi + addl %r14d,%r8d + + movl 52(%rsp),%r13d + movl 40(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 20(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 48(%rsp),%r12d + movl %eax,%r13d + addl %r14d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + movl %r12d,48(%rsp) + + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %r8d,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %eax,%r15d + movl %r9d,%edx + + rorl $11,%r14d + xorl %eax,%r13d + xorl %ecx,%r15d + + xorl %r10d,%edx + xorl %r8d,%r14d + addl %r15d,%r12d + movl %r9d,%r15d + + rorl $6,%r13d + andl %r8d,%edx + andl %r10d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%edx + + addl %r12d,%r11d + addl %r12d,%edx + leaq 1(%rdi),%rdi + addl %r14d,%edx + + movl 56(%rsp),%r13d + movl 44(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 24(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 52(%rsp),%r12d + movl %r11d,%r13d + addl %r14d,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%r15d + movl %r12d,52(%rsp) + + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + rorl $5,%r13d + addl %ecx,%r12d + xorl %edx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r11d,%r15d + movl %r8d,%ecx + + rorl $11,%r14d + xorl %r11d,%r13d + xorl %ebx,%r15d + + xorl %r9d,%ecx + xorl %edx,%r14d + addl %r15d,%r12d + movl %r8d,%r15d + + rorl $6,%r13d + andl %edx,%ecx + andl %r9d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ecx + + addl %r12d,%r10d + addl %r12d,%ecx + leaq 1(%rdi),%rdi + addl %r14d,%ecx + + movl 60(%rsp),%r13d + movl 48(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 28(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 56(%rsp),%r12d + movl %r10d,%r13d + addl %r14d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + movl %r12d,56(%rsp) + + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %ecx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r10d,%r15d + movl %edx,%ebx + + rorl $11,%r14d + xorl %r10d,%r13d + xorl %eax,%r15d + + xorl %r8d,%ebx + xorl %ecx,%r14d + addl %r15d,%r12d + movl %edx,%r15d + + rorl $6,%r13d + andl %ecx,%ebx + andl %r8d,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%ebx + + addl %r12d,%r9d + addl %r12d,%ebx + leaq 1(%rdi),%rdi + addl %r14d,%ebx + + movl 0(%rsp),%r13d + movl 52(%rsp),%r14d + movl %r13d,%r12d + movl %r14d,%r15d + + rorl $11,%r12d + xorl %r13d,%r12d + shrl $3,%r13d + + rorl $7,%r12d + xorl %r12d,%r13d + movl 32(%rsp),%r12d + + rorl $2,%r15d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + addl %r13d,%r12d + xorl %r15d,%r14d + + addl 60(%rsp),%r12d + movl %r9d,%r13d + addl %r14d,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%r15d + movl %r12d,60(%rsp) + + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + rorl $5,%r13d + addl %eax,%r12d + xorl %ebx,%r14d + + addl (%rbp,%rdi,4),%r12d + andl %r9d,%r15d + movl %ecx,%eax + + rorl $11,%r14d + xorl %r9d,%r13d + xorl %r11d,%r15d + + xorl %edx,%eax + xorl %ebx,%r14d + addl %r15d,%r12d + movl %ecx,%r15d + + rorl $6,%r13d + andl %ebx,%eax + andl %edx,%r15d + + rorl $2,%r14d + addl %r13d,%r12d + addl %r15d,%eax + + addl %r12d,%r8d + addl %r12d,%eax + leaq 1(%rdi),%rdi + addl %r14d,%eax + + cmpq $64,%rdi + jb L$rounds_16_xx + + movq 64+0(%rsp),%rdi + leaq 64(%rsi),%rsi + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb L$loop + + movq 64+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue: + .byte 0xf3,0xc3 + +.p2align 6 + +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 diff --git a/compat/sha/sha256.c b/compat/sha/sha256.c @@ -0,0 +1,284 @@ +/* $OpenBSD: sha256.c,v 1.7 2014/07/10 22:45:58 jsing Exp $ */ +/* ==================================================================== + * Copyright (c) 2004 The OpenSSL Project. All rights reserved + * according to the OpenSSL license [found in ../../LICENSE]. + * ==================================================================== + */ + +#include <openssl/opensslconf.h> + +#if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA256) + +#include <machine/endian.h> + +#include <stdlib.h> +#include <string.h> + +#include <openssl/crypto.h> +#include <openssl/sha.h> +#include <openssl/opensslv.h> + +int SHA224_Init(SHA256_CTX *c) + { + memset (c,0,sizeof(*c)); + c->h[0]=0xc1059ed8UL; c->h[1]=0x367cd507UL; + c->h[2]=0x3070dd17UL; c->h[3]=0xf70e5939UL; + c->h[4]=0xffc00b31UL; c->h[5]=0x68581511UL; + c->h[6]=0x64f98fa7UL; c->h[7]=0xbefa4fa4UL; + c->md_len=SHA224_DIGEST_LENGTH; + return 1; + } + +int SHA256_Init(SHA256_CTX *c) + { + memset (c,0,sizeof(*c)); + c->h[0]=0x6a09e667UL; c->h[1]=0xbb67ae85UL; + c->h[2]=0x3c6ef372UL; c->h[3]=0xa54ff53aUL; + c->h[4]=0x510e527fUL; c->h[5]=0x9b05688cUL; + c->h[6]=0x1f83d9abUL; c->h[7]=0x5be0cd19UL; + c->md_len=SHA256_DIGEST_LENGTH; + return 1; + } + +unsigned char *SHA224(const unsigned char *d, size_t n, unsigned char *md) + { + SHA256_CTX c; + static unsigned char m[SHA224_DIGEST_LENGTH]; + + if (md == NULL) md=m; + SHA224_Init(&c); + SHA256_Update(&c,d,n); + SHA256_Final(md,&c); + OPENSSL_cleanse(&c,sizeof(c)); + return(md); + } + +unsigned char *SHA256(const unsigned char *d, size_t n, unsigned char *md) + { + SHA256_CTX c; + static unsigned char m[SHA256_DIGEST_LENGTH]; + + if (md == NULL) md=m; + SHA256_Init(&c); + SHA256_Update(&c,d,n); + SHA256_Final(md,&c); + OPENSSL_cleanse(&c,sizeof(c)); + return(md); + } + +int SHA224_Update(SHA256_CTX *c, const void *data, size_t len) +{ return SHA256_Update (c,data,len); } +int SHA224_Final (unsigned char *md, SHA256_CTX *c) +{ return SHA256_Final (md,c); } + +#define DATA_ORDER_IS_BIG_ENDIAN + +#define HASH_LONG SHA_LONG +#define HASH_CTX SHA256_CTX +#define HASH_CBLOCK SHA_CBLOCK +/* + * Note that FIPS180-2 discusses "Truncation of the Hash Function Output." + * default: case below covers for it. It's not clear however if it's + * permitted to truncate to amount of bytes not divisible by 4. I bet not, + * but if it is, then default: case shall be extended. For reference. + * Idea behind separate cases for pre-defined lenghts is to let the + * compiler decide if it's appropriate to unroll small loops. + */ +#define HASH_MAKE_STRING(c,s) do { \ + unsigned long ll; \ + unsigned int nn; \ + switch ((c)->md_len) \ + { case SHA224_DIGEST_LENGTH: \ + for (nn=0;nn<SHA224_DIGEST_LENGTH/4;nn++) \ + { ll=(c)->h[nn]; HOST_l2c(ll,(s)); } \ + break; \ + case SHA256_DIGEST_LENGTH: \ + for (nn=0;nn<SHA256_DIGEST_LENGTH/4;nn++) \ + { ll=(c)->h[nn]; HOST_l2c(ll,(s)); } \ + break; \ + default: \ + if ((c)->md_len > SHA256_DIGEST_LENGTH) \ + return 0; \ + for (nn=0;nn<(c)->md_len/4;nn++) \ + { ll=(c)->h[nn]; HOST_l2c(ll,(s)); } \ + break; \ + } \ + } while (0) + +#define HASH_UPDATE SHA256_Update +#define HASH_TRANSFORM SHA256_Transform +#define HASH_FINAL SHA256_Final +#define HASH_BLOCK_DATA_ORDER sha256_block_data_order +#ifndef SHA256_ASM +static +#endif +void sha256_block_data_order (SHA256_CTX *ctx, const void *in, size_t num); + +#include "md32_common.h" + +#ifndef SHA256_ASM +static const SHA_LONG K256[64] = { + 0x428a2f98UL,0x71374491UL,0xb5c0fbcfUL,0xe9b5dba5UL, + 0x3956c25bUL,0x59f111f1UL,0x923f82a4UL,0xab1c5ed5UL, + 0xd807aa98UL,0x12835b01UL,0x243185beUL,0x550c7dc3UL, + 0x72be5d74UL,0x80deb1feUL,0x9bdc06a7UL,0xc19bf174UL, + 0xe49b69c1UL,0xefbe4786UL,0x0fc19dc6UL,0x240ca1ccUL, + 0x2de92c6fUL,0x4a7484aaUL,0x5cb0a9dcUL,0x76f988daUL, + 0x983e5152UL,0xa831c66dUL,0xb00327c8UL,0xbf597fc7UL, + 0xc6e00bf3UL,0xd5a79147UL,0x06ca6351UL,0x14292967UL, + 0x27b70a85UL,0x2e1b2138UL,0x4d2c6dfcUL,0x53380d13UL, + 0x650a7354UL,0x766a0abbUL,0x81c2c92eUL,0x92722c85UL, + 0xa2bfe8a1UL,0xa81a664bUL,0xc24b8b70UL,0xc76c51a3UL, + 0xd192e819UL,0xd6990624UL,0xf40e3585UL,0x106aa070UL, + 0x19a4c116UL,0x1e376c08UL,0x2748774cUL,0x34b0bcb5UL, + 0x391c0cb3UL,0x4ed8aa4aUL,0x5b9cca4fUL,0x682e6ff3UL, + 0x748f82eeUL,0x78a5636fUL,0x84c87814UL,0x8cc70208UL, + 0x90befffaUL,0xa4506cebUL,0xbef9a3f7UL,0xc67178f2UL }; + +/* + * FIPS specification refers to right rotations, while our ROTATE macro + * is left one. This is why you might notice that rotation coefficients + * differ from those observed in FIPS document by 32-N... + */ +#define Sigma0(x) (ROTATE((x),30) ^ ROTATE((x),19) ^ ROTATE((x),10)) +#define Sigma1(x) (ROTATE((x),26) ^ ROTATE((x),21) ^ ROTATE((x),7)) +#define sigma0(x) (ROTATE((x),25) ^ ROTATE((x),14) ^ ((x)>>3)) +#define sigma1(x) (ROTATE((x),15) ^ ROTATE((x),13) ^ ((x)>>10)) + +#define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z))) +#define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) + +#ifdef OPENSSL_SMALL_FOOTPRINT + +static void sha256_block_data_order (SHA256_CTX *ctx, const void *in, size_t num) + { + unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1,T2; + SHA_LONG X[16],l; + int i; + const unsigned char *data=in; + + while (num--) { + + a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3]; + e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7]; + + for (i=0;i<16;i++) + { + HOST_c2l(data,l); T1 = X[i] = l; + T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i]; + T2 = Sigma0(a) + Maj(a,b,c); + h = g; g = f; f = e; e = d + T1; + d = c; c = b; b = a; a = T1 + T2; + } + + for (;i<64;i++) + { + s0 = X[(i+1)&0x0f]; s0 = sigma0(s0); + s1 = X[(i+14)&0x0f]; s1 = sigma1(s1); + + T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf]; + T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i]; + T2 = Sigma0(a) + Maj(a,b,c); + h = g; g = f; f = e; e = d + T1; + d = c; c = b; b = a; a = T1 + T2; + } + + ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d; + ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h; + + } +} + +#else + +#define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \ + T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i]; \ + h = Sigma0(a) + Maj(a,b,c); \ + d += T1; h += T1; } while (0) + +#define ROUND_16_63(i,a,b,c,d,e,f,g,h,X) do { \ + s0 = X[(i+1)&0x0f]; s0 = sigma0(s0); \ + s1 = X[(i+14)&0x0f]; s1 = sigma1(s1); \ + T1 = X[(i)&0x0f] += s0 + s1 + X[(i+9)&0x0f]; \ + ROUND_00_15(i,a,b,c,d,e,f,g,h); } while (0) + +static void sha256_block_data_order (SHA256_CTX *ctx, const void *in, size_t num) + { + unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1; + SHA_LONG X[16]; + int i; + const unsigned char *data=in; + + while (num--) { + + a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3]; + e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7]; + + if (BYTE_ORDER != LITTLE_ENDIAN && + sizeof(SHA_LONG)==4 && ((size_t)in%4)==0) + { + const SHA_LONG *W=(const SHA_LONG *)data; + + T1 = X[0] = W[0]; ROUND_00_15(0,a,b,c,d,e,f,g,h); + T1 = X[1] = W[1]; ROUND_00_15(1,h,a,b,c,d,e,f,g); + T1 = X[2] = W[2]; ROUND_00_15(2,g,h,a,b,c,d,e,f); + T1 = X[3] = W[3]; ROUND_00_15(3,f,g,h,a,b,c,d,e); + T1 = X[4] = W[4]; ROUND_00_15(4,e,f,g,h,a,b,c,d); + T1 = X[5] = W[5]; ROUND_00_15(5,d,e,f,g,h,a,b,c); + T1 = X[6] = W[6]; ROUND_00_15(6,c,d,e,f,g,h,a,b); + T1 = X[7] = W[7]; ROUND_00_15(7,b,c,d,e,f,g,h,a); + T1 = X[8] = W[8]; ROUND_00_15(8,a,b,c,d,e,f,g,h); + T1 = X[9] = W[9]; ROUND_00_15(9,h,a,b,c,d,e,f,g); + T1 = X[10] = W[10]; ROUND_00_15(10,g,h,a,b,c,d,e,f); + T1 = X[11] = W[11]; ROUND_00_15(11,f,g,h,a,b,c,d,e); + T1 = X[12] = W[12]; ROUND_00_15(12,e,f,g,h,a,b,c,d); + T1 = X[13] = W[13]; ROUND_00_15(13,d,e,f,g,h,a,b,c); + T1 = X[14] = W[14]; ROUND_00_15(14,c,d,e,f,g,h,a,b); + T1 = X[15] = W[15]; ROUND_00_15(15,b,c,d,e,f,g,h,a); + + data += SHA256_CBLOCK; + } + else + { + SHA_LONG l; + + HOST_c2l(data,l); T1 = X[0] = l; ROUND_00_15(0,a,b,c,d,e,f,g,h); + HOST_c2l(data,l); T1 = X[1] = l; ROUND_00_15(1,h,a,b,c,d,e,f,g); + HOST_c2l(data,l); T1 = X[2] = l; ROUND_00_15(2,g,h,a,b,c,d,e,f); + HOST_c2l(data,l); T1 = X[3] = l; ROUND_00_15(3,f,g,h,a,b,c,d,e); + HOST_c2l(data,l); T1 = X[4] = l; ROUND_00_15(4,e,f,g,h,a,b,c,d); + HOST_c2l(data,l); T1 = X[5] = l; ROUND_00_15(5,d,e,f,g,h,a,b,c); + HOST_c2l(data,l); T1 = X[6] = l; ROUND_00_15(6,c,d,e,f,g,h,a,b); + HOST_c2l(data,l); T1 = X[7] = l; ROUND_00_15(7,b,c,d,e,f,g,h,a); + HOST_c2l(data,l); T1 = X[8] = l; ROUND_00_15(8,a,b,c,d,e,f,g,h); + HOST_c2l(data,l); T1 = X[9] = l; ROUND_00_15(9,h,a,b,c,d,e,f,g); + HOST_c2l(data,l); T1 = X[10] = l; ROUND_00_15(10,g,h,a,b,c,d,e,f); + HOST_c2l(data,l); T1 = X[11] = l; ROUND_00_15(11,f,g,h,a,b,c,d,e); + HOST_c2l(data,l); T1 = X[12] = l; ROUND_00_15(12,e,f,g,h,a,b,c,d); + HOST_c2l(data,l); T1 = X[13] = l; ROUND_00_15(13,d,e,f,g,h,a,b,c); + HOST_c2l(data,l); T1 = X[14] = l; ROUND_00_15(14,c,d,e,f,g,h,a,b); + HOST_c2l(data,l); T1 = X[15] = l; ROUND_00_15(15,b,c,d,e,f,g,h,a); + } + + for (i=16;i<64;i+=8) + { + ROUND_16_63(i+0,a,b,c,d,e,f,g,h,X); + ROUND_16_63(i+1,h,a,b,c,d,e,f,g,X); + ROUND_16_63(i+2,g,h,a,b,c,d,e,f,X); + ROUND_16_63(i+3,f,g,h,a,b,c,d,e,X); + ROUND_16_63(i+4,e,f,g,h,a,b,c,d,X); + ROUND_16_63(i+5,d,e,f,g,h,a,b,c,X); + ROUND_16_63(i+6,c,d,e,f,g,h,a,b,X); + ROUND_16_63(i+7,b,c,d,e,f,g,h,a,X); + } + + ctx->h[0] += a; ctx->h[1] += b; ctx->h[2] += c; ctx->h[3] += d; + ctx->h[4] += e; ctx->h[5] += f; ctx->h[6] += g; ctx->h[7] += h; + + } + } + +#endif +#endif /* SHA256_ASM */ + +#endif /* OPENSSL_NO_SHA256 */ diff --git a/compat/sha/sha512-elf-x86_64.S b/compat/sha/sha512-elf-x86_64.S @@ -0,0 +1,1802 @@ +.text + +.globl sha512_block_data_order +.type sha512_block_data_order,@function +.align 16 +sha512_block_data_order: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $128+32,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %r11,128+24(%rsp) +.Lprologue: + + leaq K512(%rip),%rbp + + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp .Lloop + +.align 16 +.Lloop: + xorq %rdi,%rdi + movq 0(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,0(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 8(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,8(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 16(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,16(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 24(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,24(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 32(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,32(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 40(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,40(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 48(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,48(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 56(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,56(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + movq 64(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,64(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 72(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,72(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 80(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,80(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 88(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,88(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 96(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,96(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 104(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,104(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 112(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,112(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 120(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,120(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + jmp .Lrounds_16_xx +.align 16 +.Lrounds_16_xx: + movq 8(%rsp),%r13 + movq 112(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 72(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 0(%rsp),%r12 + movq %r8,%r13 + addq %r14,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,0(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 16(%rsp),%r13 + movq 120(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 80(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 8(%rsp),%r12 + movq %rdx,%r13 + addq %r14,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,8(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 24(%rsp),%r13 + movq 0(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 88(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 16(%rsp),%r12 + movq %rcx,%r13 + addq %r14,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,16(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 32(%rsp),%r13 + movq 8(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 96(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 24(%rsp),%r12 + movq %rbx,%r13 + addq %r14,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,24(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 40(%rsp),%r13 + movq 16(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 104(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 32(%rsp),%r12 + movq %rax,%r13 + addq %r14,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,32(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 48(%rsp),%r13 + movq 24(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 112(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 40(%rsp),%r12 + movq %r11,%r13 + addq %r14,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,40(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 56(%rsp),%r13 + movq 32(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 120(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 48(%rsp),%r12 + movq %r10,%r13 + addq %r14,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,48(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 64(%rsp),%r13 + movq 40(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 0(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 56(%rsp),%r12 + movq %r9,%r13 + addq %r14,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,56(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + movq 72(%rsp),%r13 + movq 48(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 8(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 64(%rsp),%r12 + movq %r8,%r13 + addq %r14,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + movq %r12,64(%rsp) + + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %rax,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r8,%r15 + movq %rbx,%r11 + + rorq $6,%r14 + xorq %r8,%r13 + xorq %r10,%r15 + + xorq %rcx,%r11 + xorq %rax,%r14 + addq %r15,%r12 + movq %rbx,%r15 + + rorq $14,%r13 + andq %rax,%r11 + andq %rcx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r11 + + addq %r12,%rdx + addq %r12,%r11 + leaq 1(%rdi),%rdi + addq %r14,%r11 + + movq 80(%rsp),%r13 + movq 56(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 16(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 72(%rsp),%r12 + movq %rdx,%r13 + addq %r14,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%r15 + movq %r12,72(%rsp) + + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + rorq $4,%r13 + addq %r10,%r12 + xorq %r11,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rdx,%r15 + movq %rax,%r10 + + rorq $6,%r14 + xorq %rdx,%r13 + xorq %r9,%r15 + + xorq %rbx,%r10 + xorq %r11,%r14 + addq %r15,%r12 + movq %rax,%r15 + + rorq $14,%r13 + andq %r11,%r10 + andq %rbx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r10 + + addq %r12,%rcx + addq %r12,%r10 + leaq 1(%rdi),%rdi + addq %r14,%r10 + + movq 88(%rsp),%r13 + movq 64(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 24(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 80(%rsp),%r12 + movq %rcx,%r13 + addq %r14,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + movq %r12,80(%rsp) + + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r10,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rcx,%r15 + movq %r11,%r9 + + rorq $6,%r14 + xorq %rcx,%r13 + xorq %r8,%r15 + + xorq %rax,%r9 + xorq %r10,%r14 + addq %r15,%r12 + movq %r11,%r15 + + rorq $14,%r13 + andq %r10,%r9 + andq %rax,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r9 + + addq %r12,%rbx + addq %r12,%r9 + leaq 1(%rdi),%rdi + addq %r14,%r9 + + movq 96(%rsp),%r13 + movq 72(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 32(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 88(%rsp),%r12 + movq %rbx,%r13 + addq %r14,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%r15 + movq %r12,88(%rsp) + + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + rorq $4,%r13 + addq %r8,%r12 + xorq %r9,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rbx,%r15 + movq %r10,%r8 + + rorq $6,%r14 + xorq %rbx,%r13 + xorq %rdx,%r15 + + xorq %r11,%r8 + xorq %r9,%r14 + addq %r15,%r12 + movq %r10,%r15 + + rorq $14,%r13 + andq %r9,%r8 + andq %r11,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%r8 + + addq %r12,%rax + addq %r12,%r8 + leaq 1(%rdi),%rdi + addq %r14,%r8 + + movq 104(%rsp),%r13 + movq 80(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 40(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 96(%rsp),%r12 + movq %rax,%r13 + addq %r14,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + movq %r12,96(%rsp) + + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %r8,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %rax,%r15 + movq %r9,%rdx + + rorq $6,%r14 + xorq %rax,%r13 + xorq %rcx,%r15 + + xorq %r10,%rdx + xorq %r8,%r14 + addq %r15,%r12 + movq %r9,%r15 + + rorq $14,%r13 + andq %r8,%rdx + andq %r10,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rdx + + addq %r12,%r11 + addq %r12,%rdx + leaq 1(%rdi),%rdi + addq %r14,%rdx + + movq 112(%rsp),%r13 + movq 88(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 48(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 104(%rsp),%r12 + movq %r11,%r13 + addq %r14,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%r15 + movq %r12,104(%rsp) + + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rdx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r11,%r15 + movq %r8,%rcx + + rorq $6,%r14 + xorq %r11,%r13 + xorq %rbx,%r15 + + xorq %r9,%rcx + xorq %rdx,%r14 + addq %r15,%r12 + movq %r8,%r15 + + rorq $14,%r13 + andq %rdx,%rcx + andq %r9,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rcx + + addq %r12,%r10 + addq %r12,%rcx + leaq 1(%rdi),%rdi + addq %r14,%rcx + + movq 120(%rsp),%r13 + movq 96(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 56(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 112(%rsp),%r12 + movq %r10,%r13 + addq %r14,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + movq %r12,112(%rsp) + + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rcx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r10,%r15 + movq %rdx,%rbx + + rorq $6,%r14 + xorq %r10,%r13 + xorq %rax,%r15 + + xorq %r8,%rbx + xorq %rcx,%r14 + addq %r15,%r12 + movq %rdx,%r15 + + rorq $14,%r13 + andq %rcx,%rbx + andq %r8,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rbx + + addq %r12,%r9 + addq %r12,%rbx + leaq 1(%rdi),%rdi + addq %r14,%rbx + + movq 0(%rsp),%r13 + movq 104(%rsp),%r14 + movq %r13,%r12 + movq %r14,%r15 + + rorq $7,%r12 + xorq %r13,%r12 + shrq $7,%r13 + + rorq $1,%r12 + xorq %r12,%r13 + movq 64(%rsp),%r12 + + rorq $42,%r15 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + addq %r13,%r12 + xorq %r15,%r14 + + addq 120(%rsp),%r12 + movq %r9,%r13 + addq %r14,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%r15 + movq %r12,120(%rsp) + + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + rorq $4,%r13 + addq %rax,%r12 + xorq %rbx,%r14 + + addq (%rbp,%rdi,8),%r12 + andq %r9,%r15 + movq %rcx,%rax + + rorq $6,%r14 + xorq %r9,%r13 + xorq %r11,%r15 + + xorq %rdx,%rax + xorq %rbx,%r14 + addq %r15,%r12 + movq %rcx,%r15 + + rorq $14,%r13 + andq %rbx,%rax + andq %rdx,%r15 + + rorq $28,%r14 + addq %r13,%r12 + addq %r15,%rax + + addq %r12,%r8 + addq %r12,%rax + leaq 1(%rdi),%rdi + addq %r14,%rax + + cmpq $80,%rdi + jb .Lrounds_16_xx + + movq 128+0(%rsp),%rdi + leaq 128(%rsi),%rsi + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb .Lloop + + movq 128+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size sha512_block_data_order,.-sha512_block_data_order +.align 64 +.type K512,@object +K512: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe