mudgangster

Tiny, scriptable MUD client
Log | Files | Refs | README

tracy_lz4.cpp (96258B)


      1 /*
      2    LZ4 - Fast LZ compression algorithm
      3    Copyright (C) 2011-present, Yann Collet.
      4 
      5    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
      6 
      7    Redistribution and use in source and binary forms, with or without
      8    modification, are permitted provided that the following conditions are
      9    met:
     10 
     11        * Redistributions of source code must retain the above copyright
     12    notice, this list of conditions and the following disclaimer.
     13        * Redistributions in binary form must reproduce the above
     14    copyright notice, this list of conditions and the following disclaimer
     15    in the documentation and/or other materials provided with the
     16    distribution.
     17 
     18    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 
     30    You can contact the author at :
     31     - LZ4 homepage : http://www.lz4.org
     32     - LZ4 source repository : https://github.com/lz4/lz4
     33 */
     34 
     35 /*-************************************
     36 *  Tuning parameters
     37 **************************************/
     38 /*
     39  * LZ4_HEAPMODE :
     40  * Select how default compression functions will allocate memory for their hash table,
     41  * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
     42  */
     43 #ifndef LZ4_HEAPMODE
     44 #  define LZ4_HEAPMODE 0
     45 #endif
     46 
     47 /*
     48  * ACCELERATION_DEFAULT :
     49  * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
     50  */
     51 #define ACCELERATION_DEFAULT 1
     52 
     53 
     54 /*-************************************
     55 *  CPU Feature Detection
     56 **************************************/
     57 /* LZ4_FORCE_MEMORY_ACCESS
     58  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
     59  * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
     60  * The below switch allow to select different access method for improved performance.
     61  * Method 0 (default) : use `memcpy()`. Safe and portable.
     62  * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
     63  *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
     64  * Method 2 : direct access. This method is portable but violate C standard.
     65  *            It can generate buggy code on targets which assembly generation depends on alignment.
     66  *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
     67  * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
     68  * Prefer these methods in priority order (0 > 1 > 2)
     69  */
     70 #ifndef LZ4_FORCE_MEMORY_ACCESS   /* can be defined externally */
     71 #  if defined(__GNUC__) && \
     72   ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \
     73   || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
     74 #    define LZ4_FORCE_MEMORY_ACCESS 2
     75 #  elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__)
     76 #    define LZ4_FORCE_MEMORY_ACCESS 1
     77 #  endif
     78 #endif
     79 
     80 /*
     81  * LZ4_FORCE_SW_BITCOUNT
     82  * Define this parameter if your target system or compiler does not support hardware bit count
     83  */
     84 #if defined(_MSC_VER) && defined(_WIN32_WCE)   /* Visual Studio for WinCE doesn't support Hardware bit count */
     85 #  define LZ4_FORCE_SW_BITCOUNT
     86 #endif
     87 
     88 
     89 
     90 /*-************************************
     91 *  Dependency
     92 **************************************/
     93 /*
     94  * LZ4_SRC_INCLUDED:
     95  * Amalgamation flag, whether lz4.c is included
     96  */
     97 #ifndef LZ4_SRC_INCLUDED
     98 #  define LZ4_SRC_INCLUDED 1
     99 #endif
    100 
    101 #ifndef LZ4_STATIC_LINKING_ONLY
    102 #define LZ4_STATIC_LINKING_ONLY
    103 #endif
    104 
    105 #ifndef LZ4_DISABLE_DEPRECATE_WARNINGS
    106 #define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */
    107 #endif
    108 
    109 #include "tracy_lz4.hpp"
    110 /* see also "memory routines" below */
    111 
    112 
    113 /*-************************************
    114 *  Compiler Options
    115 **************************************/
    116 #ifdef _MSC_VER    /* Visual Studio */
    117 #  include <intrin.h>
    118 #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
    119 #  pragma warning(disable : 4293)        /* disable: C4293: too large shift (32-bits) */
    120 #endif  /* _MSC_VER */
    121 
    122 #ifndef LZ4_FORCE_INLINE
    123 #  ifdef _MSC_VER    /* Visual Studio */
    124 #    define LZ4_FORCE_INLINE static __forceinline
    125 #  else
    126 #    if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
    127 #      ifdef __GNUC__
    128 #        define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
    129 #      else
    130 #        define LZ4_FORCE_INLINE static inline
    131 #      endif
    132 #    else
    133 #      define LZ4_FORCE_INLINE static
    134 #    endif /* __STDC_VERSION__ */
    135 #  endif  /* _MSC_VER */
    136 #endif /* LZ4_FORCE_INLINE */
    137 
    138 /* LZ4_FORCE_O2_GCC_PPC64LE and LZ4_FORCE_O2_INLINE_GCC_PPC64LE
    139  * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
    140  * together with a simple 8-byte copy loop as a fall-back path.
    141  * However, this optimization hurts the decompression speed by >30%,
    142  * because the execution does not go to the optimized loop
    143  * for typical compressible data, and all of the preamble checks
    144  * before going to the fall-back path become useless overhead.
    145  * This optimization happens only with the -O3 flag, and -O2 generates
    146  * a simple 8-byte copy loop.
    147  * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8
    148  * functions are annotated with __attribute__((optimize("O2"))),
    149  * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute
    150  * of LZ4_wildCopy8 does not affect the compression speed.
    151  */
    152 #if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
    153 #  define LZ4_FORCE_O2_GCC_PPC64LE __attribute__((optimize("O2")))
    154 #  define LZ4_FORCE_O2_INLINE_GCC_PPC64LE __attribute__((optimize("O2"))) LZ4_FORCE_INLINE
    155 #else
    156 #  define LZ4_FORCE_O2_GCC_PPC64LE
    157 #  define LZ4_FORCE_O2_INLINE_GCC_PPC64LE static
    158 #endif
    159 
    160 #if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
    161 #  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
    162 #else
    163 #  define expect(expr,value)    (expr)
    164 #endif
    165 
    166 #ifndef likely
    167 #define likely(expr)     expect((expr) != 0, 1)
    168 #endif
    169 #ifndef unlikely
    170 #define unlikely(expr)   expect((expr) != 0, 0)
    171 #endif
    172 
    173 
    174 /*-************************************
    175 *  Memory routines
    176 **************************************/
    177 #include <stdlib.h>   /* malloc, calloc, free */
    178 #define ALLOC(s)          malloc(s)
    179 #define ALLOC_AND_ZERO(s) calloc(1,s)
    180 #define FREEMEM(p)        free(p)
    181 #include <string.h>   /* memset, memcpy */
    182 #define MEM_INIT(p,v,s)   memset((p),(v),(s))
    183 
    184 
    185 /*-************************************
    186 *  Types
    187 **************************************/
    188 #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
    189 # include <stdint.h>
    190   typedef  uint8_t BYTE;
    191   typedef uint16_t U16;
    192   typedef uint32_t U32;
    193   typedef  int32_t S32;
    194   typedef uint64_t U64;
    195   typedef uintptr_t uptrval;
    196 #else
    197   typedef unsigned char       BYTE;
    198   typedef unsigned short      U16;
    199   typedef unsigned int        U32;
    200   typedef   signed int        S32;
    201   typedef unsigned long long  U64;
    202   typedef size_t              uptrval;   /* generally true, except OpenVMS-64 */
    203 #endif
    204 
    205 #if defined(__x86_64__)
    206   typedef U64    reg_t;   /* 64-bits in x32 mode */
    207 #else
    208   typedef size_t reg_t;   /* 32-bits in x32 mode */
    209 #endif
    210 
    211 namespace tracy
    212 {
    213 
    214 typedef enum {
    215     notLimited = 0,
    216     limitedOutput = 1,
    217     fillOutput = 2
    218 } limitedOutput_directive;
    219 
    220 
    221 /*-************************************
    222 *  Reading and writing into memory
    223 **************************************/
    224 static unsigned LZ4_isLittleEndian(void)
    225 {
    226     const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental */
    227     return one.c[0];
    228 }
    229 
    230 
    231 #if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
    232 /* lie to the compiler about data alignment; use with caution */
    233 
    234 static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
    235 static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; }
    236 static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; }
    237 
    238 static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
    239 static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
    240 
    241 #elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
    242 
    243 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
    244 /* currently only defined for gcc and icc */
    245 typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign;
    246 
    247 static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
    248 static U32 LZ4_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
    249 static reg_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArch; }
    250 
    251 static void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
    252 static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
    253 
    254 #else  /* safe and portable access using memcpy() */
    255 
    256 static U16 LZ4_read16(const void* memPtr)
    257 {
    258     U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
    259 }
    260 
    261 static U32 LZ4_read32(const void* memPtr)
    262 {
    263     U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
    264 }
    265 
    266 static reg_t LZ4_read_ARCH(const void* memPtr)
    267 {
    268     reg_t val; memcpy(&val, memPtr, sizeof(val)); return val;
    269 }
    270 
    271 static void LZ4_write16(void* memPtr, U16 value)
    272 {
    273     memcpy(memPtr, &value, sizeof(value));
    274 }
    275 
    276 static void LZ4_write32(void* memPtr, U32 value)
    277 {
    278     memcpy(memPtr, &value, sizeof(value));
    279 }
    280 
    281 #endif /* LZ4_FORCE_MEMORY_ACCESS */
    282 
    283 
    284 static U16 LZ4_readLE16(const void* memPtr)
    285 {
    286     if (LZ4_isLittleEndian()) {
    287         return LZ4_read16(memPtr);
    288     } else {
    289         const BYTE* p = (const BYTE*)memPtr;
    290         return (U16)((U16)p[0] + (p[1]<<8));
    291     }
    292 }
    293 
    294 static void LZ4_writeLE16(void* memPtr, U16 value)
    295 {
    296     if (LZ4_isLittleEndian()) {
    297         LZ4_write16(memPtr, value);
    298     } else {
    299         BYTE* p = (BYTE*)memPtr;
    300         p[0] = (BYTE) value;
    301         p[1] = (BYTE)(value>>8);
    302     }
    303 }
    304 
    305 /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
    306 LZ4_FORCE_O2_INLINE_GCC_PPC64LE
    307 void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
    308 {
    309     BYTE* d = (BYTE*)dstPtr;
    310     const BYTE* s = (const BYTE*)srcPtr;
    311     BYTE* const e = (BYTE*)dstEnd;
    312 
    313     do { memcpy(d,s,8); d+=8; s+=8; } while (d<e);
    314 }
    315 
    316 static const unsigned inc32table[8] = {0, 1, 2,  1,  0,  4, 4, 4};
    317 static const int      dec64table[8] = {0, 0, 0, -1, -4,  1, 2, 3};
    318 
    319 
    320 #ifndef LZ4_FAST_DEC_LOOP
    321 #  if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64
    322 #    define LZ4_FAST_DEC_LOOP 1
    323 #  else
    324 #    define LZ4_FAST_DEC_LOOP 0
    325 #  endif
    326 #endif
    327 
    328 #if LZ4_FAST_DEC_LOOP
    329 
    330 LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
    331 LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
    332 {
    333     if (offset < 8) {
    334         dstPtr[0] = srcPtr[0];
    335         dstPtr[1] = srcPtr[1];
    336         dstPtr[2] = srcPtr[2];
    337         dstPtr[3] = srcPtr[3];
    338         srcPtr += inc32table[offset];
    339         memcpy(dstPtr+4, srcPtr, 4);
    340         srcPtr -= dec64table[offset];
    341         dstPtr += 8;
    342     } else {
    343         memcpy(dstPtr, srcPtr, 8);
    344         dstPtr += 8;
    345         srcPtr += 8;
    346     }
    347 
    348     LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
    349 }
    350 
    351 /* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
    352  * this version copies two times 16 bytes (instead of one time 32 bytes)
    353  * because it must be compatible with offsets >= 16. */
    354 LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
    355 LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
    356 {
    357     BYTE* d = (BYTE*)dstPtr;
    358     const BYTE* s = (const BYTE*)srcPtr;
    359     BYTE* const e = (BYTE*)dstEnd;
    360 
    361     do { memcpy(d,s,16); memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
    362 }
    363 
    364 LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
    365 LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
    366 {
    367     BYTE v[8];
    368     switch(offset) {
    369     case 1:
    370         memset(v, *srcPtr, 8);
    371         goto copy_loop;
    372     case 2:
    373         memcpy(v, srcPtr, 2);
    374         memcpy(&v[2], srcPtr, 2);
    375         memcpy(&v[4], &v[0], 4);
    376         goto copy_loop;
    377     case 4:
    378         memcpy(v, srcPtr, 4);
    379         memcpy(&v[4], srcPtr, 4);
    380         goto copy_loop;
    381     default:
    382         LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
    383         return;
    384     }
    385 
    386  copy_loop:
    387     memcpy(dstPtr, v, 8);
    388     dstPtr += 8;
    389     while (dstPtr < dstEnd) {
    390         memcpy(dstPtr, v, 8);
    391         dstPtr += 8;
    392     }
    393 }
    394 #endif
    395 
    396 
    397 /*-************************************
    398 *  Common Constants
    399 **************************************/
    400 #define MINMATCH 4
    401 
    402 #define WILDCOPYLENGTH 8
    403 #define LASTLITERALS   5   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
    404 #define MFLIMIT       12   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
    405 #define MATCH_SAFEGUARD_DISTANCE  ((2*WILDCOPYLENGTH) - MINMATCH)   /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
    406 #define FASTLOOP_SAFE_DISTANCE 64
    407 static const int LZ4_minLength = (MFLIMIT+1);
    408 
    409 #define KB *(1 <<10)
    410 #define MB *(1 <<20)
    411 #define GB *(1U<<30)
    412 
    413 #ifndef LZ4_DISTANCE_MAX   /* can be user - defined at compile time */
    414 #  define LZ4_DISTANCE_MAX 65535
    415 #endif
    416 
    417 #if (LZ4_DISTANCE_MAX > 65535)   /* max supported by LZ4 format */
    418 #  error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
    419 #endif
    420 
    421 #define ML_BITS  4
    422 #define ML_MASK  ((1U<<ML_BITS)-1)
    423 #define RUN_BITS (8-ML_BITS)
    424 #define RUN_MASK ((1U<<RUN_BITS)-1)
    425 
    426 
    427 /*-************************************
    428 *  Error detection
    429 **************************************/
    430 #if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
    431 #  include <assert.h>
    432 #else
    433 #  ifndef assert
    434 #    define assert(condition) ((void)0)
    435 #  endif
    436 #endif
    437 
    438 #define LZ4_STATIC_ASSERT(c)   { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use after variable declarations */
    439 
    440 #if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
    441 #  include <stdio.h>
    442 static int g_debuglog_enable = 1;
    443 #  define DEBUGLOG(l, ...) {                                  \
    444                 if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) {  \
    445                     fprintf(stderr, __FILE__ ": ");           \
    446                     fprintf(stderr, __VA_ARGS__);             \
    447                     fprintf(stderr, " \n");                   \
    448             }   }
    449 #else
    450 #  define DEBUGLOG(l, ...)      {}    /* disabled */
    451 #endif
    452 
    453 
    454 /*-************************************
    455 *  Common functions
    456 **************************************/
    457 static unsigned LZ4_NbCommonBytes (reg_t val)
    458 {
    459     if (LZ4_isLittleEndian()) {
    460         if (sizeof(val)==8) {
    461 #       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
    462             unsigned long r = 0;
    463             _BitScanForward64( &r, (U64)val );
    464             return (int)(r>>3);
    465 #       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
    466             return (__builtin_ctzll((U64)val) >> 3);
    467 #       else
    468             static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
    469                                                      0, 3, 1, 3, 1, 4, 2, 7,
    470                                                      0, 2, 3, 6, 1, 5, 3, 5,
    471                                                      1, 3, 4, 4, 2, 5, 6, 7,
    472                                                      7, 0, 1, 2, 3, 3, 4, 6,
    473                                                      2, 6, 5, 5, 3, 4, 5, 6,
    474                                                      7, 1, 2, 4, 6, 4, 4, 5,
    475                                                      7, 2, 6, 5, 7, 6, 7, 7 };
    476             return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
    477 #       endif
    478         } else /* 32 bits */ {
    479 #       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
    480             unsigned long r;
    481             _BitScanForward( &r, (U32)val );
    482             return (int)(r>>3);
    483 #       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
    484             return (__builtin_ctz((U32)val) >> 3);
    485 #       else
    486             static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
    487                                                      3, 2, 2, 1, 3, 2, 0, 1,
    488                                                      3, 3, 1, 2, 2, 2, 2, 0,
    489                                                      3, 1, 2, 0, 1, 0, 1, 1 };
    490             return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
    491 #       endif
    492         }
    493     } else   /* Big Endian CPU */ {
    494         if (sizeof(val)==8) {   /* 64-bits */
    495 #       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
    496             unsigned long r = 0;
    497             _BitScanReverse64( &r, val );
    498             return (unsigned)(r>>3);
    499 #       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
    500             return (__builtin_clzll((U64)val) >> 3);
    501 #       else
    502             static const U32 by32 = sizeof(val)*4;  /* 32 on 64 bits (goal), 16 on 32 bits.
    503                 Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
    504                 Note that this code path is never triggered in 32-bits mode. */
    505             unsigned r;
    506             if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; }
    507             if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
    508             r += (!val);
    509             return r;
    510 #       endif
    511         } else /* 32 bits */ {
    512 #       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
    513             unsigned long r = 0;
    514             _BitScanReverse( &r, (unsigned long)val );
    515             return (unsigned)(r>>3);
    516 #       elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
    517             return (__builtin_clz((U32)val) >> 3);
    518 #       else
    519             unsigned r;
    520             if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
    521             r += (!val);
    522             return r;
    523 #       endif
    524         }
    525     }
    526 }
    527 
    528 #define STEPSIZE sizeof(reg_t)
    529 LZ4_FORCE_INLINE
    530 unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
    531 {
    532     const BYTE* const pStart = pIn;
    533 
    534     if (likely(pIn < pInLimit-(STEPSIZE-1))) {
    535         reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
    536         if (!diff) {
    537             pIn+=STEPSIZE; pMatch+=STEPSIZE;
    538         } else {
    539             return LZ4_NbCommonBytes(diff);
    540     }   }
    541 
    542     while (likely(pIn < pInLimit-(STEPSIZE-1))) {
    543         reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
    544         if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
    545         pIn += LZ4_NbCommonBytes(diff);
    546         return (unsigned)(pIn - pStart);
    547     }
    548 
    549     if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; }
    550     if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; }
    551     if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
    552     return (unsigned)(pIn - pStart);
    553 }
    554 
    555 
    556 #ifndef LZ4_COMMONDEFS_ONLY
    557 /*-************************************
    558 *  Local Constants
    559 **************************************/
    560 static const int LZ4_64Klimit = ((64 KB) + (MFLIMIT-1));
    561 static const U32 LZ4_skipTrigger = 6;  /* Increase this value ==> compression run slower on incompressible data */
    562 
    563 
    564 /*-************************************
    565 *  Local Structures and types
    566 **************************************/
    567 typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
    568 
    569 /**
    570  * This enum distinguishes several different modes of accessing previous
    571  * content in the stream.
    572  *
    573  * - noDict        : There is no preceding content.
    574  * - withPrefix64k : Table entries up to ctx->dictSize before the current blob
    575  *                   blob being compressed are valid and refer to the preceding
    576  *                   content (of length ctx->dictSize), which is available
    577  *                   contiguously preceding in memory the content currently
    578  *                   being compressed.
    579  * - usingExtDict  : Like withPrefix64k, but the preceding content is somewhere
    580  *                   else in memory, starting at ctx->dictionary with length
    581  *                   ctx->dictSize.
    582  * - usingDictCtx  : Like usingExtDict, but everything concerning the preceding
    583  *                   content is in a separate context, pointed to by
    584  *                   ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table
    585  *                   entries in the current context that refer to positions
    586  *                   preceding the beginning of the current compression are
    587  *                   ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
    588  *                   ->dictSize describe the location and size of the preceding
    589  *                   content, and matches are found by looking in the ctx
    590  *                   ->dictCtx->hashTable.
    591  */
    592 typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
    593 typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
    594 
    595 
    596 /*-************************************
    597 *  Local Utils
    598 **************************************/
    599 int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
    600 const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; }
    601 int LZ4_compressBound(int isize)  { return LZ4_COMPRESSBOUND(isize); }
    602 int LZ4_sizeofState() { return LZ4_STREAMSIZE; }
    603 
    604 
    605 /*-************************************
    606 *  Internal Definitions used in Tests
    607 **************************************/
    608 
    609 int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize);
    610 
    611 int LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize, const void* dict, size_t dictSize);
    612 
    613 /*-******************************
    614 *  Compression functions
    615 ********************************/
    616 static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
    617 {
    618     if (tableType == byU16)
    619         return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
    620     else
    621         return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
    622 }
    623 
    624 static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
    625 {
    626     const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
    627     if (LZ4_isLittleEndian()) {
    628         const U64 prime5bytes = 889523592379ULL;
    629         return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
    630     } else {
    631         const U64 prime8bytes = 11400714785074694791ULL;
    632         return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
    633     }
    634 }
    635 
    636 LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType)
    637 {
    638     if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType);
    639     return LZ4_hash4(LZ4_read32(p), tableType);
    640 }
    641 
    642 static void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
    643 {
    644     switch (tableType)
    645     {
    646     default: /* fallthrough */
    647     case clearedTable: /* fallthrough */
    648     case byPtr: { /* illegal! */ assert(0); return; }
    649     case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; }
    650     case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; }
    651     }
    652 }
    653 
    654 static void LZ4_putPositionOnHash(const BYTE* p, U32 h,
    655                                   void* tableBase, tableType_t const tableType,
    656                             const BYTE* srcBase)
    657 {
    658     switch (tableType)
    659     {
    660     case clearedTable: { /* illegal! */ assert(0); return; }
    661     case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; }
    662     case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; }
    663     case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; }
    664     }
    665 }
    666 
    667 LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
    668 {
    669     U32 const h = LZ4_hashPosition(p, tableType);
    670     LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
    671 }
    672 
    673 /* LZ4_getIndexOnHash() :
    674  * Index of match position registered in hash table.
    675  * hash position must be calculated by using base+index, or dictBase+index.
    676  * Assumption 1 : only valid if tableType == byU32 or byU16.
    677  * Assumption 2 : h is presumed valid (within limits of hash table)
    678  */
    679 static U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
    680 {
    681     LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2);
    682     if (tableType == byU32) {
    683         const U32* const hashTable = (const U32*) tableBase;
    684         assert(h < (1U << (LZ4_MEMORY_USAGE-2)));
    685         return hashTable[h];
    686     }
    687     if (tableType == byU16) {
    688         const U16* const hashTable = (const U16*) tableBase;
    689         assert(h < (1U << (LZ4_MEMORY_USAGE-1)));
    690         return hashTable[h];
    691     }
    692     assert(0); return 0;  /* forbidden case */
    693 }
    694 
    695 static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType, const BYTE* srcBase)
    696 {
    697     if (tableType == byPtr) { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; }
    698     if (tableType == byU32) { const U32* const hashTable = (const U32*) tableBase; return hashTable[h] + srcBase; }
    699     { const U16* const hashTable = (const U16*) tableBase; return hashTable[h] + srcBase; }   /* default, to ensure a return */
    700 }
    701 
    702 LZ4_FORCE_INLINE const BYTE* LZ4_getPosition(const BYTE* p,
    703                                              const void* tableBase, tableType_t tableType,
    704                                              const BYTE* srcBase)
    705 {
    706     U32 const h = LZ4_hashPosition(p, tableType);
    707     return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
    708 }
    709 
    710 LZ4_FORCE_INLINE void LZ4_prepareTable(
    711         LZ4_stream_t_internal* const cctx,
    712         const int inputSize,
    713         const tableType_t tableType) {
    714     /* If compression failed during the previous step, then the context
    715      * is marked as dirty, therefore, it has to be fully reset.
    716      */
    717     if (cctx->dirty) {
    718         DEBUGLOG(5, "LZ4_prepareTable: Full reset for %p", cctx);
    719         MEM_INIT(cctx, 0, sizeof(LZ4_stream_t_internal));
    720         return;
    721     }
    722 
    723     /* If the table hasn't been used, it's guaranteed to be zeroed out, and is
    724      * therefore safe to use no matter what mode we're in. Otherwise, we figure
    725      * out if it's safe to leave as is or whether it needs to be reset.
    726      */
    727     if (cctx->tableType != clearedTable) {
    728         if (cctx->tableType != tableType
    729           || (tableType == byU16 && cctx->currentOffset + inputSize >= 0xFFFFU)
    730           || (tableType == byU32 && cctx->currentOffset > 1 GB)
    731           || tableType == byPtr
    732           || inputSize >= 4 KB)
    733         {
    734             DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx);
    735             MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE);
    736             cctx->currentOffset = 0;
    737             cctx->tableType = clearedTable;
    738         } else {
    739             DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)");
    740         }
    741     }
    742 
    743     /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back, is faster
    744      * than compressing without a gap. However, compressing with
    745      * currentOffset == 0 is faster still, so we preserve that case.
    746      */
    747     if (cctx->currentOffset != 0 && tableType == byU32) {
    748         DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset");
    749         cctx->currentOffset += 64 KB;
    750     }
    751 
    752     /* Finally, clear history */
    753     cctx->dictCtx = NULL;
    754     cctx->dictionary = NULL;
    755     cctx->dictSize = 0;
    756 }
    757 
    758 /** LZ4_compress_generic() :
    759     inlined, to ensure branches are decided at compilation time */
    760 LZ4_FORCE_INLINE int LZ4_compress_generic(
    761                  LZ4_stream_t_internal* const cctx,
    762                  const char* const source,
    763                  char* const dest,
    764                  const int inputSize,
    765                  int *inputConsumed, /* only written when outputDirective == fillOutput */
    766                  const int maxOutputSize,
    767                  const limitedOutput_directive outputDirective,
    768                  const tableType_t tableType,
    769                  const dict_directive dictDirective,
    770                  const dictIssue_directive dictIssue,
    771                  const int acceleration)
    772 {
    773     int result;
    774     const BYTE* ip = (const BYTE*) source;
    775 
    776     U32 const startIndex = cctx->currentOffset;
    777     const BYTE* base = (const BYTE*) source - startIndex;
    778     const BYTE* lowLimit;
    779 
    780     const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx;
    781     const BYTE* const dictionary =
    782         dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary;
    783     const U32 dictSize =
    784         dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize;
    785     const U32 dictDelta = (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0;   /* make indexes in dictCtx comparable with index in current context */
    786 
    787     int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx);
    788     U32 const prefixIdxLimit = startIndex - dictSize;   /* used when dictDirective == dictSmall */
    789     const BYTE* const dictEnd = dictionary + dictSize;
    790     const BYTE* anchor = (const BYTE*) source;
    791     const BYTE* const iend = ip + inputSize;
    792     const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1;
    793     const BYTE* const matchlimit = iend - LASTLITERALS;
    794 
    795     /* the dictCtx currentOffset is indexed on the start of the dictionary,
    796      * while a dictionary in the current context precedes the currentOffset */
    797     const BYTE* dictBase = (dictDirective == usingDictCtx) ?
    798                             dictionary + dictSize - dictCtx->currentOffset :
    799                             dictionary + dictSize - startIndex;
    800 
    801     BYTE* op = (BYTE*) dest;
    802     BYTE* const olimit = op + maxOutputSize;
    803 
    804     U32 offset = 0;
    805     U32 forwardH;
    806 
    807     DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, tableType=%u", inputSize, tableType);
    808     /* If init conditions are not met, we don't have to mark stream
    809      * as having dirty context, since no action was taken yet */
    810     if (outputDirective == fillOutput && maxOutputSize < 1) return 0;   /* Impossible to store anything */
    811     if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0;           /* Unsupported inputSize, too large (or negative) */
    812     if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) return 0;  /* Size too large (not within 64K limit) */
    813     if (tableType==byPtr) assert(dictDirective==noDict);      /* only supported use case with byPtr */
    814     assert(acceleration >= 1);
    815 
    816     lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0);
    817 
    818     /* Update context state */
    819     if (dictDirective == usingDictCtx) {
    820         /* Subsequent linked blocks can't use the dictionary. */
    821         /* Instead, they use the block we just compressed. */
    822         cctx->dictCtx = NULL;
    823         cctx->dictSize = (U32)inputSize;
    824     } else {
    825         cctx->dictSize += (U32)inputSize;
    826     }
    827     cctx->currentOffset += (U32)inputSize;
    828     cctx->tableType = (U16)tableType;
    829 
    830     if (inputSize<LZ4_minLength) goto _last_literals;        /* Input too small, no compression (all literals) */
    831 
    832     /* First Byte */
    833     LZ4_putPosition(ip, cctx->hashTable, tableType, base);
    834     ip++; forwardH = LZ4_hashPosition(ip, tableType);
    835 
    836     /* Main Loop */
    837     for ( ; ; ) {
    838         const BYTE* match;
    839         BYTE* token;
    840 
    841         /* Find a match */
    842         if (tableType == byPtr) {
    843             const BYTE* forwardIp = ip;
    844             int step = 1;
    845             int searchMatchNb = acceleration << LZ4_skipTrigger;
    846             do {
    847                 U32 const h = forwardH;
    848                 ip = forwardIp;
    849                 forwardIp += step;
    850                 step = (searchMatchNb++ >> LZ4_skipTrigger);
    851 
    852                 if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
    853                 assert(ip < mflimitPlusOne);
    854 
    855                 match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base);
    856                 forwardH = LZ4_hashPosition(forwardIp, tableType);
    857                 LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base);
    858 
    859             } while ( (match+LZ4_DISTANCE_MAX < ip)
    860                    || (LZ4_read32(match) != LZ4_read32(ip)) );
    861 
    862         } else {   /* byU32, byU16 */
    863 
    864             const BYTE* forwardIp = ip;
    865             int step = 1;
    866             int searchMatchNb = acceleration << LZ4_skipTrigger;
    867             do {
    868                 U32 const h = forwardH;
    869                 U32 const current = (U32)(forwardIp - base);
    870                 U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
    871                 assert(matchIndex <= current);
    872                 assert(forwardIp - base < (ptrdiff_t)(2 GB - 1));
    873                 ip = forwardIp;
    874                 forwardIp += step;
    875                 step = (searchMatchNb++ >> LZ4_skipTrigger);
    876 
    877                 if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
    878                 assert(ip < mflimitPlusOne);
    879 
    880                 if (dictDirective == usingDictCtx) {
    881                     if (matchIndex < startIndex) {
    882                         /* there was no match, try the dictionary */
    883                         assert(tableType == byU32);
    884                         matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
    885                         match = dictBase + matchIndex;
    886                         matchIndex += dictDelta;   /* make dictCtx index comparable with current context */
    887                         lowLimit = dictionary;
    888                     } else {
    889                         match = base + matchIndex;
    890                         lowLimit = (const BYTE*)source;
    891                     }
    892                 } else if (dictDirective==usingExtDict) {
    893                     if (matchIndex < startIndex) {
    894                         DEBUGLOG(7, "extDict candidate: matchIndex=%5u  <  startIndex=%5u", matchIndex, startIndex);
    895                         assert(startIndex - matchIndex >= MINMATCH);
    896                         match = dictBase + matchIndex;
    897                         lowLimit = dictionary;
    898                     } else {
    899                         match = base + matchIndex;
    900                         lowLimit = (const BYTE*)source;
    901                     }
    902                 } else {   /* single continuous memory segment */
    903                     match = base + matchIndex;
    904                 }
    905                 forwardH = LZ4_hashPosition(forwardIp, tableType);
    906                 LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
    907 
    908                 if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) continue;    /* match outside of valid area */
    909                 assert(matchIndex < current);
    910                 if ((tableType != byU16) && (matchIndex+LZ4_DISTANCE_MAX < current)) continue;  /* too far */
    911                 if (tableType == byU16) assert((current - matchIndex) <= LZ4_DISTANCE_MAX);     /* too_far presumed impossible with byU16 */
    912 
    913                 if (LZ4_read32(match) == LZ4_read32(ip)) {
    914                     if (maybe_extMem) offset = current - matchIndex;
    915                     break;   /* match found */
    916                 }
    917 
    918             } while(1);
    919         }
    920 
    921         /* Catch up */
    922         while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
    923 
    924         /* Encode Literals */
    925         {   unsigned const litLength = (unsigned)(ip - anchor);
    926             token = op++;
    927             if ((outputDirective == limitedOutput) &&  /* Check output buffer overflow */
    928                 (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) )
    929                 return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
    930 
    931             if ((outputDirective == fillOutput) &&
    932                 (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) {
    933                 op--;
    934                 goto _last_literals;
    935             }
    936             if (litLength >= RUN_MASK) {
    937                 int len = (int)(litLength - RUN_MASK);
    938                 *token = (RUN_MASK<<ML_BITS);
    939                 for(; len >= 255 ; len-=255) *op++ = 255;
    940                 *op++ = (BYTE)len;
    941             }
    942             else *token = (BYTE)(litLength<<ML_BITS);
    943 
    944             /* Copy Literals */
    945             LZ4_wildCopy8(op, anchor, op+litLength);
    946             op+=litLength;
    947             DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
    948                         (int)(anchor-(const BYTE*)source), litLength, (int)(ip-(const BYTE*)source));
    949         }
    950 
    951 _next_match:
    952         /* at this stage, the following variables must be correctly set :
    953          * - ip : at start of LZ operation
    954          * - match : at start of previous pattern occurence; can be within current prefix, or within extDict
    955          * - offset : if maybe_ext_memSegment==1 (constant)
    956          * - lowLimit : must be == dictionary to mean "match is within extDict"; must be == source otherwise
    957          * - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written
    958          */
    959 
    960         if ((outputDirective == fillOutput) &&
    961             (op + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit)) {
    962             /* the match was too close to the end, rewind and go to last literals */
    963             op = token;
    964             goto _last_literals;
    965         }
    966 
    967         /* Encode Offset */
    968         if (maybe_extMem) {   /* static test */
    969             DEBUGLOG(6, "             with offset=%u  (ext if > %i)", offset, (int)(ip - (const BYTE*)source));
    970             assert(offset <= LZ4_DISTANCE_MAX && offset > 0);
    971             LZ4_writeLE16(op, (U16)offset); op+=2;
    972         } else  {
    973             DEBUGLOG(6, "             with offset=%u  (same segment)", (U32)(ip - match));
    974             assert(ip-match <= LZ4_DISTANCE_MAX);
    975             LZ4_writeLE16(op, (U16)(ip - match)); op+=2;
    976         }
    977 
    978         /* Encode MatchLength */
    979         {   unsigned matchCode;
    980 
    981             if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx)
    982               && (lowLimit==dictionary) /* match within extDict */ ) {
    983                 const BYTE* limit = ip + (dictEnd-match);
    984                 assert(dictEnd > match);
    985                 if (limit > matchlimit) limit = matchlimit;
    986                 matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
    987                 ip += (size_t)matchCode + MINMATCH;
    988                 if (ip==limit) {
    989                     unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit);
    990                     matchCode += more;
    991                     ip += more;
    992                 }
    993                 DEBUGLOG(6, "             with matchLength=%u starting in extDict", matchCode+MINMATCH);
    994             } else {
    995                 matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
    996                 ip += (size_t)matchCode + MINMATCH;
    997                 DEBUGLOG(6, "             with matchLength=%u", matchCode+MINMATCH);
    998             }
    999 
   1000             if ((outputDirective) &&    /* Check output buffer overflow */
   1001                 (unlikely(op + (1 + LASTLITERALS) + (matchCode>>8) > olimit)) ) {
   1002                 if (outputDirective == fillOutput) {
   1003                     /* Match description too long : reduce it */
   1004                     U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 2 - 1 - LASTLITERALS) * 255;
   1005                     ip -= matchCode - newMatchCode;
   1006                     matchCode = newMatchCode;
   1007                 } else {
   1008                     assert(outputDirective == limitedOutput);
   1009                     return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
   1010                 }
   1011             }
   1012             if (matchCode >= ML_MASK) {
   1013                 *token += ML_MASK;
   1014                 matchCode -= ML_MASK;
   1015                 LZ4_write32(op, 0xFFFFFFFF);
   1016                 while (matchCode >= 4*255) {
   1017                     op+=4;
   1018                     LZ4_write32(op, 0xFFFFFFFF);
   1019                     matchCode -= 4*255;
   1020                 }
   1021                 op += matchCode / 255;
   1022                 *op++ = (BYTE)(matchCode % 255);
   1023             } else
   1024                 *token += (BYTE)(matchCode);
   1025         }
   1026 
   1027         anchor = ip;
   1028 
   1029         /* Test end of chunk */
   1030         if (ip >= mflimitPlusOne) break;
   1031 
   1032         /* Fill table */
   1033         LZ4_putPosition(ip-2, cctx->hashTable, tableType, base);
   1034 
   1035         /* Test next position */
   1036         if (tableType == byPtr) {
   1037 
   1038             match = LZ4_getPosition(ip, cctx->hashTable, tableType, base);
   1039             LZ4_putPosition(ip, cctx->hashTable, tableType, base);
   1040             if ( (match+LZ4_DISTANCE_MAX >= ip)
   1041               && (LZ4_read32(match) == LZ4_read32(ip)) )
   1042             { token=op++; *token=0; goto _next_match; }
   1043 
   1044         } else {   /* byU32, byU16 */
   1045 
   1046             U32 const h = LZ4_hashPosition(ip, tableType);
   1047             U32 const current = (U32)(ip-base);
   1048             U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
   1049             assert(matchIndex < current);
   1050             if (dictDirective == usingDictCtx) {
   1051                 if (matchIndex < startIndex) {
   1052                     /* there was no match, try the dictionary */
   1053                     matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
   1054                     match = dictBase + matchIndex;
   1055                     lowLimit = dictionary;   /* required for match length counter */
   1056                     matchIndex += dictDelta;
   1057                 } else {
   1058                     match = base + matchIndex;
   1059                     lowLimit = (const BYTE*)source;  /* required for match length counter */
   1060                 }
   1061             } else if (dictDirective==usingExtDict) {
   1062                 if (matchIndex < startIndex) {
   1063                     match = dictBase + matchIndex;
   1064                     lowLimit = dictionary;   /* required for match length counter */
   1065                 } else {
   1066                     match = base + matchIndex;
   1067                     lowLimit = (const BYTE*)source;   /* required for match length counter */
   1068                 }
   1069             } else {   /* single memory segment */
   1070                 match = base + matchIndex;
   1071             }
   1072             LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
   1073             assert(matchIndex < current);
   1074             if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1)
   1075               && ((tableType==byU16) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current))
   1076               && (LZ4_read32(match) == LZ4_read32(ip)) ) {
   1077                 token=op++;
   1078                 *token=0;
   1079                 if (maybe_extMem) offset = current - matchIndex;
   1080                 DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
   1081                             (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source));
   1082                 goto _next_match;
   1083             }
   1084         }
   1085 
   1086         /* Prepare next loop */
   1087         forwardH = LZ4_hashPosition(++ip, tableType);
   1088 
   1089     }
   1090 
   1091 _last_literals:
   1092     /* Encode Last Literals */
   1093     {   size_t lastRun = (size_t)(iend - anchor);
   1094         if ( (outputDirective) &&  /* Check output buffer overflow */
   1095             (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) {
   1096             if (outputDirective == fillOutput) {
   1097                 /* adapt lastRun to fill 'dst' */
   1098                 assert(olimit >= op);
   1099                 lastRun  = (size_t)(olimit-op) - 1;
   1100                 lastRun -= (lastRun+240)/255;
   1101             } else {
   1102                 assert(outputDirective == limitedOutput);
   1103                 return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
   1104             }
   1105         }
   1106         if (lastRun >= RUN_MASK) {
   1107             size_t accumulator = lastRun - RUN_MASK;
   1108             *op++ = RUN_MASK << ML_BITS;
   1109             for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
   1110             *op++ = (BYTE) accumulator;
   1111         } else {
   1112             *op++ = (BYTE)(lastRun<<ML_BITS);
   1113         }
   1114         memcpy(op, anchor, lastRun);
   1115         ip = anchor + lastRun;
   1116         op += lastRun;
   1117     }
   1118 
   1119     if (outputDirective == fillOutput) {
   1120         *inputConsumed = (int) (((const char*)ip)-source);
   1121     }
   1122     DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, (int)(((char*)op) - dest));
   1123     result = (int)(((char*)op) - dest);
   1124     assert(result > 0);
   1125     return result;
   1126 }
   1127 
   1128 
   1129 int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
   1130 {
   1131     LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse;
   1132     assert(ctx != NULL);
   1133     if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
   1134     if (maxOutputSize >= LZ4_compressBound(inputSize)) {
   1135         if (inputSize < LZ4_64Klimit) {
   1136             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration);
   1137         } else {
   1138             const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
   1139             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
   1140         }
   1141     } else {
   1142         if (inputSize < LZ4_64Klimit) {;
   1143             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration);
   1144         } else {
   1145             const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
   1146             return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration);
   1147         }
   1148     }
   1149 }
   1150 
   1151 /**
   1152  * LZ4_compress_fast_extState_fastReset() :
   1153  * A variant of LZ4_compress_fast_extState().
   1154  *
   1155  * Using this variant avoids an expensive initialization step. It is only safe
   1156  * to call if the state buffer is known to be correctly initialized already
   1157  * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of
   1158  * "correctly initialized").
   1159  */
   1160 int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
   1161 {
   1162     LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse;
   1163     if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
   1164 
   1165     if (dstCapacity >= LZ4_compressBound(srcSize)) {
   1166         if (srcSize < LZ4_64Klimit) {
   1167             const tableType_t tableType = byU16;
   1168             LZ4_prepareTable(ctx, srcSize, tableType);
   1169             if (ctx->currentOffset) {
   1170                 return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration);
   1171             } else {
   1172                 return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
   1173             }
   1174         } else {
   1175             const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
   1176             LZ4_prepareTable(ctx, srcSize, tableType);
   1177             return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
   1178         }
   1179     } else {
   1180         if (srcSize < LZ4_64Klimit) {
   1181             const tableType_t tableType = byU16;
   1182             LZ4_prepareTable(ctx, srcSize, tableType);
   1183             if (ctx->currentOffset) {
   1184                 return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration);
   1185             } else {
   1186                 return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
   1187             }
   1188         } else {
   1189             const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
   1190             LZ4_prepareTable(ctx, srcSize, tableType);
   1191             return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
   1192         }
   1193     }
   1194 }
   1195 
   1196 
   1197 int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
   1198 {
   1199     int result;
   1200 #if (LZ4_HEAPMODE)
   1201     LZ4_stream_t* ctxPtr = ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
   1202     if (ctxPtr == NULL) return 0;
   1203 #else
   1204     LZ4_stream_t ctx;
   1205     LZ4_stream_t* const ctxPtr = &ctx;
   1206 #endif
   1207     result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration);
   1208 
   1209 #if (LZ4_HEAPMODE)
   1210     FREEMEM(ctxPtr);
   1211 #endif
   1212     return result;
   1213 }
   1214 
   1215 
   1216 int LZ4_compress_default(const char* src, char* dst, int srcSize, int maxOutputSize)
   1217 {
   1218     return LZ4_compress_fast(src, dst, srcSize, maxOutputSize, 1);
   1219 }
   1220 
   1221 
   1222 /* hidden debug function */
   1223 /* strangely enough, gcc generates faster code when this function is uncommented, even if unused */
   1224 int LZ4_compress_fast_force(const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
   1225 {
   1226     LZ4_stream_t ctx;
   1227     LZ4_initStream(&ctx, sizeof(ctx));
   1228 
   1229     if (srcSize < LZ4_64Klimit) {
   1230         return LZ4_compress_generic(&ctx.internal_donotuse, src, dst, srcSize, NULL, dstCapacity, limitedOutput, byU16,    noDict, noDictIssue, acceleration);
   1231     } else {
   1232         tableType_t const addrMode = (sizeof(void*) > 4) ? byU32 : byPtr;
   1233         return LZ4_compress_generic(&ctx.internal_donotuse, src, dst, srcSize, NULL, dstCapacity, limitedOutput, addrMode, noDict, noDictIssue, acceleration);
   1234     }
   1235 }
   1236 
   1237 
   1238 /* Note!: This function leaves the stream in an unclean/broken state!
   1239  * It is not safe to subsequently use the same state with a _fastReset() or
   1240  * _continue() call without resetting it. */
   1241 static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize)
   1242 {
   1243     void* const s = LZ4_initStream(state, sizeof (*state));
   1244     assert(s != NULL); (void)s;
   1245 
   1246     if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {  /* compression success is guaranteed */
   1247         return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1);
   1248     } else {
   1249         if (*srcSizePtr < LZ4_64Klimit) {
   1250             return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, 1);
   1251         } else {
   1252             tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
   1253             return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, 1);
   1254     }   }
   1255 }
   1256 
   1257 
   1258 int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize)
   1259 {
   1260 #if (LZ4_HEAPMODE)
   1261     LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
   1262     if (ctx == NULL) return 0;
   1263 #else
   1264     LZ4_stream_t ctxBody;
   1265     LZ4_stream_t* ctx = &ctxBody;
   1266 #endif
   1267 
   1268     int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize);
   1269 
   1270 #if (LZ4_HEAPMODE)
   1271     FREEMEM(ctx);
   1272 #endif
   1273     return result;
   1274 }
   1275 
   1276 
   1277 
   1278 /*-******************************
   1279 *  Streaming functions
   1280 ********************************/
   1281 
   1282 LZ4_stream_t* LZ4_createStream(void)
   1283 {
   1284     LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));
   1285     LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal));    /* A compilation error here means LZ4_STREAMSIZE is not large enough */
   1286     DEBUGLOG(4, "LZ4_createStream %p", lz4s);
   1287     if (lz4s == NULL) return NULL;
   1288     LZ4_initStream(lz4s, sizeof(*lz4s));
   1289     return lz4s;
   1290 }
   1291 
   1292 #ifndef _MSC_VER  /* for some reason, Visual fails the aligment test on 32-bit x86 :
   1293                      it reports an aligment of 8-bytes,
   1294                      while actually aligning LZ4_stream_t on 4 bytes. */
   1295 static size_t LZ4_stream_t_alignment(void)
   1296 {
   1297     struct { char c; LZ4_stream_t t; } t_a;
   1298     return sizeof(t_a) - sizeof(t_a.t);
   1299 }
   1300 #endif
   1301 
   1302 LZ4_stream_t* LZ4_initStream (void* buffer, size_t size)
   1303 {
   1304     DEBUGLOG(5, "LZ4_initStream");
   1305     if (buffer == NULL) return NULL;
   1306     if (size < sizeof(LZ4_stream_t)) return NULL;
   1307 #ifndef _MSC_VER  /* for some reason, Visual fails the aligment test on 32-bit x86 :
   1308                      it reports an aligment of 8-bytes,
   1309                      while actually aligning LZ4_stream_t on 4 bytes. */
   1310     if (((size_t)buffer) & (LZ4_stream_t_alignment() - 1)) return NULL;  /* alignment check */
   1311 #endif
   1312     MEM_INIT(buffer, 0, sizeof(LZ4_stream_t));
   1313     return (LZ4_stream_t*)buffer;
   1314 }
   1315 
   1316 /* resetStream is now deprecated,
   1317  * prefer initStream() which is more general */
   1318 void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
   1319 {
   1320     DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream);
   1321     MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t));
   1322 }
   1323 
   1324 void LZ4_resetStream_fast(LZ4_stream_t* ctx) {
   1325     LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32);
   1326 }
   1327 
   1328 int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
   1329 {
   1330     if (!LZ4_stream) return 0;   /* support free on NULL */
   1331     DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream);
   1332     FREEMEM(LZ4_stream);
   1333     return (0);
   1334 }
   1335 
   1336 
   1337 #define HASH_UNIT sizeof(reg_t)
   1338 int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
   1339 {
   1340     LZ4_stream_t_internal* dict = &LZ4_dict->internal_donotuse;
   1341     const tableType_t tableType = byU32;
   1342     const BYTE* p = (const BYTE*)dictionary;
   1343     const BYTE* const dictEnd = p + dictSize;
   1344     const BYTE* base;
   1345 
   1346     DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict);
   1347 
   1348     /* It's necessary to reset the context,
   1349      * and not just continue it with prepareTable()
   1350      * to avoid any risk of generating overflowing matchIndex
   1351      * when compressing using this dictionary */
   1352     LZ4_resetStream(LZ4_dict);
   1353 
   1354     /* We always increment the offset by 64 KB, since, if the dict is longer,
   1355      * we truncate it to the last 64k, and if it's shorter, we still want to
   1356      * advance by a whole window length so we can provide the guarantee that
   1357      * there are only valid offsets in the window, which allows an optimization
   1358      * in LZ4_compress_fast_continue() where it uses noDictIssue even when the
   1359      * dictionary isn't a full 64k. */
   1360 
   1361     if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
   1362     base = dictEnd - 64 KB - dict->currentOffset;
   1363     dict->dictionary = p;
   1364     dict->dictSize = (U32)(dictEnd - p);
   1365     dict->currentOffset += 64 KB;
   1366     dict->tableType = tableType;
   1367 
   1368     if (dictSize < (int)HASH_UNIT) {
   1369         return 0;
   1370     }
   1371 
   1372     while (p <= dictEnd-HASH_UNIT) {
   1373         LZ4_putPosition(p, dict->hashTable, tableType, base);
   1374         p+=3;
   1375     }
   1376 
   1377     return (int)dict->dictSize;
   1378 }
   1379 
   1380 void LZ4_attach_dictionary(LZ4_stream_t *working_stream, const LZ4_stream_t *dictionary_stream) {
   1381     /* Calling LZ4_resetStream_fast() here makes sure that changes will not be
   1382      * erased by subsequent calls to LZ4_resetStream_fast() in case stream was
   1383      * marked as having dirty context, e.g. requiring full reset.
   1384      */
   1385     LZ4_resetStream_fast(working_stream);
   1386 
   1387     if (dictionary_stream != NULL) {
   1388         /* If the current offset is zero, we will never look in the
   1389          * external dictionary context, since there is no value a table
   1390          * entry can take that indicate a miss. In that case, we need
   1391          * to bump the offset to something non-zero.
   1392          */
   1393         if (working_stream->internal_donotuse.currentOffset == 0) {
   1394             working_stream->internal_donotuse.currentOffset = 64 KB;
   1395         }
   1396         working_stream->internal_donotuse.dictCtx = &(dictionary_stream->internal_donotuse);
   1397     } else {
   1398         working_stream->internal_donotuse.dictCtx = NULL;
   1399     }
   1400 }
   1401 
   1402 
   1403 static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize)
   1404 {
   1405     assert(nextSize >= 0);
   1406     if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) {   /* potential ptrdiff_t overflow (32-bits mode) */
   1407         /* rescale hash table */
   1408         U32 const delta = LZ4_dict->currentOffset - 64 KB;
   1409         const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
   1410         int i;
   1411         DEBUGLOG(4, "LZ4_renormDictT");
   1412         for (i=0; i<LZ4_HASH_SIZE_U32; i++) {
   1413             if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
   1414             else LZ4_dict->hashTable[i] -= delta;
   1415         }
   1416         LZ4_dict->currentOffset = 64 KB;
   1417         if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB;
   1418         LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
   1419     }
   1420 }
   1421 
   1422 
   1423 int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream,
   1424                                 const char* source, char* dest,
   1425                                 int inputSize, int maxOutputSize,
   1426                                 int acceleration)
   1427 {
   1428     const tableType_t tableType = byU32;
   1429     LZ4_stream_t_internal* streamPtr = &LZ4_stream->internal_donotuse;
   1430     const BYTE* dictEnd = streamPtr->dictionary + streamPtr->dictSize;
   1431 
   1432     DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i)", inputSize);
   1433 
   1434     if (streamPtr->dirty) return 0;   /* Uninitialized structure detected */
   1435     LZ4_renormDictT(streamPtr, inputSize);   /* avoid index overflow */
   1436     if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
   1437 
   1438     /* invalidate tiny dictionaries */
   1439     if ( (streamPtr->dictSize-1 < 4-1)   /* intentional underflow */
   1440       && (dictEnd != (const BYTE*)source) ) {
   1441         DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary);
   1442         streamPtr->dictSize = 0;
   1443         streamPtr->dictionary = (const BYTE*)source;
   1444         dictEnd = (const BYTE*)source;
   1445     }
   1446 
   1447     /* Check overlapping input/dictionary space */
   1448     {   const BYTE* sourceEnd = (const BYTE*) source + inputSize;
   1449         if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd)) {
   1450             streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
   1451             if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB;
   1452             if (streamPtr->dictSize < 4) streamPtr->dictSize = 0;
   1453             streamPtr->dictionary = dictEnd - streamPtr->dictSize;
   1454         }
   1455     }
   1456 
   1457     /* prefix mode : source data follows dictionary */
   1458     if (dictEnd == (const BYTE*)source) {
   1459         if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
   1460             return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration);
   1461         else
   1462             return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration);
   1463     }
   1464 
   1465     /* external dictionary mode */
   1466     {   int result;
   1467         if (streamPtr->dictCtx) {
   1468             /* We depend here on the fact that dictCtx'es (produced by
   1469              * LZ4_loadDict) guarantee that their tables contain no references
   1470              * to offsets between dictCtx->currentOffset - 64 KB and
   1471              * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe
   1472              * to use noDictIssue even when the dict isn't a full 64 KB.
   1473              */
   1474             if (inputSize > 4 KB) {
   1475                 /* For compressing large blobs, it is faster to pay the setup
   1476                  * cost to copy the dictionary's tables into the active context,
   1477                  * so that the compression loop is only looking into one table.
   1478                  */
   1479                 memcpy(streamPtr, streamPtr->dictCtx, sizeof(LZ4_stream_t));
   1480                 result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
   1481             } else {
   1482                 result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration);
   1483             }
   1484         } else {
   1485             if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
   1486                 result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration);
   1487             } else {
   1488                 result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
   1489             }
   1490         }
   1491         streamPtr->dictionary = (const BYTE*)source;
   1492         streamPtr->dictSize = (U32)inputSize;
   1493         return result;
   1494     }
   1495 }
   1496 
   1497 
   1498 /* Hidden debug function, to force-test external dictionary mode */
   1499 int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize)
   1500 {
   1501     LZ4_stream_t_internal* streamPtr = &LZ4_dict->internal_donotuse;
   1502     int result;
   1503 
   1504     LZ4_renormDictT(streamPtr, srcSize);
   1505 
   1506     if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
   1507         result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1);
   1508     } else {
   1509         result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
   1510     }
   1511 
   1512     streamPtr->dictionary = (const BYTE*)source;
   1513     streamPtr->dictSize = (U32)srcSize;
   1514 
   1515     return result;
   1516 }
   1517 
   1518 
   1519 /*! LZ4_saveDict() :
   1520  *  If previously compressed data block is not guaranteed to remain available at its memory location,
   1521  *  save it into a safer place (char* safeBuffer).
   1522  *  Note : you don't need to call LZ4_loadDict() afterwards,
   1523  *         dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue().
   1524  *  Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error.
   1525  */
   1526 int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
   1527 {
   1528     LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;
   1529     const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize;
   1530 
   1531     if ((U32)dictSize > 64 KB) dictSize = 64 KB;   /* useless to define a dictionary > 64 KB */
   1532     if ((U32)dictSize > dict->dictSize) dictSize = (int)dict->dictSize;
   1533 
   1534     memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
   1535 
   1536     dict->dictionary = (const BYTE*)safeBuffer;
   1537     dict->dictSize = (U32)dictSize;
   1538 
   1539     return dictSize;
   1540 }
   1541 
   1542 
   1543 
   1544 /*-*******************************
   1545  *  Decompression functions
   1546  ********************************/
   1547 
   1548 typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
   1549 typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
   1550 
   1551 #undef MIN
   1552 #define MIN(a,b)    ( (a) < (b) ? (a) : (b) )
   1553 
   1554 /* Read the variable-length literal or match length.
   1555  *
   1556  * ip - pointer to use as input.
   1557  * lencheck - end ip.  Return an error if ip advances >= lencheck.
   1558  * loop_check - check ip >= lencheck in body of loop.  Returns loop_error if so.
   1559  * initial_check - check ip >= lencheck before start of loop.  Returns initial_error if so.
   1560  * error (output) - error code.  Should be set to 0 before call.
   1561  */
   1562 typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error;
   1563 LZ4_FORCE_INLINE unsigned
   1564 read_variable_length(const BYTE**ip, const BYTE* lencheck, int loop_check, int initial_check, variable_length_error* error)
   1565 {
   1566   unsigned length = 0;
   1567   unsigned s;
   1568   if (initial_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
   1569     *error = initial_error;
   1570     return length;
   1571   }
   1572   do {
   1573     s = **ip;
   1574     (*ip)++;
   1575     length += s;
   1576     if (loop_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
   1577       *error = loop_error;
   1578       return length;
   1579     }
   1580   } while (s==255);
   1581 
   1582   return length;
   1583 }
   1584 
   1585 /*! LZ4_decompress_generic() :
   1586  *  This generic decompression function covers all use cases.
   1587  *  It shall be instantiated several times, using different sets of directives.
   1588  *  Note that it is important for performance that this function really get inlined,
   1589  *  in order to remove useless branches during compilation optimization.
   1590  */
   1591 LZ4_FORCE_INLINE int
   1592 LZ4_decompress_generic(
   1593                  const char* const src,
   1594                  char* const dst,
   1595                  int srcSize,
   1596                  int outputSize,         /* If endOnInput==endOnInputSize, this value is `dstCapacity` */
   1597 
   1598                  endCondition_directive endOnInput,   /* endOnOutputSize, endOnInputSize */
   1599                  earlyEnd_directive partialDecoding,  /* full, partial */
   1600                  dict_directive dict,                 /* noDict, withPrefix64k, usingExtDict */
   1601                  const BYTE* const lowPrefix,  /* always <= dst, == dst when no prefix */
   1602                  const BYTE* const dictStart,  /* only if dict==usingExtDict */
   1603                  const size_t dictSize         /* note : = 0 if noDict */
   1604                  )
   1605 {
   1606     if (src == NULL) return -1;
   1607 
   1608     {   const BYTE* ip = (const BYTE*) src;
   1609         const BYTE* const iend = ip + srcSize;
   1610 
   1611         BYTE* op = (BYTE*) dst;
   1612         BYTE* const oend = op + outputSize;
   1613         BYTE* cpy;
   1614 
   1615         const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize;
   1616 
   1617         const int safeDecode = (endOnInput==endOnInputSize);
   1618         const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
   1619 
   1620 
   1621         /* Set up the "end" pointers for the shortcut. */
   1622         const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
   1623         const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
   1624 
   1625         const BYTE* match;
   1626         size_t offset;
   1627         unsigned token;
   1628         size_t length;
   1629 
   1630 
   1631         DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
   1632 
   1633         /* Special cases */
   1634         assert(lowPrefix <= op);
   1635         if ((endOnInput) && (unlikely(outputSize==0))) return ((srcSize==1) && (*ip==0)) ? 0 : -1;  /* Empty output buffer */
   1636         if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0 ? 1 : -1);
   1637         if ((endOnInput) && unlikely(srcSize==0)) return -1;
   1638 
   1639 	/* Currently the fast loop shows a regression on qualcomm arm chips. */
   1640 #if LZ4_FAST_DEC_LOOP
   1641         if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
   1642             DEBUGLOG(6, "skip fast decode loop");
   1643             goto safe_decode;
   1644         }
   1645 
   1646         /* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */
   1647         while (1) {
   1648             /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
   1649             assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
   1650             if (endOnInput) assert(ip < iend);
   1651             token = *ip++;
   1652             length = token >> ML_BITS;  /* literal length */
   1653 
   1654             assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
   1655 
   1656             /* decode literal length */
   1657             if (length == RUN_MASK) {
   1658                 variable_length_error error = ok;
   1659                 length += read_variable_length(&ip, iend-RUN_MASK, endOnInput, endOnInput, &error);
   1660                 if (error == initial_error) goto _output_error;
   1661                 if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) goto _output_error;   /* overflow detection */
   1662                 if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) goto _output_error;   /* overflow detection */
   1663 
   1664                 /* copy literals */
   1665                 cpy = op+length;
   1666                 LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
   1667                 if (endOnInput) {  /* LZ4_decompress_safe() */
   1668                     if ((cpy>oend-32) || (ip+length>iend-32)) goto safe_literal_copy;
   1669                     LZ4_wildCopy32(op, ip, cpy);
   1670                 } else {   /* LZ4_decompress_fast() */
   1671                     if (cpy>oend-8) goto safe_literal_copy;
   1672                     LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
   1673                                                  * it doesn't know input length, and only relies on end-of-block properties */
   1674                 }
   1675                 ip += length; op = cpy;
   1676             } else {
   1677                 cpy = op+length;
   1678                 if (endOnInput) {  /* LZ4_decompress_safe() */
   1679                     DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
   1680                     /* We don't need to check oend, since we check it once for each loop below */
   1681                     if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) goto safe_literal_copy;
   1682                     /* Literals can only be 14, but hope compilers optimize if we copy by a register size */
   1683                     memcpy(op, ip, 16);
   1684                 } else {  /* LZ4_decompress_fast() */
   1685                     /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
   1686                      * it doesn't know input length, and relies on end-of-block properties */
   1687                     memcpy(op, ip, 8);
   1688                     if (length > 8) memcpy(op+8, ip+8, 8);
   1689                 }
   1690                 ip += length; op = cpy;
   1691             }
   1692 
   1693             /* get offset */
   1694             offset = LZ4_readLE16(ip); ip+=2;
   1695             match = op - offset;
   1696 
   1697             /* get matchlength */
   1698             length = token & ML_MASK;
   1699 
   1700             if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
   1701 
   1702             if (length == ML_MASK) {
   1703               variable_length_error error = ok;
   1704               length += read_variable_length(&ip, iend - LASTLITERALS + 1, endOnInput, 0, &error);
   1705               if (error != ok) goto _output_error;
   1706                 if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
   1707                 length += MINMATCH;
   1708                 if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
   1709                     goto safe_match_copy;
   1710                 }
   1711             } else {
   1712                 length += MINMATCH;
   1713                 if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
   1714                     goto safe_match_copy;
   1715                 }
   1716 
   1717                 /* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */
   1718                 if (!(dict == usingExtDict) || (match >= lowPrefix)) {
   1719                     if (offset >= 8) {
   1720                         memcpy(op, match, 8);
   1721                         memcpy(op+8, match+8, 8);
   1722                         memcpy(op+16, match+16, 2);
   1723                         op += length;
   1724                         continue;
   1725             }   }   }
   1726 
   1727             /* match starting within external dictionary */
   1728             if ((dict==usingExtDict) && (match < lowPrefix)) {
   1729                 if (unlikely(op+length > oend-LASTLITERALS)) {
   1730                     if (partialDecoding) length = MIN(length, (size_t)(oend-op));
   1731                     else goto _output_error;   /* doesn't respect parsing restriction */
   1732                 }
   1733 
   1734                 if (length <= (size_t)(lowPrefix-match)) {
   1735                     /* match fits entirely within external dictionary : just copy */
   1736                     memmove(op, dictEnd - (lowPrefix-match), length);
   1737                     op += length;
   1738                 } else {
   1739                     /* match stretches into both external dictionary and current block */
   1740                     size_t const copySize = (size_t)(lowPrefix - match);
   1741                     size_t const restSize = length - copySize;
   1742                     memcpy(op, dictEnd - copySize, copySize);
   1743                     op += copySize;
   1744                     if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
   1745                         BYTE* const endOfMatch = op + restSize;
   1746                         const BYTE* copyFrom = lowPrefix;
   1747                         while (op < endOfMatch) *op++ = *copyFrom++;
   1748                     } else {
   1749                         memcpy(op, lowPrefix, restSize);
   1750                         op += restSize;
   1751                 }   }
   1752                 continue;
   1753             }
   1754 
   1755             /* copy match within block */
   1756             cpy = op + length;
   1757 
   1758             assert((op <= oend) && (oend-op >= 32));
   1759             if (unlikely(offset<16)) {
   1760                 LZ4_memcpy_using_offset(op, match, cpy, offset);
   1761             } else {
   1762                 LZ4_wildCopy32(op, match, cpy);
   1763             }
   1764 
   1765             op = cpy;   /* wildcopy correction */
   1766         }
   1767     safe_decode:
   1768 #endif
   1769 
   1770         /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
   1771         while (1) {
   1772             token = *ip++;
   1773             length = token >> ML_BITS;  /* literal length */
   1774 
   1775             assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
   1776 
   1777             /* A two-stage shortcut for the most common case:
   1778              * 1) If the literal length is 0..14, and there is enough space,
   1779              * enter the shortcut and copy 16 bytes on behalf of the literals
   1780              * (in the fast mode, only 8 bytes can be safely copied this way).
   1781              * 2) Further if the match length is 4..18, copy 18 bytes in a similar
   1782              * manner; but we ensure that there's enough space in the output for
   1783              * those 18 bytes earlier, upon entering the shortcut (in other words,
   1784              * there is a combined check for both stages).
   1785              */
   1786             if ( (endOnInput ? length != RUN_MASK : length <= 8)
   1787                 /* strictly "less than" on input, to re-enter the loop with at least one byte */
   1788               && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) {
   1789                 /* Copy the literals */
   1790                 memcpy(op, ip, endOnInput ? 16 : 8);
   1791                 op += length; ip += length;
   1792 
   1793                 /* The second stage: prepare for match copying, decode full info.
   1794                  * If it doesn't work out, the info won't be wasted. */
   1795                 length = token & ML_MASK; /* match length */
   1796                 offset = LZ4_readLE16(ip); ip += 2;
   1797                 match = op - offset;
   1798                 assert(match <= op); /* check overflow */
   1799 
   1800                 /* Do not deal with overlapping matches. */
   1801                 if ( (length != ML_MASK)
   1802                   && (offset >= 8)
   1803                   && (dict==withPrefix64k || match >= lowPrefix) ) {
   1804                     /* Copy the match. */
   1805                     memcpy(op + 0, match + 0, 8);
   1806                     memcpy(op + 8, match + 8, 8);
   1807                     memcpy(op +16, match +16, 2);
   1808                     op += length + MINMATCH;
   1809                     /* Both stages worked, load the next token. */
   1810                     continue;
   1811                 }
   1812 
   1813                 /* The second stage didn't work out, but the info is ready.
   1814                  * Propel it right to the point of match copying. */
   1815                 goto _copy_match;
   1816             }
   1817 
   1818             /* decode literal length */
   1819             if (length == RUN_MASK) {
   1820               variable_length_error error = ok;
   1821               length += read_variable_length(&ip, iend-RUN_MASK, endOnInput, endOnInput, &error);
   1822               if (error == initial_error) goto _output_error;
   1823                 if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) goto _output_error;   /* overflow detection */
   1824                 if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) goto _output_error;   /* overflow detection */
   1825             }
   1826 
   1827             /* copy literals */
   1828             cpy = op+length;
   1829 #if LZ4_FAST_DEC_LOOP
   1830         safe_literal_copy:
   1831 #endif
   1832             LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
   1833             if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) )
   1834               || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
   1835             {
   1836                 if (partialDecoding) {
   1837                     if (cpy > oend) { cpy = oend; assert(op<=oend); length = (size_t)(oend-op); }  /* Partial decoding : stop in the middle of literal segment */
   1838                     if ((endOnInput) && (ip+length > iend)) goto _output_error;   /* Error : read attempt beyond end of input buffer */
   1839                 } else {
   1840                     if ((!endOnInput) && (cpy != oend)) goto _output_error;       /* Error : block decoding must stop exactly there */
   1841                     if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error;   /* Error : input must be consumed */
   1842                 }
   1843                 memcpy(op, ip, length);
   1844                 ip += length;
   1845                 op += length;
   1846                 if (!partialDecoding || (cpy == oend)) {
   1847                     /* Necessarily EOF, due to parsing restrictions */
   1848                     break;
   1849                 }
   1850 
   1851             } else {
   1852                 LZ4_wildCopy8(op, ip, cpy);   /* may overwrite up to WILDCOPYLENGTH beyond cpy */
   1853                 ip += length; op = cpy;
   1854             }
   1855 
   1856             /* get offset */
   1857             offset = LZ4_readLE16(ip); ip+=2;
   1858             match = op - offset;
   1859 
   1860             /* get matchlength */
   1861             length = token & ML_MASK;
   1862 
   1863     _copy_match:
   1864             if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
   1865             if (!partialDecoding) {
   1866                 assert(oend > op);
   1867                 assert(oend - op >= 4);
   1868                 LZ4_write32(op, 0);   /* silence an msan warning when offset==0; costs <1%; */
   1869             }   /* note : when partialDecoding, there is no guarantee that at least 4 bytes remain available in output buffer */
   1870 
   1871             if (length == ML_MASK) {
   1872               variable_length_error error = ok;
   1873               length += read_variable_length(&ip, iend - LASTLITERALS + 1, endOnInput, 0, &error);
   1874               if (error != ok) goto _output_error;
   1875                 if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
   1876             }
   1877             length += MINMATCH;
   1878 
   1879 #if LZ4_FAST_DEC_LOOP
   1880         safe_match_copy:
   1881 #endif
   1882             /* match starting within external dictionary */
   1883             if ((dict==usingExtDict) && (match < lowPrefix)) {
   1884                 if (unlikely(op+length > oend-LASTLITERALS)) {
   1885                     if (partialDecoding) length = MIN(length, (size_t)(oend-op));
   1886                     else goto _output_error;   /* doesn't respect parsing restriction */
   1887                 }
   1888 
   1889                 if (length <= (size_t)(lowPrefix-match)) {
   1890                     /* match fits entirely within external dictionary : just copy */
   1891                     memmove(op, dictEnd - (lowPrefix-match), length);
   1892                     op += length;
   1893                 } else {
   1894                     /* match stretches into both external dictionary and current block */
   1895                     size_t const copySize = (size_t)(lowPrefix - match);
   1896                     size_t const restSize = length - copySize;
   1897                     memcpy(op, dictEnd - copySize, copySize);
   1898                     op += copySize;
   1899                     if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
   1900                         BYTE* const endOfMatch = op + restSize;
   1901                         const BYTE* copyFrom = lowPrefix;
   1902                         while (op < endOfMatch) *op++ = *copyFrom++;
   1903                     } else {
   1904                         memcpy(op, lowPrefix, restSize);
   1905                         op += restSize;
   1906                 }   }
   1907                 continue;
   1908             }
   1909 
   1910             /* copy match within block */
   1911             cpy = op + length;
   1912 
   1913             /* partialDecoding : may end anywhere within the block */
   1914             assert(op<=oend);
   1915             if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
   1916                 size_t const mlen = MIN(length, (size_t)(oend-op));
   1917                 const BYTE* const matchEnd = match + mlen;
   1918                 BYTE* const copyEnd = op + mlen;
   1919                 if (matchEnd > op) {   /* overlap copy */
   1920                     while (op < copyEnd) *op++ = *match++;
   1921                 } else {
   1922                     memcpy(op, match, mlen);
   1923                 }
   1924                 op = copyEnd;
   1925                 if (op==oend) break;
   1926                 continue;
   1927             }
   1928 
   1929             if (unlikely(offset<8)) {
   1930                 op[0] = match[0];
   1931                 op[1] = match[1];
   1932                 op[2] = match[2];
   1933                 op[3] = match[3];
   1934                 match += inc32table[offset];
   1935                 memcpy(op+4, match, 4);
   1936                 match -= dec64table[offset];
   1937             } else {
   1938                 memcpy(op, match, 8);
   1939                 match += 8;
   1940             }
   1941             op += 8;
   1942 
   1943             if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
   1944                 BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
   1945                 if (cpy > oend-LASTLITERALS) goto _output_error;    /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
   1946                 if (op < oCopyLimit) {
   1947                     LZ4_wildCopy8(op, match, oCopyLimit);
   1948                     match += oCopyLimit - op;
   1949                     op = oCopyLimit;
   1950                 }
   1951                 while (op < cpy) *op++ = *match++;
   1952             } else {
   1953                 memcpy(op, match, 8);
   1954                 if (length > 16) LZ4_wildCopy8(op+8, match+8, cpy);
   1955             }
   1956             op = cpy;   /* wildcopy correction */
   1957         }
   1958 
   1959         /* end of decoding */
   1960         if (endOnInput)
   1961            return (int) (((char*)op)-dst);     /* Nb of output bytes decoded */
   1962         else
   1963            return (int) (((const char*)ip)-src);   /* Nb of input bytes read */
   1964 
   1965         /* Overflow error detected */
   1966     _output_error:
   1967         return (int) (-(((const char*)ip)-src))-1;
   1968     }
   1969 }
   1970 
   1971 
   1972 /*===== Instantiate the API decoding functions. =====*/
   1973 
   1974 LZ4_FORCE_O2_GCC_PPC64LE
   1975 int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
   1976 {
   1977     return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
   1978                                   endOnInputSize, decode_full_block, noDict,
   1979                                   (BYTE*)dest, NULL, 0);
   1980 }
   1981 
   1982 LZ4_FORCE_O2_GCC_PPC64LE
   1983 int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity)
   1984 {
   1985     dstCapacity = MIN(targetOutputSize, dstCapacity);
   1986     return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity,
   1987                                   endOnInputSize, partial_decode,
   1988                                   noDict, (BYTE*)dst, NULL, 0);
   1989 }
   1990 
   1991 LZ4_FORCE_O2_GCC_PPC64LE
   1992 int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
   1993 {
   1994     return LZ4_decompress_generic(source, dest, 0, originalSize,
   1995                                   endOnOutputSize, decode_full_block, withPrefix64k,
   1996                                   (BYTE*)dest - 64 KB, NULL, 0);
   1997 }
   1998 
   1999 /*===== Instantiate a few more decoding cases, used more than once. =====*/
   2000 
   2001 LZ4_FORCE_O2_GCC_PPC64LE /* Exported, an obsolete API function. */
   2002 int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
   2003 {
   2004     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
   2005                                   endOnInputSize, decode_full_block, withPrefix64k,
   2006                                   (BYTE*)dest - 64 KB, NULL, 0);
   2007 }
   2008 
   2009 /* Another obsolete API function, paired with the previous one. */
   2010 int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
   2011 {
   2012     /* LZ4_decompress_fast doesn't validate match offsets,
   2013      * and thus serves well with any prefixed dictionary. */
   2014     return LZ4_decompress_fast(source, dest, originalSize);
   2015 }
   2016 
   2017 LZ4_FORCE_O2_GCC_PPC64LE
   2018 static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize,
   2019                                                size_t prefixSize)
   2020 {
   2021     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
   2022                                   endOnInputSize, decode_full_block, noDict,
   2023                                   (BYTE*)dest-prefixSize, NULL, 0);
   2024 }
   2025 
   2026 LZ4_FORCE_O2_GCC_PPC64LE
   2027 int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
   2028                                      int compressedSize, int maxOutputSize,
   2029                                      const void* dictStart, size_t dictSize)
   2030 {
   2031     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
   2032                                   endOnInputSize, decode_full_block, usingExtDict,
   2033                                   (BYTE*)dest, (const BYTE*)dictStart, dictSize);
   2034 }
   2035 
   2036 LZ4_FORCE_O2_GCC_PPC64LE
   2037 static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize,
   2038                                        const void* dictStart, size_t dictSize)
   2039 {
   2040     return LZ4_decompress_generic(source, dest, 0, originalSize,
   2041                                   endOnOutputSize, decode_full_block, usingExtDict,
   2042                                   (BYTE*)dest, (const BYTE*)dictStart, dictSize);
   2043 }
   2044 
   2045 /* The "double dictionary" mode, for use with e.g. ring buffers: the first part
   2046  * of the dictionary is passed as prefix, and the second via dictStart + dictSize.
   2047  * These routines are used only once, in LZ4_decompress_*_continue().
   2048  */
   2049 LZ4_FORCE_INLINE
   2050 int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize,
   2051                                    size_t prefixSize, const void* dictStart, size_t dictSize)
   2052 {
   2053     return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
   2054                                   endOnInputSize, decode_full_block, usingExtDict,
   2055                                   (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
   2056 }
   2057 
   2058 LZ4_FORCE_INLINE
   2059 int LZ4_decompress_fast_doubleDict(const char* source, char* dest, int originalSize,
   2060                                    size_t prefixSize, const void* dictStart, size_t dictSize)
   2061 {
   2062     return LZ4_decompress_generic(source, dest, 0, originalSize,
   2063                                   endOnOutputSize, decode_full_block, usingExtDict,
   2064                                   (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
   2065 }
   2066 
   2067 /*===== streaming decompression functions =====*/
   2068 
   2069 LZ4_streamDecode_t* LZ4_createStreamDecode(void)
   2070 {
   2071     LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t));
   2072     LZ4_STATIC_ASSERT(LZ4_STREAMDECODESIZE >= sizeof(LZ4_streamDecode_t_internal));    /* A compilation error here means LZ4_STREAMDECODESIZE is not large enough */
   2073     return lz4s;
   2074 }
   2075 
   2076 int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
   2077 {
   2078     if (LZ4_stream == NULL) return 0;   /* support free on NULL */
   2079     FREEMEM(LZ4_stream);
   2080     return 0;
   2081 }
   2082 
   2083 /*! LZ4_setStreamDecode() :
   2084  *  Use this function to instruct where to find the dictionary.
   2085  *  This function is not necessary if previous data is still available where it was decoded.
   2086  *  Loading a size of 0 is allowed (same effect as no dictionary).
   2087  * @return : 1 if OK, 0 if error
   2088  */
   2089 int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
   2090 {
   2091     LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
   2092     lz4sd->prefixSize = (size_t) dictSize;
   2093     lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize;
   2094     lz4sd->externalDict = NULL;
   2095     lz4sd->extDictSize  = 0;
   2096     return 1;
   2097 }
   2098 
   2099 /*! LZ4_decoderRingBufferSize() :
   2100  *  when setting a ring buffer for streaming decompression (optional scenario),
   2101  *  provides the minimum size of this ring buffer
   2102  *  to be compatible with any source respecting maxBlockSize condition.
   2103  *  Note : in a ring buffer scenario,
   2104  *  blocks are presumed decompressed next to each other.
   2105  *  When not enough space remains for next block (remainingSize < maxBlockSize),
   2106  *  decoding resumes from beginning of ring buffer.
   2107  * @return : minimum ring buffer size,
   2108  *           or 0 if there is an error (invalid maxBlockSize).
   2109  */
   2110 int LZ4_decoderRingBufferSize(int maxBlockSize)
   2111 {
   2112     if (maxBlockSize < 0) return 0;
   2113     if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0;
   2114     if (maxBlockSize < 16) maxBlockSize = 16;
   2115     return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize);
   2116 }
   2117 
   2118 /*
   2119 *_continue() :
   2120     These decoding functions allow decompression of multiple blocks in "streaming" mode.
   2121     Previously decoded blocks must still be available at the memory position where they were decoded.
   2122     If it's not possible, save the relevant part of decoded data into a safe buffer,
   2123     and indicate where it stands using LZ4_setStreamDecode()
   2124 */
   2125 LZ4_FORCE_O2_GCC_PPC64LE
   2126 int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
   2127 {
   2128     LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
   2129     int result;
   2130 
   2131     if (lz4sd->prefixSize == 0) {
   2132         /* The first call, no dictionary yet. */
   2133         assert(lz4sd->extDictSize == 0);
   2134         result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
   2135         if (result <= 0) return result;
   2136         lz4sd->prefixSize = (size_t)result;
   2137         lz4sd->prefixEnd = (BYTE*)dest + result;
   2138     } else if (lz4sd->prefixEnd == (BYTE*)dest) {
   2139         /* They're rolling the current segment. */
   2140         if (lz4sd->prefixSize >= 64 KB - 1)
   2141             result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
   2142         else if (lz4sd->extDictSize == 0)
   2143             result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize,
   2144                                                          lz4sd->prefixSize);
   2145         else
   2146             result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize,
   2147                                                     lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
   2148         if (result <= 0) return result;
   2149         lz4sd->prefixSize += (size_t)result;
   2150         lz4sd->prefixEnd  += result;
   2151     } else {
   2152         /* The buffer wraps around, or they're switching to another buffer. */
   2153         lz4sd->extDictSize = lz4sd->prefixSize;
   2154         lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
   2155         result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize,
   2156                                                   lz4sd->externalDict, lz4sd->extDictSize);
   2157         if (result <= 0) return result;
   2158         lz4sd->prefixSize = (size_t)result;
   2159         lz4sd->prefixEnd  = (BYTE*)dest + result;
   2160     }
   2161 
   2162     return result;
   2163 }
   2164 
   2165 LZ4_FORCE_O2_GCC_PPC64LE
   2166 int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
   2167 {
   2168     LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
   2169     int result;
   2170     assert(originalSize >= 0);
   2171 
   2172     if (lz4sd->prefixSize == 0) {
   2173         assert(lz4sd->extDictSize == 0);
   2174         result = LZ4_decompress_fast(source, dest, originalSize);
   2175         if (result <= 0) return result;
   2176         lz4sd->prefixSize = (size_t)originalSize;
   2177         lz4sd->prefixEnd = (BYTE*)dest + originalSize;
   2178     } else if (lz4sd->prefixEnd == (BYTE*)dest) {
   2179         if (lz4sd->prefixSize >= 64 KB - 1 || lz4sd->extDictSize == 0)
   2180             result = LZ4_decompress_fast(source, dest, originalSize);
   2181         else
   2182             result = LZ4_decompress_fast_doubleDict(source, dest, originalSize,
   2183                                                     lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
   2184         if (result <= 0) return result;
   2185         lz4sd->prefixSize += (size_t)originalSize;
   2186         lz4sd->prefixEnd  += originalSize;
   2187     } else {
   2188         lz4sd->extDictSize = lz4sd->prefixSize;
   2189         lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
   2190         result = LZ4_decompress_fast_extDict(source, dest, originalSize,
   2191                                              lz4sd->externalDict, lz4sd->extDictSize);
   2192         if (result <= 0) return result;
   2193         lz4sd->prefixSize = (size_t)originalSize;
   2194         lz4sd->prefixEnd  = (BYTE*)dest + originalSize;
   2195     }
   2196 
   2197     return result;
   2198 }
   2199 
   2200 
   2201 /*
   2202 Advanced decoding functions :
   2203 *_usingDict() :
   2204     These decoding functions work the same as "_continue" ones,
   2205     the dictionary must be explicitly provided within parameters
   2206 */
   2207 
   2208 int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
   2209 {
   2210     if (dictSize==0)
   2211         return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
   2212     if (dictStart+dictSize == dest) {
   2213         if (dictSize >= 64 KB - 1)
   2214             return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
   2215         return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, dictSize);
   2216     }
   2217     return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, dictSize);
   2218 }
   2219 
   2220 int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
   2221 {
   2222     if (dictSize==0 || dictStart+dictSize == dest)
   2223         return LZ4_decompress_fast(source, dest, originalSize);
   2224     return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, dictSize);
   2225 }
   2226 
   2227 
   2228 /*=*************************************************
   2229 *  Obsolete Functions
   2230 ***************************************************/
   2231 /* obsolete compression functions */
   2232 int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
   2233 {
   2234     return LZ4_compress_default(source, dest, inputSize, maxOutputSize);
   2235 }
   2236 int LZ4_compress(const char* source, char* dest, int inputSize)
   2237 {
   2238     return LZ4_compress_default(source, dest, inputSize, LZ4_compressBound(inputSize));
   2239 }
   2240 int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize)
   2241 {
   2242     return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1);
   2243 }
   2244 int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize)
   2245 {
   2246     return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1);
   2247 }
   2248 int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity)
   2249 {
   2250     return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1);
   2251 }
   2252 int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize)
   2253 {
   2254     return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1);
   2255 }
   2256 
   2257 /*
   2258 These decompression functions are deprecated and should no longer be used.
   2259 They are only provided here for compatibility with older user programs.
   2260 - LZ4_uncompress is totally equivalent to LZ4_decompress_fast
   2261 - LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe
   2262 */
   2263 int LZ4_uncompress (const char* source, char* dest, int outputSize)
   2264 {
   2265     return LZ4_decompress_fast(source, dest, outputSize);
   2266 }
   2267 int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize)
   2268 {
   2269     return LZ4_decompress_safe(source, dest, isize, maxOutputSize);
   2270 }
   2271 
   2272 /* Obsolete Streaming functions */
   2273 
   2274 int LZ4_sizeofStreamState() { return LZ4_STREAMSIZE; }
   2275 
   2276 int LZ4_resetStreamState(void* state, char* inputBuffer)
   2277 {
   2278     (void)inputBuffer;
   2279     LZ4_resetStream((LZ4_stream_t*)state);
   2280     return 0;
   2281 }
   2282 
   2283 void* LZ4_create (char* inputBuffer)
   2284 {
   2285     (void)inputBuffer;
   2286     return LZ4_createStream();
   2287 }
   2288 
   2289 char* LZ4_slideInputBuffer (void* state)
   2290 {
   2291     /* avoid const char * -> char * conversion warning */
   2292     return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary;
   2293 }
   2294 
   2295 #endif   /* LZ4_COMMONDEFS_ONLY */
   2296 
   2297 }