medfall

A super great game engine
Log | Files | Refs

stb_image.h (250035B)


      1 /* stb_image - v2.15 - public domain image loader - http://nothings.org/stb_image.h
      2                                      no warranty implied; use at your own risk
      3 
      4    Do this:
      5       #define STB_IMAGE_IMPLEMENTATION
      6    before you include this file in *one* C or C++ file to create the implementation.
      7 
      8    // i.e. it should look like this:
      9    #include ...
     10    #include ...
     11    #include ...
     12    #define STB_IMAGE_IMPLEMENTATION
     13    #include "stb_image.h"
     14 
     15    You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
     16    And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
     17 
     18 
     19    QUICK NOTES:
     20       Primarily of interest to game developers and other people who can
     21           avoid problematic images and only need the trivial interface
     22 
     23       JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
     24       PNG 1/2/4/8/16-bit-per-channel
     25 
     26       TGA (not sure what subset, if a subset)
     27       BMP non-1bpp, non-RLE
     28       PSD (composited view only, no extra channels, 8/16 bit-per-channel)
     29 
     30       GIF (*comp always reports as 4-channel)
     31       HDR (radiance rgbE format)
     32       PIC (Softimage PIC)
     33       PNM (PPM and PGM binary only)
     34 
     35       Animated GIF still needs a proper API, but here's one way to do it:
     36           http://gist.github.com/urraka/685d9a6340b26b830d49
     37 
     38       - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
     39       - decode from arbitrary I/O callbacks
     40       - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
     41 
     42    Full documentation under "DOCUMENTATION" below.
     43 
     44 
     45 LICENSE
     46 
     47   See end of file for license information.
     48 
     49 RECENT REVISION HISTORY:
     50 
     51       2.15  (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
     52       2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
     53       2.13  (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
     54       2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
     55       2.11  (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
     56                          RGB-format JPEG; remove white matting in PSD;
     57                          allocate large structures on the stack;
     58                          correct channel count for PNG & BMP
     59       2.10  (2016-01-22) avoid warning introduced in 2.09
     60       2.09  (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
     61       2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
     62       2.07  (2015-09-13) partial animated GIF support
     63                          limited 16-bit PSD support
     64                          minor bugs, code cleanup, and compiler warnings
     65 
     66    See end of file for full revision history.
     67 
     68 
     69  ============================    Contributors    =========================
     70 
     71  Image formats                          Extensions, features
     72     Sean Barrett (jpeg, png, bmp)          Jetro Lauha (stbi_info)
     73     Nicolas Schulz (hdr, psd)              Martin "SpartanJ" Golini (stbi_info)
     74     Jonathan Dummer (tga)                  James "moose2000" Brown (iPhone PNG)
     75     Jean-Marc Lienher (gif)                Ben "Disch" Wenger (io callbacks)
     76     Tom Seddon (pic)                       Omar Cornut (1/2/4-bit PNG)
     77     Thatcher Ulrich (psd)                  Nicolas Guillemot (vertical flip)
     78     Ken Miller (pgm, ppm)                  Richard Mitton (16-bit PSD)
     79     github:urraka (animated gif)           Junggon Kim (PNM comments)
     80                                            Daniel Gibson (16-bit TGA)
     81                                            socks-the-fox (16-bit PNG)
     82                                            Jeremy Sawicki (handle all ImageNet JPGs)
     83  Optimizations & bugfixes
     84     Fabian "ryg" Giesen
     85     Arseny Kapoulkine
     86 
     87  Bug & warning fixes
     88     Marc LeBlanc            David Woo          Guillaume George   Martins Mozeiko
     89     Christpher Lloyd        Jerry Jansson      Joseph Thomson     Phil Jordan
     90     Dave Moore              Roy Eltham         Hayaki Saito       Nathan Reed
     91     Won Chun                Luke Graham        Johan Duparc       Nick Verigakis
     92     the Horde3D community   Thomas Ruf         Ronny Chevalier    Baldur Karlsson
     93     Janez Zemva             John Bartholomew   Michal Cichon      github:rlyeh
     94     Jonathan Blow           Ken Hamada         Tero Hanninen      github:romigrou
     95     Laurent Gomila          Cort Stratton      Sergio Gonzalez    github:svdijk
     96     Aruelien Pocheville     Thibault Reuille   Cass Everitt       github:snagar
     97     Ryamond Barbiero        Paul Du Bois       Engin Manap        github:Zelex
     98     Michaelangel007@github  Philipp Wiesemann  Dale Weiler        github:grim210
     99     Oriol Ferrer Mesia      Josh Tobin         Matthew Gregan     github:sammyhw
    100     Blazej Dariusz Roszkowski                  Gregory Mullen     github:phprus
    101 
    102 */
    103 
    104 #ifndef STBI_INCLUDE_STB_IMAGE_H
    105 #define STBI_INCLUDE_STB_IMAGE_H
    106 
    107 // DOCUMENTATION
    108 //
    109 // Limitations:
    110 //    - no 16-bit-per-channel PNG
    111 //    - no 12-bit-per-channel JPEG
    112 //    - no JPEGs with arithmetic coding
    113 //    - no 1-bit BMP
    114 //    - GIF always returns *comp=4
    115 //
    116 // Basic usage (see HDR discussion below for HDR usage):
    117 //    int x,y,n;
    118 //    unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
    119 //    // ... process data if not NULL ...
    120 //    // ... x = width, y = height, n = # 8-bit components per pixel ...
    121 //    // ... replace '0' with '1'..'4' to force that many components per pixel
    122 //    // ... but 'n' will always be the number that it would have been if you said 0
    123 //    stbi_image_free(data)
    124 //
    125 // Standard parameters:
    126 //    int *x                 -- outputs image width in pixels
    127 //    int *y                 -- outputs image height in pixels
    128 //    int *channels_in_file  -- outputs # of image components in image file
    129 //    int desired_channels   -- if non-zero, # of image components requested in result
    130 //
    131 // The return value from an image loader is an 'unsigned char *' which points
    132 // to the pixel data, or NULL on an allocation failure or if the image is
    133 // corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
    134 // with each pixel consisting of N interleaved 8-bit components; the first
    135 // pixel pointed to is top-left-most in the image. There is no padding between
    136 // image scanlines or between pixels, regardless of format. The number of
    137 // components N is 'req_comp' if req_comp is non-zero, or *comp otherwise.
    138 // If req_comp is non-zero, *comp has the number of components that _would_
    139 // have been output otherwise. E.g. if you set req_comp to 4, you will always
    140 // get RGBA output, but you can check *comp to see if it's trivially opaque
    141 // because e.g. there were only 3 channels in the source image.
    142 //
    143 // An output image with N components has the following components interleaved
    144 // in this order in each pixel:
    145 //
    146 //     N=#comp     components
    147 //       1           grey
    148 //       2           grey, alpha
    149 //       3           red, green, blue
    150 //       4           red, green, blue, alpha
    151 //
    152 // If image loading fails for any reason, the return value will be NULL,
    153 // and *x, *y, *comp will be unchanged. The function stbi_failure_reason()
    154 // can be queried for an extremely brief, end-user unfriendly explanation
    155 // of why the load failed. Define STBI_NO_FAILURE_STRINGS to avoid
    156 // compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
    157 // more user-friendly ones.
    158 //
    159 // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
    160 //
    161 // ===========================================================================
    162 //
    163 // Philosophy
    164 //
    165 // stb libraries are designed with the following priorities:
    166 //
    167 //    1. easy to use
    168 //    2. easy to maintain
    169 //    3. good performance
    170 //
    171 // Sometimes I let "good performance" creep up in priority over "easy to maintain",
    172 // and for best performance I may provide less-easy-to-use APIs that give higher
    173 // performance, in addition to the easy to use ones. Nevertheless, it's important
    174 // to keep in mind that from the standpoint of you, a client of this library,
    175 // all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
    176 //
    177 // Some secondary priorities arise directly from the first two, some of which
    178 // make more explicit reasons why performance can't be emphasized.
    179 //
    180 //    - Portable ("ease of use")
    181 //    - Small source code footprint ("easy to maintain")
    182 //    - No dependencies ("ease of use")
    183 //
    184 // ===========================================================================
    185 //
    186 // I/O callbacks
    187 //
    188 // I/O callbacks allow you to read from arbitrary sources, like packaged
    189 // files or some other source. Data read from callbacks are processed
    190 // through a small internal buffer (currently 128 bytes) to try to reduce
    191 // overhead.
    192 //
    193 // The three functions you must define are "read" (reads some bytes of data),
    194 // "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
    195 //
    196 // ===========================================================================
    197 //
    198 // SIMD support
    199 //
    200 // The JPEG decoder will try to automatically use SIMD kernels on x86 when
    201 // supported by the compiler. For ARM Neon support, you must explicitly
    202 // request it.
    203 //
    204 // (The old do-it-yourself SIMD API is no longer supported in the current
    205 // code.)
    206 //
    207 // On x86, SSE2 will automatically be used when available based on a run-time
    208 // test; if not, the generic C versions are used as a fall-back. On ARM targets,
    209 // the typical path is to have separate builds for NEON and non-NEON devices
    210 // (at least this is true for iOS and Android). Therefore, the NEON support is
    211 // toggled by a build flag: define STBI_NEON to get NEON loops.
    212 //
    213 // If for some reason you do not want to use any of SIMD code, or if
    214 // you have issues compiling it, you can disable it entirely by
    215 // defining STBI_NO_SIMD.
    216 //
    217 // ===========================================================================
    218 //
    219 // HDR image support   (disable by defining STBI_NO_HDR)
    220 //
    221 // stb_image now supports loading HDR images in general, and currently
    222 // the Radiance .HDR file format, although the support is provided
    223 // generically. You can still load any file through the existing interface;
    224 // if you attempt to load an HDR file, it will be automatically remapped to
    225 // LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
    226 // both of these constants can be reconfigured through this interface:
    227 //
    228 //     stbi_hdr_to_ldr_gamma(2.2f);
    229 //     stbi_hdr_to_ldr_scale(1.0f);
    230 //
    231 // (note, do not use _inverse_ constants; stbi_image will invert them
    232 // appropriately).
    233 //
    234 // Additionally, there is a new, parallel interface for loading files as
    235 // (linear) floats to preserve the full dynamic range:
    236 //
    237 //    float *data = stbi_loadf(filename, &x, &y, &n, 0);
    238 //
    239 // If you load LDR images through this interface, those images will
    240 // be promoted to floating point values, run through the inverse of
    241 // constants corresponding to the above:
    242 //
    243 //     stbi_ldr_to_hdr_scale(1.0f);
    244 //     stbi_ldr_to_hdr_gamma(2.2f);
    245 //
    246 // Finally, given a filename (or an open file or memory block--see header
    247 // file for details) containing image data, you can query for the "most
    248 // appropriate" interface to use (that is, whether the image is HDR or
    249 // not), using:
    250 //
    251 //     stbi_is_hdr(char *filename);
    252 //
    253 // ===========================================================================
    254 //
    255 // iPhone PNG support:
    256 //
    257 // By default we convert iphone-formatted PNGs back to RGB, even though
    258 // they are internally encoded differently. You can disable this conversion
    259 // by by calling stbi_convert_iphone_png_to_rgb(0), in which case
    260 // you will always just get the native iphone "format" through (which
    261 // is BGR stored in RGB).
    262 //
    263 // Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
    264 // pixel to remove any premultiplied alpha *only* if the image file explicitly
    265 // says there's premultiplied data (currently only happens in iPhone images,
    266 // and only if iPhone convert-to-rgb processing is on).
    267 //
    268 // ===========================================================================
    269 //
    270 // ADDITIONAL CONFIGURATION
    271 //
    272 //  - You can suppress implementation of any of the decoders to reduce
    273 //    your code footprint by #defining one or more of the following
    274 //    symbols before creating the implementation.
    275 //
    276 //        STBI_NO_JPEG
    277 //        STBI_NO_PNG
    278 //        STBI_NO_BMP
    279 //        STBI_NO_PSD
    280 //        STBI_NO_TGA
    281 //        STBI_NO_GIF
    282 //        STBI_NO_HDR
    283 //        STBI_NO_PIC
    284 //        STBI_NO_PNM   (.ppm and .pgm)
    285 //
    286 //  - You can request *only* certain decoders and suppress all other ones
    287 //    (this will be more forward-compatible, as addition of new decoders
    288 //    doesn't require you to disable them explicitly):
    289 //
    290 //        STBI_ONLY_JPEG
    291 //        STBI_ONLY_PNG
    292 //        STBI_ONLY_BMP
    293 //        STBI_ONLY_PSD
    294 //        STBI_ONLY_TGA
    295 //        STBI_ONLY_GIF
    296 //        STBI_ONLY_HDR
    297 //        STBI_ONLY_PIC
    298 //        STBI_ONLY_PNM   (.ppm and .pgm)
    299 //
    300 //   - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
    301 //     want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
    302 //
    303 
    304 
    305 #ifndef STBI_NO_STDIO
    306 #include <stdio.h>
    307 #endif // STBI_NO_STDIO
    308 
    309 #define STBI_VERSION 1
    310 
    311 enum
    312 {
    313    STBI_default = 0, // only used for req_comp
    314 
    315    STBI_grey       = 1,
    316    STBI_grey_alpha = 2,
    317    STBI_rgb        = 3,
    318    STBI_rgb_alpha  = 4
    319 };
    320 
    321 typedef unsigned char stbi_uc;
    322 typedef unsigned short stbi_us;
    323 
    324 #ifdef __cplusplus
    325 extern "C" {
    326 #endif
    327 
    328 #ifdef STB_IMAGE_STATIC
    329 #define STBIDEF static
    330 #else
    331 #define STBIDEF extern
    332 #endif
    333 
    334 //////////////////////////////////////////////////////////////////////////////
    335 //
    336 // PRIMARY API - works on images of any type
    337 //
    338 
    339 //
    340 // load image by filename, open file, or memory buffer
    341 //
    342 
    343 typedef struct
    344 {
    345    int      (*read)  (void *user,char *data,int size);   // fill 'data' with 'size' bytes.  return number of bytes actually read
    346    void     (*skip)  (void *user,int n);                 // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
    347    int      (*eof)   (void *user);                       // returns nonzero if we are at end of file/data
    348 } stbi_io_callbacks;
    349 
    350 ////////////////////////////////////
    351 //
    352 // 8-bits-per-channel interface
    353 //
    354 
    355 STBIDEF stbi_uc *stbi_load               (char              const *filename,           int *x, int *y, int *channels_in_file, int desired_channels);
    356 STBIDEF stbi_uc *stbi_load_from_memory   (stbi_uc           const *buffer, int len   , int *x, int *y, int *channels_in_file, int desired_channels);
    357 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk  , void *user, int *x, int *y, int *channels_in_file, int desired_channels);
    358 
    359 #ifndef STBI_NO_STDIO
    360 STBIDEF stbi_uc *stbi_load_from_file   (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
    361 // for stbi_load_from_file, file pointer is left pointing immediately after image
    362 #endif
    363 
    364 ////////////////////////////////////
    365 //
    366 // 16-bits-per-channel interface
    367 //
    368 
    369 STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
    370 #ifndef STBI_NO_STDIO
    371 STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
    372 #endif
    373 // @TODO the other variants
    374 
    375 ////////////////////////////////////
    376 //
    377 // float-per-channel interface
    378 //
    379 #ifndef STBI_NO_LINEAR
    380    STBIDEF float *stbi_loadf                 (char const *filename,           int *x, int *y, int *channels_in_file, int desired_channels);
    381    STBIDEF float *stbi_loadf_from_memory     (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
    382    STBIDEF float *stbi_loadf_from_callbacks  (stbi_io_callbacks const *clbk, void *user, int *x, int *y,  int *channels_in_file, int desired_channels);
    383 
    384    #ifndef STBI_NO_STDIO
    385    STBIDEF float *stbi_loadf_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
    386    #endif
    387 #endif
    388 
    389 #ifndef STBI_NO_HDR
    390    STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma);
    391    STBIDEF void   stbi_hdr_to_ldr_scale(float scale);
    392 #endif // STBI_NO_HDR
    393 
    394 #ifndef STBI_NO_LINEAR
    395    STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma);
    396    STBIDEF void   stbi_ldr_to_hdr_scale(float scale);
    397 #endif // STBI_NO_LINEAR
    398 
    399 // stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
    400 STBIDEF int    stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
    401 STBIDEF int    stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
    402 #ifndef STBI_NO_STDIO
    403 STBIDEF int      stbi_is_hdr          (char const *filename);
    404 STBIDEF int      stbi_is_hdr_from_file(FILE *f);
    405 #endif // STBI_NO_STDIO
    406 
    407 
    408 // get a VERY brief reason for failure
    409 // NOT THREADSAFE
    410 STBIDEF const char *stbi_failure_reason  (void);
    411 
    412 // free the loaded image -- this is just free()
    413 STBIDEF void     stbi_image_free      (void *retval_from_stbi_load);
    414 
    415 // get image dimensions & components without fully decoding
    416 STBIDEF int      stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
    417 STBIDEF int      stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);
    418 
    419 #ifndef STBI_NO_STDIO
    420 STBIDEF int      stbi_info            (char const *filename,     int *x, int *y, int *comp);
    421 STBIDEF int      stbi_info_from_file  (FILE *f,                  int *x, int *y, int *comp);
    422 
    423 #endif
    424 
    425 
    426 
    427 // for image formats that explicitly notate that they have premultiplied alpha,
    428 // we just return the colors as stored in the file. set this flag to force
    429 // unpremultiplication. results are undefined if the unpremultiply overflow.
    430 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
    431 
    432 // indicate whether we should process iphone images back to canonical format,
    433 // or just pass them through "as-is"
    434 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
    435 
    436 // flip the image vertically, so the first pixel in the output array is the bottom left
    437 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
    438 
    439 // ZLIB client - used by PNG, available for other purposes
    440 
    441 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
    442 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
    443 STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
    444 STBIDEF int   stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
    445 
    446 STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
    447 STBIDEF int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
    448 
    449 
    450 #ifdef __cplusplus
    451 }
    452 #endif
    453 
    454 //
    455 //
    456 ////   end header file   /////////////////////////////////////////////////////
    457 #endif // STBI_INCLUDE_STB_IMAGE_H
    458 
    459 #ifdef STB_IMAGE_IMPLEMENTATION
    460 
    461 #if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
    462   || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
    463   || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
    464   || defined(STBI_ONLY_ZLIB)
    465    #ifndef STBI_ONLY_JPEG
    466    #define STBI_NO_JPEG
    467    #endif
    468    #ifndef STBI_ONLY_PNG
    469    #define STBI_NO_PNG
    470    #endif
    471    #ifndef STBI_ONLY_BMP
    472    #define STBI_NO_BMP
    473    #endif
    474    #ifndef STBI_ONLY_PSD
    475    #define STBI_NO_PSD
    476    #endif
    477    #ifndef STBI_ONLY_TGA
    478    #define STBI_NO_TGA
    479    #endif
    480    #ifndef STBI_ONLY_GIF
    481    #define STBI_NO_GIF
    482    #endif
    483    #ifndef STBI_ONLY_HDR
    484    #define STBI_NO_HDR
    485    #endif
    486    #ifndef STBI_ONLY_PIC
    487    #define STBI_NO_PIC
    488    #endif
    489    #ifndef STBI_ONLY_PNM
    490    #define STBI_NO_PNM
    491    #endif
    492 #endif
    493 
    494 #if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
    495 #define STBI_NO_ZLIB
    496 #endif
    497 
    498 
    499 #include <stdarg.h>
    500 #include <stddef.h> // ptrdiff_t on osx
    501 #include <stdlib.h>
    502 #include <string.h>
    503 #include <limits.h>
    504 
    505 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
    506 #include <math.h>  // ldexp
    507 #endif
    508 
    509 #ifndef STBI_NO_STDIO
    510 #include <stdio.h>
    511 #endif
    512 
    513 #ifndef STBI_ASSERT
    514 #include <assert.h>
    515 #define STBI_ASSERT(x) assert(x)
    516 #endif
    517 
    518 
    519 #ifndef _MSC_VER
    520    #ifdef __cplusplus
    521    #define stbi_inline inline
    522    #else
    523    #define stbi_inline
    524    #endif
    525 #else
    526    #define stbi_inline __forceinline
    527 #endif
    528 
    529 
    530 #ifdef _MSC_VER
    531 typedef unsigned short stbi__uint16;
    532 typedef   signed short stbi__int16;
    533 typedef unsigned int   stbi__uint32;
    534 typedef   signed int   stbi__int32;
    535 #else
    536 #include <stdint.h>
    537 typedef uint16_t stbi__uint16;
    538 typedef int16_t  stbi__int16;
    539 typedef uint32_t stbi__uint32;
    540 typedef int32_t  stbi__int32;
    541 #endif
    542 
    543 // should produce compiler error if size is wrong
    544 typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
    545 
    546 #ifdef _MSC_VER
    547 #define STBI_NOTUSED(v)  (void)(v)
    548 #else
    549 #define STBI_NOTUSED(v)  (void)sizeof(v)
    550 #endif
    551 
    552 #ifdef _MSC_VER
    553 #define STBI_HAS_LROTL
    554 #endif
    555 
    556 #ifdef STBI_HAS_LROTL
    557    #define stbi_lrot(x,y)  _lrotl(x,y)
    558 #else
    559    #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (32 - (y))))
    560 #endif
    561 
    562 #if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
    563 // ok
    564 #elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
    565 // ok
    566 #else
    567 #error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
    568 #endif
    569 
    570 #ifndef STBI_MALLOC
    571 #define STBI_MALLOC(sz)           malloc(sz)
    572 #define STBI_REALLOC(p,newsz)     realloc(p,newsz)
    573 #define STBI_FREE(p)              free(p)
    574 #endif
    575 
    576 #ifndef STBI_REALLOC_SIZED
    577 #define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
    578 #endif
    579 
    580 // x86/x64 detection
    581 #if defined(__x86_64__) || defined(_M_X64)
    582 #define STBI__X64_TARGET
    583 #elif defined(__i386) || defined(_M_IX86)
    584 #define STBI__X86_TARGET
    585 #endif
    586 
    587 #if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
    588 // gcc doesn't support sse2 intrinsics unless you compile with -msse2,
    589 // which in turn means it gets to use SSE2 everywhere. This is unfortunate,
    590 // but previous attempts to provide the SSE2 functions with runtime
    591 // detection caused numerous issues. The way architecture extensions are
    592 // exposed in GCC/Clang is, sadly, not really suited for one-file libs.
    593 // New behavior: if compiled with -msse2, we use SSE2 without any
    594 // detection; if not, we don't use it at all.
    595 #define STBI_NO_SIMD
    596 #endif
    597 
    598 #if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
    599 // Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
    600 //
    601 // 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
    602 // Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
    603 // As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
    604 // simultaneously enabling "-mstackrealign".
    605 //
    606 // See https://github.com/nothings/stb/issues/81 for more information.
    607 //
    608 // So default to no SSE2 on 32-bit MinGW. If you've read this far and added
    609 // -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
    610 #define STBI_NO_SIMD
    611 #endif
    612 
    613 #if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
    614 #define STBI_SSE2
    615 #include <emmintrin.h>
    616 
    617 #ifdef _MSC_VER
    618 
    619 #if _MSC_VER >= 1400  // not VC6
    620 #include <intrin.h> // __cpuid
    621 static int stbi__cpuid3(void)
    622 {
    623    int info[4];
    624    __cpuid(info,1);
    625    return info[3];
    626 }
    627 #else
    628 static int stbi__cpuid3(void)
    629 {
    630    int res;
    631    __asm {
    632       mov  eax,1
    633       cpuid
    634       mov  res,edx
    635    }
    636    return res;
    637 }
    638 #endif
    639 
    640 #define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
    641 
    642 static int stbi__sse2_available()
    643 {
    644    int info3 = stbi__cpuid3();
    645    return ((info3 >> 26) & 1) != 0;
    646 }
    647 #else // assume GCC-style if not VC++
    648 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
    649 
    650 static int stbi__sse2_available()
    651 {
    652    // If we're even attempting to compile this on GCC/Clang, that means
    653    // -msse2 is on, which means the compiler is allowed to use SSE2
    654    // instructions at will, and so are we.
    655    return 1;
    656 }
    657 #endif
    658 #endif
    659 
    660 // ARM NEON
    661 #if defined(STBI_NO_SIMD) && defined(STBI_NEON)
    662 #undef STBI_NEON
    663 #endif
    664 
    665 #ifdef STBI_NEON
    666 #include <arm_neon.h>
    667 // assume GCC or Clang on ARM targets
    668 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
    669 #endif
    670 
    671 #ifndef STBI_SIMD_ALIGN
    672 #define STBI_SIMD_ALIGN(type, name) type name
    673 #endif
    674 
    675 ///////////////////////////////////////////////
    676 //
    677 //  stbi__context struct and start_xxx functions
    678 
    679 // stbi__context structure is our basic context used by all images, so it
    680 // contains all the IO context, plus some basic image information
    681 typedef struct
    682 {
    683    stbi__uint32 img_x, img_y;
    684    int img_n, img_out_n;
    685 
    686    stbi_io_callbacks io;
    687    void *io_user_data;
    688 
    689    int read_from_callbacks;
    690    int buflen;
    691    stbi_uc buffer_start[128];
    692 
    693    stbi_uc *img_buffer, *img_buffer_end;
    694    stbi_uc *img_buffer_original, *img_buffer_original_end;
    695 } stbi__context;
    696 
    697 
    698 static void stbi__refill_buffer(stbi__context *s);
    699 
    700 // initialize a memory-decode context
    701 static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
    702 {
    703    s->io.read = NULL;
    704    s->read_from_callbacks = 0;
    705    s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
    706    s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
    707 }
    708 
    709 // initialize a callback-based context
    710 static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
    711 {
    712    s->io = *c;
    713    s->io_user_data = user;
    714    s->buflen = sizeof(s->buffer_start);
    715    s->read_from_callbacks = 1;
    716    s->img_buffer_original = s->buffer_start;
    717    stbi__refill_buffer(s);
    718    s->img_buffer_original_end = s->img_buffer_end;
    719 }
    720 
    721 #ifndef STBI_NO_STDIO
    722 
    723 static int stbi__stdio_read(void *user, char *data, int size)
    724 {
    725    return (int) fread(data,1,size,(FILE*) user);
    726 }
    727 
    728 static void stbi__stdio_skip(void *user, int n)
    729 {
    730    fseek((FILE*) user, n, SEEK_CUR);
    731 }
    732 
    733 static int stbi__stdio_eof(void *user)
    734 {
    735    return feof((FILE*) user);
    736 }
    737 
    738 static stbi_io_callbacks stbi__stdio_callbacks =
    739 {
    740    stbi__stdio_read,
    741    stbi__stdio_skip,
    742    stbi__stdio_eof,
    743 };
    744 
    745 static void stbi__start_file(stbi__context *s, FILE *f)
    746 {
    747    stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
    748 }
    749 
    750 //static void stop_file(stbi__context *s) { }
    751 
    752 #endif // !STBI_NO_STDIO
    753 
    754 static void stbi__rewind(stbi__context *s)
    755 {
    756    // conceptually rewind SHOULD rewind to the beginning of the stream,
    757    // but we just rewind to the beginning of the initial buffer, because
    758    // we only use it after doing 'test', which only ever looks at at most 92 bytes
    759    s->img_buffer = s->img_buffer_original;
    760    s->img_buffer_end = s->img_buffer_original_end;
    761 }
    762 
    763 enum
    764 {
    765    STBI_ORDER_RGB,
    766    STBI_ORDER_BGR
    767 };
    768 
    769 typedef struct
    770 {
    771    int bits_per_channel;
    772    int num_channels;
    773    int channel_order;
    774 } stbi__result_info;
    775 
    776 #ifndef STBI_NO_JPEG
    777 static int      stbi__jpeg_test(stbi__context *s);
    778 static void    *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
    779 static int      stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
    780 #endif
    781 
    782 #ifndef STBI_NO_PNG
    783 static int      stbi__png_test(stbi__context *s);
    784 static void    *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
    785 static int      stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
    786 #endif
    787 
    788 #ifndef STBI_NO_BMP
    789 static int      stbi__bmp_test(stbi__context *s);
    790 static void    *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
    791 static int      stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
    792 #endif
    793 
    794 #ifndef STBI_NO_TGA
    795 static int      stbi__tga_test(stbi__context *s);
    796 static void    *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
    797 static int      stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
    798 #endif
    799 
    800 #ifndef STBI_NO_PSD
    801 static int      stbi__psd_test(stbi__context *s);
    802 static void    *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
    803 static int      stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
    804 #endif
    805 
    806 #ifndef STBI_NO_HDR
    807 static int      stbi__hdr_test(stbi__context *s);
    808 static float   *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
    809 static int      stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
    810 #endif
    811 
    812 #ifndef STBI_NO_PIC
    813 static int      stbi__pic_test(stbi__context *s);
    814 static void    *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
    815 static int      stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
    816 #endif
    817 
    818 #ifndef STBI_NO_GIF
    819 static int      stbi__gif_test(stbi__context *s);
    820 static void    *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
    821 static int      stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
    822 #endif
    823 
    824 #ifndef STBI_NO_PNM
    825 static int      stbi__pnm_test(stbi__context *s);
    826 static void    *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
    827 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
    828 #endif
    829 
    830 // this is not threadsafe
    831 static const char *stbi__g_failure_reason;
    832 
    833 STBIDEF const char *stbi_failure_reason(void)
    834 {
    835    return stbi__g_failure_reason;
    836 }
    837 
    838 static int stbi__err(const char *str)
    839 {
    840    stbi__g_failure_reason = str;
    841    return 0;
    842 }
    843 
    844 static void *stbi__malloc(size_t size)
    845 {
    846     return STBI_MALLOC(size);
    847 }
    848 
    849 // stb_image uses ints pervasively, including for offset calculations.
    850 // therefore the largest decoded image size we can support with the
    851 // current code, even on 64-bit targets, is INT_MAX. this is not a
    852 // significant limitation for the intended use case.
    853 //
    854 // we do, however, need to make sure our size calculations don't
    855 // overflow. hence a few helper functions for size calculations that
    856 // multiply integers together, making sure that they're non-negative
    857 // and no overflow occurs.
    858 
    859 // return 1 if the sum is valid, 0 on overflow.
    860 // negative terms are considered invalid.
    861 static int stbi__addsizes_valid(int a, int b)
    862 {
    863    if (b < 0) return 0;
    864    // now 0 <= b <= INT_MAX, hence also
    865    // 0 <= INT_MAX - b <= INTMAX.
    866    // And "a + b <= INT_MAX" (which might overflow) is the
    867    // same as a <= INT_MAX - b (no overflow)
    868    return a <= INT_MAX - b;
    869 }
    870 
    871 // returns 1 if the product is valid, 0 on overflow.
    872 // negative factors are considered invalid.
    873 static int stbi__mul2sizes_valid(int a, int b)
    874 {
    875    if (a < 0 || b < 0) return 0;
    876    if (b == 0) return 1; // mul-by-0 is always safe
    877    // portable way to check for no overflows in a*b
    878    return a <= INT_MAX/b;
    879 }
    880 
    881 // returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
    882 static int stbi__mad2sizes_valid(int a, int b, int add)
    883 {
    884    return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
    885 }
    886 
    887 // returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
    888 static int stbi__mad3sizes_valid(int a, int b, int c, int add)
    889 {
    890    return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
    891       stbi__addsizes_valid(a*b*c, add);
    892 }
    893 
    894 // returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
    895 static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
    896 {
    897    return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
    898       stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
    899 }
    900 
    901 // mallocs with size overflow checking
    902 static void *stbi__malloc_mad2(int a, int b, int add)
    903 {
    904    if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
    905    return stbi__malloc(a*b + add);
    906 }
    907 
    908 static void *stbi__malloc_mad3(int a, int b, int c, int add)
    909 {
    910    if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
    911    return stbi__malloc(a*b*c + add);
    912 }
    913 
    914 static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
    915 {
    916    if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
    917    return stbi__malloc(a*b*c*d + add);
    918 }
    919 
    920 // stbi__err - error
    921 // stbi__errpf - error returning pointer to float
    922 // stbi__errpuc - error returning pointer to unsigned char
    923 
    924 #ifdef STBI_NO_FAILURE_STRINGS
    925    #define stbi__err(x,y)  0
    926 #elif defined(STBI_FAILURE_USERMSG)
    927    #define stbi__err(x,y)  stbi__err(y)
    928 #else
    929    #define stbi__err(x,y)  stbi__err(x)
    930 #endif
    931 
    932 #define stbi__errpf(x,y)   ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
    933 #define stbi__errpuc(x,y)  ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
    934 
    935 STBIDEF void stbi_image_free(void *retval_from_stbi_load)
    936 {
    937    STBI_FREE(retval_from_stbi_load);
    938 }
    939 
    940 #ifndef STBI_NO_LINEAR
    941 static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
    942 #endif
    943 
    944 #ifndef STBI_NO_HDR
    945 static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp);
    946 #endif
    947 
    948 static int stbi__vertically_flip_on_load = 0;
    949 
    950 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
    951 {
    952     stbi__vertically_flip_on_load = flag_true_if_should_flip;
    953 }
    954 
    955 static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
    956 {
    957    memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
    958    ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
    959    ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
    960    ri->num_channels = 0;
    961 
    962    #ifndef STBI_NO_JPEG
    963    if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
    964    #endif
    965    #ifndef STBI_NO_PNG
    966    if (stbi__png_test(s))  return stbi__png_load(s,x,y,comp,req_comp, ri);
    967    #endif
    968    #ifndef STBI_NO_BMP
    969    if (stbi__bmp_test(s))  return stbi__bmp_load(s,x,y,comp,req_comp, ri);
    970    #endif
    971    #ifndef STBI_NO_GIF
    972    if (stbi__gif_test(s))  return stbi__gif_load(s,x,y,comp,req_comp, ri);
    973    #endif
    974    #ifndef STBI_NO_PSD
    975    if (stbi__psd_test(s))  return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc);
    976    #endif
    977    #ifndef STBI_NO_PIC
    978    if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp, ri);
    979    #endif
    980    #ifndef STBI_NO_PNM
    981    if (stbi__pnm_test(s))  return stbi__pnm_load(s,x,y,comp,req_comp, ri);
    982    #endif
    983 
    984    #ifndef STBI_NO_HDR
    985    if (stbi__hdr_test(s)) {
    986       float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri);
    987       return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
    988    }
    989    #endif
    990 
    991    #ifndef STBI_NO_TGA
    992    // test tga last because it's a crappy test!
    993    if (stbi__tga_test(s))
    994       return stbi__tga_load(s,x,y,comp,req_comp, ri);
    995    #endif
    996 
    997    return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
    998 }
    999 
   1000 static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
   1001 {
   1002    int i;
   1003    int img_len = w * h * channels;
   1004    stbi_uc *reduced;
   1005 
   1006    reduced = (stbi_uc *) stbi__malloc(img_len);
   1007    if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
   1008 
   1009    for (i = 0; i < img_len; ++i)
   1010       reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
   1011 
   1012    STBI_FREE(orig);
   1013    return reduced;
   1014 }
   1015 
   1016 static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
   1017 {
   1018    int i;
   1019    int img_len = w * h * channels;
   1020    stbi__uint16 *enlarged;
   1021 
   1022    enlarged = (stbi__uint16 *) stbi__malloc(img_len*2);
   1023    if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
   1024 
   1025    for (i = 0; i < img_len; ++i)
   1026       enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
   1027 
   1028    STBI_FREE(orig);
   1029    return enlarged;
   1030 }
   1031 
   1032 static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
   1033 {
   1034    stbi__result_info ri;
   1035    void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
   1036 
   1037    if (result == NULL)
   1038       return NULL;
   1039 
   1040    if (ri.bits_per_channel != 8) {
   1041       STBI_ASSERT(ri.bits_per_channel == 16);
   1042       result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
   1043       ri.bits_per_channel = 8;
   1044    }
   1045 
   1046    // @TODO: move stbi__convert_format to here
   1047 
   1048    if (stbi__vertically_flip_on_load) {
   1049       int w = *x, h = *y;
   1050       int channels = req_comp ? req_comp : *comp;
   1051       int row,col,z;
   1052       stbi_uc *image = (stbi_uc *) result;
   1053 
   1054       // @OPTIMIZE: use a bigger temp buffer and memcpy multiple pixels at once
   1055       for (row = 0; row < (h>>1); row++) {
   1056          for (col = 0; col < w; col++) {
   1057             for (z = 0; z < channels; z++) {
   1058                stbi_uc temp = image[(row * w + col) * channels + z];
   1059                image[(row * w + col) * channels + z] = image[((h - row - 1) * w + col) * channels + z];
   1060                image[((h - row - 1) * w + col) * channels + z] = temp;
   1061             }
   1062          }
   1063       }
   1064    }
   1065 
   1066    return (unsigned char *) result;
   1067 }
   1068 
   1069 static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
   1070 {
   1071    stbi__result_info ri;
   1072    void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
   1073 
   1074    if (result == NULL)
   1075       return NULL;
   1076 
   1077    if (ri.bits_per_channel != 16) {
   1078       STBI_ASSERT(ri.bits_per_channel == 8);
   1079       result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
   1080       ri.bits_per_channel = 16;
   1081    }
   1082 
   1083    // @TODO: move stbi__convert_format16 to here
   1084    // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
   1085 
   1086    if (stbi__vertically_flip_on_load) {
   1087       int w = *x, h = *y;
   1088       int channels = req_comp ? req_comp : *comp;
   1089       int row,col,z;
   1090       stbi__uint16 *image = (stbi__uint16 *) result;
   1091 
   1092       // @OPTIMIZE: use a bigger temp buffer and memcpy multiple pixels at once
   1093       for (row = 0; row < (h>>1); row++) {
   1094          for (col = 0; col < w; col++) {
   1095             for (z = 0; z < channels; z++) {
   1096                stbi__uint16 temp = image[(row * w + col) * channels + z];
   1097                image[(row * w + col) * channels + z] = image[((h - row - 1) * w + col) * channels + z];
   1098                image[((h - row - 1) * w + col) * channels + z] = temp;
   1099             }
   1100          }
   1101       }
   1102    }
   1103 
   1104    return (stbi__uint16 *) result;
   1105 }
   1106 
   1107 #ifndef STBI_NO_HDR
   1108 static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
   1109 {
   1110    if (stbi__vertically_flip_on_load && result != NULL) {
   1111       int w = *x, h = *y;
   1112       int depth = req_comp ? req_comp : *comp;
   1113       int row,col,z;
   1114       float temp;
   1115 
   1116       // @OPTIMIZE: use a bigger temp buffer and memcpy multiple pixels at once
   1117       for (row = 0; row < (h>>1); row++) {
   1118          for (col = 0; col < w; col++) {
   1119             for (z = 0; z < depth; z++) {
   1120                temp = result[(row * w + col) * depth + z];
   1121                result[(row * w + col) * depth + z] = result[((h - row - 1) * w + col) * depth + z];
   1122                result[((h - row - 1) * w + col) * depth + z] = temp;
   1123             }
   1124          }
   1125       }
   1126    }
   1127 }
   1128 #endif
   1129 
   1130 #ifndef STBI_NO_STDIO
   1131 
   1132 static FILE *stbi__fopen(char const *filename, char const *mode)
   1133 {
   1134    FILE *f;
   1135 #if defined(_MSC_VER) && _MSC_VER >= 1400
   1136    if (0 != fopen_s(&f, filename, mode))
   1137       f=0;
   1138 #else
   1139    f = fopen(filename, mode);
   1140 #endif
   1141    return f;
   1142 }
   1143 
   1144 
   1145 STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
   1146 {
   1147    FILE *f = stbi__fopen(filename, "rb");
   1148    unsigned char *result;
   1149    if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
   1150    result = stbi_load_from_file(f,x,y,comp,req_comp);
   1151    fclose(f);
   1152    return result;
   1153 }
   1154 
   1155 STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
   1156 {
   1157    unsigned char *result;
   1158    stbi__context s;
   1159    stbi__start_file(&s,f);
   1160    result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
   1161    if (result) {
   1162       // need to 'unget' all the characters in the IO buffer
   1163       fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
   1164    }
   1165    return result;
   1166 }
   1167 
   1168 STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
   1169 {
   1170    stbi__uint16 *result;
   1171    stbi__context s;
   1172    stbi__start_file(&s,f);
   1173    result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp);
   1174    if (result) {
   1175       // need to 'unget' all the characters in the IO buffer
   1176       fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
   1177    }
   1178    return result;
   1179 }
   1180 
   1181 STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
   1182 {
   1183    FILE *f = stbi__fopen(filename, "rb");
   1184    stbi__uint16 *result;
   1185    if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file");
   1186    result = stbi_load_from_file_16(f,x,y,comp,req_comp);
   1187    fclose(f);
   1188    return result;
   1189 }
   1190 
   1191 
   1192 #endif //!STBI_NO_STDIO
   1193 
   1194 STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
   1195 {
   1196    stbi__context s;
   1197    stbi__start_mem(&s,buffer,len);
   1198    return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
   1199 }
   1200 
   1201 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
   1202 {
   1203    stbi__context s;
   1204    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
   1205    return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
   1206 }
   1207 
   1208 #ifndef STBI_NO_LINEAR
   1209 static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
   1210 {
   1211    unsigned char *data;
   1212    #ifndef STBI_NO_HDR
   1213    if (stbi__hdr_test(s)) {
   1214       stbi__result_info ri;
   1215       float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri);
   1216       if (hdr_data)
   1217          stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
   1218       return hdr_data;
   1219    }
   1220    #endif
   1221    data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
   1222    if (data)
   1223       return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
   1224    return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
   1225 }
   1226 
   1227 STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
   1228 {
   1229    stbi__context s;
   1230    stbi__start_mem(&s,buffer,len);
   1231    return stbi__loadf_main(&s,x,y,comp,req_comp);
   1232 }
   1233 
   1234 STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
   1235 {
   1236    stbi__context s;
   1237    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
   1238    return stbi__loadf_main(&s,x,y,comp,req_comp);
   1239 }
   1240 
   1241 #ifndef STBI_NO_STDIO
   1242 STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
   1243 {
   1244    float *result;
   1245    FILE *f = stbi__fopen(filename, "rb");
   1246    if (!f) return stbi__errpf("can't fopen", "Unable to open file");
   1247    result = stbi_loadf_from_file(f,x,y,comp,req_comp);
   1248    fclose(f);
   1249    return result;
   1250 }
   1251 
   1252 STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
   1253 {
   1254    stbi__context s;
   1255    stbi__start_file(&s,f);
   1256    return stbi__loadf_main(&s,x,y,comp,req_comp);
   1257 }
   1258 #endif // !STBI_NO_STDIO
   1259 
   1260 #endif // !STBI_NO_LINEAR
   1261 
   1262 // these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
   1263 // defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
   1264 // reports false!
   1265 
   1266 STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
   1267 {
   1268    #ifndef STBI_NO_HDR
   1269    stbi__context s;
   1270    stbi__start_mem(&s,buffer,len);
   1271    return stbi__hdr_test(&s);
   1272    #else
   1273    STBI_NOTUSED(buffer);
   1274    STBI_NOTUSED(len);
   1275    return 0;
   1276    #endif
   1277 }
   1278 
   1279 #ifndef STBI_NO_STDIO
   1280 STBIDEF int      stbi_is_hdr          (char const *filename)
   1281 {
   1282    FILE *f = stbi__fopen(filename, "rb");
   1283    int result=0;
   1284    if (f) {
   1285       result = stbi_is_hdr_from_file(f);
   1286       fclose(f);
   1287    }
   1288    return result;
   1289 }
   1290 
   1291 STBIDEF int      stbi_is_hdr_from_file(FILE *f)
   1292 {
   1293    #ifndef STBI_NO_HDR
   1294    stbi__context s;
   1295    stbi__start_file(&s,f);
   1296    return stbi__hdr_test(&s);
   1297    #else
   1298    STBI_NOTUSED(f);
   1299    return 0;
   1300    #endif
   1301 }
   1302 #endif // !STBI_NO_STDIO
   1303 
   1304 STBIDEF int      stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
   1305 {
   1306    #ifndef STBI_NO_HDR
   1307    stbi__context s;
   1308    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
   1309    return stbi__hdr_test(&s);
   1310    #else
   1311    STBI_NOTUSED(clbk);
   1312    STBI_NOTUSED(user);
   1313    return 0;
   1314    #endif
   1315 }
   1316 
   1317 #ifndef STBI_NO_LINEAR
   1318 static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
   1319 
   1320 STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
   1321 STBIDEF void   stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
   1322 #endif
   1323 
   1324 static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
   1325 
   1326 STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
   1327 STBIDEF void   stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
   1328 
   1329 
   1330 //////////////////////////////////////////////////////////////////////////////
   1331 //
   1332 // Common code used by all image loaders
   1333 //
   1334 
   1335 enum
   1336 {
   1337    STBI__SCAN_load=0,
   1338    STBI__SCAN_type,
   1339    STBI__SCAN_header
   1340 };
   1341 
   1342 static void stbi__refill_buffer(stbi__context *s)
   1343 {
   1344    int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
   1345    if (n == 0) {
   1346       // at end of file, treat same as if from memory, but need to handle case
   1347       // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
   1348       s->read_from_callbacks = 0;
   1349       s->img_buffer = s->buffer_start;
   1350       s->img_buffer_end = s->buffer_start+1;
   1351       *s->img_buffer = 0;
   1352    } else {
   1353       s->img_buffer = s->buffer_start;
   1354       s->img_buffer_end = s->buffer_start + n;
   1355    }
   1356 }
   1357 
   1358 stbi_inline static stbi_uc stbi__get8(stbi__context *s)
   1359 {
   1360    if (s->img_buffer < s->img_buffer_end)
   1361       return *s->img_buffer++;
   1362    if (s->read_from_callbacks) {
   1363       stbi__refill_buffer(s);
   1364       return *s->img_buffer++;
   1365    }
   1366    return 0;
   1367 }
   1368 
   1369 stbi_inline static int stbi__at_eof(stbi__context *s)
   1370 {
   1371    if (s->io.read) {
   1372       if (!(s->io.eof)(s->io_user_data)) return 0;
   1373       // if feof() is true, check if buffer = end
   1374       // special case: we've only got the special 0 character at the end
   1375       if (s->read_from_callbacks == 0) return 1;
   1376    }
   1377 
   1378    return s->img_buffer >= s->img_buffer_end;
   1379 }
   1380 
   1381 static void stbi__skip(stbi__context *s, int n)
   1382 {
   1383    if (n < 0) {
   1384       s->img_buffer = s->img_buffer_end;
   1385       return;
   1386    }
   1387    if (s->io.read) {
   1388       int blen = (int) (s->img_buffer_end - s->img_buffer);
   1389       if (blen < n) {
   1390          s->img_buffer = s->img_buffer_end;
   1391          (s->io.skip)(s->io_user_data, n - blen);
   1392          return;
   1393       }
   1394    }
   1395    s->img_buffer += n;
   1396 }
   1397 
   1398 static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
   1399 {
   1400    if (s->io.read) {
   1401       int blen = (int) (s->img_buffer_end - s->img_buffer);
   1402       if (blen < n) {
   1403          int res, count;
   1404 
   1405          memcpy(buffer, s->img_buffer, blen);
   1406 
   1407          count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
   1408          res = (count == (n-blen));
   1409          s->img_buffer = s->img_buffer_end;
   1410          return res;
   1411       }
   1412    }
   1413 
   1414    if (s->img_buffer+n <= s->img_buffer_end) {
   1415       memcpy(buffer, s->img_buffer, n);
   1416       s->img_buffer += n;
   1417       return 1;
   1418    } else
   1419       return 0;
   1420 }
   1421 
   1422 static int stbi__get16be(stbi__context *s)
   1423 {
   1424    int z = stbi__get8(s);
   1425    return (z << 8) + stbi__get8(s);
   1426 }
   1427 
   1428 static stbi__uint32 stbi__get32be(stbi__context *s)
   1429 {
   1430    stbi__uint32 z = stbi__get16be(s);
   1431    return (z << 16) + stbi__get16be(s);
   1432 }
   1433 
   1434 #if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
   1435 // nothing
   1436 #else
   1437 static int stbi__get16le(stbi__context *s)
   1438 {
   1439    int z = stbi__get8(s);
   1440    return z + (stbi__get8(s) << 8);
   1441 }
   1442 #endif
   1443 
   1444 #ifndef STBI_NO_BMP
   1445 static stbi__uint32 stbi__get32le(stbi__context *s)
   1446 {
   1447    stbi__uint32 z = stbi__get16le(s);
   1448    return z + (stbi__get16le(s) << 16);
   1449 }
   1450 #endif
   1451 
   1452 #define STBI__BYTECAST(x)  ((stbi_uc) ((x) & 255))  // truncate int to byte without warnings
   1453 
   1454 
   1455 //////////////////////////////////////////////////////////////////////////////
   1456 //
   1457 //  generic converter from built-in img_n to req_comp
   1458 //    individual types do this automatically as much as possible (e.g. jpeg
   1459 //    does all cases internally since it needs to colorspace convert anyway,
   1460 //    and it never has alpha, so very few cases ). png can automatically
   1461 //    interleave an alpha=255 channel, but falls back to this for other cases
   1462 //
   1463 //  assume data buffer is malloced, so malloc a new one and free that one
   1464 //  only failure mode is malloc failing
   1465 
   1466 static stbi_uc stbi__compute_y(int r, int g, int b)
   1467 {
   1468    return (stbi_uc) (((r*77) + (g*150) +  (29*b)) >> 8);
   1469 }
   1470 
   1471 static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
   1472 {
   1473    int i,j;
   1474    unsigned char *good;
   1475 
   1476    if (req_comp == img_n) return data;
   1477    STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
   1478 
   1479    good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0);
   1480    if (good == NULL) {
   1481       STBI_FREE(data);
   1482       return stbi__errpuc("outofmem", "Out of memory");
   1483    }
   1484 
   1485    for (j=0; j < (int) y; ++j) {
   1486       unsigned char *src  = data + j * x * img_n   ;
   1487       unsigned char *dest = good + j * x * req_comp;
   1488 
   1489       #define STBI__COMBO(a,b)  ((a)*8+(b))
   1490       #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
   1491       // convert source image with img_n components to one with req_comp components;
   1492       // avoid switch per pixel, so use switch per scanline and massive macros
   1493       switch (STBI__COMBO(img_n, req_comp)) {
   1494          STBI__CASE(1,2) { dest[0]=src[0], dest[1]=255;                                     } break;
   1495          STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
   1496          STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=255;                     } break;
   1497          STBI__CASE(2,1) { dest[0]=src[0];                                                  } break;
   1498          STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
   1499          STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1];                  } break;
   1500          STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255;        } break;
   1501          STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
   1502          STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255;    } break;
   1503          STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
   1504          STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; } break;
   1505          STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2];                    } break;
   1506          default: STBI_ASSERT(0);
   1507       }
   1508       #undef STBI__CASE
   1509    }
   1510 
   1511    STBI_FREE(data);
   1512    return good;
   1513 }
   1514 
   1515 static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
   1516 {
   1517    return (stbi__uint16) (((r*77) + (g*150) +  (29*b)) >> 8);
   1518 }
   1519 
   1520 static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
   1521 {
   1522    int i,j;
   1523    stbi__uint16 *good;
   1524 
   1525    if (req_comp == img_n) return data;
   1526    STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
   1527 
   1528    good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2);
   1529    if (good == NULL) {
   1530       STBI_FREE(data);
   1531       return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
   1532    }
   1533 
   1534    for (j=0; j < (int) y; ++j) {
   1535       stbi__uint16 *src  = data + j * x * img_n   ;
   1536       stbi__uint16 *dest = good + j * x * req_comp;
   1537 
   1538       #define STBI__COMBO(a,b)  ((a)*8+(b))
   1539       #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
   1540       // convert source image with img_n components to one with req_comp components;
   1541       // avoid switch per pixel, so use switch per scanline and massive macros
   1542       switch (STBI__COMBO(img_n, req_comp)) {
   1543          STBI__CASE(1,2) { dest[0]=src[0], dest[1]=0xffff;                                     } break;
   1544          STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
   1545          STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=0xffff;                     } break;
   1546          STBI__CASE(2,1) { dest[0]=src[0];                                                     } break;
   1547          STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
   1548          STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1];                     } break;
   1549          STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=0xffff;        } break;
   1550          STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
   1551          STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = 0xffff; } break;
   1552          STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
   1553          STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = src[3]; } break;
   1554          STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2];                       } break;
   1555          default: STBI_ASSERT(0);
   1556       }
   1557       #undef STBI__CASE
   1558    }
   1559 
   1560    STBI_FREE(data);
   1561    return good;
   1562 }
   1563 
   1564 #ifndef STBI_NO_LINEAR
   1565 static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
   1566 {
   1567    int i,k,n;
   1568    float *output;
   1569    if (!data) return NULL;
   1570    output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
   1571    if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
   1572    // compute number of non-alpha components
   1573    if (comp & 1) n = comp; else n = comp-1;
   1574    for (i=0; i < x*y; ++i) {
   1575       for (k=0; k < n; ++k) {
   1576          output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
   1577       }
   1578       if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f;
   1579    }
   1580    STBI_FREE(data);
   1581    return output;
   1582 }
   1583 #endif
   1584 
   1585 #ifndef STBI_NO_HDR
   1586 #define stbi__float2int(x)   ((int) (x))
   1587 static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp)
   1588 {
   1589    int i,k,n;
   1590    stbi_uc *output;
   1591    if (!data) return NULL;
   1592    output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0);
   1593    if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
   1594    // compute number of non-alpha components
   1595    if (comp & 1) n = comp; else n = comp-1;
   1596    for (i=0; i < x*y; ++i) {
   1597       for (k=0; k < n; ++k) {
   1598          float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
   1599          if (z < 0) z = 0;
   1600          if (z > 255) z = 255;
   1601          output[i*comp + k] = (stbi_uc) stbi__float2int(z);
   1602       }
   1603       if (k < comp) {
   1604          float z = data[i*comp+k] * 255 + 0.5f;
   1605          if (z < 0) z = 0;
   1606          if (z > 255) z = 255;
   1607          output[i*comp + k] = (stbi_uc) stbi__float2int(z);
   1608       }
   1609    }
   1610    STBI_FREE(data);
   1611    return output;
   1612 }
   1613 #endif
   1614 
   1615 //////////////////////////////////////////////////////////////////////////////
   1616 //
   1617 //  "baseline" JPEG/JFIF decoder
   1618 //
   1619 //    simple implementation
   1620 //      - doesn't support delayed output of y-dimension
   1621 //      - simple interface (only one output format: 8-bit interleaved RGB)
   1622 //      - doesn't try to recover corrupt jpegs
   1623 //      - doesn't allow partial loading, loading multiple at once
   1624 //      - still fast on x86 (copying globals into locals doesn't help x86)
   1625 //      - allocates lots of intermediate memory (full size of all components)
   1626 //        - non-interleaved case requires this anyway
   1627 //        - allows good upsampling (see next)
   1628 //    high-quality
   1629 //      - upsampled channels are bilinearly interpolated, even across blocks
   1630 //      - quality integer IDCT derived from IJG's 'slow'
   1631 //    performance
   1632 //      - fast huffman; reasonable integer IDCT
   1633 //      - some SIMD kernels for common paths on targets with SSE2/NEON
   1634 //      - uses a lot of intermediate memory, could cache poorly
   1635 
   1636 #ifndef STBI_NO_JPEG
   1637 
   1638 // huffman decoding acceleration
   1639 #define FAST_BITS   9  // larger handles more cases; smaller stomps less cache
   1640 
   1641 typedef struct
   1642 {
   1643    stbi_uc  fast[1 << FAST_BITS];
   1644    // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
   1645    stbi__uint16 code[256];
   1646    stbi_uc  values[256];
   1647    stbi_uc  size[257];
   1648    unsigned int maxcode[18];
   1649    int    delta[17];   // old 'firstsymbol' - old 'firstcode'
   1650 } stbi__huffman;
   1651 
   1652 typedef struct
   1653 {
   1654    stbi__context *s;
   1655    stbi__huffman huff_dc[4];
   1656    stbi__huffman huff_ac[4];
   1657    stbi__uint16 dequant[4][64];
   1658    stbi__int16 fast_ac[4][1 << FAST_BITS];
   1659 
   1660 // sizes for components, interleaved MCUs
   1661    int img_h_max, img_v_max;
   1662    int img_mcu_x, img_mcu_y;
   1663    int img_mcu_w, img_mcu_h;
   1664 
   1665 // definition of jpeg image component
   1666    struct
   1667    {
   1668       int id;
   1669       int h,v;
   1670       int tq;
   1671       int hd,ha;
   1672       int dc_pred;
   1673 
   1674       int x,y,w2,h2;
   1675       stbi_uc *data;
   1676       void *raw_data, *raw_coeff;
   1677       stbi_uc *linebuf;
   1678       short   *coeff;   // progressive only
   1679       int      coeff_w, coeff_h; // number of 8x8 coefficient blocks
   1680    } img_comp[4];
   1681 
   1682    stbi__uint32   code_buffer; // jpeg entropy-coded buffer
   1683    int            code_bits;   // number of valid bits
   1684    unsigned char  marker;      // marker seen while filling entropy buffer
   1685    int            nomore;      // flag if we saw a marker so must stop
   1686 
   1687    int            progressive;
   1688    int            spec_start;
   1689    int            spec_end;
   1690    int            succ_high;
   1691    int            succ_low;
   1692    int            eob_run;
   1693    int            jfif;
   1694    int            app14_color_transform; // Adobe APP14 tag
   1695    int            rgb;
   1696 
   1697    int scan_n, order[4];
   1698    int restart_interval, todo;
   1699 
   1700 // kernels
   1701    void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
   1702    void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
   1703    stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
   1704 } stbi__jpeg;
   1705 
   1706 static int stbi__build_huffman(stbi__huffman *h, int *count)
   1707 {
   1708    int i,j,k=0,code;
   1709    // build size list for each symbol (from JPEG spec)
   1710    for (i=0; i < 16; ++i)
   1711       for (j=0; j < count[i]; ++j)
   1712          h->size[k++] = (stbi_uc) (i+1);
   1713    h->size[k] = 0;
   1714 
   1715    // compute actual symbols (from jpeg spec)
   1716    code = 0;
   1717    k = 0;
   1718    for(j=1; j <= 16; ++j) {
   1719       // compute delta to add to code to compute symbol id
   1720       h->delta[j] = k - code;
   1721       if (h->size[k] == j) {
   1722          while (h->size[k] == j)
   1723             h->code[k++] = (stbi__uint16) (code++);
   1724          if (code-1 >= (1 << j)) return stbi__err("bad code lengths","Corrupt JPEG");
   1725       }
   1726       // compute largest code + 1 for this size, preshifted as needed later
   1727       h->maxcode[j] = code << (16-j);
   1728       code <<= 1;
   1729    }
   1730    h->maxcode[j] = 0xffffffff;
   1731 
   1732    // build non-spec acceleration table; 255 is flag for not-accelerated
   1733    memset(h->fast, 255, 1 << FAST_BITS);
   1734    for (i=0; i < k; ++i) {
   1735       int s = h->size[i];
   1736       if (s <= FAST_BITS) {
   1737          int c = h->code[i] << (FAST_BITS-s);
   1738          int m = 1 << (FAST_BITS-s);
   1739          for (j=0; j < m; ++j) {
   1740             h->fast[c+j] = (stbi_uc) i;
   1741          }
   1742       }
   1743    }
   1744    return 1;
   1745 }
   1746 
   1747 // build a table that decodes both magnitude and value of small ACs in
   1748 // one go.
   1749 static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
   1750 {
   1751    int i;
   1752    for (i=0; i < (1 << FAST_BITS); ++i) {
   1753       stbi_uc fast = h->fast[i];
   1754       fast_ac[i] = 0;
   1755       if (fast < 255) {
   1756          int rs = h->values[fast];
   1757          int run = (rs >> 4) & 15;
   1758          int magbits = rs & 15;
   1759          int len = h->size[fast];
   1760 
   1761          if (magbits && len + magbits <= FAST_BITS) {
   1762             // magnitude code followed by receive_extend code
   1763             int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
   1764             int m = 1 << (magbits - 1);
   1765             if (k < m) k += (~0U << magbits) + 1;
   1766             // if the result is small enough, we can fit it in fast_ac table
   1767             if (k >= -128 && k <= 127)
   1768                fast_ac[i] = (stbi__int16) ((k << 8) + (run << 4) + (len + magbits));
   1769          }
   1770       }
   1771    }
   1772 }
   1773 
   1774 static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
   1775 {
   1776    do {
   1777       int b = j->nomore ? 0 : stbi__get8(j->s);
   1778       if (b == 0xff) {
   1779          int c = stbi__get8(j->s);
   1780          while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
   1781          if (c != 0) {
   1782             j->marker = (unsigned char) c;
   1783             j->nomore = 1;
   1784             return;
   1785          }
   1786       }
   1787       j->code_buffer |= b << (24 - j->code_bits);
   1788       j->code_bits += 8;
   1789    } while (j->code_bits <= 24);
   1790 }
   1791 
   1792 // (1 << n) - 1
   1793 static stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
   1794 
   1795 // decode a jpeg huffman value from the bitstream
   1796 stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
   1797 {
   1798    unsigned int temp;
   1799    int c,k;
   1800 
   1801    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
   1802 
   1803    // look at the top FAST_BITS and determine what symbol ID it is,
   1804    // if the code is <= FAST_BITS
   1805    c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
   1806    k = h->fast[c];
   1807    if (k < 255) {
   1808       int s = h->size[k];
   1809       if (s > j->code_bits)
   1810          return -1;
   1811       j->code_buffer <<= s;
   1812       j->code_bits -= s;
   1813       return h->values[k];
   1814    }
   1815 
   1816    // naive test is to shift the code_buffer down so k bits are
   1817    // valid, then test against maxcode. To speed this up, we've
   1818    // preshifted maxcode left so that it has (16-k) 0s at the
   1819    // end; in other words, regardless of the number of bits, it
   1820    // wants to be compared against something shifted to have 16;
   1821    // that way we don't need to shift inside the loop.
   1822    temp = j->code_buffer >> 16;
   1823    for (k=FAST_BITS+1 ; ; ++k)
   1824       if (temp < h->maxcode[k])
   1825          break;
   1826    if (k == 17) {
   1827       // error! code not found
   1828       j->code_bits -= 16;
   1829       return -1;
   1830    }
   1831 
   1832    if (k > j->code_bits)
   1833       return -1;
   1834 
   1835    // convert the huffman code to the symbol id
   1836    c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
   1837    STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
   1838 
   1839    // convert the id to a symbol
   1840    j->code_bits -= k;
   1841    j->code_buffer <<= k;
   1842    return h->values[c];
   1843 }
   1844 
   1845 // bias[n] = (-1<<n) + 1
   1846 static int const stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
   1847 
   1848 // combined JPEG 'receive' and JPEG 'extend', since baseline
   1849 // always extends everything it receives.
   1850 stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
   1851 {
   1852    unsigned int k;
   1853    int sgn;
   1854    if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
   1855 
   1856    sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
   1857    k = stbi_lrot(j->code_buffer, n);
   1858    STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask)));
   1859    j->code_buffer = k & ~stbi__bmask[n];
   1860    k &= stbi__bmask[n];
   1861    j->code_bits -= n;
   1862    return k + (stbi__jbias[n] & ~sgn);
   1863 }
   1864 
   1865 // get some unsigned bits
   1866 stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
   1867 {
   1868    unsigned int k;
   1869    if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
   1870    k = stbi_lrot(j->code_buffer, n);
   1871    j->code_buffer = k & ~stbi__bmask[n];
   1872    k &= stbi__bmask[n];
   1873    j->code_bits -= n;
   1874    return k;
   1875 }
   1876 
   1877 stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
   1878 {
   1879    unsigned int k;
   1880    if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
   1881    k = j->code_buffer;
   1882    j->code_buffer <<= 1;
   1883    --j->code_bits;
   1884    return k & 0x80000000;
   1885 }
   1886 
   1887 // given a value that's at position X in the zigzag stream,
   1888 // where does it appear in the 8x8 matrix coded as row-major?
   1889 static stbi_uc stbi__jpeg_dezigzag[64+15] =
   1890 {
   1891     0,  1,  8, 16,  9,  2,  3, 10,
   1892    17, 24, 32, 25, 18, 11,  4,  5,
   1893    12, 19, 26, 33, 40, 48, 41, 34,
   1894    27, 20, 13,  6,  7, 14, 21, 28,
   1895    35, 42, 49, 56, 57, 50, 43, 36,
   1896    29, 22, 15, 23, 30, 37, 44, 51,
   1897    58, 59, 52, 45, 38, 31, 39, 46,
   1898    53, 60, 61, 54, 47, 55, 62, 63,
   1899    // let corrupt input sample past end
   1900    63, 63, 63, 63, 63, 63, 63, 63,
   1901    63, 63, 63, 63, 63, 63, 63
   1902 };
   1903 
   1904 // decode one 64-entry block--
   1905 static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant)
   1906 {
   1907    int diff,dc,k;
   1908    int t;
   1909 
   1910    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
   1911    t = stbi__jpeg_huff_decode(j, hdc);
   1912    if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG");
   1913 
   1914    // 0 all the ac values now so we can do it 32-bits at a time
   1915    memset(data,0,64*sizeof(data[0]));
   1916 
   1917    diff = t ? stbi__extend_receive(j, t) : 0;
   1918    dc = j->img_comp[b].dc_pred + diff;
   1919    j->img_comp[b].dc_pred = dc;
   1920    data[0] = (short) (dc * dequant[0]);
   1921 
   1922    // decode AC components, see JPEG spec
   1923    k = 1;
   1924    do {
   1925       unsigned int zig;
   1926       int c,r,s;
   1927       if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
   1928       c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
   1929       r = fac[c];
   1930       if (r) { // fast-AC path
   1931          k += (r >> 4) & 15; // run
   1932          s = r & 15; // combined length
   1933          j->code_buffer <<= s;
   1934          j->code_bits -= s;
   1935          // decode into unzigzag'd location
   1936          zig = stbi__jpeg_dezigzag[k++];
   1937          data[zig] = (short) ((r >> 8) * dequant[zig]);
   1938       } else {
   1939          int rs = stbi__jpeg_huff_decode(j, hac);
   1940          if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
   1941          s = rs & 15;
   1942          r = rs >> 4;
   1943          if (s == 0) {
   1944             if (rs != 0xf0) break; // end block
   1945             k += 16;
   1946          } else {
   1947             k += r;
   1948             // decode into unzigzag'd location
   1949             zig = stbi__jpeg_dezigzag[k++];
   1950             data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
   1951          }
   1952       }
   1953    } while (k < 64);
   1954    return 1;
   1955 }
   1956 
   1957 static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
   1958 {
   1959    int diff,dc;
   1960    int t;
   1961    if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
   1962 
   1963    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
   1964 
   1965    if (j->succ_high == 0) {
   1966       // first scan for DC coefficient, must be first
   1967       memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
   1968       t = stbi__jpeg_huff_decode(j, hdc);
   1969       diff = t ? stbi__extend_receive(j, t) : 0;
   1970 
   1971       dc = j->img_comp[b].dc_pred + diff;
   1972       j->img_comp[b].dc_pred = dc;
   1973       data[0] = (short) (dc << j->succ_low);
   1974    } else {
   1975       // refinement scan for DC coefficient
   1976       if (stbi__jpeg_get_bit(j))
   1977          data[0] += (short) (1 << j->succ_low);
   1978    }
   1979    return 1;
   1980 }
   1981 
   1982 // @OPTIMIZE: store non-zigzagged during the decode passes,
   1983 // and only de-zigzag when dequantizing
   1984 static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
   1985 {
   1986    int k;
   1987    if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
   1988 
   1989    if (j->succ_high == 0) {
   1990       int shift = j->succ_low;
   1991 
   1992       if (j->eob_run) {
   1993          --j->eob_run;
   1994          return 1;
   1995       }
   1996 
   1997       k = j->spec_start;
   1998       do {
   1999          unsigned int zig;
   2000          int c,r,s;
   2001          if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
   2002          c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
   2003          r = fac[c];
   2004          if (r) { // fast-AC path
   2005             k += (r >> 4) & 15; // run
   2006             s = r & 15; // combined length
   2007             j->code_buffer <<= s;
   2008             j->code_bits -= s;
   2009             zig = stbi__jpeg_dezigzag[k++];
   2010             data[zig] = (short) ((r >> 8) << shift);
   2011          } else {
   2012             int rs = stbi__jpeg_huff_decode(j, hac);
   2013             if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
   2014             s = rs & 15;
   2015             r = rs >> 4;
   2016             if (s == 0) {
   2017                if (r < 15) {
   2018                   j->eob_run = (1 << r);
   2019                   if (r)
   2020                      j->eob_run += stbi__jpeg_get_bits(j, r);
   2021                   --j->eob_run;
   2022                   break;
   2023                }
   2024                k += 16;
   2025             } else {
   2026                k += r;
   2027                zig = stbi__jpeg_dezigzag[k++];
   2028                data[zig] = (short) (stbi__extend_receive(j,s) << shift);
   2029             }
   2030          }
   2031       } while (k <= j->spec_end);
   2032    } else {
   2033       // refinement scan for these AC coefficients
   2034 
   2035       short bit = (short) (1 << j->succ_low);
   2036 
   2037       if (j->eob_run) {
   2038          --j->eob_run;
   2039          for (k = j->spec_start; k <= j->spec_end; ++k) {
   2040             short *p = &data[stbi__jpeg_dezigzag[k]];
   2041             if (*p != 0)
   2042                if (stbi__jpeg_get_bit(j))
   2043                   if ((*p & bit)==0) {
   2044                      if (*p > 0)
   2045                         *p += bit;
   2046                      else
   2047                         *p -= bit;
   2048                   }
   2049          }
   2050       } else {
   2051          k = j->spec_start;
   2052          do {
   2053             int r,s;
   2054             int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
   2055             if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
   2056             s = rs & 15;
   2057             r = rs >> 4;
   2058             if (s == 0) {
   2059                if (r < 15) {
   2060                   j->eob_run = (1 << r) - 1;
   2061                   if (r)
   2062                      j->eob_run += stbi__jpeg_get_bits(j, r);
   2063                   r = 64; // force end of block
   2064                } else {
   2065                   // r=15 s=0 should write 16 0s, so we just do
   2066                   // a run of 15 0s and then write s (which is 0),
   2067                   // so we don't have to do anything special here
   2068                }
   2069             } else {
   2070                if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
   2071                // sign bit
   2072                if (stbi__jpeg_get_bit(j))
   2073                   s = bit;
   2074                else
   2075                   s = -bit;
   2076             }
   2077 
   2078             // advance by r
   2079             while (k <= j->spec_end) {
   2080                short *p = &data[stbi__jpeg_dezigzag[k++]];
   2081                if (*p != 0) {
   2082                   if (stbi__jpeg_get_bit(j))
   2083                      if ((*p & bit)==0) {
   2084                         if (*p > 0)
   2085                            *p += bit;
   2086                         else
   2087                            *p -= bit;
   2088                      }
   2089                } else {
   2090                   if (r == 0) {
   2091                      *p = (short) s;
   2092                      break;
   2093                   }
   2094                   --r;
   2095                }
   2096             }
   2097          } while (k <= j->spec_end);
   2098       }
   2099    }
   2100    return 1;
   2101 }
   2102 
   2103 // take a -128..127 value and stbi__clamp it and convert to 0..255
   2104 stbi_inline static stbi_uc stbi__clamp(int x)
   2105 {
   2106    // trick to use a single test to catch both cases
   2107    if ((unsigned int) x > 255) {
   2108       if (x < 0) return 0;
   2109       if (x > 255) return 255;
   2110    }
   2111    return (stbi_uc) x;
   2112 }
   2113 
   2114 #define stbi__f2f(x)  ((int) (((x) * 4096 + 0.5)))
   2115 #define stbi__fsh(x)  ((x) << 12)
   2116 
   2117 // derived from jidctint -- DCT_ISLOW
   2118 #define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
   2119    int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
   2120    p2 = s2;                                    \
   2121    p3 = s6;                                    \
   2122    p1 = (p2+p3) * stbi__f2f(0.5411961f);       \
   2123    t2 = p1 + p3*stbi__f2f(-1.847759065f);      \
   2124    t3 = p1 + p2*stbi__f2f( 0.765366865f);      \
   2125    p2 = s0;                                    \
   2126    p3 = s4;                                    \
   2127    t0 = stbi__fsh(p2+p3);                      \
   2128    t1 = stbi__fsh(p2-p3);                      \
   2129    x0 = t0+t3;                                 \
   2130    x3 = t0-t3;                                 \
   2131    x1 = t1+t2;                                 \
   2132    x2 = t1-t2;                                 \
   2133    t0 = s7;                                    \
   2134    t1 = s5;                                    \
   2135    t2 = s3;                                    \
   2136    t3 = s1;                                    \
   2137    p3 = t0+t2;                                 \
   2138    p4 = t1+t3;                                 \
   2139    p1 = t0+t3;                                 \
   2140    p2 = t1+t2;                                 \
   2141    p5 = (p3+p4)*stbi__f2f( 1.175875602f);      \
   2142    t0 = t0*stbi__f2f( 0.298631336f);           \
   2143    t1 = t1*stbi__f2f( 2.053119869f);           \
   2144    t2 = t2*stbi__f2f( 3.072711026f);           \
   2145    t3 = t3*stbi__f2f( 1.501321110f);           \
   2146    p1 = p5 + p1*stbi__f2f(-0.899976223f);      \
   2147    p2 = p5 + p2*stbi__f2f(-2.562915447f);      \
   2148    p3 = p3*stbi__f2f(-1.961570560f);           \
   2149    p4 = p4*stbi__f2f(-0.390180644f);           \
   2150    t3 += p1+p4;                                \
   2151    t2 += p2+p3;                                \
   2152    t1 += p2+p4;                                \
   2153    t0 += p1+p3;
   2154 
   2155 static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
   2156 {
   2157    int i,val[64],*v=val;
   2158    stbi_uc *o;
   2159    short *d = data;
   2160 
   2161    // columns
   2162    for (i=0; i < 8; ++i,++d, ++v) {
   2163       // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
   2164       if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
   2165            && d[40]==0 && d[48]==0 && d[56]==0) {
   2166          //    no shortcut                 0     seconds
   2167          //    (1|2|3|4|5|6|7)==0          0     seconds
   2168          //    all separate               -0.047 seconds
   2169          //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
   2170          int dcterm = d[0] << 2;
   2171          v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
   2172       } else {
   2173          STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
   2174          // constants scaled things up by 1<<12; let's bring them back
   2175          // down, but keep 2 extra bits of precision
   2176          x0 += 512; x1 += 512; x2 += 512; x3 += 512;
   2177          v[ 0] = (x0+t3) >> 10;
   2178          v[56] = (x0-t3) >> 10;
   2179          v[ 8] = (x1+t2) >> 10;
   2180          v[48] = (x1-t2) >> 10;
   2181          v[16] = (x2+t1) >> 10;
   2182          v[40] = (x2-t1) >> 10;
   2183          v[24] = (x3+t0) >> 10;
   2184          v[32] = (x3-t0) >> 10;
   2185       }
   2186    }
   2187 
   2188    for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
   2189       // no fast case since the first 1D IDCT spread components out
   2190       STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
   2191       // constants scaled things up by 1<<12, plus we had 1<<2 from first
   2192       // loop, plus horizontal and vertical each scale by sqrt(8) so together
   2193       // we've got an extra 1<<3, so 1<<17 total we need to remove.
   2194       // so we want to round that, which means adding 0.5 * 1<<17,
   2195       // aka 65536. Also, we'll end up with -128 to 127 that we want
   2196       // to encode as 0..255 by adding 128, so we'll add that before the shift
   2197       x0 += 65536 + (128<<17);
   2198       x1 += 65536 + (128<<17);
   2199       x2 += 65536 + (128<<17);
   2200       x3 += 65536 + (128<<17);
   2201       // tried computing the shifts into temps, or'ing the temps to see
   2202       // if any were out of range, but that was slower
   2203       o[0] = stbi__clamp((x0+t3) >> 17);
   2204       o[7] = stbi__clamp((x0-t3) >> 17);
   2205       o[1] = stbi__clamp((x1+t2) >> 17);
   2206       o[6] = stbi__clamp((x1-t2) >> 17);
   2207       o[2] = stbi__clamp((x2+t1) >> 17);
   2208       o[5] = stbi__clamp((x2-t1) >> 17);
   2209       o[3] = stbi__clamp((x3+t0) >> 17);
   2210       o[4] = stbi__clamp((x3-t0) >> 17);
   2211    }
   2212 }
   2213 
   2214 #ifdef STBI_SSE2
   2215 // sse2 integer IDCT. not the fastest possible implementation but it
   2216 // produces bit-identical results to the generic C version so it's
   2217 // fully "transparent".
   2218 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
   2219 {
   2220    // This is constructed to match our regular (generic) integer IDCT exactly.
   2221    __m128i row0, row1, row2, row3, row4, row5, row6, row7;
   2222    __m128i tmp;
   2223 
   2224    // dot product constant: even elems=x, odd elems=y
   2225    #define dct_const(x,y)  _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
   2226 
   2227    // out(0) = c0[even]*x + c0[odd]*y   (c0, x, y 16-bit, out 32-bit)
   2228    // out(1) = c1[even]*x + c1[odd]*y
   2229    #define dct_rot(out0,out1, x,y,c0,c1) \
   2230       __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
   2231       __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
   2232       __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
   2233       __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
   2234       __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
   2235       __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
   2236 
   2237    // out = in << 12  (in 16-bit, out 32-bit)
   2238    #define dct_widen(out, in) \
   2239       __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
   2240       __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
   2241 
   2242    // wide add
   2243    #define dct_wadd(out, a, b) \
   2244       __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
   2245       __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
   2246 
   2247    // wide sub
   2248    #define dct_wsub(out, a, b) \
   2249       __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
   2250       __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
   2251 
   2252    // butterfly a/b, add bias, then shift by "s" and pack
   2253    #define dct_bfly32o(out0, out1, a,b,bias,s) \
   2254       { \
   2255          __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
   2256          __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
   2257          dct_wadd(sum, abiased, b); \
   2258          dct_wsub(dif, abiased, b); \
   2259          out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
   2260          out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
   2261       }
   2262 
   2263    // 8-bit interleave step (for transposes)
   2264    #define dct_interleave8(a, b) \
   2265       tmp = a; \
   2266       a = _mm_unpacklo_epi8(a, b); \
   2267       b = _mm_unpackhi_epi8(tmp, b)
   2268 
   2269    // 16-bit interleave step (for transposes)
   2270    #define dct_interleave16(a, b) \
   2271       tmp = a; \
   2272       a = _mm_unpacklo_epi16(a, b); \
   2273       b = _mm_unpackhi_epi16(tmp, b)
   2274 
   2275    #define dct_pass(bias,shift) \
   2276       { \
   2277          /* even part */ \
   2278          dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
   2279          __m128i sum04 = _mm_add_epi16(row0, row4); \
   2280          __m128i dif04 = _mm_sub_epi16(row0, row4); \
   2281          dct_widen(t0e, sum04); \
   2282          dct_widen(t1e, dif04); \
   2283          dct_wadd(x0, t0e, t3e); \
   2284          dct_wsub(x3, t0e, t3e); \
   2285          dct_wadd(x1, t1e, t2e); \
   2286          dct_wsub(x2, t1e, t2e); \
   2287          /* odd part */ \
   2288          dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
   2289          dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
   2290          __m128i sum17 = _mm_add_epi16(row1, row7); \
   2291          __m128i sum35 = _mm_add_epi16(row3, row5); \
   2292          dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
   2293          dct_wadd(x4, y0o, y4o); \
   2294          dct_wadd(x5, y1o, y5o); \
   2295          dct_wadd(x6, y2o, y5o); \
   2296          dct_wadd(x7, y3o, y4o); \
   2297          dct_bfly32o(row0,row7, x0,x7,bias,shift); \
   2298          dct_bfly32o(row1,row6, x1,x6,bias,shift); \
   2299          dct_bfly32o(row2,row5, x2,x5,bias,shift); \
   2300          dct_bfly32o(row3,row4, x3,x4,bias,shift); \
   2301       }
   2302 
   2303    __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
   2304    __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
   2305    __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
   2306    __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
   2307    __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
   2308    __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
   2309    __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
   2310    __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));
   2311 
   2312    // rounding biases in column/row passes, see stbi__idct_block for explanation.
   2313    __m128i bias_0 = _mm_set1_epi32(512);
   2314    __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
   2315 
   2316    // load
   2317    row0 = _mm_load_si128((const __m128i *) (data + 0*8));
   2318    row1 = _mm_load_si128((const __m128i *) (data + 1*8));
   2319    row2 = _mm_load_si128((const __m128i *) (data + 2*8));
   2320    row3 = _mm_load_si128((const __m128i *) (data + 3*8));
   2321    row4 = _mm_load_si128((const __m128i *) (data + 4*8));
   2322    row5 = _mm_load_si128((const __m128i *) (data + 5*8));
   2323    row6 = _mm_load_si128((const __m128i *) (data + 6*8));
   2324    row7 = _mm_load_si128((const __m128i *) (data + 7*8));
   2325 
   2326    // column pass
   2327    dct_pass(bias_0, 10);
   2328 
   2329    {
   2330       // 16bit 8x8 transpose pass 1
   2331       dct_interleave16(row0, row4);
   2332       dct_interleave16(row1, row5);
   2333       dct_interleave16(row2, row6);
   2334       dct_interleave16(row3, row7);
   2335 
   2336       // transpose pass 2
   2337       dct_interleave16(row0, row2);
   2338       dct_interleave16(row1, row3);
   2339       dct_interleave16(row4, row6);
   2340       dct_interleave16(row5, row7);
   2341 
   2342       // transpose pass 3
   2343       dct_interleave16(row0, row1);
   2344       dct_interleave16(row2, row3);
   2345       dct_interleave16(row4, row5);
   2346       dct_interleave16(row6, row7);
   2347    }
   2348 
   2349    // row pass
   2350    dct_pass(bias_1, 17);
   2351 
   2352    {
   2353       // pack
   2354       __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
   2355       __m128i p1 = _mm_packus_epi16(row2, row3);
   2356       __m128i p2 = _mm_packus_epi16(row4, row5);
   2357       __m128i p3 = _mm_packus_epi16(row6, row7);
   2358 
   2359       // 8bit 8x8 transpose pass 1
   2360       dct_interleave8(p0, p2); // a0e0a1e1...
   2361       dct_interleave8(p1, p3); // c0g0c1g1...
   2362 
   2363       // transpose pass 2
   2364       dct_interleave8(p0, p1); // a0c0e0g0...
   2365       dct_interleave8(p2, p3); // b0d0f0h0...
   2366 
   2367       // transpose pass 3
   2368       dct_interleave8(p0, p2); // a0b0c0d0...
   2369       dct_interleave8(p1, p3); // a4b4c4d4...
   2370 
   2371       // store
   2372       _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
   2373       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
   2374       _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
   2375       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
   2376       _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
   2377       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
   2378       _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
   2379       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
   2380    }
   2381 
   2382 #undef dct_const
   2383 #undef dct_rot
   2384 #undef dct_widen
   2385 #undef dct_wadd
   2386 #undef dct_wsub
   2387 #undef dct_bfly32o
   2388 #undef dct_interleave8
   2389 #undef dct_interleave16
   2390 #undef dct_pass
   2391 }
   2392 
   2393 #endif // STBI_SSE2
   2394 
   2395 #ifdef STBI_NEON
   2396 
   2397 // NEON integer IDCT. should produce bit-identical
   2398 // results to the generic C version.
   2399 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
   2400 {
   2401    int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
   2402 
   2403    int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
   2404    int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
   2405    int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
   2406    int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
   2407    int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
   2408    int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
   2409    int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
   2410    int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
   2411    int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
   2412    int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
   2413    int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
   2414    int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
   2415 
   2416 #define dct_long_mul(out, inq, coeff) \
   2417    int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
   2418    int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
   2419 
   2420 #define dct_long_mac(out, acc, inq, coeff) \
   2421    int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
   2422    int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
   2423 
   2424 #define dct_widen(out, inq) \
   2425    int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
   2426    int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
   2427 
   2428 // wide add
   2429 #define dct_wadd(out, a, b) \
   2430    int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
   2431    int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
   2432 
   2433 // wide sub
   2434 #define dct_wsub(out, a, b) \
   2435    int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
   2436    int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
   2437 
   2438 // butterfly a/b, then shift using "shiftop" by "s" and pack
   2439 #define dct_bfly32o(out0,out1, a,b,shiftop,s) \
   2440    { \
   2441       dct_wadd(sum, a, b); \
   2442       dct_wsub(dif, a, b); \
   2443       out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
   2444       out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
   2445    }
   2446 
   2447 #define dct_pass(shiftop, shift) \
   2448    { \
   2449       /* even part */ \
   2450       int16x8_t sum26 = vaddq_s16(row2, row6); \
   2451       dct_long_mul(p1e, sum26, rot0_0); \
   2452       dct_long_mac(t2e, p1e, row6, rot0_1); \
   2453       dct_long_mac(t3e, p1e, row2, rot0_2); \
   2454       int16x8_t sum04 = vaddq_s16(row0, row4); \
   2455       int16x8_t dif04 = vsubq_s16(row0, row4); \
   2456       dct_widen(t0e, sum04); \
   2457       dct_widen(t1e, dif04); \
   2458       dct_wadd(x0, t0e, t3e); \
   2459       dct_wsub(x3, t0e, t3e); \
   2460       dct_wadd(x1, t1e, t2e); \
   2461       dct_wsub(x2, t1e, t2e); \
   2462       /* odd part */ \
   2463       int16x8_t sum15 = vaddq_s16(row1, row5); \
   2464       int16x8_t sum17 = vaddq_s16(row1, row7); \
   2465       int16x8_t sum35 = vaddq_s16(row3, row5); \
   2466       int16x8_t sum37 = vaddq_s16(row3, row7); \
   2467       int16x8_t sumodd = vaddq_s16(sum17, sum35); \
   2468       dct_long_mul(p5o, sumodd, rot1_0); \
   2469       dct_long_mac(p1o, p5o, sum17, rot1_1); \
   2470       dct_long_mac(p2o, p5o, sum35, rot1_2); \
   2471       dct_long_mul(p3o, sum37, rot2_0); \
   2472       dct_long_mul(p4o, sum15, rot2_1); \
   2473       dct_wadd(sump13o, p1o, p3o); \
   2474       dct_wadd(sump24o, p2o, p4o); \
   2475       dct_wadd(sump23o, p2o, p3o); \
   2476       dct_wadd(sump14o, p1o, p4o); \
   2477       dct_long_mac(x4, sump13o, row7, rot3_0); \
   2478       dct_long_mac(x5, sump24o, row5, rot3_1); \
   2479       dct_long_mac(x6, sump23o, row3, rot3_2); \
   2480       dct_long_mac(x7, sump14o, row1, rot3_3); \
   2481       dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
   2482       dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
   2483       dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
   2484       dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
   2485    }
   2486 
   2487    // load
   2488    row0 = vld1q_s16(data + 0*8);
   2489    row1 = vld1q_s16(data + 1*8);
   2490    row2 = vld1q_s16(data + 2*8);
   2491    row3 = vld1q_s16(data + 3*8);
   2492    row4 = vld1q_s16(data + 4*8);
   2493    row5 = vld1q_s16(data + 5*8);
   2494    row6 = vld1q_s16(data + 6*8);
   2495    row7 = vld1q_s16(data + 7*8);
   2496 
   2497    // add DC bias
   2498    row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
   2499 
   2500    // column pass
   2501    dct_pass(vrshrn_n_s32, 10);
   2502 
   2503    // 16bit 8x8 transpose
   2504    {
   2505 // these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
   2506 // whether compilers actually get this is another story, sadly.
   2507 #define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
   2508 #define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
   2509 #define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
   2510 
   2511       // pass 1
   2512       dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
   2513       dct_trn16(row2, row3);
   2514       dct_trn16(row4, row5);
   2515       dct_trn16(row6, row7);
   2516 
   2517       // pass 2
   2518       dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
   2519       dct_trn32(row1, row3);
   2520       dct_trn32(row4, row6);
   2521       dct_trn32(row5, row7);
   2522 
   2523       // pass 3
   2524       dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
   2525       dct_trn64(row1, row5);
   2526       dct_trn64(row2, row6);
   2527       dct_trn64(row3, row7);
   2528 
   2529 #undef dct_trn16
   2530 #undef dct_trn32
   2531 #undef dct_trn64
   2532    }
   2533 
   2534    // row pass
   2535    // vrshrn_n_s32 only supports shifts up to 16, we need
   2536    // 17. so do a non-rounding shift of 16 first then follow
   2537    // up with a rounding shift by 1.
   2538    dct_pass(vshrn_n_s32, 16);
   2539 
   2540    {
   2541       // pack and round
   2542       uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
   2543       uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
   2544       uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
   2545       uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
   2546       uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
   2547       uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
   2548       uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
   2549       uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
   2550 
   2551       // again, these can translate into one instruction, but often don't.
   2552 #define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
   2553 #define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
   2554 #define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
   2555 
   2556       // sadly can't use interleaved stores here since we only write
   2557       // 8 bytes to each scan line!
   2558 
   2559       // 8x8 8-bit transpose pass 1
   2560       dct_trn8_8(p0, p1);
   2561       dct_trn8_8(p2, p3);
   2562       dct_trn8_8(p4, p5);
   2563       dct_trn8_8(p6, p7);
   2564 
   2565       // pass 2
   2566       dct_trn8_16(p0, p2);
   2567       dct_trn8_16(p1, p3);
   2568       dct_trn8_16(p4, p6);
   2569       dct_trn8_16(p5, p7);
   2570 
   2571       // pass 3
   2572       dct_trn8_32(p0, p4);
   2573       dct_trn8_32(p1, p5);
   2574       dct_trn8_32(p2, p6);
   2575       dct_trn8_32(p3, p7);
   2576 
   2577       // store
   2578       vst1_u8(out, p0); out += out_stride;
   2579       vst1_u8(out, p1); out += out_stride;
   2580       vst1_u8(out, p2); out += out_stride;
   2581       vst1_u8(out, p3); out += out_stride;
   2582       vst1_u8(out, p4); out += out_stride;
   2583       vst1_u8(out, p5); out += out_stride;
   2584       vst1_u8(out, p6); out += out_stride;
   2585       vst1_u8(out, p7);
   2586 
   2587 #undef dct_trn8_8
   2588 #undef dct_trn8_16
   2589 #undef dct_trn8_32
   2590    }
   2591 
   2592 #undef dct_long_mul
   2593 #undef dct_long_mac
   2594 #undef dct_widen
   2595 #undef dct_wadd
   2596 #undef dct_wsub
   2597 #undef dct_bfly32o
   2598 #undef dct_pass
   2599 }
   2600 
   2601 #endif // STBI_NEON
   2602 
   2603 #define STBI__MARKER_none  0xff
   2604 // if there's a pending marker from the entropy stream, return that
   2605 // otherwise, fetch from the stream and get a marker. if there's no
   2606 // marker, return 0xff, which is never a valid marker value
   2607 static stbi_uc stbi__get_marker(stbi__jpeg *j)
   2608 {
   2609    stbi_uc x;
   2610    if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
   2611    x = stbi__get8(j->s);
   2612    if (x != 0xff) return STBI__MARKER_none;
   2613    while (x == 0xff)
   2614       x = stbi__get8(j->s); // consume repeated 0xff fill bytes
   2615    return x;
   2616 }
   2617 
   2618 // in each scan, we'll have scan_n components, and the order
   2619 // of the components is specified by order[]
   2620 #define STBI__RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
   2621 
   2622 // after a restart interval, stbi__jpeg_reset the entropy decoder and
   2623 // the dc prediction
   2624 static void stbi__jpeg_reset(stbi__jpeg *j)
   2625 {
   2626    j->code_bits = 0;
   2627    j->code_buffer = 0;
   2628    j->nomore = 0;
   2629    j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
   2630    j->marker = STBI__MARKER_none;
   2631    j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
   2632    j->eob_run = 0;
   2633    // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
   2634    // since we don't even allow 1<<30 pixels
   2635 }
   2636 
   2637 static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
   2638 {
   2639    stbi__jpeg_reset(z);
   2640    if (!z->progressive) {
   2641       if (z->scan_n == 1) {
   2642          int i,j;
   2643          STBI_SIMD_ALIGN(short, data[64]);
   2644          int n = z->order[0];
   2645          // non-interleaved data, we just need to process one block at a time,
   2646          // in trivial scanline order
   2647          // number of blocks to do just depends on how many actual "pixels" this
   2648          // component has, independent of interleaved MCU blocking and such
   2649          int w = (z->img_comp[n].x+7) >> 3;
   2650          int h = (z->img_comp[n].y+7) >> 3;
   2651          for (j=0; j < h; ++j) {
   2652             for (i=0; i < w; ++i) {
   2653                int ha = z->img_comp[n].ha;
   2654                if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
   2655                z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
   2656                // every data block is an MCU, so countdown the restart interval
   2657                if (--z->todo <= 0) {
   2658                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
   2659                   // if it's NOT a restart, then just bail, so we get corrupt data
   2660                   // rather than no data
   2661                   if (!STBI__RESTART(z->marker)) return 1;
   2662                   stbi__jpeg_reset(z);
   2663                }
   2664             }
   2665          }
   2666          return 1;
   2667       } else { // interleaved
   2668          int i,j,k,x,y;
   2669          STBI_SIMD_ALIGN(short, data[64]);
   2670          for (j=0; j < z->img_mcu_y; ++j) {
   2671             for (i=0; i < z->img_mcu_x; ++i) {
   2672                // scan an interleaved mcu... process scan_n components in order
   2673                for (k=0; k < z->scan_n; ++k) {
   2674                   int n = z->order[k];
   2675                   // scan out an mcu's worth of this component; that's just determined
   2676                   // by the basic H and V specified for the component
   2677                   for (y=0; y < z->img_comp[n].v; ++y) {
   2678                      for (x=0; x < z->img_comp[n].h; ++x) {
   2679                         int x2 = (i*z->img_comp[n].h + x)*8;
   2680                         int y2 = (j*z->img_comp[n].v + y)*8;
   2681                         int ha = z->img_comp[n].ha;
   2682                         if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
   2683                         z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
   2684                      }
   2685                   }
   2686                }
   2687                // after all interleaved components, that's an interleaved MCU,
   2688                // so now count down the restart interval
   2689                if (--z->todo <= 0) {
   2690                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
   2691                   if (!STBI__RESTART(z->marker)) return 1;
   2692                   stbi__jpeg_reset(z);
   2693                }
   2694             }
   2695          }
   2696          return 1;
   2697       }
   2698    } else {
   2699       if (z->scan_n == 1) {
   2700          int i,j;
   2701          int n = z->order[0];
   2702          // non-interleaved data, we just need to process one block at a time,
   2703          // in trivial scanline order
   2704          // number of blocks to do just depends on how many actual "pixels" this
   2705          // component has, independent of interleaved MCU blocking and such
   2706          int w = (z->img_comp[n].x+7) >> 3;
   2707          int h = (z->img_comp[n].y+7) >> 3;
   2708          for (j=0; j < h; ++j) {
   2709             for (i=0; i < w; ++i) {
   2710                short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
   2711                if (z->spec_start == 0) {
   2712                   if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
   2713                      return 0;
   2714                } else {
   2715                   int ha = z->img_comp[n].ha;
   2716                   if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
   2717                      return 0;
   2718                }
   2719                // every data block is an MCU, so countdown the restart interval
   2720                if (--z->todo <= 0) {
   2721                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
   2722                   if (!STBI__RESTART(z->marker)) return 1;
   2723                   stbi__jpeg_reset(z);
   2724                }
   2725             }
   2726          }
   2727          return 1;
   2728       } else { // interleaved
   2729          int i,j,k,x,y;
   2730          for (j=0; j < z->img_mcu_y; ++j) {
   2731             for (i=0; i < z->img_mcu_x; ++i) {
   2732                // scan an interleaved mcu... process scan_n components in order
   2733                for (k=0; k < z->scan_n; ++k) {
   2734                   int n = z->order[k];
   2735                   // scan out an mcu's worth of this component; that's just determined
   2736                   // by the basic H and V specified for the component
   2737                   for (y=0; y < z->img_comp[n].v; ++y) {
   2738                      for (x=0; x < z->img_comp[n].h; ++x) {
   2739                         int x2 = (i*z->img_comp[n].h + x);
   2740                         int y2 = (j*z->img_comp[n].v + y);
   2741                         short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
   2742                         if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
   2743                            return 0;
   2744                      }
   2745                   }
   2746                }
   2747                // after all interleaved components, that's an interleaved MCU,
   2748                // so now count down the restart interval
   2749                if (--z->todo <= 0) {
   2750                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
   2751                   if (!STBI__RESTART(z->marker)) return 1;
   2752                   stbi__jpeg_reset(z);
   2753                }
   2754             }
   2755          }
   2756          return 1;
   2757       }
   2758    }
   2759 }
   2760 
   2761 static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
   2762 {
   2763    int i;
   2764    for (i=0; i < 64; ++i)
   2765       data[i] *= dequant[i];
   2766 }
   2767 
   2768 static void stbi__jpeg_finish(stbi__jpeg *z)
   2769 {
   2770    if (z->progressive) {
   2771       // dequantize and idct the data
   2772       int i,j,n;
   2773       for (n=0; n < z->s->img_n; ++n) {
   2774          int w = (z->img_comp[n].x+7) >> 3;
   2775          int h = (z->img_comp[n].y+7) >> 3;
   2776          for (j=0; j < h; ++j) {
   2777             for (i=0; i < w; ++i) {
   2778                short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
   2779                stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
   2780                z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
   2781             }
   2782          }
   2783       }
   2784    }
   2785 }
   2786 
   2787 static int stbi__process_marker(stbi__jpeg *z, int m)
   2788 {
   2789    int L;
   2790    switch (m) {
   2791       case STBI__MARKER_none: // no marker found
   2792          return stbi__err("expected marker","Corrupt JPEG");
   2793 
   2794       case 0xDD: // DRI - specify restart interval
   2795          if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
   2796          z->restart_interval = stbi__get16be(z->s);
   2797          return 1;
   2798 
   2799       case 0xDB: // DQT - define quantization table
   2800          L = stbi__get16be(z->s)-2;
   2801          while (L > 0) {
   2802             int q = stbi__get8(z->s);
   2803             int p = q >> 4, sixteen = (p != 0);
   2804             int t = q & 15,i;
   2805             if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG");
   2806             if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
   2807 
   2808             for (i=0; i < 64; ++i)
   2809                z->dequant[t][stbi__jpeg_dezigzag[i]] = sixteen ? stbi__get16be(z->s) : stbi__get8(z->s);
   2810             L -= (sixteen ? 129 : 65);
   2811          }
   2812          return L==0;
   2813 
   2814       case 0xC4: // DHT - define huffman table
   2815          L = stbi__get16be(z->s)-2;
   2816          while (L > 0) {
   2817             stbi_uc *v;
   2818             int sizes[16],i,n=0;
   2819             int q = stbi__get8(z->s);
   2820             int tc = q >> 4;
   2821             int th = q & 15;
   2822             if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
   2823             for (i=0; i < 16; ++i) {
   2824                sizes[i] = stbi__get8(z->s);
   2825                n += sizes[i];
   2826             }
   2827             L -= 17;
   2828             if (tc == 0) {
   2829                if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
   2830                v = z->huff_dc[th].values;
   2831             } else {
   2832                if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
   2833                v = z->huff_ac[th].values;
   2834             }
   2835             for (i=0; i < n; ++i)
   2836                v[i] = stbi__get8(z->s);
   2837             if (tc != 0)
   2838                stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
   2839             L -= n;
   2840          }
   2841          return L==0;
   2842    }
   2843 
   2844    // check for comment block or APP blocks
   2845    if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
   2846       L = stbi__get16be(z->s);
   2847       if (L < 2) {
   2848          if (m == 0xFE)
   2849             return stbi__err("bad COM len","Corrupt JPEG");
   2850          else
   2851             return stbi__err("bad APP len","Corrupt JPEG");
   2852       }
   2853       L -= 2;
   2854 
   2855       if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
   2856          static const unsigned char tag[5] = {'J','F','I','F','\0'};
   2857          int ok = 1;
   2858          int i;
   2859          for (i=0; i < 5; ++i)
   2860             if (stbi__get8(z->s) != tag[i])
   2861                ok = 0;
   2862          L -= 5;
   2863          if (ok)
   2864             z->jfif = 1;
   2865       } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
   2866          static const unsigned char tag[6] = {'A','d','o','b','e','\0'};
   2867          int ok = 1;
   2868          int i;
   2869          for (i=0; i < 6; ++i)
   2870             if (stbi__get8(z->s) != tag[i])
   2871                ok = 0;
   2872          L -= 6;
   2873          if (ok) {
   2874             stbi__get8(z->s); // version
   2875             stbi__get16be(z->s); // flags0
   2876             stbi__get16be(z->s); // flags1
   2877             z->app14_color_transform = stbi__get8(z->s); // color transform
   2878             L -= 6;
   2879          }
   2880       }
   2881 
   2882       stbi__skip(z->s, L);
   2883       return 1;
   2884    }
   2885 
   2886    return stbi__err("unknown marker","Corrupt JPEG");
   2887 }
   2888 
   2889 // after we see SOS
   2890 static int stbi__process_scan_header(stbi__jpeg *z)
   2891 {
   2892    int i;
   2893    int Ls = stbi__get16be(z->s);
   2894    z->scan_n = stbi__get8(z->s);
   2895    if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
   2896    if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
   2897    for (i=0; i < z->scan_n; ++i) {
   2898       int id = stbi__get8(z->s), which;
   2899       int q = stbi__get8(z->s);
   2900       for (which = 0; which < z->s->img_n; ++which)
   2901          if (z->img_comp[which].id == id)
   2902             break;
   2903       if (which == z->s->img_n) return 0; // no match
   2904       z->img_comp[which].hd = q >> 4;   if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
   2905       z->img_comp[which].ha = q & 15;   if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
   2906       z->order[i] = which;
   2907    }
   2908 
   2909    {
   2910       int aa;
   2911       z->spec_start = stbi__get8(z->s);
   2912       z->spec_end   = stbi__get8(z->s); // should be 63, but might be 0
   2913       aa = stbi__get8(z->s);
   2914       z->succ_high = (aa >> 4);
   2915       z->succ_low  = (aa & 15);
   2916       if (z->progressive) {
   2917          if (z->spec_start > 63 || z->spec_end > 63  || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
   2918             return stbi__err("bad SOS", "Corrupt JPEG");
   2919       } else {
   2920          if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
   2921          if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
   2922          z->spec_end = 63;
   2923       }
   2924    }
   2925 
   2926    return 1;
   2927 }
   2928 
   2929 static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
   2930 {
   2931    int i;
   2932    for (i=0; i < ncomp; ++i) {
   2933       if (z->img_comp[i].raw_data) {
   2934          STBI_FREE(z->img_comp[i].raw_data);
   2935          z->img_comp[i].raw_data = NULL;
   2936          z->img_comp[i].data = NULL;
   2937       }
   2938       if (z->img_comp[i].raw_coeff) {
   2939          STBI_FREE(z->img_comp[i].raw_coeff);
   2940          z->img_comp[i].raw_coeff = 0;
   2941          z->img_comp[i].coeff = 0;
   2942       }
   2943       if (z->img_comp[i].linebuf) {
   2944          STBI_FREE(z->img_comp[i].linebuf);
   2945          z->img_comp[i].linebuf = NULL;
   2946       }
   2947    }
   2948    return why;
   2949 }
   2950 
   2951 static int stbi__process_frame_header(stbi__jpeg *z, int scan)
   2952 {
   2953    stbi__context *s = z->s;
   2954    int Lf,p,i,q, h_max=1,v_max=1,c;
   2955    Lf = stbi__get16be(s);         if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
   2956    p  = stbi__get8(s);            if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
   2957    s->img_y = stbi__get16be(s);   if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
   2958    s->img_x = stbi__get16be(s);   if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
   2959    c = stbi__get8(s);
   2960    if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG");
   2961    s->img_n = c;
   2962    for (i=0; i < c; ++i) {
   2963       z->img_comp[i].data = NULL;
   2964       z->img_comp[i].linebuf = NULL;
   2965    }
   2966 
   2967    if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
   2968 
   2969    z->rgb = 0;
   2970    for (i=0; i < s->img_n; ++i) {
   2971       static unsigned char rgb[3] = { 'R', 'G', 'B' };
   2972       z->img_comp[i].id = stbi__get8(s);
   2973       if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
   2974          ++z->rgb;
   2975       q = stbi__get8(s);
   2976       z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
   2977       z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
   2978       z->img_comp[i].tq = stbi__get8(s);  if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
   2979    }
   2980 
   2981    if (scan != STBI__SCAN_load) return 1;
   2982 
   2983    if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
   2984 
   2985    for (i=0; i < s->img_n; ++i) {
   2986       if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
   2987       if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
   2988    }
   2989 
   2990    // compute interleaved mcu info
   2991    z->img_h_max = h_max;
   2992    z->img_v_max = v_max;
   2993    z->img_mcu_w = h_max * 8;
   2994    z->img_mcu_h = v_max * 8;
   2995    // these sizes can't be more than 17 bits
   2996    z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
   2997    z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
   2998 
   2999    for (i=0; i < s->img_n; ++i) {
   3000       // number of effective pixels (e.g. for non-interleaved MCU)
   3001       z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
   3002       z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
   3003       // to simplify generation, we'll allocate enough memory to decode
   3004       // the bogus oversized data from using interleaved MCUs and their
   3005       // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
   3006       // discard the extra data until colorspace conversion
   3007       //
   3008       // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
   3009       // so these muls can't overflow with 32-bit ints (which we require)
   3010       z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
   3011       z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
   3012       z->img_comp[i].coeff = 0;
   3013       z->img_comp[i].raw_coeff = 0;
   3014       z->img_comp[i].linebuf = NULL;
   3015       z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
   3016       if (z->img_comp[i].raw_data == NULL)
   3017          return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
   3018       // align blocks for idct using mmx/sse
   3019       z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
   3020       if (z->progressive) {
   3021          // w2, h2 are multiples of 8 (see above)
   3022          z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
   3023          z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
   3024          z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
   3025          if (z->img_comp[i].raw_coeff == NULL)
   3026             return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
   3027          z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
   3028       }
   3029    }
   3030 
   3031    return 1;
   3032 }
   3033 
   3034 // use comparisons since in some cases we handle more than one case (e.g. SOF)
   3035 #define stbi__DNL(x)         ((x) == 0xdc)
   3036 #define stbi__SOI(x)         ((x) == 0xd8)
   3037 #define stbi__EOI(x)         ((x) == 0xd9)
   3038 #define stbi__SOF(x)         ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
   3039 #define stbi__SOS(x)         ((x) == 0xda)
   3040 
   3041 #define stbi__SOF_progressive(x)   ((x) == 0xc2)
   3042 
   3043 static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
   3044 {
   3045    int m;
   3046    z->jfif = 0;
   3047    z->app14_color_transform = -1; // valid values are 0,1,2
   3048    z->marker = STBI__MARKER_none; // initialize cached marker to empty
   3049    m = stbi__get_marker(z);
   3050    if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
   3051    if (scan == STBI__SCAN_type) return 1;
   3052    m = stbi__get_marker(z);
   3053    while (!stbi__SOF(m)) {
   3054       if (!stbi__process_marker(z,m)) return 0;
   3055       m = stbi__get_marker(z);
   3056       while (m == STBI__MARKER_none) {
   3057          // some files have extra padding after their blocks, so ok, we'll scan
   3058          if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
   3059          m = stbi__get_marker(z);
   3060       }
   3061    }
   3062    z->progressive = stbi__SOF_progressive(m);
   3063    if (!stbi__process_frame_header(z, scan)) return 0;
   3064    return 1;
   3065 }
   3066 
   3067 // decode image to YCbCr format
   3068 static int stbi__decode_jpeg_image(stbi__jpeg *j)
   3069 {
   3070    int m;
   3071    for (m = 0; m < 4; m++) {
   3072       j->img_comp[m].raw_data = NULL;
   3073       j->img_comp[m].raw_coeff = NULL;
   3074    }
   3075    j->restart_interval = 0;
   3076    if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
   3077    m = stbi__get_marker(j);
   3078    while (!stbi__EOI(m)) {
   3079       if (stbi__SOS(m)) {
   3080          if (!stbi__process_scan_header(j)) return 0;
   3081          if (!stbi__parse_entropy_coded_data(j)) return 0;
   3082          if (j->marker == STBI__MARKER_none ) {
   3083             // handle 0s at the end of image data from IP Kamera 9060
   3084             while (!stbi__at_eof(j->s)) {
   3085                int x = stbi__get8(j->s);
   3086                if (x == 255) {
   3087                   j->marker = stbi__get8(j->s);
   3088                   break;
   3089                }
   3090             }
   3091             // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
   3092          }
   3093       } else if (stbi__DNL(m)) {
   3094          int Ld = stbi__get16be(j->s);
   3095          stbi__uint32 NL = stbi__get16be(j->s);
   3096          if (Ld != 4) stbi__err("bad DNL len", "Corrupt JPEG");
   3097          if (NL != j->s->img_y) stbi__err("bad DNL height", "Corrupt JPEG");
   3098       } else {
   3099          if (!stbi__process_marker(j, m)) return 0;
   3100       }
   3101       m = stbi__get_marker(j);
   3102    }
   3103    if (j->progressive)
   3104       stbi__jpeg_finish(j);
   3105    return 1;
   3106 }
   3107 
   3108 // static jfif-centered resampling (across block boundaries)
   3109 
   3110 typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
   3111                                     int w, int hs);
   3112 
   3113 #define stbi__div4(x) ((stbi_uc) ((x) >> 2))
   3114 
   3115 static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
   3116 {
   3117    STBI_NOTUSED(out);
   3118    STBI_NOTUSED(in_far);
   3119    STBI_NOTUSED(w);
   3120    STBI_NOTUSED(hs);
   3121    return in_near;
   3122 }
   3123 
   3124 static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
   3125 {
   3126    // need to generate two samples vertically for every one in input
   3127    int i;
   3128    STBI_NOTUSED(hs);
   3129    for (i=0; i < w; ++i)
   3130       out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2);
   3131    return out;
   3132 }
   3133 
   3134 static stbi_uc*  stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
   3135 {
   3136    // need to generate two samples horizontally for every one in input
   3137    int i;
   3138    stbi_uc *input = in_near;
   3139 
   3140    if (w == 1) {
   3141       // if only one sample, can't do any interpolation
   3142       out[0] = out[1] = input[0];
   3143       return out;
   3144    }
   3145 
   3146    out[0] = input[0];
   3147    out[1] = stbi__div4(input[0]*3 + input[1] + 2);
   3148    for (i=1; i < w-1; ++i) {
   3149       int n = 3*input[i]+2;
   3150       out[i*2+0] = stbi__div4(n+input[i-1]);
   3151       out[i*2+1] = stbi__div4(n+input[i+1]);
   3152    }
   3153    out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2);
   3154    out[i*2+1] = input[w-1];
   3155 
   3156    STBI_NOTUSED(in_far);
   3157    STBI_NOTUSED(hs);
   3158 
   3159    return out;
   3160 }
   3161 
   3162 #define stbi__div16(x) ((stbi_uc) ((x) >> 4))
   3163 
   3164 static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
   3165 {
   3166    // need to generate 2x2 samples for every one in input
   3167    int i,t0,t1;
   3168    if (w == 1) {
   3169       out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
   3170       return out;
   3171    }
   3172 
   3173    t1 = 3*in_near[0] + in_far[0];
   3174    out[0] = stbi__div4(t1+2);
   3175    for (i=1; i < w; ++i) {
   3176       t0 = t1;
   3177       t1 = 3*in_near[i]+in_far[i];
   3178       out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
   3179       out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
   3180    }
   3181    out[w*2-1] = stbi__div4(t1+2);
   3182 
   3183    STBI_NOTUSED(hs);
   3184 
   3185    return out;
   3186 }
   3187 
   3188 #if defined(STBI_SSE2) || defined(STBI_NEON)
   3189 static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
   3190 {
   3191    // need to generate 2x2 samples for every one in input
   3192    int i=0,t0,t1;
   3193 
   3194    if (w == 1) {
   3195       out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
   3196       return out;
   3197    }
   3198 
   3199    t1 = 3*in_near[0] + in_far[0];
   3200    // process groups of 8 pixels for as long as we can.
   3201    // note we can't handle the last pixel in a row in this loop
   3202    // because we need to handle the filter boundary conditions.
   3203    for (; i < ((w-1) & ~7); i += 8) {
   3204 #if defined(STBI_SSE2)
   3205       // load and perform the vertical filtering pass
   3206       // this uses 3*x + y = 4*x + (y - x)
   3207       __m128i zero  = _mm_setzero_si128();
   3208       __m128i farb  = _mm_loadl_epi64((__m128i *) (in_far + i));
   3209       __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
   3210       __m128i farw  = _mm_unpacklo_epi8(farb, zero);
   3211       __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
   3212       __m128i diff  = _mm_sub_epi16(farw, nearw);
   3213       __m128i nears = _mm_slli_epi16(nearw, 2);
   3214       __m128i curr  = _mm_add_epi16(nears, diff); // current row
   3215 
   3216       // horizontal filter works the same based on shifted vers of current
   3217       // row. "prev" is current row shifted right by 1 pixel; we need to
   3218       // insert the previous pixel value (from t1).
   3219       // "next" is current row shifted left by 1 pixel, with first pixel
   3220       // of next block of 8 pixels added in.
   3221       __m128i prv0 = _mm_slli_si128(curr, 2);
   3222       __m128i nxt0 = _mm_srli_si128(curr, 2);
   3223       __m128i prev = _mm_insert_epi16(prv0, t1, 0);
   3224       __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);
   3225 
   3226       // horizontal filter, polyphase implementation since it's convenient:
   3227       // even pixels = 3*cur + prev = cur*4 + (prev - cur)
   3228       // odd  pixels = 3*cur + next = cur*4 + (next - cur)
   3229       // note the shared term.
   3230       __m128i bias  = _mm_set1_epi16(8);
   3231       __m128i curs = _mm_slli_epi16(curr, 2);
   3232       __m128i prvd = _mm_sub_epi16(prev, curr);
   3233       __m128i nxtd = _mm_sub_epi16(next, curr);
   3234       __m128i curb = _mm_add_epi16(curs, bias);
   3235       __m128i even = _mm_add_epi16(prvd, curb);
   3236       __m128i odd  = _mm_add_epi16(nxtd, curb);
   3237 
   3238       // interleave even and odd pixels, then undo scaling.
   3239       __m128i int0 = _mm_unpacklo_epi16(even, odd);
   3240       __m128i int1 = _mm_unpackhi_epi16(even, odd);
   3241       __m128i de0  = _mm_srli_epi16(int0, 4);
   3242       __m128i de1  = _mm_srli_epi16(int1, 4);
   3243 
   3244       // pack and write output
   3245       __m128i outv = _mm_packus_epi16(de0, de1);
   3246       _mm_storeu_si128((__m128i *) (out + i*2), outv);
   3247 #elif defined(STBI_NEON)
   3248       // load and perform the vertical filtering pass
   3249       // this uses 3*x + y = 4*x + (y - x)
   3250       uint8x8_t farb  = vld1_u8(in_far + i);
   3251       uint8x8_t nearb = vld1_u8(in_near + i);
   3252       int16x8_t diff  = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
   3253       int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
   3254       int16x8_t curr  = vaddq_s16(nears, diff); // current row
   3255 
   3256       // horizontal filter works the same based on shifted vers of current
   3257       // row. "prev" is current row shifted right by 1 pixel; we need to
   3258       // insert the previous pixel value (from t1).
   3259       // "next" is current row shifted left by 1 pixel, with first pixel
   3260       // of next block of 8 pixels added in.
   3261       int16x8_t prv0 = vextq_s16(curr, curr, 7);
   3262       int16x8_t nxt0 = vextq_s16(curr, curr, 1);
   3263       int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
   3264       int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);
   3265 
   3266       // horizontal filter, polyphase implementation since it's convenient:
   3267       // even pixels = 3*cur + prev = cur*4 + (prev - cur)
   3268       // odd  pixels = 3*cur + next = cur*4 + (next - cur)
   3269       // note the shared term.
   3270       int16x8_t curs = vshlq_n_s16(curr, 2);
   3271       int16x8_t prvd = vsubq_s16(prev, curr);
   3272       int16x8_t nxtd = vsubq_s16(next, curr);
   3273       int16x8_t even = vaddq_s16(curs, prvd);
   3274       int16x8_t odd  = vaddq_s16(curs, nxtd);
   3275 
   3276       // undo scaling and round, then store with even/odd phases interleaved
   3277       uint8x8x2_t o;
   3278       o.val[0] = vqrshrun_n_s16(even, 4);
   3279       o.val[1] = vqrshrun_n_s16(odd,  4);
   3280       vst2_u8(out + i*2, o);
   3281 #endif
   3282 
   3283       // "previous" value for next iter
   3284       t1 = 3*in_near[i+7] + in_far[i+7];
   3285    }
   3286 
   3287    t0 = t1;
   3288    t1 = 3*in_near[i] + in_far[i];
   3289    out[i*2] = stbi__div16(3*t1 + t0 + 8);
   3290 
   3291    for (++i; i < w; ++i) {
   3292       t0 = t1;
   3293       t1 = 3*in_near[i]+in_far[i];
   3294       out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
   3295       out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
   3296    }
   3297    out[w*2-1] = stbi__div4(t1+2);
   3298 
   3299    STBI_NOTUSED(hs);
   3300 
   3301    return out;
   3302 }
   3303 #endif
   3304 
   3305 static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
   3306 {
   3307    // resample with nearest-neighbor
   3308    int i,j;
   3309    STBI_NOTUSED(in_far);
   3310    for (i=0; i < w; ++i)
   3311       for (j=0; j < hs; ++j)
   3312          out[i*hs+j] = in_near[i];
   3313    return out;
   3314 }
   3315 
   3316 // this is a reduced-precision calculation of YCbCr-to-RGB introduced
   3317 // to make sure the code produces the same results in both SIMD and scalar
   3318 #define stbi__float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
   3319 static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
   3320 {
   3321    int i;
   3322    for (i=0; i < count; ++i) {
   3323       int y_fixed = (y[i] << 20) + (1<<19); // rounding
   3324       int r,g,b;
   3325       int cr = pcr[i] - 128;
   3326       int cb = pcb[i] - 128;
   3327       r = y_fixed +  cr* stbi__float2fixed(1.40200f);
   3328       g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
   3329       b = y_fixed                                     +   cb* stbi__float2fixed(1.77200f);
   3330       r >>= 20;
   3331       g >>= 20;
   3332       b >>= 20;
   3333       if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
   3334       if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
   3335       if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
   3336       out[0] = (stbi_uc)r;
   3337       out[1] = (stbi_uc)g;
   3338       out[2] = (stbi_uc)b;
   3339       out[3] = 255;
   3340       out += step;
   3341    }
   3342 }
   3343 
   3344 #if defined(STBI_SSE2) || defined(STBI_NEON)
   3345 static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
   3346 {
   3347    int i = 0;
   3348 
   3349 #ifdef STBI_SSE2
   3350    // step == 3 is pretty ugly on the final interleave, and i'm not convinced
   3351    // it's useful in practice (you wouldn't use it for textures, for example).
   3352    // so just accelerate step == 4 case.
   3353    if (step == 4) {
   3354       // this is a fairly straightforward implementation and not super-optimized.
   3355       __m128i signflip  = _mm_set1_epi8(-0x80);
   3356       __m128i cr_const0 = _mm_set1_epi16(   (short) ( 1.40200f*4096.0f+0.5f));
   3357       __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f));
   3358       __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f));
   3359       __m128i cb_const1 = _mm_set1_epi16(   (short) ( 1.77200f*4096.0f+0.5f));
   3360       __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128);
   3361       __m128i xw = _mm_set1_epi16(255); // alpha channel
   3362 
   3363       for (; i+7 < count; i += 8) {
   3364          // load
   3365          __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
   3366          __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
   3367          __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
   3368          __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
   3369          __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
   3370 
   3371          // unpack to short (and left-shift cr, cb by 8)
   3372          __m128i yw  = _mm_unpacklo_epi8(y_bias, y_bytes);
   3373          __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
   3374          __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
   3375 
   3376          // color transform
   3377          __m128i yws = _mm_srli_epi16(yw, 4);
   3378          __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
   3379          __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
   3380          __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
   3381          __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
   3382          __m128i rws = _mm_add_epi16(cr0, yws);
   3383          __m128i gwt = _mm_add_epi16(cb0, yws);
   3384          __m128i bws = _mm_add_epi16(yws, cb1);
   3385          __m128i gws = _mm_add_epi16(gwt, cr1);
   3386 
   3387          // descale
   3388          __m128i rw = _mm_srai_epi16(rws, 4);
   3389          __m128i bw = _mm_srai_epi16(bws, 4);
   3390          __m128i gw = _mm_srai_epi16(gws, 4);
   3391 
   3392          // back to byte, set up for transpose
   3393          __m128i brb = _mm_packus_epi16(rw, bw);
   3394          __m128i gxb = _mm_packus_epi16(gw, xw);
   3395 
   3396          // transpose to interleave channels
   3397          __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
   3398          __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
   3399          __m128i o0 = _mm_unpacklo_epi16(t0, t1);
   3400          __m128i o1 = _mm_unpackhi_epi16(t0, t1);
   3401 
   3402          // store
   3403          _mm_storeu_si128((__m128i *) (out + 0), o0);
   3404          _mm_storeu_si128((__m128i *) (out + 16), o1);
   3405          out += 32;
   3406       }
   3407    }
   3408 #endif
   3409 
   3410 #ifdef STBI_NEON
   3411    // in this version, step=3 support would be easy to add. but is there demand?
   3412    if (step == 4) {
   3413       // this is a fairly straightforward implementation and not super-optimized.
   3414       uint8x8_t signflip = vdup_n_u8(0x80);
   3415       int16x8_t cr_const0 = vdupq_n_s16(   (short) ( 1.40200f*4096.0f+0.5f));
   3416       int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
   3417       int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
   3418       int16x8_t cb_const1 = vdupq_n_s16(   (short) ( 1.77200f*4096.0f+0.5f));
   3419 
   3420       for (; i+7 < count; i += 8) {
   3421          // load
   3422          uint8x8_t y_bytes  = vld1_u8(y + i);
   3423          uint8x8_t cr_bytes = vld1_u8(pcr + i);
   3424          uint8x8_t cb_bytes = vld1_u8(pcb + i);
   3425          int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
   3426          int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
   3427 
   3428          // expand to s16
   3429          int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
   3430          int16x8_t crw = vshll_n_s8(cr_biased, 7);
   3431          int16x8_t cbw = vshll_n_s8(cb_biased, 7);
   3432 
   3433          // color transform
   3434          int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
   3435          int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
   3436          int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
   3437          int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
   3438          int16x8_t rws = vaddq_s16(yws, cr0);
   3439          int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
   3440          int16x8_t bws = vaddq_s16(yws, cb1);
   3441 
   3442          // undo scaling, round, convert to byte
   3443          uint8x8x4_t o;
   3444          o.val[0] = vqrshrun_n_s16(rws, 4);
   3445          o.val[1] = vqrshrun_n_s16(gws, 4);
   3446          o.val[2] = vqrshrun_n_s16(bws, 4);
   3447          o.val[3] = vdup_n_u8(255);
   3448 
   3449          // store, interleaving r/g/b/a
   3450          vst4_u8(out, o);
   3451          out += 8*4;
   3452       }
   3453    }
   3454 #endif
   3455 
   3456    for (; i < count; ++i) {
   3457       int y_fixed = (y[i] << 20) + (1<<19); // rounding
   3458       int r,g,b;
   3459       int cr = pcr[i] - 128;
   3460       int cb = pcb[i] - 128;
   3461       r = y_fixed + cr* stbi__float2fixed(1.40200f);
   3462       g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
   3463       b = y_fixed                                   +   cb* stbi__float2fixed(1.77200f);
   3464       r >>= 20;
   3465       g >>= 20;
   3466       b >>= 20;
   3467       if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
   3468       if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
   3469       if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
   3470       out[0] = (stbi_uc)r;
   3471       out[1] = (stbi_uc)g;
   3472       out[2] = (stbi_uc)b;
   3473       out[3] = 255;
   3474       out += step;
   3475    }
   3476 }
   3477 #endif
   3478 
   3479 // set up the kernels
   3480 static void stbi__setup_jpeg(stbi__jpeg *j)
   3481 {
   3482    j->idct_block_kernel = stbi__idct_block;
   3483    j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
   3484    j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
   3485 
   3486 #ifdef STBI_SSE2
   3487    if (stbi__sse2_available()) {
   3488       j->idct_block_kernel = stbi__idct_simd;
   3489       j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
   3490       j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
   3491    }
   3492 #endif
   3493 
   3494 #ifdef STBI_NEON
   3495    j->idct_block_kernel = stbi__idct_simd;
   3496    j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
   3497    j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
   3498 #endif
   3499 }
   3500 
   3501 // clean up the temporary component buffers
   3502 static void stbi__cleanup_jpeg(stbi__jpeg *j)
   3503 {
   3504    stbi__free_jpeg_components(j, j->s->img_n, 0);
   3505 }
   3506 
   3507 typedef struct
   3508 {
   3509    resample_row_func resample;
   3510    stbi_uc *line0,*line1;
   3511    int hs,vs;   // expansion factor in each axis
   3512    int w_lores; // horizontal pixels pre-expansion
   3513    int ystep;   // how far through vertical expansion we are
   3514    int ypos;    // which pre-expansion row we're on
   3515 } stbi__resample;
   3516 
   3517 // fast 0..255 * 0..255 => 0..255 rounded multiplication
   3518 static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
   3519 {
   3520    unsigned int t = x*y + 128;
   3521    return (stbi_uc) ((t + (t >>8)) >> 8);
   3522 }
   3523 
   3524 static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
   3525 {
   3526    int n, decode_n, is_rgb;
   3527    z->s->img_n = 0; // make stbi__cleanup_jpeg safe
   3528 
   3529    // validate req_comp
   3530    if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
   3531 
   3532    // load a jpeg image from whichever source, but leave in YCbCr format
   3533    if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
   3534 
   3535    // determine actual number of components to generate
   3536    n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
   3537 
   3538    is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
   3539 
   3540    if (z->s->img_n == 3 && n < 3 && !is_rgb)
   3541       decode_n = 1;
   3542    else
   3543       decode_n = z->s->img_n;
   3544 
   3545    // resample and color-convert
   3546    {
   3547       int k;
   3548       unsigned int i,j;
   3549       stbi_uc *output;
   3550       stbi_uc *coutput[4];
   3551 
   3552       stbi__resample res_comp[4];
   3553 
   3554       for (k=0; k < decode_n; ++k) {
   3555          stbi__resample *r = &res_comp[k];
   3556 
   3557          // allocate line buffer big enough for upsampling off the edges
   3558          // with upsample factor of 4
   3559          z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3);
   3560          if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
   3561 
   3562          r->hs      = z->img_h_max / z->img_comp[k].h;
   3563          r->vs      = z->img_v_max / z->img_comp[k].v;
   3564          r->ystep   = r->vs >> 1;
   3565          r->w_lores = (z->s->img_x + r->hs-1) / r->hs;
   3566          r->ypos    = 0;
   3567          r->line0   = r->line1 = z->img_comp[k].data;
   3568 
   3569          if      (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
   3570          else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
   3571          else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
   3572          else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
   3573          else                               r->resample = stbi__resample_row_generic;
   3574       }
   3575 
   3576       // can't error after this so, this is safe
   3577       output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
   3578       if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
   3579 
   3580       // now go ahead and resample
   3581       for (j=0; j < z->s->img_y; ++j) {
   3582          stbi_uc *out = output + n * z->s->img_x * j;
   3583          for (k=0; k < decode_n; ++k) {
   3584             stbi__resample *r = &res_comp[k];
   3585             int y_bot = r->ystep >= (r->vs >> 1);
   3586             coutput[k] = r->resample(z->img_comp[k].linebuf,
   3587                                      y_bot ? r->line1 : r->line0,
   3588                                      y_bot ? r->line0 : r->line1,
   3589                                      r->w_lores, r->hs);
   3590             if (++r->ystep >= r->vs) {
   3591                r->ystep = 0;
   3592                r->line0 = r->line1;
   3593                if (++r->ypos < z->img_comp[k].y)
   3594                   r->line1 += z->img_comp[k].w2;
   3595             }
   3596          }
   3597          if (n >= 3) {
   3598             stbi_uc *y = coutput[0];
   3599             if (z->s->img_n == 3) {
   3600                if (is_rgb) {
   3601                   for (i=0; i < z->s->img_x; ++i) {
   3602                      out[0] = y[i];
   3603                      out[1] = coutput[1][i];
   3604                      out[2] = coutput[2][i];
   3605                      out[3] = 255;
   3606                      out += n;
   3607                   }
   3608                } else {
   3609                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
   3610                }
   3611             } else if (z->s->img_n == 4) {
   3612                if (z->app14_color_transform == 0) { // CMYK
   3613                   for (i=0; i < z->s->img_x; ++i) {
   3614                      stbi_uc k = coutput[3][i];
   3615                      out[0] = stbi__blinn_8x8(coutput[0][i], k);
   3616                      out[1] = stbi__blinn_8x8(coutput[1][i], k);
   3617                      out[2] = stbi__blinn_8x8(coutput[2][i], k);
   3618                      out[3] = 255;
   3619                      out += n;
   3620                   }
   3621                } else if (z->app14_color_transform == 2) { // YCCK
   3622                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
   3623                   for (i=0; i < z->s->img_x; ++i) {
   3624                      stbi_uc k = coutput[3][i];
   3625                      out[0] = stbi__blinn_8x8(255 - out[0], k);
   3626                      out[1] = stbi__blinn_8x8(255 - out[1], k);
   3627                      out[2] = stbi__blinn_8x8(255 - out[2], k);
   3628                      out += n;
   3629                   }
   3630                } else { // YCbCr + alpha?  Ignore the fourth channel for now
   3631                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
   3632                }
   3633             } else
   3634                for (i=0; i < z->s->img_x; ++i) {
   3635                   out[0] = out[1] = out[2] = y[i];
   3636                   out[3] = 255; // not used if n==3
   3637                   out += n;
   3638                }
   3639          } else {
   3640             if (is_rgb) {
   3641                if (n == 1)
   3642                   for (i=0; i < z->s->img_x; ++i)
   3643                      *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
   3644                else {
   3645                   for (i=0; i < z->s->img_x; ++i, out += 2) {
   3646                      out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
   3647                      out[1] = 255;
   3648                   }
   3649                }
   3650             } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
   3651                for (i=0; i < z->s->img_x; ++i) {
   3652                   stbi_uc k = coutput[3][i];
   3653                   stbi_uc r = stbi__blinn_8x8(coutput[0][i], k);
   3654                   stbi_uc g = stbi__blinn_8x8(coutput[1][i], k);
   3655                   stbi_uc b = stbi__blinn_8x8(coutput[2][i], k);
   3656                   out[0] = stbi__compute_y(r, g, b);
   3657                   out[1] = 255;
   3658                   out += n;
   3659                }
   3660             } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
   3661                for (i=0; i < z->s->img_x; ++i) {
   3662                   out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
   3663                   out[1] = 255;
   3664                   out += n;
   3665                }
   3666             } else {
   3667                stbi_uc *y = coutput[0];
   3668                if (n == 1)
   3669                   for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
   3670                else
   3671                   for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255;
   3672             }
   3673          }
   3674       }
   3675       stbi__cleanup_jpeg(z);
   3676       *out_x = z->s->img_x;
   3677       *out_y = z->s->img_y;
   3678       if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
   3679       return output;
   3680    }
   3681 }
   3682 
   3683 static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
   3684 {
   3685    unsigned char* result;
   3686    stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
   3687    STBI_NOTUSED(ri);
   3688    j->s = s;
   3689    stbi__setup_jpeg(j);
   3690    result = load_jpeg_image(j, x,y,comp,req_comp);
   3691    STBI_FREE(j);
   3692    return result;
   3693 }
   3694 
   3695 static int stbi__jpeg_test(stbi__context *s)
   3696 {
   3697    int r;
   3698    stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
   3699    j->s = s;
   3700    stbi__setup_jpeg(j);
   3701    r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
   3702    stbi__rewind(s);
   3703    STBI_FREE(j);
   3704    return r;
   3705 }
   3706 
   3707 static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
   3708 {
   3709    if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
   3710       stbi__rewind( j->s );
   3711       return 0;
   3712    }
   3713    if (x) *x = j->s->img_x;
   3714    if (y) *y = j->s->img_y;
   3715    if (comp) *comp = j->s->img_n >= 3 ? 3 : 1;
   3716    return 1;
   3717 }
   3718 
   3719 static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
   3720 {
   3721    int result;
   3722    stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
   3723    j->s = s;
   3724    result = stbi__jpeg_info_raw(j, x, y, comp);
   3725    STBI_FREE(j);
   3726    return result;
   3727 }
   3728 #endif
   3729 
   3730 // public domain zlib decode    v0.2  Sean Barrett 2006-11-18
   3731 //    simple implementation
   3732 //      - all input must be provided in an upfront buffer
   3733 //      - all output is written to a single output buffer (can malloc/realloc)
   3734 //    performance
   3735 //      - fast huffman
   3736 
   3737 #ifndef STBI_NO_ZLIB
   3738 
   3739 // fast-way is faster to check than jpeg huffman, but slow way is slower
   3740 #define STBI__ZFAST_BITS  9 // accelerate all cases in default tables
   3741 #define STBI__ZFAST_MASK  ((1 << STBI__ZFAST_BITS) - 1)
   3742 
   3743 // zlib-style huffman encoding
   3744 // (jpegs packs from left, zlib from right, so can't share code)
   3745 typedef struct
   3746 {
   3747    stbi__uint16 fast[1 << STBI__ZFAST_BITS];
   3748    stbi__uint16 firstcode[16];
   3749    int maxcode[17];
   3750    stbi__uint16 firstsymbol[16];
   3751    stbi_uc  size[288];
   3752    stbi__uint16 value[288];
   3753 } stbi__zhuffman;
   3754 
   3755 stbi_inline static int stbi__bitreverse16(int n)
   3756 {
   3757   n = ((n & 0xAAAA) >>  1) | ((n & 0x5555) << 1);
   3758   n = ((n & 0xCCCC) >>  2) | ((n & 0x3333) << 2);
   3759   n = ((n & 0xF0F0) >>  4) | ((n & 0x0F0F) << 4);
   3760   n = ((n & 0xFF00) >>  8) | ((n & 0x00FF) << 8);
   3761   return n;
   3762 }
   3763 
   3764 stbi_inline static int stbi__bit_reverse(int v, int bits)
   3765 {
   3766    STBI_ASSERT(bits <= 16);
   3767    // to bit reverse n bits, reverse 16 and shift
   3768    // e.g. 11 bits, bit reverse and shift away 5
   3769    return stbi__bitreverse16(v) >> (16-bits);
   3770 }
   3771 
   3772 static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num)
   3773 {
   3774    int i,k=0;
   3775    int code, next_code[16], sizes[17];
   3776 
   3777    // DEFLATE spec for generating codes
   3778    memset(sizes, 0, sizeof(sizes));
   3779    memset(z->fast, 0, sizeof(z->fast));
   3780    for (i=0; i < num; ++i)
   3781       ++sizes[sizelist[i]];
   3782    sizes[0] = 0;
   3783    for (i=1; i < 16; ++i)
   3784       if (sizes[i] > (1 << i))
   3785          return stbi__err("bad sizes", "Corrupt PNG");
   3786    code = 0;
   3787    for (i=1; i < 16; ++i) {
   3788       next_code[i] = code;
   3789       z->firstcode[i] = (stbi__uint16) code;
   3790       z->firstsymbol[i] = (stbi__uint16) k;
   3791       code = (code + sizes[i]);
   3792       if (sizes[i])
   3793          if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG");
   3794       z->maxcode[i] = code << (16-i); // preshift for inner loop
   3795       code <<= 1;
   3796       k += sizes[i];
   3797    }
   3798    z->maxcode[16] = 0x10000; // sentinel
   3799    for (i=0; i < num; ++i) {
   3800       int s = sizelist[i];
   3801       if (s) {
   3802          int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
   3803          stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
   3804          z->size [c] = (stbi_uc     ) s;
   3805          z->value[c] = (stbi__uint16) i;
   3806          if (s <= STBI__ZFAST_BITS) {
   3807             int j = stbi__bit_reverse(next_code[s],s);
   3808             while (j < (1 << STBI__ZFAST_BITS)) {
   3809                z->fast[j] = fastv;
   3810                j += (1 << s);
   3811             }
   3812          }
   3813          ++next_code[s];
   3814       }
   3815    }
   3816    return 1;
   3817 }
   3818 
   3819 // zlib-from-memory implementation for PNG reading
   3820 //    because PNG allows splitting the zlib stream arbitrarily,
   3821 //    and it's annoying structurally to have PNG call ZLIB call PNG,
   3822 //    we require PNG read all the IDATs and combine them into a single
   3823 //    memory buffer
   3824 
   3825 typedef struct
   3826 {
   3827    stbi_uc *zbuffer, *zbuffer_end;
   3828    int num_bits;
   3829    stbi__uint32 code_buffer;
   3830 
   3831    char *zout;
   3832    char *zout_start;
   3833    char *zout_end;
   3834    int   z_expandable;
   3835 
   3836    stbi__zhuffman z_length, z_distance;
   3837 } stbi__zbuf;
   3838 
   3839 stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
   3840 {
   3841    if (z->zbuffer >= z->zbuffer_end) return 0;
   3842    return *z->zbuffer++;
   3843 }
   3844 
   3845 static void stbi__fill_bits(stbi__zbuf *z)
   3846 {
   3847    do {
   3848       STBI_ASSERT(z->code_buffer < (1U << z->num_bits));
   3849       z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits;
   3850       z->num_bits += 8;
   3851    } while (z->num_bits <= 24);
   3852 }
   3853 
   3854 stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
   3855 {
   3856    unsigned int k;
   3857    if (z->num_bits < n) stbi__fill_bits(z);
   3858    k = z->code_buffer & ((1 << n) - 1);
   3859    z->code_buffer >>= n;
   3860    z->num_bits -= n;
   3861    return k;
   3862 }
   3863 
   3864 static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
   3865 {
   3866    int b,s,k;
   3867    // not resolved by fast table, so compute it the slow way
   3868    // use jpeg approach, which requires MSbits at top
   3869    k = stbi__bit_reverse(a->code_buffer, 16);
   3870    for (s=STBI__ZFAST_BITS+1; ; ++s)
   3871       if (k < z->maxcode[s])
   3872          break;
   3873    if (s == 16) return -1; // invalid code!
   3874    // code size is s, so:
   3875    b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
   3876    STBI_ASSERT(z->size[b] == s);
   3877    a->code_buffer >>= s;
   3878    a->num_bits -= s;
   3879    return z->value[b];
   3880 }
   3881 
   3882 stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
   3883 {
   3884    int b,s;
   3885    if (a->num_bits < 16) stbi__fill_bits(a);
   3886    b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
   3887    if (b) {
   3888       s = b >> 9;
   3889       a->code_buffer >>= s;
   3890       a->num_bits -= s;
   3891       return b & 511;
   3892    }
   3893    return stbi__zhuffman_decode_slowpath(a, z);
   3894 }
   3895 
   3896 static int stbi__zexpand(stbi__zbuf *z, char *zout, int n)  // need to make room for n bytes
   3897 {
   3898    char *q;
   3899    int cur, limit, old_limit;
   3900    z->zout = zout;
   3901    if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
   3902    cur   = (int) (z->zout     - z->zout_start);
   3903    limit = old_limit = (int) (z->zout_end - z->zout_start);
   3904    while (cur + n > limit)
   3905       limit *= 2;
   3906    q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
   3907    STBI_NOTUSED(old_limit);
   3908    if (q == NULL) return stbi__err("outofmem", "Out of memory");
   3909    z->zout_start = q;
   3910    z->zout       = q + cur;
   3911    z->zout_end   = q + limit;
   3912    return 1;
   3913 }
   3914 
   3915 static int stbi__zlength_base[31] = {
   3916    3,4,5,6,7,8,9,10,11,13,
   3917    15,17,19,23,27,31,35,43,51,59,
   3918    67,83,99,115,131,163,195,227,258,0,0 };
   3919 
   3920 static int stbi__zlength_extra[31]=
   3921 { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
   3922 
   3923 static int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
   3924 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
   3925 
   3926 static int stbi__zdist_extra[32] =
   3927 { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
   3928 
   3929 static int stbi__parse_huffman_block(stbi__zbuf *a)
   3930 {
   3931    char *zout = a->zout;
   3932    for(;;) {
   3933       int z = stbi__zhuffman_decode(a, &a->z_length);
   3934       if (z < 256) {
   3935          if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
   3936          if (zout >= a->zout_end) {
   3937             if (!stbi__zexpand(a, zout, 1)) return 0;
   3938             zout = a->zout;
   3939          }
   3940          *zout++ = (char) z;
   3941       } else {
   3942          stbi_uc *p;
   3943          int len,dist;
   3944          if (z == 256) {
   3945             a->zout = zout;
   3946             return 1;
   3947          }
   3948          z -= 257;
   3949          len = stbi__zlength_base[z];
   3950          if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
   3951          z = stbi__zhuffman_decode(a, &a->z_distance);
   3952          if (z < 0) return stbi__err("bad huffman code","Corrupt PNG");
   3953          dist = stbi__zdist_base[z];
   3954          if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
   3955          if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
   3956          if (zout + len > a->zout_end) {
   3957             if (!stbi__zexpand(a, zout, len)) return 0;
   3958             zout = a->zout;
   3959          }
   3960          p = (stbi_uc *) (zout - dist);
   3961          if (dist == 1) { // run of one byte; common in images.
   3962             stbi_uc v = *p;
   3963             if (len) { do *zout++ = v; while (--len); }
   3964          } else {
   3965             if (len) { do *zout++ = *p++; while (--len); }
   3966          }
   3967       }
   3968    }
   3969 }
   3970 
   3971 static int stbi__compute_huffman_codes(stbi__zbuf *a)
   3972 {
   3973    static stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
   3974    stbi__zhuffman z_codelength;
   3975    stbi_uc lencodes[286+32+137];//padding for maximum single op
   3976    stbi_uc codelength_sizes[19];
   3977    int i,n;
   3978 
   3979    int hlit  = stbi__zreceive(a,5) + 257;
   3980    int hdist = stbi__zreceive(a,5) + 1;
   3981    int hclen = stbi__zreceive(a,4) + 4;
   3982    int ntot  = hlit + hdist;
   3983 
   3984    memset(codelength_sizes, 0, sizeof(codelength_sizes));
   3985    for (i=0; i < hclen; ++i) {
   3986       int s = stbi__zreceive(a,3);
   3987       codelength_sizes[length_dezigzag[i]] = (stbi_uc) s;
   3988    }
   3989    if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
   3990 
   3991    n = 0;
   3992    while (n < ntot) {
   3993       int c = stbi__zhuffman_decode(a, &z_codelength);
   3994       if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
   3995       if (c < 16)
   3996          lencodes[n++] = (stbi_uc) c;
   3997       else {
   3998          stbi_uc fill = 0;
   3999          if (c == 16) {
   4000             c = stbi__zreceive(a,2)+3;
   4001             if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
   4002             fill = lencodes[n-1];
   4003          } else if (c == 17)
   4004             c = stbi__zreceive(a,3)+3;
   4005          else {
   4006             STBI_ASSERT(c == 18);
   4007             c = stbi__zreceive(a,7)+11;
   4008          }
   4009          if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
   4010          memset(lencodes+n, fill, c);
   4011          n += c;
   4012       }
   4013    }
   4014    if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG");
   4015    if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
   4016    if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
   4017    return 1;
   4018 }
   4019 
   4020 static int stbi__parse_uncompressed_block(stbi__zbuf *a)
   4021 {
   4022    stbi_uc header[4];
   4023    int len,nlen,k;
   4024    if (a->num_bits & 7)
   4025       stbi__zreceive(a, a->num_bits & 7); // discard
   4026    // drain the bit-packed data into header
   4027    k = 0;
   4028    while (a->num_bits > 0) {
   4029       header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check
   4030       a->code_buffer >>= 8;
   4031       a->num_bits -= 8;
   4032    }
   4033    STBI_ASSERT(a->num_bits == 0);
   4034    // now fill header the normal way
   4035    while (k < 4)
   4036       header[k++] = stbi__zget8(a);
   4037    len  = header[1] * 256 + header[0];
   4038    nlen = header[3] * 256 + header[2];
   4039    if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
   4040    if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
   4041    if (a->zout + len > a->zout_end)
   4042       if (!stbi__zexpand(a, a->zout, len)) return 0;
   4043    memcpy(a->zout, a->zbuffer, len);
   4044    a->zbuffer += len;
   4045    a->zout += len;
   4046    return 1;
   4047 }
   4048 
   4049 static int stbi__parse_zlib_header(stbi__zbuf *a)
   4050 {
   4051    int cmf   = stbi__zget8(a);
   4052    int cm    = cmf & 15;
   4053    /* int cinfo = cmf >> 4; */
   4054    int flg   = stbi__zget8(a);
   4055    if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
   4056    if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
   4057    if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
   4058    // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
   4059    return 1;
   4060 }
   4061 
   4062 static const stbi_uc stbi__zdefault_length[288] =
   4063 {
   4064    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
   4065    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
   4066    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
   4067    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
   4068    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
   4069    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
   4070    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
   4071    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
   4072    7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
   4073 };
   4074 static const stbi_uc stbi__zdefault_distance[32] =
   4075 {
   4076    5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
   4077 };
   4078 /*
   4079 Init algorithm:
   4080 {
   4081    int i;   // use <= to match clearly with spec
   4082    for (i=0; i <= 143; ++i)     stbi__zdefault_length[i]   = 8;
   4083    for (   ; i <= 255; ++i)     stbi__zdefault_length[i]   = 9;
   4084    for (   ; i <= 279; ++i)     stbi__zdefault_length[i]   = 7;
   4085    for (   ; i <= 287; ++i)     stbi__zdefault_length[i]   = 8;
   4086 
   4087    for (i=0; i <=  31; ++i)     stbi__zdefault_distance[i] = 5;
   4088 }
   4089 */
   4090 
   4091 static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
   4092 {
   4093    int final, type;
   4094    if (parse_header)
   4095       if (!stbi__parse_zlib_header(a)) return 0;
   4096    a->num_bits = 0;
   4097    a->code_buffer = 0;
   4098    do {
   4099       final = stbi__zreceive(a,1);
   4100       type = stbi__zreceive(a,2);
   4101       if (type == 0) {
   4102          if (!stbi__parse_uncompressed_block(a)) return 0;
   4103       } else if (type == 3) {
   4104          return 0;
   4105       } else {
   4106          if (type == 1) {
   4107             // use fixed code lengths
   4108             if (!stbi__zbuild_huffman(&a->z_length  , stbi__zdefault_length  , 288)) return 0;
   4109             if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance,  32)) return 0;
   4110          } else {
   4111             if (!stbi__compute_huffman_codes(a)) return 0;
   4112          }
   4113          if (!stbi__parse_huffman_block(a)) return 0;
   4114       }
   4115    } while (!final);
   4116    return 1;
   4117 }
   4118 
   4119 static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
   4120 {
   4121    a->zout_start = obuf;
   4122    a->zout       = obuf;
   4123    a->zout_end   = obuf + olen;
   4124    a->z_expandable = exp;
   4125 
   4126    return stbi__parse_zlib(a, parse_header);
   4127 }
   4128 
   4129 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
   4130 {
   4131    stbi__zbuf a;
   4132    char *p = (char *) stbi__malloc(initial_size);
   4133    if (p == NULL) return NULL;
   4134    a.zbuffer = (stbi_uc *) buffer;
   4135    a.zbuffer_end = (stbi_uc *) buffer + len;
   4136    if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
   4137       if (outlen) *outlen = (int) (a.zout - a.zout_start);
   4138       return a.zout_start;
   4139    } else {
   4140       STBI_FREE(a.zout_start);
   4141       return NULL;
   4142    }
   4143 }
   4144 
   4145 STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
   4146 {
   4147    return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
   4148 }
   4149 
   4150 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
   4151 {
   4152    stbi__zbuf a;
   4153    char *p = (char *) stbi__malloc(initial_size);
   4154    if (p == NULL) return NULL;
   4155    a.zbuffer = (stbi_uc *) buffer;
   4156    a.zbuffer_end = (stbi_uc *) buffer + len;
   4157    if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
   4158       if (outlen) *outlen = (int) (a.zout - a.zout_start);
   4159       return a.zout_start;
   4160    } else {
   4161       STBI_FREE(a.zout_start);
   4162       return NULL;
   4163    }
   4164 }
   4165 
   4166 STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
   4167 {
   4168    stbi__zbuf a;
   4169    a.zbuffer = (stbi_uc *) ibuffer;
   4170    a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
   4171    if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
   4172       return (int) (a.zout - a.zout_start);
   4173    else
   4174       return -1;
   4175 }
   4176 
   4177 STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
   4178 {
   4179    stbi__zbuf a;
   4180    char *p = (char *) stbi__malloc(16384);
   4181    if (p == NULL) return NULL;
   4182    a.zbuffer = (stbi_uc *) buffer;
   4183    a.zbuffer_end = (stbi_uc *) buffer+len;
   4184    if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
   4185       if (outlen) *outlen = (int) (a.zout - a.zout_start);
   4186       return a.zout_start;
   4187    } else {
   4188       STBI_FREE(a.zout_start);
   4189       return NULL;
   4190    }
   4191 }
   4192 
   4193 STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
   4194 {
   4195    stbi__zbuf a;
   4196    a.zbuffer = (stbi_uc *) ibuffer;
   4197    a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
   4198    if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
   4199       return (int) (a.zout - a.zout_start);
   4200    else
   4201       return -1;
   4202 }
   4203 #endif
   4204 
   4205 // public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
   4206 //    simple implementation
   4207 //      - only 8-bit samples
   4208 //      - no CRC checking
   4209 //      - allocates lots of intermediate memory
   4210 //        - avoids problem of streaming data between subsystems
   4211 //        - avoids explicit window management
   4212 //    performance
   4213 //      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
   4214 
   4215 #ifndef STBI_NO_PNG
   4216 typedef struct
   4217 {
   4218    stbi__uint32 length;
   4219    stbi__uint32 type;
   4220 } stbi__pngchunk;
   4221 
   4222 static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
   4223 {
   4224    stbi__pngchunk c;
   4225    c.length = stbi__get32be(s);
   4226    c.type   = stbi__get32be(s);
   4227    return c;
   4228 }
   4229 
   4230 static int stbi__check_png_header(stbi__context *s)
   4231 {
   4232    static stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
   4233    int i;
   4234    for (i=0; i < 8; ++i)
   4235       if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
   4236    return 1;
   4237 }
   4238 
   4239 typedef struct
   4240 {
   4241    stbi__context *s;
   4242    stbi_uc *idata, *expanded, *out;
   4243    int depth;
   4244 } stbi__png;
   4245 
   4246 
   4247 enum {
   4248    STBI__F_none=0,
   4249    STBI__F_sub=1,
   4250    STBI__F_up=2,
   4251    STBI__F_avg=3,
   4252    STBI__F_paeth=4,
   4253    // synthetic filters used for first scanline to avoid needing a dummy row of 0s
   4254    STBI__F_avg_first,
   4255    STBI__F_paeth_first
   4256 };
   4257 
   4258 static stbi_uc first_row_filter[5] =
   4259 {
   4260    STBI__F_none,
   4261    STBI__F_sub,
   4262    STBI__F_none,
   4263    STBI__F_avg_first,
   4264    STBI__F_paeth_first
   4265 };
   4266 
   4267 static int stbi__paeth(int a, int b, int c)
   4268 {
   4269    int p = a + b - c;
   4270    int pa = abs(p-a);
   4271    int pb = abs(p-b);
   4272    int pc = abs(p-c);
   4273    if (pa <= pb && pa <= pc) return a;
   4274    if (pb <= pc) return b;
   4275    return c;
   4276 }
   4277 
   4278 static stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
   4279 
   4280 // create the png data from post-deflated data
   4281 static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
   4282 {
   4283    int bytes = (depth == 16? 2 : 1);
   4284    stbi__context *s = a->s;
   4285    stbi__uint32 i,j,stride = x*out_n*bytes;
   4286    stbi__uint32 img_len, img_width_bytes;
   4287    int k;
   4288    int img_n = s->img_n; // copy it into a local for later
   4289 
   4290    int output_bytes = out_n*bytes;
   4291    int filter_bytes = img_n*bytes;
   4292    int width = x;
   4293 
   4294    STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
   4295    a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
   4296    if (!a->out) return stbi__err("outofmem", "Out of memory");
   4297 
   4298    img_width_bytes = (((img_n * x * depth) + 7) >> 3);
   4299    img_len = (img_width_bytes + 1) * y;
   4300    if (s->img_x == x && s->img_y == y) {
   4301       if (raw_len != img_len) return stbi__err("not enough pixels","Corrupt PNG");
   4302    } else { // interlaced:
   4303       if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
   4304    }
   4305 
   4306    for (j=0; j < y; ++j) {
   4307       stbi_uc *cur = a->out + stride*j;
   4308       stbi_uc *prior;
   4309       int filter = *raw++;
   4310 
   4311       if (filter > 4)
   4312          return stbi__err("invalid filter","Corrupt PNG");
   4313 
   4314       if (depth < 8) {
   4315          STBI_ASSERT(img_width_bytes <= x);
   4316          cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
   4317          filter_bytes = 1;
   4318          width = img_width_bytes;
   4319       }
   4320       prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above
   4321 
   4322       // if first row, use special filter that doesn't sample previous row
   4323       if (j == 0) filter = first_row_filter[filter];
   4324 
   4325       // handle first byte explicitly
   4326       for (k=0; k < filter_bytes; ++k) {
   4327          switch (filter) {
   4328             case STBI__F_none       : cur[k] = raw[k]; break;
   4329             case STBI__F_sub        : cur[k] = raw[k]; break;
   4330             case STBI__F_up         : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
   4331             case STBI__F_avg        : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break;
   4332             case STBI__F_paeth      : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break;
   4333             case STBI__F_avg_first  : cur[k] = raw[k]; break;
   4334             case STBI__F_paeth_first: cur[k] = raw[k]; break;
   4335          }
   4336       }
   4337 
   4338       if (depth == 8) {
   4339          if (img_n != out_n)
   4340             cur[img_n] = 255; // first pixel
   4341          raw += img_n;
   4342          cur += out_n;
   4343          prior += out_n;
   4344       } else if (depth == 16) {
   4345          if (img_n != out_n) {
   4346             cur[filter_bytes]   = 255; // first pixel top byte
   4347             cur[filter_bytes+1] = 255; // first pixel bottom byte
   4348          }
   4349          raw += filter_bytes;
   4350          cur += output_bytes;
   4351          prior += output_bytes;
   4352       } else {
   4353          raw += 1;
   4354          cur += 1;
   4355          prior += 1;
   4356       }
   4357 
   4358       // this is a little gross, so that we don't switch per-pixel or per-component
   4359       if (depth < 8 || img_n == out_n) {
   4360          int nk = (width - 1)*filter_bytes;
   4361          #define STBI__CASE(f) \
   4362              case f:     \
   4363                 for (k=0; k < nk; ++k)
   4364          switch (filter) {
   4365             // "none" filter turns into a memcpy here; make that explicit.
   4366             case STBI__F_none:         memcpy(cur, raw, nk); break;
   4367             STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break;
   4368             STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
   4369             STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break;
   4370             STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break;
   4371             STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break;
   4372             STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break;
   4373          }
   4374          #undef STBI__CASE
   4375          raw += nk;
   4376       } else {
   4377          STBI_ASSERT(img_n+1 == out_n);
   4378          #define STBI__CASE(f) \
   4379              case f:     \
   4380                 for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
   4381                    for (k=0; k < filter_bytes; ++k)
   4382          switch (filter) {
   4383             STBI__CASE(STBI__F_none)         { cur[k] = raw[k]; } break;
   4384             STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break;
   4385             STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
   4386             STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break;
   4387             STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break;
   4388             STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break;
   4389             STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break;
   4390          }
   4391          #undef STBI__CASE
   4392 
   4393          // the loop above sets the high byte of the pixels' alpha, but for
   4394          // 16 bit png files we also need the low byte set. we'll do that here.
   4395          if (depth == 16) {
   4396             cur = a->out + stride*j; // start at the beginning of the row again
   4397             for (i=0; i < x; ++i,cur+=output_bytes) {
   4398                cur[filter_bytes+1] = 255;
   4399             }
   4400          }
   4401       }
   4402    }
   4403 
   4404    // we make a separate pass to expand bits to pixels; for performance,
   4405    // this could run two scanlines behind the above code, so it won't
   4406    // intefere with filtering but will still be in the cache.
   4407    if (depth < 8) {
   4408       for (j=0; j < y; ++j) {
   4409          stbi_uc *cur = a->out + stride*j;
   4410          stbi_uc *in  = a->out + stride*j + x*out_n - img_width_bytes;
   4411          // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
   4412          // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
   4413          stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
   4414 
   4415          // note that the final byte might overshoot and write more data than desired.
   4416          // we can allocate enough data that this never writes out of memory, but it
   4417          // could also overwrite the next scanline. can it overwrite non-empty data
   4418          // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
   4419          // so we need to explicitly clamp the final ones
   4420 
   4421          if (depth == 4) {
   4422             for (k=x*img_n; k >= 2; k-=2, ++in) {
   4423                *cur++ = scale * ((*in >> 4)       );
   4424                *cur++ = scale * ((*in     ) & 0x0f);
   4425             }
   4426             if (k > 0) *cur++ = scale * ((*in >> 4)       );
   4427          } else if (depth == 2) {
   4428             for (k=x*img_n; k >= 4; k-=4, ++in) {
   4429                *cur++ = scale * ((*in >> 6)       );
   4430                *cur++ = scale * ((*in >> 4) & 0x03);
   4431                *cur++ = scale * ((*in >> 2) & 0x03);
   4432                *cur++ = scale * ((*in     ) & 0x03);
   4433             }
   4434             if (k > 0) *cur++ = scale * ((*in >> 6)       );
   4435             if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03);
   4436             if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03);
   4437          } else if (depth == 1) {
   4438             for (k=x*img_n; k >= 8; k-=8, ++in) {
   4439                *cur++ = scale * ((*in >> 7)       );
   4440                *cur++ = scale * ((*in >> 6) & 0x01);
   4441                *cur++ = scale * ((*in >> 5) & 0x01);
   4442                *cur++ = scale * ((*in >> 4) & 0x01);
   4443                *cur++ = scale * ((*in >> 3) & 0x01);
   4444                *cur++ = scale * ((*in >> 2) & 0x01);
   4445                *cur++ = scale * ((*in >> 1) & 0x01);
   4446                *cur++ = scale * ((*in     ) & 0x01);
   4447             }
   4448             if (k > 0) *cur++ = scale * ((*in >> 7)       );
   4449             if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01);
   4450             if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01);
   4451             if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01);
   4452             if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01);
   4453             if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01);
   4454             if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01);
   4455          }
   4456          if (img_n != out_n) {
   4457             int q;
   4458             // insert alpha = 255
   4459             cur = a->out + stride*j;
   4460             if (img_n == 1) {
   4461                for (q=x-1; q >= 0; --q) {
   4462                   cur[q*2+1] = 255;
   4463                   cur[q*2+0] = cur[q];
   4464                }
   4465             } else {
   4466                STBI_ASSERT(img_n == 3);
   4467                for (q=x-1; q >= 0; --q) {
   4468                   cur[q*4+3] = 255;
   4469                   cur[q*4+2] = cur[q*3+2];
   4470                   cur[q*4+1] = cur[q*3+1];
   4471                   cur[q*4+0] = cur[q*3+0];
   4472                }
   4473             }
   4474          }
   4475       }
   4476    } else if (depth == 16) {
   4477       // force the image data from big-endian to platform-native.
   4478       // this is done in a separate pass due to the decoding relying
   4479       // on the data being untouched, but could probably be done
   4480       // per-line during decode if care is taken.
   4481       stbi_uc *cur = a->out;
   4482       stbi__uint16 *cur16 = (stbi__uint16*)cur;
   4483 
   4484       for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) {
   4485          *cur16 = (cur[0] << 8) | cur[1];
   4486       }
   4487    }
   4488 
   4489    return 1;
   4490 }
   4491 
   4492 static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
   4493 {
   4494    int bytes = (depth == 16 ? 2 : 1);
   4495    int out_bytes = out_n * bytes;
   4496    stbi_uc *final;
   4497    int p;
   4498    if (!interlaced)
   4499       return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
   4500 
   4501    // de-interlacing
   4502    final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
   4503    for (p=0; p < 7; ++p) {
   4504       int xorig[] = { 0,4,0,2,0,1,0 };
   4505       int yorig[] = { 0,0,4,0,2,0,1 };
   4506       int xspc[]  = { 8,8,4,4,2,2,1 };
   4507       int yspc[]  = { 8,8,8,4,4,2,2 };
   4508       int i,j,x,y;
   4509       // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
   4510       x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p];
   4511       y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p];
   4512       if (x && y) {
   4513          stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
   4514          if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
   4515             STBI_FREE(final);
   4516             return 0;
   4517          }
   4518          for (j=0; j < y; ++j) {
   4519             for (i=0; i < x; ++i) {
   4520                int out_y = j*yspc[p]+yorig[p];
   4521                int out_x = i*xspc[p]+xorig[p];
   4522                memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes,
   4523                       a->out + (j*x+i)*out_bytes, out_bytes);
   4524             }
   4525          }
   4526          STBI_FREE(a->out);
   4527          image_data += img_len;
   4528          image_data_len -= img_len;
   4529       }
   4530    }
   4531    a->out = final;
   4532 
   4533    return 1;
   4534 }
   4535 
   4536 static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
   4537 {
   4538    stbi__context *s = z->s;
   4539    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
   4540    stbi_uc *p = z->out;
   4541 
   4542    // compute color-based transparency, assuming we've
   4543    // already got 255 as the alpha value in the output
   4544    STBI_ASSERT(out_n == 2 || out_n == 4);
   4545 
   4546    if (out_n == 2) {
   4547       for (i=0; i < pixel_count; ++i) {
   4548          p[1] = (p[0] == tc[0] ? 0 : 255);
   4549          p += 2;
   4550       }
   4551    } else {
   4552       for (i=0; i < pixel_count; ++i) {
   4553          if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
   4554             p[3] = 0;
   4555          p += 4;
   4556       }
   4557    }
   4558    return 1;
   4559 }
   4560 
   4561 static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n)
   4562 {
   4563    stbi__context *s = z->s;
   4564    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
   4565    stbi__uint16 *p = (stbi__uint16*) z->out;
   4566 
   4567    // compute color-based transparency, assuming we've
   4568    // already got 65535 as the alpha value in the output
   4569    STBI_ASSERT(out_n == 2 || out_n == 4);
   4570 
   4571    if (out_n == 2) {
   4572       for (i = 0; i < pixel_count; ++i) {
   4573          p[1] = (p[0] == tc[0] ? 0 : 65535);
   4574          p += 2;
   4575       }
   4576    } else {
   4577       for (i = 0; i < pixel_count; ++i) {
   4578          if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
   4579             p[3] = 0;
   4580          p += 4;
   4581       }
   4582    }
   4583    return 1;
   4584 }
   4585 
   4586 static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
   4587 {
   4588    stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
   4589    stbi_uc *p, *temp_out, *orig = a->out;
   4590 
   4591    p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0);
   4592    if (p == NULL) return stbi__err("outofmem", "Out of memory");
   4593 
   4594    // between here and free(out) below, exitting would leak
   4595    temp_out = p;
   4596 
   4597    if (pal_img_n == 3) {
   4598       for (i=0; i < pixel_count; ++i) {
   4599          int n = orig[i]*4;
   4600          p[0] = palette[n  ];
   4601          p[1] = palette[n+1];
   4602          p[2] = palette[n+2];
   4603          p += 3;
   4604       }
   4605    } else {
   4606       for (i=0; i < pixel_count; ++i) {
   4607          int n = orig[i]*4;
   4608          p[0] = palette[n  ];
   4609          p[1] = palette[n+1];
   4610          p[2] = palette[n+2];
   4611          p[3] = palette[n+3];
   4612          p += 4;
   4613       }
   4614    }
   4615    STBI_FREE(a->out);
   4616    a->out = temp_out;
   4617 
   4618    STBI_NOTUSED(len);
   4619 
   4620    return 1;
   4621 }
   4622 
   4623 static int stbi__unpremultiply_on_load = 0;
   4624 static int stbi__de_iphone_flag = 0;
   4625 
   4626 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
   4627 {
   4628    stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
   4629 }
   4630 
   4631 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
   4632 {
   4633    stbi__de_iphone_flag = flag_true_if_should_convert;
   4634 }
   4635 
   4636 static void stbi__de_iphone(stbi__png *z)
   4637 {
   4638    stbi__context *s = z->s;
   4639    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
   4640    stbi_uc *p = z->out;
   4641 
   4642    if (s->img_out_n == 3) {  // convert bgr to rgb
   4643       for (i=0; i < pixel_count; ++i) {
   4644          stbi_uc t = p[0];
   4645          p[0] = p[2];
   4646          p[2] = t;
   4647          p += 3;
   4648       }
   4649    } else {
   4650       STBI_ASSERT(s->img_out_n == 4);
   4651       if (stbi__unpremultiply_on_load) {
   4652          // convert bgr to rgb and unpremultiply
   4653          for (i=0; i < pixel_count; ++i) {
   4654             stbi_uc a = p[3];
   4655             stbi_uc t = p[0];
   4656             if (a) {
   4657                p[0] = p[2] * 255 / a;
   4658                p[1] = p[1] * 255 / a;
   4659                p[2] =  t   * 255 / a;
   4660             } else {
   4661                p[0] = p[2];
   4662                p[2] = t;
   4663             }
   4664             p += 4;
   4665          }
   4666       } else {
   4667          // convert bgr to rgb
   4668          for (i=0; i < pixel_count; ++i) {
   4669             stbi_uc t = p[0];
   4670             p[0] = p[2];
   4671             p[2] = t;
   4672             p += 4;
   4673          }
   4674       }
   4675    }
   4676 }
   4677 
   4678 #define STBI__PNG_TYPE(a,b,c,d)  (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
   4679 
   4680 static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
   4681 {
   4682    stbi_uc palette[1024], pal_img_n=0;
   4683    stbi_uc has_trans=0, tc[3];
   4684    stbi__uint16 tc16[3];
   4685    stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
   4686    int first=1,k,interlace=0, color=0, is_iphone=0;
   4687    stbi__context *s = z->s;
   4688 
   4689    z->expanded = NULL;
   4690    z->idata = NULL;
   4691    z->out = NULL;
   4692 
   4693    if (!stbi__check_png_header(s)) return 0;
   4694 
   4695    if (scan == STBI__SCAN_type) return 1;
   4696 
   4697    for (;;) {
   4698       stbi__pngchunk c = stbi__get_chunk_header(s);
   4699       switch (c.type) {
   4700          case STBI__PNG_TYPE('C','g','B','I'):
   4701             is_iphone = 1;
   4702             stbi__skip(s, c.length);
   4703             break;
   4704          case STBI__PNG_TYPE('I','H','D','R'): {
   4705             int comp,filter;
   4706             if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
   4707             first = 0;
   4708             if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
   4709             s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
   4710             s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
   4711             z->depth = stbi__get8(s);  if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)  return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only");
   4712             color = stbi__get8(s);  if (color > 6)         return stbi__err("bad ctype","Corrupt PNG");
   4713             if (color == 3 && z->depth == 16)                  return stbi__err("bad ctype","Corrupt PNG");
   4714             if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
   4715             comp  = stbi__get8(s);  if (comp) return stbi__err("bad comp method","Corrupt PNG");
   4716             filter= stbi__get8(s);  if (filter) return stbi__err("bad filter method","Corrupt PNG");
   4717             interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG");
   4718             if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG");
   4719             if (!pal_img_n) {
   4720                s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
   4721                if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
   4722                if (scan == STBI__SCAN_header) return 1;
   4723             } else {
   4724                // if paletted, then pal_n is our final components, and
   4725                // img_n is # components to decompress/filter.
   4726                s->img_n = 1;
   4727                if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
   4728                // if SCAN_header, have to scan to see if we have a tRNS
   4729             }
   4730             break;
   4731          }
   4732 
   4733          case STBI__PNG_TYPE('P','L','T','E'):  {
   4734             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
   4735             if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG");
   4736             pal_len = c.length / 3;
   4737             if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG");
   4738             for (i=0; i < pal_len; ++i) {
   4739                palette[i*4+0] = stbi__get8(s);
   4740                palette[i*4+1] = stbi__get8(s);
   4741                palette[i*4+2] = stbi__get8(s);
   4742                palette[i*4+3] = 255;
   4743             }
   4744             break;
   4745          }
   4746 
   4747          case STBI__PNG_TYPE('t','R','N','S'): {
   4748             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
   4749             if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
   4750             if (pal_img_n) {
   4751                if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
   4752                if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
   4753                if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
   4754                pal_img_n = 4;
   4755                for (i=0; i < c.length; ++i)
   4756                   palette[i*4+3] = stbi__get8(s);
   4757             } else {
   4758                if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
   4759                if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
   4760                has_trans = 1;
   4761                if (z->depth == 16) {
   4762                   for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
   4763                } else {
   4764                   for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
   4765                }
   4766             }
   4767             break;
   4768          }
   4769 
   4770          case STBI__PNG_TYPE('I','D','A','T'): {
   4771             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
   4772             if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
   4773             if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; }
   4774             if ((int)(ioff + c.length) < (int)ioff) return 0;
   4775             if (ioff + c.length > idata_limit) {
   4776                stbi__uint32 idata_limit_old = idata_limit;
   4777                stbi_uc *p;
   4778                if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
   4779                while (ioff + c.length > idata_limit)
   4780                   idata_limit *= 2;
   4781                STBI_NOTUSED(idata_limit_old);
   4782                p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
   4783                z->idata = p;
   4784             }
   4785             if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG");
   4786             ioff += c.length;
   4787             break;
   4788          }
   4789 
   4790          case STBI__PNG_TYPE('I','E','N','D'): {
   4791             stbi__uint32 raw_len, bpl;
   4792             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
   4793             if (scan != STBI__SCAN_load) return 1;
   4794             if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
   4795             // initial guess for decoded data size to avoid unnecessary reallocs
   4796             bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
   4797             raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
   4798             z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone);
   4799             if (z->expanded == NULL) return 0; // zlib should set error
   4800             STBI_FREE(z->idata); z->idata = NULL;
   4801             if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
   4802                s->img_out_n = s->img_n+1;
   4803             else
   4804                s->img_out_n = s->img_n;
   4805             if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
   4806             if (has_trans) {
   4807                if (z->depth == 16) {
   4808                   if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0;
   4809                } else {
   4810                   if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
   4811                }
   4812             }
   4813             if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
   4814                stbi__de_iphone(z);
   4815             if (pal_img_n) {
   4816                // pal_img_n == 3 or 4
   4817                s->img_n = pal_img_n; // record the actual colors we had
   4818                s->img_out_n = pal_img_n;
   4819                if (req_comp >= 3) s->img_out_n = req_comp;
   4820                if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
   4821                   return 0;
   4822             }
   4823             STBI_FREE(z->expanded); z->expanded = NULL;
   4824             return 1;
   4825          }
   4826 
   4827          default:
   4828             // if critical, fail
   4829             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
   4830             if ((c.type & (1 << 29)) == 0) {
   4831                #ifndef STBI_NO_FAILURE_STRINGS
   4832                // not threadsafe
   4833                static char invalid_chunk[] = "XXXX PNG chunk not known";
   4834                invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
   4835                invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
   4836                invalid_chunk[2] = STBI__BYTECAST(c.type >>  8);
   4837                invalid_chunk[3] = STBI__BYTECAST(c.type >>  0);
   4838                #endif
   4839                return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
   4840             }
   4841             stbi__skip(s, c.length);
   4842             break;
   4843       }
   4844       // end of PNG chunk, read and skip CRC
   4845       stbi__get32be(s);
   4846    }
   4847 }
   4848 
   4849 static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri)
   4850 {
   4851    void *result=NULL;
   4852    if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
   4853    if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
   4854       if (p->depth < 8)
   4855          ri->bits_per_channel = 8;
   4856       else
   4857          ri->bits_per_channel = p->depth;
   4858       result = p->out;
   4859       p->out = NULL;
   4860       if (req_comp && req_comp != p->s->img_out_n) {
   4861          if (ri->bits_per_channel == 8)
   4862             result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
   4863          else
   4864             result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
   4865          p->s->img_out_n = req_comp;
   4866          if (result == NULL) return result;
   4867       }
   4868       *x = p->s->img_x;
   4869       *y = p->s->img_y;
   4870       if (n) *n = p->s->img_n;
   4871    }
   4872    STBI_FREE(p->out);      p->out      = NULL;
   4873    STBI_FREE(p->expanded); p->expanded = NULL;
   4874    STBI_FREE(p->idata);    p->idata    = NULL;
   4875 
   4876    return result;
   4877 }
   4878 
   4879 static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
   4880 {
   4881    stbi__png p;
   4882    p.s = s;
   4883    return stbi__do_png(&p, x,y,comp,req_comp, ri);
   4884 }
   4885 
   4886 static int stbi__png_test(stbi__context *s)
   4887 {
   4888    int r;
   4889    r = stbi__check_png_header(s);
   4890    stbi__rewind(s);
   4891    return r;
   4892 }
   4893 
   4894 static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
   4895 {
   4896    if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
   4897       stbi__rewind( p->s );
   4898       return 0;
   4899    }
   4900    if (x) *x = p->s->img_x;
   4901    if (y) *y = p->s->img_y;
   4902    if (comp) *comp = p->s->img_n;
   4903    return 1;
   4904 }
   4905 
   4906 static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
   4907 {
   4908    stbi__png p;
   4909    p.s = s;
   4910    return stbi__png_info_raw(&p, x, y, comp);
   4911 }
   4912 #endif
   4913 
   4914 // Microsoft/Windows BMP image
   4915 
   4916 #ifndef STBI_NO_BMP
   4917 static int stbi__bmp_test_raw(stbi__context *s)
   4918 {
   4919    int r;
   4920    int sz;
   4921    if (stbi__get8(s) != 'B') return 0;
   4922    if (stbi__get8(s) != 'M') return 0;
   4923    stbi__get32le(s); // discard filesize
   4924    stbi__get16le(s); // discard reserved
   4925    stbi__get16le(s); // discard reserved
   4926    stbi__get32le(s); // discard data offset
   4927    sz = stbi__get32le(s);
   4928    r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
   4929    return r;
   4930 }
   4931 
   4932 static int stbi__bmp_test(stbi__context *s)
   4933 {
   4934    int r = stbi__bmp_test_raw(s);
   4935    stbi__rewind(s);
   4936    return r;
   4937 }
   4938 
   4939 
   4940 // returns 0..31 for the highest set bit
   4941 static int stbi__high_bit(unsigned int z)
   4942 {
   4943    int n=0;
   4944    if (z == 0) return -1;
   4945    if (z >= 0x10000) n += 16, z >>= 16;
   4946    if (z >= 0x00100) n +=  8, z >>=  8;
   4947    if (z >= 0x00010) n +=  4, z >>=  4;
   4948    if (z >= 0x00004) n +=  2, z >>=  2;
   4949    if (z >= 0x00002) n +=  1, z >>=  1;
   4950    return n;
   4951 }
   4952 
   4953 static int stbi__bitcount(unsigned int a)
   4954 {
   4955    a = (a & 0x55555555) + ((a >>  1) & 0x55555555); // max 2
   4956    a = (a & 0x33333333) + ((a >>  2) & 0x33333333); // max 4
   4957    a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
   4958    a = (a + (a >> 8)); // max 16 per 8 bits
   4959    a = (a + (a >> 16)); // max 32 per 8 bits
   4960    return a & 0xff;
   4961 }
   4962 
   4963 static int stbi__shiftsigned(int v, int shift, int bits)
   4964 {
   4965    int result;
   4966    int z=0;
   4967 
   4968    if (shift < 0) v <<= -shift;
   4969    else v >>= shift;
   4970    result = v;
   4971 
   4972    z = bits;
   4973    while (z < 8) {
   4974       result += v >> z;
   4975       z += bits;
   4976    }
   4977    return result;
   4978 }
   4979 
   4980 typedef struct
   4981 {
   4982    int bpp, offset, hsz;
   4983    unsigned int mr,mg,mb,ma, all_a;
   4984 } stbi__bmp_data;
   4985 
   4986 static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
   4987 {
   4988    int hsz;
   4989    if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
   4990    stbi__get32le(s); // discard filesize
   4991    stbi__get16le(s); // discard reserved
   4992    stbi__get16le(s); // discard reserved
   4993    info->offset = stbi__get32le(s);
   4994    info->hsz = hsz = stbi__get32le(s);
   4995    info->mr = info->mg = info->mb = info->ma = 0;
   4996 
   4997    if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
   4998    if (hsz == 12) {
   4999       s->img_x = stbi__get16le(s);
   5000       s->img_y = stbi__get16le(s);
   5001    } else {
   5002       s->img_x = stbi__get32le(s);
   5003       s->img_y = stbi__get32le(s);
   5004    }
   5005    if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
   5006    info->bpp = stbi__get16le(s);
   5007    if (info->bpp == 1) return stbi__errpuc("monochrome", "BMP type not supported: 1-bit");
   5008    if (hsz != 12) {
   5009       int compress = stbi__get32le(s);
   5010       if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
   5011       stbi__get32le(s); // discard sizeof
   5012       stbi__get32le(s); // discard hres
   5013       stbi__get32le(s); // discard vres
   5014       stbi__get32le(s); // discard colorsused
   5015       stbi__get32le(s); // discard max important
   5016       if (hsz == 40 || hsz == 56) {
   5017          if (hsz == 56) {
   5018             stbi__get32le(s);
   5019             stbi__get32le(s);
   5020             stbi__get32le(s);
   5021             stbi__get32le(s);
   5022          }
   5023          if (info->bpp == 16 || info->bpp == 32) {
   5024             if (compress == 0) {
   5025                if (info->bpp == 32) {
   5026                   info->mr = 0xffu << 16;
   5027                   info->mg = 0xffu <<  8;
   5028                   info->mb = 0xffu <<  0;
   5029                   info->ma = 0xffu << 24;
   5030                   info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
   5031                } else {
   5032                   info->mr = 31u << 10;
   5033                   info->mg = 31u <<  5;
   5034                   info->mb = 31u <<  0;
   5035                }
   5036             } else if (compress == 3) {
   5037                info->mr = stbi__get32le(s);
   5038                info->mg = stbi__get32le(s);
   5039                info->mb = stbi__get32le(s);
   5040                // not documented, but generated by photoshop and handled by mspaint
   5041                if (info->mr == info->mg && info->mg == info->mb) {
   5042                   // ?!?!?
   5043                   return stbi__errpuc("bad BMP", "bad BMP");
   5044                }
   5045             } else
   5046                return stbi__errpuc("bad BMP", "bad BMP");
   5047          }
   5048       } else {
   5049          int i;
   5050          if (hsz != 108 && hsz != 124)
   5051             return stbi__errpuc("bad BMP", "bad BMP");
   5052          info->mr = stbi__get32le(s);
   5053          info->mg = stbi__get32le(s);
   5054          info->mb = stbi__get32le(s);
   5055          info->ma = stbi__get32le(s);
   5056          stbi__get32le(s); // discard color space
   5057          for (i=0; i < 12; ++i)
   5058             stbi__get32le(s); // discard color space parameters
   5059          if (hsz == 124) {
   5060             stbi__get32le(s); // discard rendering intent
   5061             stbi__get32le(s); // discard offset of profile data
   5062             stbi__get32le(s); // discard size of profile data
   5063             stbi__get32le(s); // discard reserved
   5064          }
   5065       }
   5066    }
   5067    return (void *) 1;
   5068 }
   5069 
   5070 
   5071 static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
   5072 {
   5073    stbi_uc *out;
   5074    unsigned int mr=0,mg=0,mb=0,ma=0, all_a;
   5075    stbi_uc pal[256][4];
   5076    int psize=0,i,j,width;
   5077    int flip_vertically, pad, target;
   5078    stbi__bmp_data info;
   5079    STBI_NOTUSED(ri);
   5080 
   5081    info.all_a = 255;
   5082    if (stbi__bmp_parse_header(s, &info) == NULL)
   5083       return NULL; // error code already set
   5084 
   5085    flip_vertically = ((int) s->img_y) > 0;
   5086    s->img_y = abs((int) s->img_y);
   5087 
   5088    mr = info.mr;
   5089    mg = info.mg;
   5090    mb = info.mb;
   5091    ma = info.ma;
   5092    all_a = info.all_a;
   5093 
   5094    if (info.hsz == 12) {
   5095       if (info.bpp < 24)
   5096          psize = (info.offset - 14 - 24) / 3;
   5097    } else {
   5098       if (info.bpp < 16)
   5099          psize = (info.offset - 14 - info.hsz) >> 2;
   5100    }
   5101 
   5102    s->img_n = ma ? 4 : 3;
   5103    if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
   5104       target = req_comp;
   5105    else
   5106       target = s->img_n; // if they want monochrome, we'll post-convert
   5107 
   5108    // sanity-check size
   5109    if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
   5110       return stbi__errpuc("too large", "Corrupt BMP");
   5111 
   5112    out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
   5113    if (!out) return stbi__errpuc("outofmem", "Out of memory");
   5114    if (info.bpp < 16) {
   5115       int z=0;
   5116       if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
   5117       for (i=0; i < psize; ++i) {
   5118          pal[i][2] = stbi__get8(s);
   5119          pal[i][1] = stbi__get8(s);
   5120          pal[i][0] = stbi__get8(s);
   5121          if (info.hsz != 12) stbi__get8(s);
   5122          pal[i][3] = 255;
   5123       }
   5124       stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
   5125       if (info.bpp == 4) width = (s->img_x + 1) >> 1;
   5126       else if (info.bpp == 8) width = s->img_x;
   5127       else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
   5128       pad = (-width)&3;
   5129       for (j=0; j < (int) s->img_y; ++j) {
   5130          for (i=0; i < (int) s->img_x; i += 2) {
   5131             int v=stbi__get8(s),v2=0;
   5132             if (info.bpp == 4) {
   5133                v2 = v & 15;
   5134                v >>= 4;
   5135             }
   5136             out[z++] = pal[v][0];
   5137             out[z++] = pal[v][1];
   5138             out[z++] = pal[v][2];
   5139             if (target == 4) out[z++] = 255;
   5140             if (i+1 == (int) s->img_x) break;
   5141             v = (info.bpp == 8) ? stbi__get8(s) : v2;
   5142             out[z++] = pal[v][0];
   5143             out[z++] = pal[v][1];
   5144             out[z++] = pal[v][2];
   5145             if (target == 4) out[z++] = 255;
   5146          }
   5147          stbi__skip(s, pad);
   5148       }
   5149    } else {
   5150       int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
   5151       int z = 0;
   5152       int easy=0;
   5153       stbi__skip(s, info.offset - 14 - info.hsz);
   5154       if (info.bpp == 24) width = 3 * s->img_x;
   5155       else if (info.bpp == 16) width = 2*s->img_x;
   5156       else /* bpp = 32 and pad = 0 */ width=0;
   5157       pad = (-width) & 3;
   5158       if (info.bpp == 24) {
   5159          easy = 1;
   5160       } else if (info.bpp == 32) {
   5161          if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
   5162             easy = 2;
   5163       }
   5164       if (!easy) {
   5165          if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
   5166          // right shift amt to put high bit in position #7
   5167          rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr);
   5168          gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
   5169          bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
   5170          ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
   5171       }
   5172       for (j=0; j < (int) s->img_y; ++j) {
   5173          if (easy) {
   5174             for (i=0; i < (int) s->img_x; ++i) {
   5175                unsigned char a;
   5176                out[z+2] = stbi__get8(s);
   5177                out[z+1] = stbi__get8(s);
   5178                out[z+0] = stbi__get8(s);
   5179                z += 3;
   5180                a = (easy == 2 ? stbi__get8(s) : 255);
   5181                all_a |= a;
   5182                if (target == 4) out[z++] = a;
   5183             }
   5184          } else {
   5185             int bpp = info.bpp;
   5186             for (i=0; i < (int) s->img_x; ++i) {
   5187                stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s));
   5188                int a;
   5189                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
   5190                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
   5191                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
   5192                a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
   5193                all_a |= a;
   5194                if (target == 4) out[z++] = STBI__BYTECAST(a);
   5195             }
   5196          }
   5197          stbi__skip(s, pad);
   5198       }
   5199    }
   5200 
   5201    // if alpha channel is all 0s, replace with all 255s
   5202    if (target == 4 && all_a == 0)
   5203       for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4)
   5204          out[i] = 255;
   5205 
   5206    if (flip_vertically) {
   5207       stbi_uc t;
   5208       for (j=0; j < (int) s->img_y>>1; ++j) {
   5209          stbi_uc *p1 = out +      j     *s->img_x*target;
   5210          stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
   5211          for (i=0; i < (int) s->img_x*target; ++i) {
   5212             t = p1[i], p1[i] = p2[i], p2[i] = t;
   5213          }
   5214       }
   5215    }
   5216 
   5217    if (req_comp && req_comp != target) {
   5218       out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
   5219       if (out == NULL) return out; // stbi__convert_format frees input on failure
   5220    }
   5221 
   5222    *x = s->img_x;
   5223    *y = s->img_y;
   5224    if (comp) *comp = s->img_n;
   5225    return out;
   5226 }
   5227 #endif
   5228 
   5229 // Targa Truevision - TGA
   5230 // by Jonathan Dummer
   5231 #ifndef STBI_NO_TGA
   5232 // returns STBI_rgb or whatever, 0 on error
   5233 static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
   5234 {
   5235    // only RGB or RGBA (incl. 16bit) or grey allowed
   5236    if(is_rgb16) *is_rgb16 = 0;
   5237    switch(bits_per_pixel) {
   5238       case 8:  return STBI_grey;
   5239       case 16: if(is_grey) return STBI_grey_alpha;
   5240             // else: fall-through
   5241       case 15: if(is_rgb16) *is_rgb16 = 1;
   5242             return STBI_rgb;
   5243       case 24: // fall-through
   5244       case 32: return bits_per_pixel/8;
   5245       default: return 0;
   5246    }
   5247 }
   5248 
   5249 static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
   5250 {
   5251     int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
   5252     int sz, tga_colormap_type;
   5253     stbi__get8(s);                   // discard Offset
   5254     tga_colormap_type = stbi__get8(s); // colormap type
   5255     if( tga_colormap_type > 1 ) {
   5256         stbi__rewind(s);
   5257         return 0;      // only RGB or indexed allowed
   5258     }
   5259     tga_image_type = stbi__get8(s); // image type
   5260     if ( tga_colormap_type == 1 ) { // colormapped (paletted) image
   5261         if (tga_image_type != 1 && tga_image_type != 9) {
   5262             stbi__rewind(s);
   5263             return 0;
   5264         }
   5265         stbi__skip(s,4);       // skip index of first colormap entry and number of entries
   5266         sz = stbi__get8(s);    //   check bits per palette color entry
   5267         if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) {
   5268             stbi__rewind(s);
   5269             return 0;
   5270         }
   5271         stbi__skip(s,4);       // skip image x and y origin
   5272         tga_colormap_bpp = sz;
   5273     } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
   5274         if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) {
   5275             stbi__rewind(s);
   5276             return 0; // only RGB or grey allowed, +/- RLE
   5277         }
   5278         stbi__skip(s,9); // skip colormap specification and image x/y origin
   5279         tga_colormap_bpp = 0;
   5280     }
   5281     tga_w = stbi__get16le(s);
   5282     if( tga_w < 1 ) {
   5283         stbi__rewind(s);
   5284         return 0;   // test width
   5285     }
   5286     tga_h = stbi__get16le(s);
   5287     if( tga_h < 1 ) {
   5288         stbi__rewind(s);
   5289         return 0;   // test height
   5290     }
   5291     tga_bits_per_pixel = stbi__get8(s); // bits per pixel
   5292     stbi__get8(s); // ignore alpha bits
   5293     if (tga_colormap_bpp != 0) {
   5294         if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
   5295             // when using a colormap, tga_bits_per_pixel is the size of the indexes
   5296             // I don't think anything but 8 or 16bit indexes makes sense
   5297             stbi__rewind(s);
   5298             return 0;
   5299         }
   5300         tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
   5301     } else {
   5302         tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);
   5303     }
   5304     if(!tga_comp) {
   5305       stbi__rewind(s);
   5306       return 0;
   5307     }
   5308     if (x) *x = tga_w;
   5309     if (y) *y = tga_h;
   5310     if (comp) *comp = tga_comp;
   5311     return 1;                   // seems to have passed everything
   5312 }
   5313 
   5314 static int stbi__tga_test(stbi__context *s)
   5315 {
   5316    int res = 0;
   5317    int sz, tga_color_type;
   5318    stbi__get8(s);      //   discard Offset
   5319    tga_color_type = stbi__get8(s);   //   color type
   5320    if ( tga_color_type > 1 ) goto errorEnd;   //   only RGB or indexed allowed
   5321    sz = stbi__get8(s);   //   image type
   5322    if ( tga_color_type == 1 ) { // colormapped (paletted) image
   5323       if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9
   5324       stbi__skip(s,4);       // skip index of first colormap entry and number of entries
   5325       sz = stbi__get8(s);    //   check bits per palette color entry
   5326       if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
   5327       stbi__skip(s,4);       // skip image x and y origin
   5328    } else { // "normal" image w/o colormap
   5329       if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE
   5330       stbi__skip(s,9); // skip colormap specification and image x/y origin
   5331    }
   5332    if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test width
   5333    if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test height
   5334    sz = stbi__get8(s);   //   bits per pixel
   5335    if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index
   5336    if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
   5337 
   5338    res = 1; // if we got this far, everything's good and we can return 1 instead of 0
   5339 
   5340 errorEnd:
   5341    stbi__rewind(s);
   5342    return res;
   5343 }
   5344 
   5345 // read 16bit value and convert to 24bit RGB
   5346 static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
   5347 {
   5348    stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
   5349    stbi__uint16 fiveBitMask = 31;
   5350    // we have 3 channels with 5bits each
   5351    int r = (px >> 10) & fiveBitMask;
   5352    int g = (px >> 5) & fiveBitMask;
   5353    int b = px & fiveBitMask;
   5354    // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
   5355    out[0] = (stbi_uc)((r * 255)/31);
   5356    out[1] = (stbi_uc)((g * 255)/31);
   5357    out[2] = (stbi_uc)((b * 255)/31);
   5358 
   5359    // some people claim that the most significant bit might be used for alpha
   5360    // (possibly if an alpha-bit is set in the "image descriptor byte")
   5361    // but that only made 16bit test images completely translucent..
   5362    // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
   5363 }
   5364 
   5365 static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
   5366 {
   5367    //   read in the TGA header stuff
   5368    int tga_offset = stbi__get8(s);
   5369    int tga_indexed = stbi__get8(s);
   5370    int tga_image_type = stbi__get8(s);
   5371    int tga_is_RLE = 0;
   5372    int tga_palette_start = stbi__get16le(s);
   5373    int tga_palette_len = stbi__get16le(s);
   5374    int tga_palette_bits = stbi__get8(s);
   5375    int tga_x_origin = stbi__get16le(s);
   5376    int tga_y_origin = stbi__get16le(s);
   5377    int tga_width = stbi__get16le(s);
   5378    int tga_height = stbi__get16le(s);
   5379    int tga_bits_per_pixel = stbi__get8(s);
   5380    int tga_comp, tga_rgb16=0;
   5381    int tga_inverted = stbi__get8(s);
   5382    // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
   5383    //   image data
   5384    unsigned char *tga_data;
   5385    unsigned char *tga_palette = NULL;
   5386    int i, j;
   5387    unsigned char raw_data[4] = {0};
   5388    int RLE_count = 0;
   5389    int RLE_repeating = 0;
   5390    int read_next_pixel = 1;
   5391    STBI_NOTUSED(ri);
   5392 
   5393    //   do a tiny bit of precessing
   5394    if ( tga_image_type >= 8 )
   5395    {
   5396       tga_image_type -= 8;
   5397       tga_is_RLE = 1;
   5398    }
   5399    tga_inverted = 1 - ((tga_inverted >> 5) & 1);
   5400 
   5401    //   If I'm paletted, then I'll use the number of bits from the palette
   5402    if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
   5403    else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
   5404 
   5405    if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
   5406       return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
   5407 
   5408    //   tga info
   5409    *x = tga_width;
   5410    *y = tga_height;
   5411    if (comp) *comp = tga_comp;
   5412 
   5413    if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
   5414       return stbi__errpuc("too large", "Corrupt TGA");
   5415 
   5416    tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
   5417    if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
   5418 
   5419    // skip to the data's starting position (offset usually = 0)
   5420    stbi__skip(s, tga_offset );
   5421 
   5422    if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) {
   5423       for (i=0; i < tga_height; ++i) {
   5424          int row = tga_inverted ? tga_height -i - 1 : i;
   5425          stbi_uc *tga_row = tga_data + row*tga_width*tga_comp;
   5426          stbi__getn(s, tga_row, tga_width * tga_comp);
   5427       }
   5428    } else  {
   5429       //   do I need to load a palette?
   5430       if ( tga_indexed)
   5431       {
   5432          //   any data to skip? (offset usually = 0)
   5433          stbi__skip(s, tga_palette_start );
   5434          //   load the palette
   5435          tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
   5436          if (!tga_palette) {
   5437             STBI_FREE(tga_data);
   5438             return stbi__errpuc("outofmem", "Out of memory");
   5439          }
   5440          if (tga_rgb16) {
   5441             stbi_uc *pal_entry = tga_palette;
   5442             STBI_ASSERT(tga_comp == STBI_rgb);
   5443             for (i=0; i < tga_palette_len; ++i) {
   5444                stbi__tga_read_rgb16(s, pal_entry);
   5445                pal_entry += tga_comp;
   5446             }
   5447          } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
   5448                STBI_FREE(tga_data);
   5449                STBI_FREE(tga_palette);
   5450                return stbi__errpuc("bad palette", "Corrupt TGA");
   5451          }
   5452       }
   5453       //   load the data
   5454       for (i=0; i < tga_width * tga_height; ++i)
   5455       {
   5456          //   if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
   5457          if ( tga_is_RLE )
   5458          {
   5459             if ( RLE_count == 0 )
   5460             {
   5461                //   yep, get the next byte as a RLE command
   5462                int RLE_cmd = stbi__get8(s);
   5463                RLE_count = 1 + (RLE_cmd & 127);
   5464                RLE_repeating = RLE_cmd >> 7;
   5465                read_next_pixel = 1;
   5466             } else if ( !RLE_repeating )
   5467             {
   5468                read_next_pixel = 1;
   5469             }
   5470          } else
   5471          {
   5472             read_next_pixel = 1;
   5473          }
   5474          //   OK, if I need to read a pixel, do it now
   5475          if ( read_next_pixel )
   5476          {
   5477             //   load however much data we did have
   5478             if ( tga_indexed )
   5479             {
   5480                // read in index, then perform the lookup
   5481                int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
   5482                if ( pal_idx >= tga_palette_len ) {
   5483                   // invalid index
   5484                   pal_idx = 0;
   5485                }
   5486                pal_idx *= tga_comp;
   5487                for (j = 0; j < tga_comp; ++j) {
   5488                   raw_data[j] = tga_palette[pal_idx+j];
   5489                }
   5490             } else if(tga_rgb16) {
   5491                STBI_ASSERT(tga_comp == STBI_rgb);
   5492                stbi__tga_read_rgb16(s, raw_data);
   5493             } else {
   5494                //   read in the data raw
   5495                for (j = 0; j < tga_comp; ++j) {
   5496                   raw_data[j] = stbi__get8(s);
   5497                }
   5498             }
   5499             //   clear the reading flag for the next pixel
   5500             read_next_pixel = 0;
   5501          } // end of reading a pixel
   5502 
   5503          // copy data
   5504          for (j = 0; j < tga_comp; ++j)
   5505            tga_data[i*tga_comp+j] = raw_data[j];
   5506 
   5507          //   in case we're in RLE mode, keep counting down
   5508          --RLE_count;
   5509       }
   5510       //   do I need to invert the image?
   5511       if ( tga_inverted )
   5512       {
   5513          for (j = 0; j*2 < tga_height; ++j)
   5514          {
   5515             int index1 = j * tga_width * tga_comp;
   5516             int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
   5517             for (i = tga_width * tga_comp; i > 0; --i)
   5518             {
   5519                unsigned char temp = tga_data[index1];
   5520                tga_data[index1] = tga_data[index2];
   5521                tga_data[index2] = temp;
   5522                ++index1;
   5523                ++index2;
   5524             }
   5525          }
   5526       }
   5527       //   clear my palette, if I had one
   5528       if ( tga_palette != NULL )
   5529       {
   5530          STBI_FREE( tga_palette );
   5531       }
   5532    }
   5533 
   5534    // swap RGB - if the source data was RGB16, it already is in the right order
   5535    if (tga_comp >= 3 && !tga_rgb16)
   5536    {
   5537       unsigned char* tga_pixel = tga_data;
   5538       for (i=0; i < tga_width * tga_height; ++i)
   5539       {
   5540          unsigned char temp = tga_pixel[0];
   5541          tga_pixel[0] = tga_pixel[2];
   5542          tga_pixel[2] = temp;
   5543          tga_pixel += tga_comp;
   5544       }
   5545    }
   5546 
   5547    // convert to target component count
   5548    if (req_comp && req_comp != tga_comp)
   5549       tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
   5550 
   5551    //   the things I do to get rid of an error message, and yet keep
   5552    //   Microsoft's C compilers happy... [8^(
   5553    tga_palette_start = tga_palette_len = tga_palette_bits =
   5554          tga_x_origin = tga_y_origin = 0;
   5555    //   OK, done
   5556    return tga_data;
   5557 }
   5558 #endif
   5559 
   5560 // *************************************************************************************************
   5561 // Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
   5562 
   5563 #ifndef STBI_NO_PSD
   5564 static int stbi__psd_test(stbi__context *s)
   5565 {
   5566    int r = (stbi__get32be(s) == 0x38425053);
   5567    stbi__rewind(s);
   5568    return r;
   5569 }
   5570 
   5571 static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
   5572 {
   5573    int count, nleft, len;
   5574 
   5575    count = 0;
   5576    while ((nleft = pixelCount - count) > 0) {
   5577       len = stbi__get8(s);
   5578       if (len == 128) {
   5579          // No-op.
   5580       } else if (len < 128) {
   5581          // Copy next len+1 bytes literally.
   5582          len++;
   5583          if (len > nleft) return 0; // corrupt data
   5584          count += len;
   5585          while (len) {
   5586             *p = stbi__get8(s);
   5587             p += 4;
   5588             len--;
   5589          }
   5590       } else if (len > 128) {
   5591          stbi_uc   val;
   5592          // Next -len+1 bytes in the dest are replicated from next source byte.
   5593          // (Interpret len as a negative 8-bit int.)
   5594          len = 257 - len;
   5595          if (len > nleft) return 0; // corrupt data
   5596          val = stbi__get8(s);
   5597          count += len;
   5598          while (len) {
   5599             *p = val;
   5600             p += 4;
   5601             len--;
   5602          }
   5603       }
   5604    }
   5605 
   5606    return 1;
   5607 }
   5608 
   5609 static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
   5610 {
   5611    int pixelCount;
   5612    int channelCount, compression;
   5613    int channel, i;
   5614    int bitdepth;
   5615    int w,h;
   5616    stbi_uc *out;
   5617    STBI_NOTUSED(ri);
   5618 
   5619    // Check identifier
   5620    if (stbi__get32be(s) != 0x38425053)   // "8BPS"
   5621       return stbi__errpuc("not PSD", "Corrupt PSD image");
   5622 
   5623    // Check file type version.
   5624    if (stbi__get16be(s) != 1)
   5625       return stbi__errpuc("wrong version", "Unsupported version of PSD image");
   5626 
   5627    // Skip 6 reserved bytes.
   5628    stbi__skip(s, 6 );
   5629 
   5630    // Read the number of channels (R, G, B, A, etc).
   5631    channelCount = stbi__get16be(s);
   5632    if (channelCount < 0 || channelCount > 16)
   5633       return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
   5634 
   5635    // Read the rows and columns of the image.
   5636    h = stbi__get32be(s);
   5637    w = stbi__get32be(s);
   5638 
   5639    // Make sure the depth is 8 bits.
   5640    bitdepth = stbi__get16be(s);
   5641    if (bitdepth != 8 && bitdepth != 16)
   5642       return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
   5643 
   5644    // Make sure the color mode is RGB.
   5645    // Valid options are:
   5646    //   0: Bitmap
   5647    //   1: Grayscale
   5648    //   2: Indexed color
   5649    //   3: RGB color
   5650    //   4: CMYK color
   5651    //   7: Multichannel
   5652    //   8: Duotone
   5653    //   9: Lab color
   5654    if (stbi__get16be(s) != 3)
   5655       return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
   5656 
   5657    // Skip the Mode Data.  (It's the palette for indexed color; other info for other modes.)
   5658    stbi__skip(s,stbi__get32be(s) );
   5659 
   5660    // Skip the image resources.  (resolution, pen tool paths, etc)
   5661    stbi__skip(s, stbi__get32be(s) );
   5662 
   5663    // Skip the reserved data.
   5664    stbi__skip(s, stbi__get32be(s) );
   5665 
   5666    // Find out if the data is compressed.
   5667    // Known values:
   5668    //   0: no compression
   5669    //   1: RLE compressed
   5670    compression = stbi__get16be(s);
   5671    if (compression > 1)
   5672       return stbi__errpuc("bad compression", "PSD has an unknown compression format");
   5673 
   5674    // Check size
   5675    if (!stbi__mad3sizes_valid(4, w, h, 0))
   5676       return stbi__errpuc("too large", "Corrupt PSD");
   5677 
   5678    // Create the destination image.
   5679 
   5680    if (!compression && bitdepth == 16 && bpc == 16) {
   5681       out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0);
   5682       ri->bits_per_channel = 16;
   5683    } else
   5684       out = (stbi_uc *) stbi__malloc(4 * w*h);
   5685 
   5686    if (!out) return stbi__errpuc("outofmem", "Out of memory");
   5687    pixelCount = w*h;
   5688 
   5689    // Initialize the data to zero.
   5690    //memset( out, 0, pixelCount * 4 );
   5691 
   5692    // Finally, the image data.
   5693    if (compression) {
   5694       // RLE as used by .PSD and .TIFF
   5695       // Loop until you get the number of unpacked bytes you are expecting:
   5696       //     Read the next source byte into n.
   5697       //     If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
   5698       //     Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
   5699       //     Else if n is 128, noop.
   5700       // Endloop
   5701 
   5702       // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
   5703       // which we're going to just skip.
   5704       stbi__skip(s, h * channelCount * 2 );
   5705 
   5706       // Read the RLE data by channel.
   5707       for (channel = 0; channel < 4; channel++) {
   5708          stbi_uc *p;
   5709 
   5710          p = out+channel;
   5711          if (channel >= channelCount) {
   5712             // Fill this channel with default data.
   5713             for (i = 0; i < pixelCount; i++, p += 4)
   5714                *p = (channel == 3 ? 255 : 0);
   5715          } else {
   5716             // Read the RLE data.
   5717             if (!stbi__psd_decode_rle(s, p, pixelCount)) {
   5718                STBI_FREE(out);
   5719                return stbi__errpuc("corrupt", "bad RLE data");
   5720             }
   5721          }
   5722       }
   5723 
   5724    } else {
   5725       // We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
   5726       // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
   5727 
   5728       // Read the data by channel.
   5729       for (channel = 0; channel < 4; channel++) {
   5730          if (channel >= channelCount) {
   5731             // Fill this channel with default data.
   5732             if (bitdepth == 16 && bpc == 16) {
   5733                stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
   5734                stbi__uint16 val = channel == 3 ? 65535 : 0;
   5735                for (i = 0; i < pixelCount; i++, q += 4)
   5736                   *q = val;
   5737             } else {
   5738                stbi_uc *p = out+channel;
   5739                stbi_uc val = channel == 3 ? 255 : 0;
   5740                for (i = 0; i < pixelCount; i++, p += 4)
   5741                   *p = val;
   5742             }
   5743          } else {
   5744             if (ri->bits_per_channel == 16) {    // output bpc
   5745                stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
   5746                for (i = 0; i < pixelCount; i++, q += 4)
   5747                   *q = (stbi__uint16) stbi__get16be(s);
   5748             } else {
   5749                stbi_uc *p = out+channel;
   5750                if (bitdepth == 16) {  // input bpc
   5751                   for (i = 0; i < pixelCount; i++, p += 4)
   5752                      *p = (stbi_uc) (stbi__get16be(s) >> 8);
   5753                } else {
   5754                   for (i = 0; i < pixelCount; i++, p += 4)
   5755                      *p = stbi__get8(s);
   5756                }
   5757             }
   5758          }
   5759       }
   5760    }
   5761 
   5762    // remove weird white matte from PSD
   5763    if (channelCount >= 4) {
   5764       if (ri->bits_per_channel == 16) {
   5765          for (i=0; i < w*h; ++i) {
   5766             stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i;
   5767             if (pixel[3] != 0 && pixel[3] != 65535) {
   5768                float a = pixel[3] / 65535.0f;
   5769                float ra = 1.0f / a;
   5770                float inv_a = 65535.0f * (1 - ra);
   5771                pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a);
   5772                pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a);
   5773                pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a);
   5774             }
   5775          }
   5776       } else {
   5777          for (i=0; i < w*h; ++i) {
   5778             unsigned char *pixel = out + 4*i;
   5779             if (pixel[3] != 0 && pixel[3] != 255) {
   5780                float a = pixel[3] / 255.0f;
   5781                float ra = 1.0f / a;
   5782                float inv_a = 255.0f * (1 - ra);
   5783                pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
   5784                pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
   5785                pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
   5786             }
   5787          }
   5788       }
   5789    }
   5790 
   5791    // convert to desired output format
   5792    if (req_comp && req_comp != 4) {
   5793       if (ri->bits_per_channel == 16)
   5794          out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h);
   5795       else
   5796          out = stbi__convert_format(out, 4, req_comp, w, h);
   5797       if (out == NULL) return out; // stbi__convert_format frees input on failure
   5798    }
   5799 
   5800    if (comp) *comp = 4;
   5801    *y = h;
   5802    *x = w;
   5803 
   5804    return out;
   5805 }
   5806 #endif
   5807 
   5808 // *************************************************************************************************
   5809 // Softimage PIC loader
   5810 // by Tom Seddon
   5811 //
   5812 // See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
   5813 // See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
   5814 
   5815 #ifndef STBI_NO_PIC
   5816 static int stbi__pic_is4(stbi__context *s,const char *str)
   5817 {
   5818    int i;
   5819    for (i=0; i<4; ++i)
   5820       if (stbi__get8(s) != (stbi_uc)str[i])
   5821          return 0;
   5822 
   5823    return 1;
   5824 }
   5825 
   5826 static int stbi__pic_test_core(stbi__context *s)
   5827 {
   5828    int i;
   5829 
   5830    if (!stbi__pic_is4(s,"\x53\x80\xF6\x34"))
   5831       return 0;
   5832 
   5833    for(i=0;i<84;++i)
   5834       stbi__get8(s);
   5835 
   5836    if (!stbi__pic_is4(s,"PICT"))
   5837       return 0;
   5838 
   5839    return 1;
   5840 }
   5841 
   5842 typedef struct
   5843 {
   5844    stbi_uc size,type,channel;
   5845 } stbi__pic_packet;
   5846 
   5847 static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
   5848 {
   5849    int mask=0x80, i;
   5850 
   5851    for (i=0; i<4; ++i, mask>>=1) {
   5852       if (channel & mask) {
   5853          if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short");
   5854          dest[i]=stbi__get8(s);
   5855       }
   5856    }
   5857 
   5858    return dest;
   5859 }
   5860 
   5861 static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
   5862 {
   5863    int mask=0x80,i;
   5864 
   5865    for (i=0;i<4; ++i, mask>>=1)
   5866       if (channel&mask)
   5867          dest[i]=src[i];
   5868 }
   5869 
   5870 static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result)
   5871 {
   5872    int act_comp=0,num_packets=0,y,chained;
   5873    stbi__pic_packet packets[10];
   5874 
   5875    // this will (should...) cater for even some bizarre stuff like having data
   5876     // for the same channel in multiple packets.
   5877    do {
   5878       stbi__pic_packet *packet;
   5879 
   5880       if (num_packets==sizeof(packets)/sizeof(packets[0]))
   5881          return stbi__errpuc("bad format","too many packets");
   5882 
   5883       packet = &packets[num_packets++];
   5884 
   5885       chained = stbi__get8(s);
   5886       packet->size    = stbi__get8(s);
   5887       packet->type    = stbi__get8(s);
   5888       packet->channel = stbi__get8(s);
   5889 
   5890       act_comp |= packet->channel;
   5891 
   5892       if (stbi__at_eof(s))          return stbi__errpuc("bad file","file too short (reading packets)");
   5893       if (packet->size != 8)  return stbi__errpuc("bad format","packet isn't 8bpp");
   5894    } while (chained);
   5895 
   5896    *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
   5897 
   5898    for(y=0; y<height; ++y) {
   5899       int packet_idx;
   5900 
   5901       for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
   5902          stbi__pic_packet *packet = &packets[packet_idx];
   5903          stbi_uc *dest = result+y*width*4;
   5904 
   5905          switch (packet->type) {
   5906             default:
   5907                return stbi__errpuc("bad format","packet has bad compression type");
   5908 
   5909             case 0: {//uncompressed
   5910                int x;
   5911 
   5912                for(x=0;x<width;++x, dest+=4)
   5913                   if (!stbi__readval(s,packet->channel,dest))
   5914                      return 0;
   5915                break;
   5916             }
   5917 
   5918             case 1://Pure RLE
   5919                {
   5920                   int left=width, i;
   5921 
   5922                   while (left>0) {
   5923                      stbi_uc count,value[4];
   5924 
   5925                      count=stbi__get8(s);
   5926                      if (stbi__at_eof(s))   return stbi__errpuc("bad file","file too short (pure read count)");
   5927 
   5928                      if (count > left)
   5929                         count = (stbi_uc) left;
   5930 
   5931                      if (!stbi__readval(s,packet->channel,value))  return 0;
   5932 
   5933                      for(i=0; i<count; ++i,dest+=4)
   5934                         stbi__copyval(packet->channel,dest,value);
   5935                      left -= count;
   5936                   }
   5937                }
   5938                break;
   5939 
   5940             case 2: {//Mixed RLE
   5941                int left=width;
   5942                while (left>0) {
   5943                   int count = stbi__get8(s), i;
   5944                   if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (mixed read count)");
   5945 
   5946                   if (count >= 128) { // Repeated
   5947                      stbi_uc value[4];
   5948 
   5949                      if (count==128)
   5950                         count = stbi__get16be(s);
   5951                      else
   5952                         count -= 127;
   5953                      if (count > left)
   5954                         return stbi__errpuc("bad file","scanline overrun");
   5955 
   5956                      if (!stbi__readval(s,packet->channel,value))
   5957                         return 0;
   5958 
   5959                      for(i=0;i<count;++i, dest += 4)
   5960                         stbi__copyval(packet->channel,dest,value);
   5961                   } else { // Raw
   5962                      ++count;
   5963                      if (count>left) return stbi__errpuc("bad file","scanline overrun");
   5964 
   5965                      for(i=0;i<count;++i, dest+=4)
   5966                         if (!stbi__readval(s,packet->channel,dest))
   5967                            return 0;
   5968                   }
   5969                   left-=count;
   5970                }
   5971                break;
   5972             }
   5973          }
   5974       }
   5975    }
   5976 
   5977    return result;
   5978 }
   5979 
   5980 static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri)
   5981 {
   5982    stbi_uc *result;
   5983    int i, x,y, internal_comp;
   5984    STBI_NOTUSED(ri);
   5985 
   5986    if (!comp) comp = &internal_comp;
   5987 
   5988    for (i=0; i<92; ++i)
   5989       stbi__get8(s);
   5990 
   5991    x = stbi__get16be(s);
   5992    y = stbi__get16be(s);
   5993    if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (pic header)");
   5994    if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
   5995 
   5996    stbi__get32be(s); //skip `ratio'
   5997    stbi__get16be(s); //skip `fields'
   5998    stbi__get16be(s); //skip `pad'
   5999 
   6000    // intermediate buffer is RGBA
   6001    result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
   6002    memset(result, 0xff, x*y*4);
   6003 
   6004    if (!stbi__pic_load_core(s,x,y,comp, result)) {
   6005       STBI_FREE(result);
   6006       result=0;
   6007    }
   6008    *px = x;
   6009    *py = y;
   6010    if (req_comp == 0) req_comp = *comp;
   6011    result=stbi__convert_format(result,4,req_comp,x,y);
   6012 
   6013    return result;
   6014 }
   6015 
   6016 static int stbi__pic_test(stbi__context *s)
   6017 {
   6018    int r = stbi__pic_test_core(s);
   6019    stbi__rewind(s);
   6020    return r;
   6021 }
   6022 #endif
   6023 
   6024 // *************************************************************************************************
   6025 // GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
   6026 
   6027 #ifndef STBI_NO_GIF
   6028 typedef struct
   6029 {
   6030    stbi__int16 prefix;
   6031    stbi_uc first;
   6032    stbi_uc suffix;
   6033 } stbi__gif_lzw;
   6034 
   6035 typedef struct
   6036 {
   6037    int w,h;
   6038    stbi_uc *out, *old_out;             // output buffer (always 4 components)
   6039    int flags, bgindex, ratio, transparent, eflags, delay;
   6040    stbi_uc  pal[256][4];
   6041    stbi_uc lpal[256][4];
   6042    stbi__gif_lzw codes[4096];
   6043    stbi_uc *color_table;
   6044    int parse, step;
   6045    int lflags;
   6046    int start_x, start_y;
   6047    int max_x, max_y;
   6048    int cur_x, cur_y;
   6049    int line_size;
   6050 } stbi__gif;
   6051 
   6052 static int stbi__gif_test_raw(stbi__context *s)
   6053 {
   6054    int sz;
   6055    if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
   6056    sz = stbi__get8(s);
   6057    if (sz != '9' && sz != '7') return 0;
   6058    if (stbi__get8(s) != 'a') return 0;
   6059    return 1;
   6060 }
   6061 
   6062 static int stbi__gif_test(stbi__context *s)
   6063 {
   6064    int r = stbi__gif_test_raw(s);
   6065    stbi__rewind(s);
   6066    return r;
   6067 }
   6068 
   6069 static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
   6070 {
   6071    int i;
   6072    for (i=0; i < num_entries; ++i) {
   6073       pal[i][2] = stbi__get8(s);
   6074       pal[i][1] = stbi__get8(s);
   6075       pal[i][0] = stbi__get8(s);
   6076       pal[i][3] = transp == i ? 0 : 255;
   6077    }
   6078 }
   6079 
   6080 static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
   6081 {
   6082    stbi_uc version;
   6083    if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
   6084       return stbi__err("not GIF", "Corrupt GIF");
   6085 
   6086    version = stbi__get8(s);
   6087    if (version != '7' && version != '9')    return stbi__err("not GIF", "Corrupt GIF");
   6088    if (stbi__get8(s) != 'a')                return stbi__err("not GIF", "Corrupt GIF");
   6089 
   6090    stbi__g_failure_reason = "";
   6091    g->w = stbi__get16le(s);
   6092    g->h = stbi__get16le(s);
   6093    g->flags = stbi__get8(s);
   6094    g->bgindex = stbi__get8(s);
   6095    g->ratio = stbi__get8(s);
   6096    g->transparent = -1;
   6097 
   6098    if (comp != 0) *comp = 4;  // can't actually tell whether it's 3 or 4 until we parse the comments
   6099 
   6100    if (is_info) return 1;
   6101 
   6102    if (g->flags & 0x80)
   6103       stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
   6104 
   6105    return 1;
   6106 }
   6107 
   6108 static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
   6109 {
   6110    stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
   6111    if (!stbi__gif_header(s, g, comp, 1)) {
   6112       STBI_FREE(g);
   6113       stbi__rewind( s );
   6114       return 0;
   6115    }
   6116    if (x) *x = g->w;
   6117    if (y) *y = g->h;
   6118    STBI_FREE(g);
   6119    return 1;
   6120 }
   6121 
   6122 static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
   6123 {
   6124    stbi_uc *p, *c;
   6125 
   6126    // recurse to decode the prefixes, since the linked-list is backwards,
   6127    // and working backwards through an interleaved image would be nasty
   6128    if (g->codes[code].prefix >= 0)
   6129       stbi__out_gif_code(g, g->codes[code].prefix);
   6130 
   6131    if (g->cur_y >= g->max_y) return;
   6132 
   6133    p = &g->out[g->cur_x + g->cur_y];
   6134    c = &g->color_table[g->codes[code].suffix * 4];
   6135 
   6136    if (c[3] >= 128) {
   6137       p[0] = c[2];
   6138       p[1] = c[1];
   6139       p[2] = c[0];
   6140       p[3] = c[3];
   6141    }
   6142    g->cur_x += 4;
   6143 
   6144    if (g->cur_x >= g->max_x) {
   6145       g->cur_x = g->start_x;
   6146       g->cur_y += g->step;
   6147 
   6148       while (g->cur_y >= g->max_y && g->parse > 0) {
   6149          g->step = (1 << g->parse) * g->line_size;
   6150          g->cur_y = g->start_y + (g->step >> 1);
   6151          --g->parse;
   6152       }
   6153    }
   6154 }
   6155 
   6156 static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
   6157 {
   6158    stbi_uc lzw_cs;
   6159    stbi__int32 len, init_code;
   6160    stbi__uint32 first;
   6161    stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
   6162    stbi__gif_lzw *p;
   6163 
   6164    lzw_cs = stbi__get8(s);
   6165    if (lzw_cs > 12) return NULL;
   6166    clear = 1 << lzw_cs;
   6167    first = 1;
   6168    codesize = lzw_cs + 1;
   6169    codemask = (1 << codesize) - 1;
   6170    bits = 0;
   6171    valid_bits = 0;
   6172    for (init_code = 0; init_code < clear; init_code++) {
   6173       g->codes[init_code].prefix = -1;
   6174       g->codes[init_code].first = (stbi_uc) init_code;
   6175       g->codes[init_code].suffix = (stbi_uc) init_code;
   6176    }
   6177 
   6178    // support no starting clear code
   6179    avail = clear+2;
   6180    oldcode = -1;
   6181 
   6182    len = 0;
   6183    for(;;) {
   6184       if (valid_bits < codesize) {
   6185          if (len == 0) {
   6186             len = stbi__get8(s); // start new block
   6187             if (len == 0)
   6188                return g->out;
   6189          }
   6190          --len;
   6191          bits |= (stbi__int32) stbi__get8(s) << valid_bits;
   6192          valid_bits += 8;
   6193       } else {
   6194          stbi__int32 code = bits & codemask;
   6195          bits >>= codesize;
   6196          valid_bits -= codesize;
   6197          // @OPTIMIZE: is there some way we can accelerate the non-clear path?
   6198          if (code == clear) {  // clear code
   6199             codesize = lzw_cs + 1;
   6200             codemask = (1 << codesize) - 1;
   6201             avail = clear + 2;
   6202             oldcode = -1;
   6203             first = 0;
   6204          } else if (code == clear + 1) { // end of stream code
   6205             stbi__skip(s, len);
   6206             while ((len = stbi__get8(s)) > 0)
   6207                stbi__skip(s,len);
   6208             return g->out;
   6209          } else if (code <= avail) {
   6210             if (first) return stbi__errpuc("no clear code", "Corrupt GIF");
   6211 
   6212             if (oldcode >= 0) {
   6213                p = &g->codes[avail++];
   6214                if (avail > 4096)        return stbi__errpuc("too many codes", "Corrupt GIF");
   6215                p->prefix = (stbi__int16) oldcode;
   6216                p->first = g->codes[oldcode].first;
   6217                p->suffix = (code == avail) ? p->first : g->codes[code].first;
   6218             } else if (code == avail)
   6219                return stbi__errpuc("illegal code in raster", "Corrupt GIF");
   6220 
   6221             stbi__out_gif_code(g, (stbi__uint16) code);
   6222 
   6223             if ((avail & codemask) == 0 && avail <= 0x0FFF) {
   6224                codesize++;
   6225                codemask = (1 << codesize) - 1;
   6226             }
   6227 
   6228             oldcode = code;
   6229          } else {
   6230             return stbi__errpuc("illegal code in raster", "Corrupt GIF");
   6231          }
   6232       }
   6233    }
   6234 }
   6235 
   6236 static void stbi__fill_gif_background(stbi__gif *g, int x0, int y0, int x1, int y1)
   6237 {
   6238    int x, y;
   6239    stbi_uc *c = g->pal[g->bgindex];
   6240    for (y = y0; y < y1; y += 4 * g->w) {
   6241       for (x = x0; x < x1; x += 4) {
   6242          stbi_uc *p  = &g->out[y + x];
   6243          p[0] = c[2];
   6244          p[1] = c[1];
   6245          p[2] = c[0];
   6246          p[3] = 0;
   6247       }
   6248    }
   6249 }
   6250 
   6251 // this function is designed to support animated gifs, although stb_image doesn't support it
   6252 static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp)
   6253 {
   6254    int i;
   6255    stbi_uc *prev_out = 0;
   6256 
   6257    if (g->out == 0 && !stbi__gif_header(s, g, comp,0))
   6258       return 0; // stbi__g_failure_reason set by stbi__gif_header
   6259 
   6260    if (!stbi__mad3sizes_valid(g->w, g->h, 4, 0))
   6261       return stbi__errpuc("too large", "GIF too large");
   6262 
   6263    prev_out = g->out;
   6264    g->out = (stbi_uc *) stbi__malloc_mad3(4, g->w, g->h, 0);
   6265    if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory");
   6266 
   6267    switch ((g->eflags & 0x1C) >> 2) {
   6268       case 0: // unspecified (also always used on 1st frame)
   6269          stbi__fill_gif_background(g, 0, 0, 4 * g->w, 4 * g->w * g->h);
   6270          break;
   6271       case 1: // do not dispose
   6272          if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h);
   6273          g->old_out = prev_out;
   6274          break;
   6275       case 2: // dispose to background
   6276          if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h);
   6277          stbi__fill_gif_background(g, g->start_x, g->start_y, g->max_x, g->max_y);
   6278          break;
   6279       case 3: // dispose to previous
   6280          if (g->old_out) {
   6281             for (i = g->start_y; i < g->max_y; i += 4 * g->w)
   6282                memcpy(&g->out[i + g->start_x], &g->old_out[i + g->start_x], g->max_x - g->start_x);
   6283          }
   6284          break;
   6285    }
   6286 
   6287    for (;;) {
   6288       switch (stbi__get8(s)) {
   6289          case 0x2C: /* Image Descriptor */
   6290          {
   6291             int prev_trans = -1;
   6292             stbi__int32 x, y, w, h;
   6293             stbi_uc *o;
   6294 
   6295             x = stbi__get16le(s);
   6296             y = stbi__get16le(s);
   6297             w = stbi__get16le(s);
   6298             h = stbi__get16le(s);
   6299             if (((x + w) > (g->w)) || ((y + h) > (g->h)))
   6300                return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
   6301 
   6302             g->line_size = g->w * 4;
   6303             g->start_x = x * 4;
   6304             g->start_y = y * g->line_size;
   6305             g->max_x   = g->start_x + w * 4;
   6306             g->max_y   = g->start_y + h * g->line_size;
   6307             g->cur_x   = g->start_x;
   6308             g->cur_y   = g->start_y;
   6309 
   6310             g->lflags = stbi__get8(s);
   6311 
   6312             if (g->lflags & 0x40) {
   6313                g->step = 8 * g->line_size; // first interlaced spacing
   6314                g->parse = 3;
   6315             } else {
   6316                g->step = g->line_size;
   6317                g->parse = 0;
   6318             }
   6319 
   6320             if (g->lflags & 0x80) {
   6321                stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
   6322                g->color_table = (stbi_uc *) g->lpal;
   6323             } else if (g->flags & 0x80) {
   6324                if (g->transparent >= 0 && (g->eflags & 0x01)) {
   6325                   prev_trans = g->pal[g->transparent][3];
   6326                   g->pal[g->transparent][3] = 0;
   6327                }
   6328                g->color_table = (stbi_uc *) g->pal;
   6329             } else
   6330                return stbi__errpuc("missing color table", "Corrupt GIF");
   6331 
   6332             o = stbi__process_gif_raster(s, g);
   6333             if (o == NULL) return NULL;
   6334 
   6335             if (prev_trans != -1)
   6336                g->pal[g->transparent][3] = (stbi_uc) prev_trans;
   6337 
   6338             return o;
   6339          }
   6340 
   6341          case 0x21: // Comment Extension.
   6342          {
   6343             int len;
   6344             if (stbi__get8(s) == 0xF9) { // Graphic Control Extension.
   6345                len = stbi__get8(s);
   6346                if (len == 4) {
   6347                   g->eflags = stbi__get8(s);
   6348                   g->delay = stbi__get16le(s);
   6349                   g->transparent = stbi__get8(s);
   6350                } else {
   6351                   stbi__skip(s, len);
   6352                   break;
   6353                }
   6354             }
   6355             while ((len = stbi__get8(s)) != 0)
   6356                stbi__skip(s, len);
   6357             break;
   6358          }
   6359 
   6360          case 0x3B: // gif stream termination code
   6361             return (stbi_uc *) s; // using '1' causes warning on some compilers
   6362 
   6363          default:
   6364             return stbi__errpuc("unknown code", "Corrupt GIF");
   6365       }
   6366    }
   6367 
   6368    STBI_NOTUSED(req_comp);
   6369 }
   6370 
   6371 static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
   6372 {
   6373    stbi_uc *u = 0;
   6374    stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
   6375    memset(g, 0, sizeof(*g));
   6376    STBI_NOTUSED(ri);
   6377 
   6378    u = stbi__gif_load_next(s, g, comp, req_comp);
   6379    if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
   6380    if (u) {
   6381       *x = g->w;
   6382       *y = g->h;
   6383       if (req_comp && req_comp != 4)
   6384          u = stbi__convert_format(u, 4, req_comp, g->w, g->h);
   6385    }
   6386    else if (g->out)
   6387       STBI_FREE(g->out);
   6388    STBI_FREE(g);
   6389    return u;
   6390 }
   6391 
   6392 static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
   6393 {
   6394    return stbi__gif_info_raw(s,x,y,comp);
   6395 }
   6396 #endif
   6397 
   6398 // *************************************************************************************************
   6399 // Radiance RGBE HDR loader
   6400 // originally by Nicolas Schulz
   6401 #ifndef STBI_NO_HDR
   6402 static int stbi__hdr_test_core(stbi__context *s, const char *signature)
   6403 {
   6404    int i;
   6405    for (i=0; signature[i]; ++i)
   6406       if (stbi__get8(s) != signature[i])
   6407           return 0;
   6408    stbi__rewind(s);
   6409    return 1;
   6410 }
   6411 
   6412 static int stbi__hdr_test(stbi__context* s)
   6413 {
   6414    int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
   6415    stbi__rewind(s);
   6416    if(!r) {
   6417        r = stbi__hdr_test_core(s, "#?RGBE\n");
   6418        stbi__rewind(s);
   6419    }
   6420    return r;
   6421 }
   6422 
   6423 #define STBI__HDR_BUFLEN  1024
   6424 static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
   6425 {
   6426    int len=0;
   6427    char c = '\0';
   6428 
   6429    c = (char) stbi__get8(z);
   6430 
   6431    while (!stbi__at_eof(z) && c != '\n') {
   6432       buffer[len++] = c;
   6433       if (len == STBI__HDR_BUFLEN-1) {
   6434          // flush to end of line
   6435          while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
   6436             ;
   6437          break;
   6438       }
   6439       c = (char) stbi__get8(z);
   6440    }
   6441 
   6442    buffer[len] = 0;
   6443    return buffer;
   6444 }
   6445 
   6446 static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
   6447 {
   6448    if ( input[3] != 0 ) {
   6449       float f1;
   6450       // Exponent
   6451       f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
   6452       if (req_comp <= 2)
   6453          output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
   6454       else {
   6455          output[0] = input[0] * f1;
   6456          output[1] = input[1] * f1;
   6457          output[2] = input[2] * f1;
   6458       }
   6459       if (req_comp == 2) output[1] = 1;
   6460       if (req_comp == 4) output[3] = 1;
   6461    } else {
   6462       switch (req_comp) {
   6463          case 4: output[3] = 1; /* fallthrough */
   6464          case 3: output[0] = output[1] = output[2] = 0;
   6465                  break;
   6466          case 2: output[1] = 1; /* fallthrough */
   6467          case 1: output[0] = 0;
   6468                  break;
   6469       }
   6470    }
   6471 }
   6472 
   6473 static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
   6474 {
   6475    char buffer[STBI__HDR_BUFLEN];
   6476    char *token;
   6477    int valid = 0;
   6478    int width, height;
   6479    stbi_uc *scanline;
   6480    float *hdr_data;
   6481    int len;
   6482    unsigned char count, value;
   6483    int i, j, k, c1,c2, z;
   6484    const char *headerToken;
   6485    STBI_NOTUSED(ri);
   6486 
   6487    // Check identifier
   6488    headerToken = stbi__hdr_gettoken(s,buffer);
   6489    if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
   6490       return stbi__errpf("not HDR", "Corrupt HDR image");
   6491 
   6492    // Parse header
   6493    for(;;) {
   6494       token = stbi__hdr_gettoken(s,buffer);
   6495       if (token[0] == 0) break;
   6496       if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
   6497    }
   6498 
   6499    if (!valid)    return stbi__errpf("unsupported format", "Unsupported HDR format");
   6500 
   6501    // Parse width and height
   6502    // can't use sscanf() if we're not using stdio!
   6503    token = stbi__hdr_gettoken(s,buffer);
   6504    if (strncmp(token, "-Y ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
   6505    token += 3;
   6506    height = (int) strtol(token, &token, 10);
   6507    while (*token == ' ') ++token;
   6508    if (strncmp(token, "+X ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
   6509    token += 3;
   6510    width = (int) strtol(token, NULL, 10);
   6511 
   6512    *x = width;
   6513    *y = height;
   6514 
   6515    if (comp) *comp = 3;
   6516    if (req_comp == 0) req_comp = 3;
   6517 
   6518    if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
   6519       return stbi__errpf("too large", "HDR image is too large");
   6520 
   6521    // Read data
   6522    hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
   6523    if (!hdr_data)
   6524       return stbi__errpf("outofmem", "Out of memory");
   6525 
   6526    // Load image data
   6527    // image data is stored as some number of sca
   6528    if ( width < 8 || width >= 32768) {
   6529       // Read flat data
   6530       for (j=0; j < height; ++j) {
   6531          for (i=0; i < width; ++i) {
   6532             stbi_uc rgbe[4];
   6533            main_decode_loop:
   6534             stbi__getn(s, rgbe, 4);
   6535             stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
   6536          }
   6537       }
   6538    } else {
   6539       // Read RLE-encoded data
   6540       scanline = NULL;
   6541 
   6542       for (j = 0; j < height; ++j) {
   6543          c1 = stbi__get8(s);
   6544          c2 = stbi__get8(s);
   6545          len = stbi__get8(s);
   6546          if (c1 != 2 || c2 != 2 || (len & 0x80)) {
   6547             // not run-length encoded, so we have to actually use THIS data as a decoded
   6548             // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
   6549             stbi_uc rgbe[4];
   6550             rgbe[0] = (stbi_uc) c1;
   6551             rgbe[1] = (stbi_uc) c2;
   6552             rgbe[2] = (stbi_uc) len;
   6553             rgbe[3] = (stbi_uc) stbi__get8(s);
   6554             stbi__hdr_convert(hdr_data, rgbe, req_comp);
   6555             i = 1;
   6556             j = 0;
   6557             STBI_FREE(scanline);
   6558             goto main_decode_loop; // yes, this makes no sense
   6559          }
   6560          len <<= 8;
   6561          len |= stbi__get8(s);
   6562          if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
   6563          if (scanline == NULL) {
   6564             scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0);
   6565             if (!scanline) {
   6566                STBI_FREE(hdr_data);
   6567                return stbi__errpf("outofmem", "Out of memory");
   6568             }
   6569          }
   6570 
   6571          for (k = 0; k < 4; ++k) {
   6572             int nleft;
   6573             i = 0;
   6574             while ((nleft = width - i) > 0) {
   6575                count = stbi__get8(s);
   6576                if (count > 128) {
   6577                   // Run
   6578                   value = stbi__get8(s);
   6579                   count -= 128;
   6580                   if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
   6581                   for (z = 0; z < count; ++z)
   6582                      scanline[i++ * 4 + k] = value;
   6583                } else {
   6584                   // Dump
   6585                   if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
   6586                   for (z = 0; z < count; ++z)
   6587                      scanline[i++ * 4 + k] = stbi__get8(s);
   6588                }
   6589             }
   6590          }
   6591          for (i=0; i < width; ++i)
   6592             stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
   6593       }
   6594       if (scanline)
   6595          STBI_FREE(scanline);
   6596    }
   6597 
   6598    return hdr_data;
   6599 }
   6600 
   6601 static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
   6602 {
   6603    char buffer[STBI__HDR_BUFLEN];
   6604    char *token;
   6605    int valid = 0;
   6606    int dummy;
   6607 
   6608    if (!x) x = &dummy;
   6609    if (!y) y = &dummy;
   6610    if (!comp) comp = &dummy;
   6611 
   6612    if (stbi__hdr_test(s) == 0) {
   6613        stbi__rewind( s );
   6614        return 0;
   6615    }
   6616 
   6617    for(;;) {
   6618       token = stbi__hdr_gettoken(s,buffer);
   6619       if (token[0] == 0) break;
   6620       if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
   6621    }
   6622 
   6623    if (!valid) {
   6624        stbi__rewind( s );
   6625        return 0;
   6626    }
   6627    token = stbi__hdr_gettoken(s,buffer);
   6628    if (strncmp(token, "-Y ", 3)) {
   6629        stbi__rewind( s );
   6630        return 0;
   6631    }
   6632    token += 3;
   6633    *y = (int) strtol(token, &token, 10);
   6634    while (*token == ' ') ++token;
   6635    if (strncmp(token, "+X ", 3)) {
   6636        stbi__rewind( s );
   6637        return 0;
   6638    }
   6639    token += 3;
   6640    *x = (int) strtol(token, NULL, 10);
   6641    *comp = 3;
   6642    return 1;
   6643 }
   6644 #endif // STBI_NO_HDR
   6645 
   6646 #ifndef STBI_NO_BMP
   6647 static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
   6648 {
   6649    void *p;
   6650    stbi__bmp_data info;
   6651 
   6652    info.all_a = 255;
   6653    p = stbi__bmp_parse_header(s, &info);
   6654    stbi__rewind( s );
   6655    if (p == NULL)
   6656       return 0;
   6657    if (x) *x = s->img_x;
   6658    if (y) *y = s->img_y;
   6659    if (comp) *comp = info.ma ? 4 : 3;
   6660    return 1;
   6661 }
   6662 #endif
   6663 
   6664 #ifndef STBI_NO_PSD
   6665 static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
   6666 {
   6667    int channelCount, dummy;
   6668    if (!x) x = &dummy;
   6669    if (!y) y = &dummy;
   6670    if (!comp) comp = &dummy;
   6671    if (stbi__get32be(s) != 0x38425053) {
   6672        stbi__rewind( s );
   6673        return 0;
   6674    }
   6675    if (stbi__get16be(s) != 1) {
   6676        stbi__rewind( s );
   6677        return 0;
   6678    }
   6679    stbi__skip(s, 6);
   6680    channelCount = stbi__get16be(s);
   6681    if (channelCount < 0 || channelCount > 16) {
   6682        stbi__rewind( s );
   6683        return 0;
   6684    }
   6685    *y = stbi__get32be(s);
   6686    *x = stbi__get32be(s);
   6687    if (stbi__get16be(s) != 8) {
   6688        stbi__rewind( s );
   6689        return 0;
   6690    }
   6691    if (stbi__get16be(s) != 3) {
   6692        stbi__rewind( s );
   6693        return 0;
   6694    }
   6695    *comp = 4;
   6696    return 1;
   6697 }
   6698 #endif
   6699 
   6700 #ifndef STBI_NO_PIC
   6701 static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
   6702 {
   6703    int act_comp=0,num_packets=0,chained,dummy;
   6704    stbi__pic_packet packets[10];
   6705 
   6706    if (!x) x = &dummy;
   6707    if (!y) y = &dummy;
   6708    if (!comp) comp = &dummy;
   6709 
   6710    if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) {
   6711       stbi__rewind(s);
   6712       return 0;
   6713    }
   6714 
   6715    stbi__skip(s, 88);
   6716 
   6717    *x = stbi__get16be(s);
   6718    *y = stbi__get16be(s);
   6719    if (stbi__at_eof(s)) {
   6720       stbi__rewind( s);
   6721       return 0;
   6722    }
   6723    if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) {
   6724       stbi__rewind( s );
   6725       return 0;
   6726    }
   6727 
   6728    stbi__skip(s, 8);
   6729 
   6730    do {
   6731       stbi__pic_packet *packet;
   6732 
   6733       if (num_packets==sizeof(packets)/sizeof(packets[0]))
   6734          return 0;
   6735 
   6736       packet = &packets[num_packets++];
   6737       chained = stbi__get8(s);
   6738       packet->size    = stbi__get8(s);
   6739       packet->type    = stbi__get8(s);
   6740       packet->channel = stbi__get8(s);
   6741       act_comp |= packet->channel;
   6742 
   6743       if (stbi__at_eof(s)) {
   6744           stbi__rewind( s );
   6745           return 0;
   6746       }
   6747       if (packet->size != 8) {
   6748           stbi__rewind( s );
   6749           return 0;
   6750       }
   6751    } while (chained);
   6752 
   6753    *comp = (act_comp & 0x10 ? 4 : 3);
   6754 
   6755    return 1;
   6756 }
   6757 #endif
   6758 
   6759 // *************************************************************************************************
   6760 // Portable Gray Map and Portable Pixel Map loader
   6761 // by Ken Miller
   6762 //
   6763 // PGM: http://netpbm.sourceforge.net/doc/pgm.html
   6764 // PPM: http://netpbm.sourceforge.net/doc/ppm.html
   6765 //
   6766 // Known limitations:
   6767 //    Does not support comments in the header section
   6768 //    Does not support ASCII image data (formats P2 and P3)
   6769 //    Does not support 16-bit-per-channel
   6770 
   6771 #ifndef STBI_NO_PNM
   6772 
   6773 static int      stbi__pnm_test(stbi__context *s)
   6774 {
   6775    char p, t;
   6776    p = (char) stbi__get8(s);
   6777    t = (char) stbi__get8(s);
   6778    if (p != 'P' || (t != '5' && t != '6')) {
   6779        stbi__rewind( s );
   6780        return 0;
   6781    }
   6782    return 1;
   6783 }
   6784 
   6785 static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
   6786 {
   6787    stbi_uc *out;
   6788    STBI_NOTUSED(ri);
   6789 
   6790    if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n))
   6791       return 0;
   6792 
   6793    *x = s->img_x;
   6794    *y = s->img_y;
   6795    if (comp) *comp = s->img_n;
   6796 
   6797    if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0))
   6798       return stbi__errpuc("too large", "PNM too large");
   6799 
   6800    out = (stbi_uc *) stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0);
   6801    if (!out) return stbi__errpuc("outofmem", "Out of memory");
   6802    stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
   6803 
   6804    if (req_comp && req_comp != s->img_n) {
   6805       out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
   6806       if (out == NULL) return out; // stbi__convert_format frees input on failure
   6807    }
   6808    return out;
   6809 }
   6810 
   6811 static int      stbi__pnm_isspace(char c)
   6812 {
   6813    return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
   6814 }
   6815 
   6816 static void     stbi__pnm_skip_whitespace(stbi__context *s, char *c)
   6817 {
   6818    for (;;) {
   6819       while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
   6820          *c = (char) stbi__get8(s);
   6821 
   6822       if (stbi__at_eof(s) || *c != '#')
   6823          break;
   6824 
   6825       while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' )
   6826          *c = (char) stbi__get8(s);
   6827    }
   6828 }
   6829 
   6830 static int      stbi__pnm_isdigit(char c)
   6831 {
   6832    return c >= '0' && c <= '9';
   6833 }
   6834 
   6835 static int      stbi__pnm_getinteger(stbi__context *s, char *c)
   6836 {
   6837    int value = 0;
   6838 
   6839    while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
   6840       value = value*10 + (*c - '0');
   6841       *c = (char) stbi__get8(s);
   6842    }
   6843 
   6844    return value;
   6845 }
   6846 
   6847 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
   6848 {
   6849    int maxv, dummy;
   6850    char c, p, t;
   6851 
   6852    if (!x) x = &dummy;
   6853    if (!y) y = &dummy;
   6854    if (!comp) comp = &dummy;
   6855 
   6856    stbi__rewind(s);
   6857 
   6858    // Get identifier
   6859    p = (char) stbi__get8(s);
   6860    t = (char) stbi__get8(s);
   6861    if (p != 'P' || (t != '5' && t != '6')) {
   6862        stbi__rewind(s);
   6863        return 0;
   6864    }
   6865 
   6866    *comp = (t == '6') ? 3 : 1;  // '5' is 1-component .pgm; '6' is 3-component .ppm
   6867 
   6868    c = (char) stbi__get8(s);
   6869    stbi__pnm_skip_whitespace(s, &c);
   6870 
   6871    *x = stbi__pnm_getinteger(s, &c); // read width
   6872    stbi__pnm_skip_whitespace(s, &c);
   6873 
   6874    *y = stbi__pnm_getinteger(s, &c); // read height
   6875    stbi__pnm_skip_whitespace(s, &c);
   6876 
   6877    maxv = stbi__pnm_getinteger(s, &c);  // read max value
   6878 
   6879    if (maxv > 255)
   6880       return stbi__err("max value > 255", "PPM image not 8-bit");
   6881    else
   6882       return 1;
   6883 }
   6884 #endif
   6885 
   6886 static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
   6887 {
   6888    #ifndef STBI_NO_JPEG
   6889    if (stbi__jpeg_info(s, x, y, comp)) return 1;
   6890    #endif
   6891 
   6892    #ifndef STBI_NO_PNG
   6893    if (stbi__png_info(s, x, y, comp))  return 1;
   6894    #endif
   6895 
   6896    #ifndef STBI_NO_GIF
   6897    if (stbi__gif_info(s, x, y, comp))  return 1;
   6898    #endif
   6899 
   6900    #ifndef STBI_NO_BMP
   6901    if (stbi__bmp_info(s, x, y, comp))  return 1;
   6902    #endif
   6903 
   6904    #ifndef STBI_NO_PSD
   6905    if (stbi__psd_info(s, x, y, comp))  return 1;
   6906    #endif
   6907 
   6908    #ifndef STBI_NO_PIC
   6909    if (stbi__pic_info(s, x, y, comp))  return 1;
   6910    #endif
   6911 
   6912    #ifndef STBI_NO_PNM
   6913    if (stbi__pnm_info(s, x, y, comp))  return 1;
   6914    #endif
   6915 
   6916    #ifndef STBI_NO_HDR
   6917    if (stbi__hdr_info(s, x, y, comp))  return 1;
   6918    #endif
   6919 
   6920    // test tga last because it's a crappy test!
   6921    #ifndef STBI_NO_TGA
   6922    if (stbi__tga_info(s, x, y, comp))
   6923        return 1;
   6924    #endif
   6925    return stbi__err("unknown image type", "Image not of any known type, or corrupt");
   6926 }
   6927 
   6928 #ifndef STBI_NO_STDIO
   6929 STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
   6930 {
   6931     FILE *f = stbi__fopen(filename, "rb");
   6932     int result;
   6933     if (!f) return stbi__err("can't fopen", "Unable to open file");
   6934     result = stbi_info_from_file(f, x, y, comp);
   6935     fclose(f);
   6936     return result;
   6937 }
   6938 
   6939 STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
   6940 {
   6941    int r;
   6942    stbi__context s;
   6943    long pos = ftell(f);
   6944    stbi__start_file(&s, f);
   6945    r = stbi__info_main(&s,x,y,comp);
   6946    fseek(f,pos,SEEK_SET);
   6947    return r;
   6948 }
   6949 #endif // !STBI_NO_STDIO
   6950 
   6951 STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
   6952 {
   6953    stbi__context s;
   6954    stbi__start_mem(&s,buffer,len);
   6955    return stbi__info_main(&s,x,y,comp);
   6956 }
   6957 
   6958 STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp)
   6959 {
   6960    stbi__context s;
   6961    stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
   6962    return stbi__info_main(&s,x,y,comp);
   6963 }
   6964 
   6965 #endif // STB_IMAGE_IMPLEMENTATION
   6966 
   6967 /*
   6968    revision history:
   6969       2.15  (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;
   6970                          warning fixes; disable run-time SSE detection on gcc;
   6971                          uniform handling of optional "return" values;
   6972                          thread-safe initialization of zlib tables
   6973       2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
   6974       2.13  (2016-11-29) add 16-bit API, only supported for PNG right now
   6975       2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
   6976       2.11  (2016-04-02) allocate large structures on the stack
   6977                          remove white matting for transparent PSD
   6978                          fix reported channel count for PNG & BMP
   6979                          re-enable SSE2 in non-gcc 64-bit
   6980                          support RGB-formatted JPEG
   6981                          read 16-bit PNGs (only as 8-bit)
   6982       2.10  (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED
   6983       2.09  (2016-01-16) allow comments in PNM files
   6984                          16-bit-per-pixel TGA (not bit-per-component)
   6985                          info() for TGA could break due to .hdr handling
   6986                          info() for BMP to shares code instead of sloppy parse
   6987                          can use STBI_REALLOC_SIZED if allocator doesn't support realloc
   6988                          code cleanup
   6989       2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
   6990       2.07  (2015-09-13) fix compiler warnings
   6991                          partial animated GIF support
   6992                          limited 16-bpc PSD support
   6993                          #ifdef unused functions
   6994                          bug with < 92 byte PIC,PNM,HDR,TGA
   6995       2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value
   6996       2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning
   6997       2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit
   6998       2.03  (2015-04-12) extra corruption checking (mmozeiko)
   6999                          stbi_set_flip_vertically_on_load (nguillemot)
   7000                          fix NEON support; fix mingw support
   7001       2.02  (2015-01-19) fix incorrect assert, fix warning
   7002       2.01  (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
   7003       2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
   7004       2.00  (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
   7005                          progressive JPEG (stb)
   7006                          PGM/PPM support (Ken Miller)
   7007                          STBI_MALLOC,STBI_REALLOC,STBI_FREE
   7008                          GIF bugfix -- seemingly never worked
   7009                          STBI_NO_*, STBI_ONLY_*
   7010       1.48  (2014-12-14) fix incorrectly-named assert()
   7011       1.47  (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
   7012                          optimize PNG (ryg)
   7013                          fix bug in interlaced PNG with user-specified channel count (stb)
   7014       1.46  (2014-08-26)
   7015               fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
   7016       1.45  (2014-08-16)
   7017               fix MSVC-ARM internal compiler error by wrapping malloc
   7018       1.44  (2014-08-07)
   7019               various warning fixes from Ronny Chevalier
   7020       1.43  (2014-07-15)
   7021               fix MSVC-only compiler problem in code changed in 1.42
   7022       1.42  (2014-07-09)
   7023               don't define _CRT_SECURE_NO_WARNINGS (affects user code)
   7024               fixes to stbi__cleanup_jpeg path
   7025               added STBI_ASSERT to avoid requiring assert.h
   7026       1.41  (2014-06-25)
   7027               fix search&replace from 1.36 that messed up comments/error messages
   7028       1.40  (2014-06-22)
   7029               fix gcc struct-initialization warning
   7030       1.39  (2014-06-15)
   7031               fix to TGA optimization when req_comp != number of components in TGA;
   7032               fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
   7033               add support for BMP version 5 (more ignored fields)
   7034       1.38  (2014-06-06)
   7035               suppress MSVC warnings on integer casts truncating values
   7036               fix accidental rename of 'skip' field of I/O
   7037       1.37  (2014-06-04)
   7038               remove duplicate typedef
   7039       1.36  (2014-06-03)
   7040               convert to header file single-file library
   7041               if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
   7042       1.35  (2014-05-27)
   7043               various warnings
   7044               fix broken STBI_SIMD path
   7045               fix bug where stbi_load_from_file no longer left file pointer in correct place
   7046               fix broken non-easy path for 32-bit BMP (possibly never used)
   7047               TGA optimization by Arseny Kapoulkine
   7048       1.34  (unknown)
   7049               use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
   7050       1.33  (2011-07-14)
   7051               make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
   7052       1.32  (2011-07-13)
   7053               support for "info" function for all supported filetypes (SpartanJ)
   7054       1.31  (2011-06-20)
   7055               a few more leak fixes, bug in PNG handling (SpartanJ)
   7056       1.30  (2011-06-11)
   7057               added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
   7058               removed deprecated format-specific test/load functions
   7059               removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
   7060               error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
   7061               fix inefficiency in decoding 32-bit BMP (David Woo)
   7062       1.29  (2010-08-16)
   7063               various warning fixes from Aurelien Pocheville
   7064       1.28  (2010-08-01)
   7065               fix bug in GIF palette transparency (SpartanJ)
   7066       1.27  (2010-08-01)
   7067               cast-to-stbi_uc to fix warnings
   7068       1.26  (2010-07-24)
   7069               fix bug in file buffering for PNG reported by SpartanJ
   7070       1.25  (2010-07-17)
   7071               refix trans_data warning (Won Chun)
   7072       1.24  (2010-07-12)
   7073               perf improvements reading from files on platforms with lock-heavy fgetc()
   7074               minor perf improvements for jpeg
   7075               deprecated type-specific functions so we'll get feedback if they're needed
   7076               attempt to fix trans_data warning (Won Chun)
   7077       1.23    fixed bug in iPhone support
   7078       1.22  (2010-07-10)
   7079               removed image *writing* support
   7080               stbi_info support from Jetro Lauha
   7081               GIF support from Jean-Marc Lienher
   7082               iPhone PNG-extensions from James Brown
   7083               warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
   7084       1.21    fix use of 'stbi_uc' in header (reported by jon blow)
   7085       1.20    added support for Softimage PIC, by Tom Seddon
   7086       1.19    bug in interlaced PNG corruption check (found by ryg)
   7087       1.18  (2008-08-02)
   7088               fix a threading bug (local mutable static)
   7089       1.17    support interlaced PNG
   7090       1.16    major bugfix - stbi__convert_format converted one too many pixels
   7091       1.15    initialize some fields for thread safety
   7092       1.14    fix threadsafe conversion bug
   7093               header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
   7094       1.13    threadsafe
   7095       1.12    const qualifiers in the API
   7096       1.11    Support installable IDCT, colorspace conversion routines
   7097       1.10    Fixes for 64-bit (don't use "unsigned long")
   7098               optimized upsampling by Fabian "ryg" Giesen
   7099       1.09    Fix format-conversion for PSD code (bad global variables!)
   7100       1.08    Thatcher Ulrich's PSD code integrated by Nicolas Schulz
   7101       1.07    attempt to fix C++ warning/errors again
   7102       1.06    attempt to fix C++ warning/errors again
   7103       1.05    fix TGA loading to return correct *comp and use good luminance calc
   7104       1.04    default float alpha is 1, not 255; use 'void *' for stbi_image_free
   7105       1.03    bugfixes to STBI_NO_STDIO, STBI_NO_HDR
   7106       1.02    support for (subset of) HDR files, float interface for preferred access to them
   7107       1.01    fix bug: possible bug in handling right-side up bmps... not sure
   7108               fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
   7109       1.00    interface to zlib that skips zlib header
   7110       0.99    correct handling of alpha in palette
   7111       0.98    TGA loader by lonesock; dynamically add loaders (untested)
   7112       0.97    jpeg errors on too large a file; also catch another malloc failure
   7113       0.96    fix detection of invalid v value - particleman@mollyrocket forum
   7114       0.95    during header scan, seek to markers in case of padding
   7115       0.94    STBI_NO_STDIO to disable stdio usage; rename all #defines the same
   7116       0.93    handle jpegtran output; verbose errors
   7117       0.92    read 4,8,16,24,32-bit BMP files of several formats
   7118       0.91    output 24-bit Windows 3.0 BMP files
   7119       0.90    fix a few more warnings; bump version number to approach 1.0
   7120       0.61    bugfixes due to Marc LeBlanc, Christopher Lloyd
   7121       0.60    fix compiling as c++
   7122       0.59    fix warnings: merge Dave Moore's -Wall fixes
   7123       0.58    fix bug: zlib uncompressed mode len/nlen was wrong endian
   7124       0.57    fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
   7125       0.56    fix bug: zlib uncompressed mode len vs. nlen
   7126       0.55    fix bug: restart_interval not initialized to 0
   7127       0.54    allow NULL for 'int *comp'
   7128       0.53    fix bug in png 3->4; speedup png decoding
   7129       0.52    png handles req_comp=3,4 directly; minor cleanup; jpeg comments
   7130       0.51    obey req_comp requests, 1-component jpegs return as 1-component,
   7131               on 'test' only check type, not whether we support this variant
   7132       0.50  (2006-11-19)
   7133               first released version
   7134 */
   7135 
   7136 
   7137 /*
   7138 ------------------------------------------------------------------------------
   7139 This software is available under 2 licenses -- choose whichever you prefer.
   7140 ------------------------------------------------------------------------------
   7141 ALTERNATIVE A - MIT License
   7142 Copyright (c) 2017 Sean Barrett
   7143 Permission is hereby granted, free of charge, to any person obtaining a copy of
   7144 this software and associated documentation files (the "Software"), to deal in
   7145 the Software without restriction, including without limitation the rights to
   7146 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   7147 of the Software, and to permit persons to whom the Software is furnished to do
   7148 so, subject to the following conditions:
   7149 The above copyright notice and this permission notice shall be included in all
   7150 copies or substantial portions of the Software.
   7151 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   7152 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   7153 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   7154 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   7155 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
   7156 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   7157 SOFTWARE.
   7158 ------------------------------------------------------------------------------
   7159 ALTERNATIVE B - Public Domain (www.unlicense.org)
   7160 This is free and unencumbered software released into the public domain.
   7161 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
   7162 software, either in source code form or as a compiled binary, for any purpose,
   7163 commercial or non-commercial, and by any means.
   7164 In jurisdictions that recognize copyright laws, the author or authors of this
   7165 software dedicate any and all copyright interest in the software to the public
   7166 domain. We make this dedication for the benefit of the public at large and to
   7167 the detriment of our heirs and successors. We intend this dedication to be an
   7168 overt act of relinquishment in perpetuity of all present and future rights to
   7169 this software under copyright law.
   7170 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   7171 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   7172 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   7173 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
   7174 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
   7175 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   7176 ------------------------------------------------------------------------------
   7177 */