minesweeper

A minewseeper implementation to play around with Hare and Raylib
git clone https://git.tronto.net/minesweeper
Download | Log | Files | Refs | README | LICENSE

stb_vorbis.c (192790B)


      1 // Ogg Vorbis audio decoder - v1.22 - public domain
      2 // http://nothings.org/stb_vorbis/
      3 //
      4 // Original version written by Sean Barrett in 2007.
      5 //
      6 // Originally sponsored by RAD Game Tools. Seeking implementation
      7 // sponsored by Phillip Bennefall, Marc Andersen, Aaron Baker,
      8 // Elias Software, Aras Pranckevicius, and Sean Barrett.
      9 //
     10 // LICENSE
     11 //
     12 //   See end of file for license information.
     13 //
     14 // Limitations:
     15 //
     16 //   - floor 0 not supported (used in old ogg vorbis files pre-2004)
     17 //   - lossless sample-truncation at beginning ignored
     18 //   - cannot concatenate multiple vorbis streams
     19 //   - sample positions are 32-bit, limiting seekable 192Khz
     20 //       files to around 6 hours (Ogg supports 64-bit)
     21 //
     22 // Feature contributors:
     23 //    Dougall Johnson (sample-exact seeking)
     24 //
     25 // Bugfix/warning contributors:
     26 //    Terje Mathisen     Niklas Frykholm     Andy Hill
     27 //    Casey Muratori     John Bolton         Gargaj
     28 //    Laurent Gomila     Marc LeBlanc        Ronny Chevalier
     29 //    Bernhard Wodo      Evan Balster        github:alxprd
     30 //    Tom Beaumont       Ingo Leitgeb        Nicolas Guillemot
     31 //    Phillip Bennefall  Rohit               Thiago Goulart
     32 //    github:manxorist   Saga Musix          github:infatum
     33 //    Timur Gagiev       Maxwell Koo         Peter Waller
     34 //    github:audinowho   Dougall Johnson     David Reid
     35 //    github:Clownacy    Pedro J. Estebanez  Remi Verschelde
     36 //    AnthoFoxo          github:morlat       Gabriel Ravier
     37 //
     38 // Partial history:
     39 //    1.22    - 2021-07-11 - various small fixes
     40 //    1.21    - 2021-07-02 - fix bug for files with no comments
     41 //    1.20    - 2020-07-11 - several small fixes
     42 //    1.19    - 2020-02-05 - warnings
     43 //    1.18    - 2020-02-02 - fix seek bugs; parse header comments; misc warnings etc.
     44 //    1.17    - 2019-07-08 - fix CVE-2019-13217..CVE-2019-13223 (by ForAllSecure)
     45 //    1.16    - 2019-03-04 - fix warnings
     46 //    1.15    - 2019-02-07 - explicit failure if Ogg Skeleton data is found
     47 //    1.14    - 2018-02-11 - delete bogus dealloca usage
     48 //    1.13    - 2018-01-29 - fix truncation of last frame (hopefully)
     49 //    1.12    - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
     50 //    1.11    - 2017-07-23 - fix MinGW compilation
     51 //    1.10    - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
     52 //    1.09    - 2016-04-04 - back out 'truncation of last frame' fix from previous version
     53 //    1.08    - 2016-04-02 - warnings; setup memory leaks; truncation of last frame
     54 //    1.07    - 2015-01-16 - fixes for crashes on invalid files; warning fixes; const
     55 //    1.06    - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
     56 //                           some crash fixes when out of memory or with corrupt files
     57 //                           fix some inappropriately signed shifts
     58 //    1.05    - 2015-04-19 - don't define __forceinline if it's redundant
     59 //    1.04    - 2014-08-27 - fix missing const-correct case in API
     60 //    1.03    - 2014-08-07 - warning fixes
     61 //    1.02    - 2014-07-09 - declare qsort comparison as explicitly _cdecl in Windows
     62 //    1.01    - 2014-06-18 - fix stb_vorbis_get_samples_float (interleaved was correct)
     63 //    1.0     - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
     64 //                           (API change) report sample rate for decode-full-file funcs
     65 //
     66 // See end of file for full version history.
     67 
     68 
     69 //////////////////////////////////////////////////////////////////////////////
     70 //
     71 //  HEADER BEGINS HERE
     72 //
     73 
     74 #ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H
     75 #define STB_VORBIS_INCLUDE_STB_VORBIS_H
     76 
     77 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
     78 #define STB_VORBIS_NO_STDIO 1
     79 #endif
     80 
     81 #ifndef STB_VORBIS_NO_STDIO
     82 #include <stdio.h>
     83 #endif
     84 
     85 #ifdef __cplusplus
     86 extern "C" {
     87 #endif
     88 
     89 ///////////   THREAD SAFETY
     90 
     91 // Individual stb_vorbis* handles are not thread-safe; you cannot decode from
     92 // them from multiple threads at the same time. However, you can have multiple
     93 // stb_vorbis* handles and decode from them independently in multiple thrads.
     94 
     95 
     96 ///////////   MEMORY ALLOCATION
     97 
     98 // normally stb_vorbis uses malloc() to allocate memory at startup,
     99 // and alloca() to allocate temporary memory during a frame on the
    100 // stack. (Memory consumption will depend on the amount of setup
    101 // data in the file and how you set the compile flags for speed
    102 // vs. size. In my test files the maximal-size usage is ~150KB.)
    103 //
    104 // You can modify the wrapper functions in the source (setup_malloc,
    105 // setup_temp_malloc, temp_malloc) to change this behavior, or you
    106 // can use a simpler allocation model: you pass in a buffer from
    107 // which stb_vorbis will allocate _all_ its memory (including the
    108 // temp memory). "open" may fail with a VORBIS_outofmem if you
    109 // do not pass in enough data; there is no way to determine how
    110 // much you do need except to succeed (at which point you can
    111 // query get_info to find the exact amount required. yes I know
    112 // this is lame).
    113 //
    114 // If you pass in a non-NULL buffer of the type below, allocation
    115 // will occur from it as described above. Otherwise just pass NULL
    116 // to use malloc()/alloca()
    117 
    118 typedef struct
    119 {
    120    char *alloc_buffer;
    121    int   alloc_buffer_length_in_bytes;
    122 } stb_vorbis_alloc;
    123 
    124 
    125 ///////////   FUNCTIONS USEABLE WITH ALL INPUT MODES
    126 
    127 typedef struct stb_vorbis stb_vorbis;
    128 
    129 typedef struct
    130 {
    131    unsigned int sample_rate;
    132    int channels;
    133 
    134    unsigned int setup_memory_required;
    135    unsigned int setup_temp_memory_required;
    136    unsigned int temp_memory_required;
    137 
    138    int max_frame_size;
    139 } stb_vorbis_info;
    140 
    141 typedef struct
    142 {
    143    char *vendor;
    144 
    145    int comment_list_length;
    146    char **comment_list;
    147 } stb_vorbis_comment;
    148 
    149 // get general information about the file
    150 extern stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f);
    151 
    152 // get ogg comments
    153 extern stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f);
    154 
    155 // get the last error detected (clears it, too)
    156 extern int stb_vorbis_get_error(stb_vorbis *f);
    157 
    158 // close an ogg vorbis file and free all memory in use
    159 extern void stb_vorbis_close(stb_vorbis *f);
    160 
    161 // this function returns the offset (in samples) from the beginning of the
    162 // file that will be returned by the next decode, if it is known, or -1
    163 // otherwise. after a flush_pushdata() call, this may take a while before
    164 // it becomes valid again.
    165 // NOT WORKING YET after a seek with PULLDATA API
    166 extern int stb_vorbis_get_sample_offset(stb_vorbis *f);
    167 
    168 // returns the current seek point within the file, or offset from the beginning
    169 // of the memory buffer. In pushdata mode it returns 0.
    170 extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f);
    171 
    172 ///////////   PUSHDATA API
    173 
    174 #ifndef STB_VORBIS_NO_PUSHDATA_API
    175 
    176 // this API allows you to get blocks of data from any source and hand
    177 // them to stb_vorbis. you have to buffer them; stb_vorbis will tell
    178 // you how much it used, and you have to give it the rest next time;
    179 // and stb_vorbis may not have enough data to work with and you will
    180 // need to give it the same data again PLUS more. Note that the Vorbis
    181 // specification does not bound the size of an individual frame.
    182 
    183 extern stb_vorbis *stb_vorbis_open_pushdata(
    184          const unsigned char * datablock, int datablock_length_in_bytes,
    185          int *datablock_memory_consumed_in_bytes,
    186          int *error,
    187          const stb_vorbis_alloc *alloc_buffer);
    188 // create a vorbis decoder by passing in the initial data block containing
    189 //    the ogg&vorbis headers (you don't need to do parse them, just provide
    190 //    the first N bytes of the file--you're told if it's not enough, see below)
    191 // on success, returns an stb_vorbis *, does not set error, returns the amount of
    192 //    data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
    193 // on failure, returns NULL on error and sets *error, does not change *datablock_memory_consumed
    194 // if returns NULL and *error is VORBIS_need_more_data, then the input block was
    195 //       incomplete and you need to pass in a larger block from the start of the file
    196 
    197 extern int stb_vorbis_decode_frame_pushdata(
    198          stb_vorbis *f,
    199          const unsigned char *datablock, int datablock_length_in_bytes,
    200          int *channels,             // place to write number of float * buffers
    201          float ***output,           // place to write float ** array of float * buffers
    202          int *samples               // place to write number of output samples
    203      );
    204 // decode a frame of audio sample data if possible from the passed-in data block
    205 //
    206 // return value: number of bytes we used from datablock
    207 //
    208 // possible cases:
    209 //     0 bytes used, 0 samples output (need more data)
    210 //     N bytes used, 0 samples output (resynching the stream, keep going)
    211 //     N bytes used, M samples output (one frame of data)
    212 // note that after opening a file, you will ALWAYS get one N-bytes,0-sample
    213 // frame, because Vorbis always "discards" the first frame.
    214 //
    215 // Note that on resynch, stb_vorbis will rarely consume all of the buffer,
    216 // instead only datablock_length_in_bytes-3 or less. This is because it wants
    217 // to avoid missing parts of a page header if they cross a datablock boundary,
    218 // without writing state-machiney code to record a partial detection.
    219 //
    220 // The number of channels returned are stored in *channels (which can be
    221 // NULL--it is always the same as the number of channels reported by
    222 // get_info). *output will contain an array of float* buffers, one per
    223 // channel. In other words, (*output)[0][0] contains the first sample from
    224 // the first channel, and (*output)[1][0] contains the first sample from
    225 // the second channel.
    226 //
    227 // *output points into stb_vorbis's internal output buffer storage; these
    228 // buffers are owned by stb_vorbis and application code should not free
    229 // them or modify their contents. They are transient and will be overwritten
    230 // once you ask for more data to get decoded, so be sure to grab any data
    231 // you need before then.
    232 
    233 extern void stb_vorbis_flush_pushdata(stb_vorbis *f);
    234 // inform stb_vorbis that your next datablock will not be contiguous with
    235 // previous ones (e.g. you've seeked in the data); future attempts to decode
    236 // frames will cause stb_vorbis to resynchronize (as noted above), and
    237 // once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
    238 // will begin decoding the _next_ frame.
    239 //
    240 // if you want to seek using pushdata, you need to seek in your file, then
    241 // call stb_vorbis_flush_pushdata(), then start calling decoding, then once
    242 // decoding is returning you data, call stb_vorbis_get_sample_offset, and
    243 // if you don't like the result, seek your file again and repeat.
    244 #endif
    245 
    246 
    247 //////////   PULLING INPUT API
    248 
    249 #ifndef STB_VORBIS_NO_PULLDATA_API
    250 // This API assumes stb_vorbis is allowed to pull data from a source--
    251 // either a block of memory containing the _entire_ vorbis stream, or a
    252 // FILE * that you or it create, or possibly some other reading mechanism
    253 // if you go modify the source to replace the FILE * case with some kind
    254 // of callback to your code. (But if you don't support seeking, you may
    255 // just want to go ahead and use pushdata.)
    256 
    257 #if !defined(STB_VORBIS_NO_STDIO) && !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
    258 extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output);
    259 #endif
    260 #if !defined(STB_VORBIS_NO_INTEGER_CONVERSION)
    261 extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output);
    262 #endif
    263 // decode an entire file and output the data interleaved into a malloc()ed
    264 // buffer stored in *output. The return value is the number of samples
    265 // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
    266 // When you're done with it, just free() the pointer returned in *output.
    267 
    268 extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len,
    269                                   int *error, const stb_vorbis_alloc *alloc_buffer);
    270 // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
    271 // this must be the entire stream!). on failure, returns NULL and sets *error
    272 
    273 #ifndef STB_VORBIS_NO_STDIO
    274 extern stb_vorbis * stb_vorbis_open_filename(const char *filename,
    275                                   int *error, const stb_vorbis_alloc *alloc_buffer);
    276 // create an ogg vorbis decoder from a filename via fopen(). on failure,
    277 // returns NULL and sets *error (possibly to VORBIS_file_open_failure).
    278 
    279 extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close,
    280                                   int *error, const stb_vorbis_alloc *alloc_buffer);
    281 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
    282 // the _current_ seek point (ftell). on failure, returns NULL and sets *error.
    283 // note that stb_vorbis must "own" this stream; if you seek it in between
    284 // calls to stb_vorbis, it will become confused. Moreover, if you attempt to
    285 // perform stb_vorbis_seek_*() operations on this file, it will assume it
    286 // owns the _entire_ rest of the file after the start point. Use the next
    287 // function, stb_vorbis_open_file_section(), to limit it.
    288 
    289 extern stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_close,
    290                 int *error, const stb_vorbis_alloc *alloc_buffer, unsigned int len);
    291 // create an ogg vorbis decoder from an open FILE *, looking for a stream at
    292 // the _current_ seek point (ftell); the stream will be of length 'len' bytes.
    293 // on failure, returns NULL and sets *error. note that stb_vorbis must "own"
    294 // this stream; if you seek it in between calls to stb_vorbis, it will become
    295 // confused.
    296 #endif
    297 
    298 extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number);
    299 extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number);
    300 // these functions seek in the Vorbis file to (approximately) 'sample_number'.
    301 // after calling seek_frame(), the next call to get_frame_*() will include
    302 // the specified sample. after calling stb_vorbis_seek(), the next call to
    303 // stb_vorbis_get_samples_* will start with the specified sample. If you
    304 // do not need to seek to EXACTLY the target sample when using get_samples_*,
    305 // you can also use seek_frame().
    306 
    307 extern int stb_vorbis_seek_start(stb_vorbis *f);
    308 // this function is equivalent to stb_vorbis_seek(f,0)
    309 
    310 extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f);
    311 extern float        stb_vorbis_stream_length_in_seconds(stb_vorbis *f);
    312 // these functions return the total length of the vorbis stream
    313 
    314 extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output);
    315 // decode the next frame and return the number of samples. the number of
    316 // channels returned are stored in *channels (which can be NULL--it is always
    317 // the same as the number of channels reported by get_info). *output will
    318 // contain an array of float* buffers, one per channel. These outputs will
    319 // be overwritten on the next call to stb_vorbis_get_frame_*.
    320 //
    321 // You generally should not intermix calls to stb_vorbis_get_frame_*()
    322 // and stb_vorbis_get_samples_*(), since the latter calls the former.
    323 
    324 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
    325 extern int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts);
    326 extern int stb_vorbis_get_frame_short            (stb_vorbis *f, int num_c, short **buffer, int num_samples);
    327 #endif
    328 // decode the next frame and return the number of *samples* per channel.
    329 // Note that for interleaved data, you pass in the number of shorts (the
    330 // size of your array), but the return value is the number of samples per
    331 // channel, not the total number of samples.
    332 //
    333 // The data is coerced to the number of channels you request according to the
    334 // channel coercion rules (see below). You must pass in the size of your
    335 // buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
    336 // The maximum buffer size needed can be gotten from get_info(); however,
    337 // the Vorbis I specification implies an absolute maximum of 4096 samples
    338 // per channel.
    339 
    340 // Channel coercion rules:
    341 //    Let M be the number of channels requested, and N the number of channels present,
    342 //    and Cn be the nth channel; let stereo L be the sum of all L and center channels,
    343 //    and stereo R be the sum of all R and center channels (channel assignment from the
    344 //    vorbis spec).
    345 //        M    N       output
    346 //        1    k      sum(Ck) for all k
    347 //        2    *      stereo L, stereo R
    348 //        k    l      k > l, the first l channels, then 0s
    349 //        k    l      k <= l, the first k channels
    350 //    Note that this is not _good_ surround etc. mixing at all! It's just so
    351 //    you get something useful.
    352 
    353 extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats);
    354 extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples);
    355 // gets num_samples samples, not necessarily on a frame boundary--this requires
    356 // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
    357 // Returns the number of samples stored per channel; it may be less than requested
    358 // at the end of the file. If there are no more samples in the file, returns 0.
    359 
    360 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
    361 extern int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts);
    362 extern int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int num_samples);
    363 #endif
    364 // gets num_samples samples, not necessarily on a frame boundary--this requires
    365 // buffering so you have to supply the buffers. Applies the coercion rules above
    366 // to produce 'channels' channels. Returns the number of samples stored per channel;
    367 // it may be less than requested at the end of the file. If there are no more
    368 // samples in the file, returns 0.
    369 
    370 #endif
    371 
    372 ////////   ERROR CODES
    373 
    374 enum STBVorbisError
    375 {
    376    VORBIS__no_error,
    377 
    378    VORBIS_need_more_data=1,             // not a real error
    379 
    380    VORBIS_invalid_api_mixing,           // can't mix API modes
    381    VORBIS_outofmem,                     // not enough memory
    382    VORBIS_feature_not_supported,        // uses floor 0
    383    VORBIS_too_many_channels,            // STB_VORBIS_MAX_CHANNELS is too small
    384    VORBIS_file_open_failure,            // fopen() failed
    385    VORBIS_seek_without_length,          // can't seek in unknown-length file
    386 
    387    VORBIS_unexpected_eof=10,            // file is truncated?
    388    VORBIS_seek_invalid,                 // seek past EOF
    389 
    390    // decoding errors (corrupt/invalid stream) -- you probably
    391    // don't care about the exact details of these
    392 
    393    // vorbis errors:
    394    VORBIS_invalid_setup=20,
    395    VORBIS_invalid_stream,
    396 
    397    // ogg errors:
    398    VORBIS_missing_capture_pattern=30,
    399    VORBIS_invalid_stream_structure_version,
    400    VORBIS_continued_packet_flag_invalid,
    401    VORBIS_incorrect_stream_serial_number,
    402    VORBIS_invalid_first_page,
    403    VORBIS_bad_packet_type,
    404    VORBIS_cant_find_last_page,
    405    VORBIS_seek_failed,
    406    VORBIS_ogg_skeleton_not_supported
    407 };
    408 
    409 
    410 #ifdef __cplusplus
    411 }
    412 #endif
    413 
    414 #endif // STB_VORBIS_INCLUDE_STB_VORBIS_H
    415 //
    416 //  HEADER ENDS HERE
    417 //
    418 //////////////////////////////////////////////////////////////////////////////
    419 
    420 #ifndef STB_VORBIS_HEADER_ONLY
    421 
    422 // global configuration settings (e.g. set these in the project/makefile),
    423 // or just set them in this file at the top (although ideally the first few
    424 // should be visible when the header file is compiled too, although it's not
    425 // crucial)
    426 
    427 // STB_VORBIS_NO_PUSHDATA_API
    428 //     does not compile the code for the various stb_vorbis_*_pushdata()
    429 //     functions
    430 // #define STB_VORBIS_NO_PUSHDATA_API
    431 
    432 // STB_VORBIS_NO_PULLDATA_API
    433 //     does not compile the code for the non-pushdata APIs
    434 // #define STB_VORBIS_NO_PULLDATA_API
    435 
    436 // STB_VORBIS_NO_STDIO
    437 //     does not compile the code for the APIs that use FILE *s internally
    438 //     or externally (implied by STB_VORBIS_NO_PULLDATA_API)
    439 // #define STB_VORBIS_NO_STDIO
    440 
    441 // STB_VORBIS_NO_INTEGER_CONVERSION
    442 //     does not compile the code for converting audio sample data from
    443 //     float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
    444 // #define STB_VORBIS_NO_INTEGER_CONVERSION
    445 
    446 // STB_VORBIS_NO_FAST_SCALED_FLOAT
    447 //      does not use a fast float-to-int trick to accelerate float-to-int on
    448 //      most platforms which requires endianness be defined correctly.
    449 //#define STB_VORBIS_NO_FAST_SCALED_FLOAT
    450 
    451 
    452 // STB_VORBIS_MAX_CHANNELS [number]
    453 //     globally define this to the maximum number of channels you need.
    454 //     The spec does not put a restriction on channels except that
    455 //     the count is stored in a byte, so 255 is the hard limit.
    456 //     Reducing this saves about 16 bytes per value, so using 16 saves
    457 //     (255-16)*16 or around 4KB. Plus anything other memory usage
    458 //     I forgot to account for. Can probably go as low as 8 (7.1 audio),
    459 //     6 (5.1 audio), or 2 (stereo only).
    460 #ifndef STB_VORBIS_MAX_CHANNELS
    461 #define STB_VORBIS_MAX_CHANNELS    16  // enough for anyone?
    462 #endif
    463 
    464 // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
    465 //     after a flush_pushdata(), stb_vorbis begins scanning for the
    466 //     next valid page, without backtracking. when it finds something
    467 //     that looks like a page, it streams through it and verifies its
    468 //     CRC32. Should that validation fail, it keeps scanning. But it's
    469 //     possible that _while_ streaming through to check the CRC32 of
    470 //     one candidate page, it sees another candidate page. This #define
    471 //     determines how many "overlapping" candidate pages it can search
    472 //     at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
    473 //     garbage pages could be as big as 64KB, but probably average ~16KB.
    474 //     So don't hose ourselves by scanning an apparent 64KB page and
    475 //     missing a ton of real ones in the interim; so minimum of 2
    476 #ifndef STB_VORBIS_PUSHDATA_CRC_COUNT
    477 #define STB_VORBIS_PUSHDATA_CRC_COUNT  4
    478 #endif
    479 
    480 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
    481 //     sets the log size of the huffman-acceleration table.  Maximum
    482 //     supported value is 24. with larger numbers, more decodings are O(1),
    483 //     but the table size is larger so worse cache missing, so you'll have
    484 //     to probe (and try multiple ogg vorbis files) to find the sweet spot.
    485 #ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
    486 #define STB_VORBIS_FAST_HUFFMAN_LENGTH   10
    487 #endif
    488 
    489 // STB_VORBIS_FAST_BINARY_LENGTH [number]
    490 //     sets the log size of the binary-search acceleration table. this
    491 //     is used in similar fashion to the fast-huffman size to set initial
    492 //     parameters for the binary search
    493 
    494 // STB_VORBIS_FAST_HUFFMAN_INT
    495 //     The fast huffman tables are much more efficient if they can be
    496 //     stored as 16-bit results instead of 32-bit results. This restricts
    497 //     the codebooks to having only 65535 possible outcomes, though.
    498 //     (At least, accelerated by the huffman table.)
    499 #ifndef STB_VORBIS_FAST_HUFFMAN_INT
    500 #define STB_VORBIS_FAST_HUFFMAN_SHORT
    501 #endif
    502 
    503 // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
    504 //     If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
    505 //     back on binary searching for the correct one. This requires storing
    506 //     extra tables with the huffman codes in sorted order. Defining this
    507 //     symbol trades off space for speed by forcing a linear search in the
    508 //     non-fast case, except for "sparse" codebooks.
    509 // #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
    510 
    511 // STB_VORBIS_DIVIDES_IN_RESIDUE
    512 //     stb_vorbis precomputes the result of the scalar residue decoding
    513 //     that would otherwise require a divide per chunk. you can trade off
    514 //     space for time by defining this symbol.
    515 // #define STB_VORBIS_DIVIDES_IN_RESIDUE
    516 
    517 // STB_VORBIS_DIVIDES_IN_CODEBOOK
    518 //     vorbis VQ codebooks can be encoded two ways: with every case explicitly
    519 //     stored, or with all elements being chosen from a small range of values,
    520 //     and all values possible in all elements. By default, stb_vorbis expands
    521 //     this latter kind out to look like the former kind for ease of decoding,
    522 //     because otherwise an integer divide-per-vector-element is required to
    523 //     unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
    524 //     trade off storage for speed.
    525 //#define STB_VORBIS_DIVIDES_IN_CODEBOOK
    526 
    527 #ifdef STB_VORBIS_CODEBOOK_SHORTS
    528 #error "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats"
    529 #endif
    530 
    531 // STB_VORBIS_DIVIDE_TABLE
    532 //     this replaces small integer divides in the floor decode loop with
    533 //     table lookups. made less than 1% difference, so disabled by default.
    534 
    535 // STB_VORBIS_NO_INLINE_DECODE
    536 //     disables the inlining of the scalar codebook fast-huffman decode.
    537 //     might save a little codespace; useful for debugging
    538 // #define STB_VORBIS_NO_INLINE_DECODE
    539 
    540 // STB_VORBIS_NO_DEFER_FLOOR
    541 //     Normally we only decode the floor without synthesizing the actual
    542 //     full curve. We can instead synthesize the curve immediately. This
    543 //     requires more memory and is very likely slower, so I don't think
    544 //     you'd ever want to do it except for debugging.
    545 // #define STB_VORBIS_NO_DEFER_FLOOR
    546 
    547 
    548 
    549 
    550 //////////////////////////////////////////////////////////////////////////////
    551 
    552 #ifdef STB_VORBIS_NO_PULLDATA_API
    553    #define STB_VORBIS_NO_INTEGER_CONVERSION
    554    #define STB_VORBIS_NO_STDIO
    555 #endif
    556 
    557 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
    558    #define STB_VORBIS_NO_STDIO 1
    559 #endif
    560 
    561 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
    562 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
    563 
    564    // only need endianness for fast-float-to-int, which we don't
    565    // use for pushdata
    566 
    567    #ifndef STB_VORBIS_BIG_ENDIAN
    568      #define STB_VORBIS_ENDIAN  0
    569    #else
    570      #define STB_VORBIS_ENDIAN  1
    571    #endif
    572 
    573 #endif
    574 #endif
    575 
    576 
    577 #ifndef STB_VORBIS_NO_STDIO
    578 #include <stdio.h>
    579 #endif
    580 
    581 #ifndef STB_VORBIS_NO_CRT
    582    #include <stdlib.h>
    583    #include <string.h>
    584    #include <assert.h>
    585    #include <math.h>
    586 
    587    // find definition of alloca if it's not in stdlib.h:
    588    #if defined(_MSC_VER) || defined(__MINGW32__)
    589       #include <malloc.h>
    590    #endif
    591    #if defined(__linux__) || defined(__linux) || defined(__sun__) || defined(__EMSCRIPTEN__) || defined(__NEWLIB__)
    592       #include <alloca.h>
    593    #endif
    594 #else // STB_VORBIS_NO_CRT
    595    #define NULL 0
    596    #define malloc(s)   0
    597    #define free(s)     ((void) 0)
    598    #define realloc(s)  0
    599 #endif // STB_VORBIS_NO_CRT
    600 
    601 #include <limits.h>
    602 
    603 #ifdef __MINGW32__
    604    // eff you mingw:
    605    //     "fixed":
    606    //         http://sourceforge.net/p/mingw-w64/mailman/message/32882927/
    607    //     "no that broke the build, reverted, who cares about C":
    608    //         http://sourceforge.net/p/mingw-w64/mailman/message/32890381/
    609    #ifdef __forceinline
    610    #undef __forceinline
    611    #endif
    612    #define __forceinline
    613    #ifndef alloca
    614    #define alloca __builtin_alloca
    615    #endif
    616 #elif !defined(_MSC_VER)
    617    #if __GNUC__
    618       #define __forceinline inline
    619    #else
    620       #define __forceinline
    621    #endif
    622 #endif
    623 
    624 #if STB_VORBIS_MAX_CHANNELS > 256
    625 #error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range"
    626 #endif
    627 
    628 #if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24
    629 #error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range"
    630 #endif
    631 
    632 
    633 #if 0
    634 #include <crtdbg.h>
    635 #define CHECK(f)   _CrtIsValidHeapPointer(f->channel_buffers[1])
    636 #else
    637 #define CHECK(f)   ((void) 0)
    638 #endif
    639 
    640 #define MAX_BLOCKSIZE_LOG  13   // from specification
    641 #define MAX_BLOCKSIZE      (1 << MAX_BLOCKSIZE_LOG)
    642 
    643 
    644 typedef unsigned char  uint8;
    645 typedef   signed char   int8;
    646 typedef unsigned short uint16;
    647 typedef   signed short  int16;
    648 typedef unsigned int   uint32;
    649 typedef   signed int    int32;
    650 
    651 #ifndef TRUE
    652 #define TRUE 1
    653 #define FALSE 0
    654 #endif
    655 
    656 typedef float codetype;
    657 
    658 #ifdef _MSC_VER
    659 #define STBV_NOTUSED(v)  (void)(v)
    660 #else
    661 #define STBV_NOTUSED(v)  (void)sizeof(v)
    662 #endif
    663 
    664 // @NOTE
    665 //
    666 // Some arrays below are tagged "//varies", which means it's actually
    667 // a variable-sized piece of data, but rather than malloc I assume it's
    668 // small enough it's better to just allocate it all together with the
    669 // main thing
    670 //
    671 // Most of the variables are specified with the smallest size I could pack
    672 // them into. It might give better performance to make them all full-sized
    673 // integers. It should be safe to freely rearrange the structures or change
    674 // the sizes larger--nothing relies on silently truncating etc., nor the
    675 // order of variables.
    676 
    677 #define FAST_HUFFMAN_TABLE_SIZE   (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
    678 #define FAST_HUFFMAN_TABLE_MASK   (FAST_HUFFMAN_TABLE_SIZE - 1)
    679 
    680 typedef struct
    681 {
    682    int dimensions, entries;
    683    uint8 *codeword_lengths;
    684    float  minimum_value;
    685    float  delta_value;
    686    uint8  value_bits;
    687    uint8  lookup_type;
    688    uint8  sequence_p;
    689    uint8  sparse;
    690    uint32 lookup_values;
    691    codetype *multiplicands;
    692    uint32 *codewords;
    693    #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
    694     int16  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
    695    #else
    696     int32  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
    697    #endif
    698    uint32 *sorted_codewords;
    699    int    *sorted_values;
    700    int     sorted_entries;
    701 } Codebook;
    702 
    703 typedef struct
    704 {
    705    uint8 order;
    706    uint16 rate;
    707    uint16 bark_map_size;
    708    uint8 amplitude_bits;
    709    uint8 amplitude_offset;
    710    uint8 number_of_books;
    711    uint8 book_list[16]; // varies
    712 } Floor0;
    713 
    714 typedef struct
    715 {
    716    uint8 partitions;
    717    uint8 partition_class_list[32]; // varies
    718    uint8 class_dimensions[16]; // varies
    719    uint8 class_subclasses[16]; // varies
    720    uint8 class_masterbooks[16]; // varies
    721    int16 subclass_books[16][8]; // varies
    722    uint16 Xlist[31*8+2]; // varies
    723    uint8 sorted_order[31*8+2];
    724    uint8 neighbors[31*8+2][2];
    725    uint8 floor1_multiplier;
    726    uint8 rangebits;
    727    int values;
    728 } Floor1;
    729 
    730 typedef union
    731 {
    732    Floor0 floor0;
    733    Floor1 floor1;
    734 } Floor;
    735 
    736 typedef struct
    737 {
    738    uint32 begin, end;
    739    uint32 part_size;
    740    uint8 classifications;
    741    uint8 classbook;
    742    uint8 **classdata;
    743    int16 (*residue_books)[8];
    744 } Residue;
    745 
    746 typedef struct
    747 {
    748    uint8 magnitude;
    749    uint8 angle;
    750    uint8 mux;
    751 } MappingChannel;
    752 
    753 typedef struct
    754 {
    755    uint16 coupling_steps;
    756    MappingChannel *chan;
    757    uint8  submaps;
    758    uint8  submap_floor[15]; // varies
    759    uint8  submap_residue[15]; // varies
    760 } Mapping;
    761 
    762 typedef struct
    763 {
    764    uint8 blockflag;
    765    uint8 mapping;
    766    uint16 windowtype;
    767    uint16 transformtype;
    768 } Mode;
    769 
    770 typedef struct
    771 {
    772    uint32  goal_crc;    // expected crc if match
    773    int     bytes_left;  // bytes left in packet
    774    uint32  crc_so_far;  // running crc
    775    int     bytes_done;  // bytes processed in _current_ chunk
    776    uint32  sample_loc;  // granule pos encoded in page
    777 } CRCscan;
    778 
    779 typedef struct
    780 {
    781    uint32 page_start, page_end;
    782    uint32 last_decoded_sample;
    783 } ProbedPage;
    784 
    785 struct stb_vorbis
    786 {
    787   // user-accessible info
    788    unsigned int sample_rate;
    789    int channels;
    790 
    791    unsigned int setup_memory_required;
    792    unsigned int temp_memory_required;
    793    unsigned int setup_temp_memory_required;
    794 
    795    char *vendor;
    796    int comment_list_length;
    797    char **comment_list;
    798 
    799   // input config
    800 #ifndef STB_VORBIS_NO_STDIO
    801    FILE *f;
    802    uint32 f_start;
    803    int close_on_free;
    804 #endif
    805 
    806    uint8 *stream;
    807    uint8 *stream_start;
    808    uint8 *stream_end;
    809 
    810    uint32 stream_len;
    811 
    812    uint8  push_mode;
    813 
    814    // the page to seek to when seeking to start, may be zero
    815    uint32 first_audio_page_offset;
    816 
    817    // p_first is the page on which the first audio packet ends
    818    // (but not necessarily the page on which it starts)
    819    ProbedPage p_first, p_last;
    820 
    821   // memory management
    822    stb_vorbis_alloc alloc;
    823    int setup_offset;
    824    int temp_offset;
    825 
    826   // run-time results
    827    int eof;
    828    enum STBVorbisError error;
    829 
    830   // user-useful data
    831 
    832   // header info
    833    int blocksize[2];
    834    int blocksize_0, blocksize_1;
    835    int codebook_count;
    836    Codebook *codebooks;
    837    int floor_count;
    838    uint16 floor_types[64]; // varies
    839    Floor *floor_config;
    840    int residue_count;
    841    uint16 residue_types[64]; // varies
    842    Residue *residue_config;
    843    int mapping_count;
    844    Mapping *mapping;
    845    int mode_count;
    846    Mode mode_config[64];  // varies
    847 
    848    uint32 total_samples;
    849 
    850   // decode buffer
    851    float *channel_buffers[STB_VORBIS_MAX_CHANNELS];
    852    float *outputs        [STB_VORBIS_MAX_CHANNELS];
    853 
    854    float *previous_window[STB_VORBIS_MAX_CHANNELS];
    855    int previous_length;
    856 
    857    #ifndef STB_VORBIS_NO_DEFER_FLOOR
    858    int16 *finalY[STB_VORBIS_MAX_CHANNELS];
    859    #else
    860    float *floor_buffers[STB_VORBIS_MAX_CHANNELS];
    861    #endif
    862 
    863    uint32 current_loc; // sample location of next frame to decode
    864    int    current_loc_valid;
    865 
    866   // per-blocksize precomputed data
    867 
    868    // twiddle factors
    869    float *A[2],*B[2],*C[2];
    870    float *window[2];
    871    uint16 *bit_reverse[2];
    872 
    873   // current page/packet/segment streaming info
    874    uint32 serial; // stream serial number for verification
    875    int last_page;
    876    int segment_count;
    877    uint8 segments[255];
    878    uint8 page_flag;
    879    uint8 bytes_in_seg;
    880    uint8 first_decode;
    881    int next_seg;
    882    int last_seg;  // flag that we're on the last segment
    883    int last_seg_which; // what was the segment number of the last seg?
    884    uint32 acc;
    885    int valid_bits;
    886    int packet_bytes;
    887    int end_seg_with_known_loc;
    888    uint32 known_loc_for_packet;
    889    int discard_samples_deferred;
    890    uint32 samples_output;
    891 
    892   // push mode scanning
    893    int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
    894 #ifndef STB_VORBIS_NO_PUSHDATA_API
    895    CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT];
    896 #endif
    897 
    898   // sample-access
    899    int channel_buffer_start;
    900    int channel_buffer_end;
    901 };
    902 
    903 #if defined(STB_VORBIS_NO_PUSHDATA_API)
    904    #define IS_PUSH_MODE(f)   FALSE
    905 #elif defined(STB_VORBIS_NO_PULLDATA_API)
    906    #define IS_PUSH_MODE(f)   TRUE
    907 #else
    908    #define IS_PUSH_MODE(f)   ((f)->push_mode)
    909 #endif
    910 
    911 typedef struct stb_vorbis vorb;
    912 
    913 static int error(vorb *f, enum STBVorbisError e)
    914 {
    915    f->error = e;
    916    if (!f->eof && e != VORBIS_need_more_data) {
    917       f->error=e; // breakpoint for debugging
    918    }
    919    return 0;
    920 }
    921 
    922 
    923 // these functions are used for allocating temporary memory
    924 // while decoding. if you can afford the stack space, use
    925 // alloca(); otherwise, provide a temp buffer and it will
    926 // allocate out of those.
    927 
    928 #define array_size_required(count,size)  (count*(sizeof(void *)+(size)))
    929 
    930 #define temp_alloc(f,size)              (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
    931 #define temp_free(f,p)                  (void)0
    932 #define temp_alloc_save(f)              ((f)->temp_offset)
    933 #define temp_alloc_restore(f,p)         ((f)->temp_offset = (p))
    934 
    935 #define temp_block_array(f,count,size)  make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
    936 
    937 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
    938 static void *make_block_array(void *mem, int count, int size)
    939 {
    940    int i;
    941    void ** p = (void **) mem;
    942    char *q = (char *) (p + count);
    943    for (i=0; i < count; ++i) {
    944       p[i] = q;
    945       q += size;
    946    }
    947    return p;
    948 }
    949 
    950 static void *setup_malloc(vorb *f, int sz)
    951 {
    952    sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs.
    953    f->setup_memory_required += sz;
    954    if (f->alloc.alloc_buffer) {
    955       void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
    956       if (f->setup_offset + sz > f->temp_offset) return NULL;
    957       f->setup_offset += sz;
    958       return p;
    959    }
    960    return sz ? malloc(sz) : NULL;
    961 }
    962 
    963 static void setup_free(vorb *f, void *p)
    964 {
    965    if (f->alloc.alloc_buffer) return; // do nothing; setup mem is a stack
    966    free(p);
    967 }
    968 
    969 static void *setup_temp_malloc(vorb *f, int sz)
    970 {
    971    sz = (sz+7) & ~7; // round up to nearest 8 for alignment of future allocs.
    972    if (f->alloc.alloc_buffer) {
    973       if (f->temp_offset - sz < f->setup_offset) return NULL;
    974       f->temp_offset -= sz;
    975       return (char *) f->alloc.alloc_buffer + f->temp_offset;
    976    }
    977    return malloc(sz);
    978 }
    979 
    980 static void setup_temp_free(vorb *f, void *p, int sz)
    981 {
    982    if (f->alloc.alloc_buffer) {
    983       f->temp_offset += (sz+7)&~7;
    984       return;
    985    }
    986    free(p);
    987 }
    988 
    989 #define CRC32_POLY    0x04c11db7   // from spec
    990 
    991 static uint32 crc_table[256];
    992 static void crc32_init(void)
    993 {
    994    int i,j;
    995    uint32 s;
    996    for(i=0; i < 256; i++) {
    997       for (s=(uint32) i << 24, j=0; j < 8; ++j)
    998          s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0);
    999       crc_table[i] = s;
   1000    }
   1001 }
   1002 
   1003 static __forceinline uint32 crc32_update(uint32 crc, uint8 byte)
   1004 {
   1005    return (crc << 8) ^ crc_table[byte ^ (crc >> 24)];
   1006 }
   1007 
   1008 
   1009 // used in setup, and for huffman that doesn't go fast path
   1010 static unsigned int bit_reverse(unsigned int n)
   1011 {
   1012   n = ((n & 0xAAAAAAAA) >>  1) | ((n & 0x55555555) << 1);
   1013   n = ((n & 0xCCCCCCCC) >>  2) | ((n & 0x33333333) << 2);
   1014   n = ((n & 0xF0F0F0F0) >>  4) | ((n & 0x0F0F0F0F) << 4);
   1015   n = ((n & 0xFF00FF00) >>  8) | ((n & 0x00FF00FF) << 8);
   1016   return (n >> 16) | (n << 16);
   1017 }
   1018 
   1019 static float square(float x)
   1020 {
   1021    return x*x;
   1022 }
   1023 
   1024 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
   1025 // as required by the specification. fast(?) implementation from stb.h
   1026 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
   1027 static int ilog(int32 n)
   1028 {
   1029    static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
   1030 
   1031    if (n < 0) return 0; // signed n returns 0
   1032 
   1033    // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
   1034    if (n < (1 << 14))
   1035         if (n < (1 <<  4))            return  0 + log2_4[n      ];
   1036         else if (n < (1 <<  9))       return  5 + log2_4[n >>  5];
   1037              else                     return 10 + log2_4[n >> 10];
   1038    else if (n < (1 << 24))
   1039              if (n < (1 << 19))       return 15 + log2_4[n >> 15];
   1040              else                     return 20 + log2_4[n >> 20];
   1041         else if (n < (1 << 29))       return 25 + log2_4[n >> 25];
   1042              else                     return 30 + log2_4[n >> 30];
   1043 }
   1044 
   1045 #ifndef M_PI
   1046   #define M_PI  3.14159265358979323846264f  // from CRC
   1047 #endif
   1048 
   1049 // code length assigned to a value with no huffman encoding
   1050 #define NO_CODE   255
   1051 
   1052 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
   1053 //
   1054 // these functions are only called at setup, and only a few times
   1055 // per file
   1056 
   1057 static float float32_unpack(uint32 x)
   1058 {
   1059    // from the specification
   1060    uint32 mantissa = x & 0x1fffff;
   1061    uint32 sign = x & 0x80000000;
   1062    uint32 exp = (x & 0x7fe00000) >> 21;
   1063    double res = sign ? -(double)mantissa : (double)mantissa;
   1064    return (float) ldexp((float)res, (int)exp-788);
   1065 }
   1066 
   1067 
   1068 // zlib & jpeg huffman tables assume that the output symbols
   1069 // can either be arbitrarily arranged, or have monotonically
   1070 // increasing frequencies--they rely on the lengths being sorted;
   1071 // this makes for a very simple generation algorithm.
   1072 // vorbis allows a huffman table with non-sorted lengths. This
   1073 // requires a more sophisticated construction, since symbols in
   1074 // order do not map to huffman codes "in order".
   1075 static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
   1076 {
   1077    if (!c->sparse) {
   1078       c->codewords      [symbol] = huff_code;
   1079    } else {
   1080       c->codewords       [count] = huff_code;
   1081       c->codeword_lengths[count] = len;
   1082       values             [count] = symbol;
   1083    }
   1084 }
   1085 
   1086 static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
   1087 {
   1088    int i,k,m=0;
   1089    uint32 available[32];
   1090 
   1091    memset(available, 0, sizeof(available));
   1092    // find the first entry
   1093    for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
   1094    if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
   1095    assert(len[k] < 32); // no error return required, code reading lens checks this
   1096    // add to the list
   1097    add_entry(c, 0, k, m++, len[k], values);
   1098    // add all available leaves
   1099    for (i=1; i <= len[k]; ++i)
   1100       available[i] = 1U << (32-i);
   1101    // note that the above code treats the first case specially,
   1102    // but it's really the same as the following code, so they
   1103    // could probably be combined (except the initial code is 0,
   1104    // and I use 0 in available[] to mean 'empty')
   1105    for (i=k+1; i < n; ++i) {
   1106       uint32 res;
   1107       int z = len[i], y;
   1108       if (z == NO_CODE) continue;
   1109       assert(z < 32); // no error return required, code reading lens checks this
   1110       // find lowest available leaf (should always be earliest,
   1111       // which is what the specification calls for)
   1112       // note that this property, and the fact we can never have
   1113       // more than one free leaf at a given level, isn't totally
   1114       // trivial to prove, but it seems true and the assert never
   1115       // fires, so!
   1116       while (z > 0 && !available[z]) --z;
   1117       if (z == 0) { return FALSE; }
   1118       res = available[z];
   1119       available[z] = 0;
   1120       add_entry(c, bit_reverse(res), i, m++, len[i], values);
   1121       // propagate availability up the tree
   1122       if (z != len[i]) {
   1123          for (y=len[i]; y > z; --y) {
   1124             assert(available[y] == 0);
   1125             available[y] = res + (1 << (32-y));
   1126          }
   1127       }
   1128    }
   1129    return TRUE;
   1130 }
   1131 
   1132 // accelerated huffman table allows fast O(1) match of all symbols
   1133 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
   1134 static void compute_accelerated_huffman(Codebook *c)
   1135 {
   1136    int i, len;
   1137    for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
   1138       c->fast_huffman[i] = -1;
   1139 
   1140    len = c->sparse ? c->sorted_entries : c->entries;
   1141    #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
   1142    if (len > 32767) len = 32767; // largest possible value we can encode!
   1143    #endif
   1144    for (i=0; i < len; ++i) {
   1145       if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
   1146          uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
   1147          // set table entries for all bit combinations in the higher bits
   1148          while (z < FAST_HUFFMAN_TABLE_SIZE) {
   1149              c->fast_huffman[z] = i;
   1150              z += 1 << c->codeword_lengths[i];
   1151          }
   1152       }
   1153    }
   1154 }
   1155 
   1156 #ifdef _MSC_VER
   1157 #define STBV_CDECL __cdecl
   1158 #else
   1159 #define STBV_CDECL
   1160 #endif
   1161 
   1162 static int STBV_CDECL uint32_compare(const void *p, const void *q)
   1163 {
   1164    uint32 x = * (uint32 *) p;
   1165    uint32 y = * (uint32 *) q;
   1166    return x < y ? -1 : x > y;
   1167 }
   1168 
   1169 static int include_in_sort(Codebook *c, uint8 len)
   1170 {
   1171    if (c->sparse) { assert(len != NO_CODE); return TRUE; }
   1172    if (len == NO_CODE) return FALSE;
   1173    if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
   1174    return FALSE;
   1175 }
   1176 
   1177 // if the fast table above doesn't work, we want to binary
   1178 // search them... need to reverse the bits
   1179 static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
   1180 {
   1181    int i, len;
   1182    // build a list of all the entries
   1183    // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
   1184    // this is kind of a frivolous optimization--I don't see any performance improvement,
   1185    // but it's like 4 extra lines of code, so.
   1186    if (!c->sparse) {
   1187       int k = 0;
   1188       for (i=0; i < c->entries; ++i)
   1189          if (include_in_sort(c, lengths[i]))
   1190             c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
   1191       assert(k == c->sorted_entries);
   1192    } else {
   1193       for (i=0; i < c->sorted_entries; ++i)
   1194          c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
   1195    }
   1196 
   1197    qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare);
   1198    c->sorted_codewords[c->sorted_entries] = 0xffffffff;
   1199 
   1200    len = c->sparse ? c->sorted_entries : c->entries;
   1201    // now we need to indicate how they correspond; we could either
   1202    //   #1: sort a different data structure that says who they correspond to
   1203    //   #2: for each sorted entry, search the original list to find who corresponds
   1204    //   #3: for each original entry, find the sorted entry
   1205    // #1 requires extra storage, #2 is slow, #3 can use binary search!
   1206    for (i=0; i < len; ++i) {
   1207       int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
   1208       if (include_in_sort(c,huff_len)) {
   1209          uint32 code = bit_reverse(c->codewords[i]);
   1210          int x=0, n=c->sorted_entries;
   1211          while (n > 1) {
   1212             // invariant: sc[x] <= code < sc[x+n]
   1213             int m = x + (n >> 1);
   1214             if (c->sorted_codewords[m] <= code) {
   1215                x = m;
   1216                n -= (n>>1);
   1217             } else {
   1218                n >>= 1;
   1219             }
   1220          }
   1221          assert(c->sorted_codewords[x] == code);
   1222          if (c->sparse) {
   1223             c->sorted_values[x] = values[i];
   1224             c->codeword_lengths[x] = huff_len;
   1225          } else {
   1226             c->sorted_values[x] = i;
   1227          }
   1228       }
   1229    }
   1230 }
   1231 
   1232 // only run while parsing the header (3 times)
   1233 static int vorbis_validate(uint8 *data)
   1234 {
   1235    static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
   1236    return memcmp(data, vorbis, 6) == 0;
   1237 }
   1238 
   1239 // called from setup only, once per code book
   1240 // (formula implied by specification)
   1241 static int lookup1_values(int entries, int dim)
   1242 {
   1243    int r = (int) floor(exp((float) log((float) entries) / dim));
   1244    if ((int) floor(pow((float) r+1, dim)) <= entries)   // (int) cast for MinGW warning;
   1245       ++r;                                              // floor() to avoid _ftol() when non-CRT
   1246    if (pow((float) r+1, dim) <= entries)
   1247       return -1;
   1248    if ((int) floor(pow((float) r, dim)) > entries)
   1249       return -1;
   1250    return r;
   1251 }
   1252 
   1253 // called twice per file
   1254 static void compute_twiddle_factors(int n, float *A, float *B, float *C)
   1255 {
   1256    int n4 = n >> 2, n8 = n >> 3;
   1257    int k,k2;
   1258 
   1259    for (k=k2=0; k < n4; ++k,k2+=2) {
   1260       A[k2  ] = (float)  cos(4*k*M_PI/n);
   1261       A[k2+1] = (float) -sin(4*k*M_PI/n);
   1262       B[k2  ] = (float)  cos((k2+1)*M_PI/n/2) * 0.5f;
   1263       B[k2+1] = (float)  sin((k2+1)*M_PI/n/2) * 0.5f;
   1264    }
   1265    for (k=k2=0; k < n8; ++k,k2+=2) {
   1266       C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
   1267       C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
   1268    }
   1269 }
   1270 
   1271 static void compute_window(int n, float *window)
   1272 {
   1273    int n2 = n >> 1, i;
   1274    for (i=0; i < n2; ++i)
   1275       window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
   1276 }
   1277 
   1278 static void compute_bitreverse(int n, uint16 *rev)
   1279 {
   1280    int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
   1281    int i, n8 = n >> 3;
   1282    for (i=0; i < n8; ++i)
   1283       rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
   1284 }
   1285 
   1286 static int init_blocksize(vorb *f, int b, int n)
   1287 {
   1288    int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
   1289    f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
   1290    f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
   1291    f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
   1292    if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
   1293    compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
   1294    f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
   1295    if (!f->window[b]) return error(f, VORBIS_outofmem);
   1296    compute_window(n, f->window[b]);
   1297    f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8);
   1298    if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
   1299    compute_bitreverse(n, f->bit_reverse[b]);
   1300    return TRUE;
   1301 }
   1302 
   1303 static void neighbors(uint16 *x, int n, int *plow, int *phigh)
   1304 {
   1305    int low = -1;
   1306    int high = 65536;
   1307    int i;
   1308    for (i=0; i < n; ++i) {
   1309       if (x[i] > low  && x[i] < x[n]) { *plow  = i; low = x[i]; }
   1310       if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
   1311    }
   1312 }
   1313 
   1314 // this has been repurposed so y is now the original index instead of y
   1315 typedef struct
   1316 {
   1317    uint16 x,id;
   1318 } stbv__floor_ordering;
   1319 
   1320 static int STBV_CDECL point_compare(const void *p, const void *q)
   1321 {
   1322    stbv__floor_ordering *a = (stbv__floor_ordering *) p;
   1323    stbv__floor_ordering *b = (stbv__floor_ordering *) q;
   1324    return a->x < b->x ? -1 : a->x > b->x;
   1325 }
   1326 
   1327 //
   1328 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
   1329 
   1330 
   1331 #if defined(STB_VORBIS_NO_STDIO)
   1332    #define USE_MEMORY(z)    TRUE
   1333 #else
   1334    #define USE_MEMORY(z)    ((z)->stream)
   1335 #endif
   1336 
   1337 static uint8 get8(vorb *z)
   1338 {
   1339    if (USE_MEMORY(z)) {
   1340       if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
   1341       return *z->stream++;
   1342    }
   1343 
   1344    #ifndef STB_VORBIS_NO_STDIO
   1345    {
   1346    int c = fgetc(z->f);
   1347    if (c == EOF) { z->eof = TRUE; return 0; }
   1348    return c;
   1349    }
   1350    #endif
   1351 }
   1352 
   1353 static uint32 get32(vorb *f)
   1354 {
   1355    uint32 x;
   1356    x = get8(f);
   1357    x += get8(f) << 8;
   1358    x += get8(f) << 16;
   1359    x += (uint32) get8(f) << 24;
   1360    return x;
   1361 }
   1362 
   1363 static int getn(vorb *z, uint8 *data, int n)
   1364 {
   1365    if (USE_MEMORY(z)) {
   1366       if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
   1367       memcpy(data, z->stream, n);
   1368       z->stream += n;
   1369       return 1;
   1370    }
   1371 
   1372    #ifndef STB_VORBIS_NO_STDIO
   1373    if (fread(data, n, 1, z->f) == 1)
   1374       return 1;
   1375    else {
   1376       z->eof = 1;
   1377       return 0;
   1378    }
   1379    #endif
   1380 }
   1381 
   1382 static void skip(vorb *z, int n)
   1383 {
   1384    if (USE_MEMORY(z)) {
   1385       z->stream += n;
   1386       if (z->stream >= z->stream_end) z->eof = 1;
   1387       return;
   1388    }
   1389    #ifndef STB_VORBIS_NO_STDIO
   1390    {
   1391       long x = ftell(z->f);
   1392       fseek(z->f, x+n, SEEK_SET);
   1393    }
   1394    #endif
   1395 }
   1396 
   1397 static int set_file_offset(stb_vorbis *f, unsigned int loc)
   1398 {
   1399    #ifndef STB_VORBIS_NO_PUSHDATA_API
   1400    if (f->push_mode) return 0;
   1401    #endif
   1402    f->eof = 0;
   1403    if (USE_MEMORY(f)) {
   1404       if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
   1405          f->stream = f->stream_end;
   1406          f->eof = 1;
   1407          return 0;
   1408       } else {
   1409          f->stream = f->stream_start + loc;
   1410          return 1;
   1411       }
   1412    }
   1413    #ifndef STB_VORBIS_NO_STDIO
   1414    if (loc + f->f_start < loc || loc >= 0x80000000) {
   1415       loc = 0x7fffffff;
   1416       f->eof = 1;
   1417    } else {
   1418       loc += f->f_start;
   1419    }
   1420    if (!fseek(f->f, loc, SEEK_SET))
   1421       return 1;
   1422    f->eof = 1;
   1423    fseek(f->f, f->f_start, SEEK_END);
   1424    return 0;
   1425    #endif
   1426 }
   1427 
   1428 
   1429 static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
   1430 
   1431 static int capture_pattern(vorb *f)
   1432 {
   1433    if (0x4f != get8(f)) return FALSE;
   1434    if (0x67 != get8(f)) return FALSE;
   1435    if (0x67 != get8(f)) return FALSE;
   1436    if (0x53 != get8(f)) return FALSE;
   1437    return TRUE;
   1438 }
   1439 
   1440 #define PAGEFLAG_continued_packet   1
   1441 #define PAGEFLAG_first_page         2
   1442 #define PAGEFLAG_last_page          4
   1443 
   1444 static int start_page_no_capturepattern(vorb *f)
   1445 {
   1446    uint32 loc0,loc1,n;
   1447    if (f->first_decode && !IS_PUSH_MODE(f)) {
   1448       f->p_first.page_start = stb_vorbis_get_file_offset(f) - 4;
   1449    }
   1450    // stream structure version
   1451    if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
   1452    // header flag
   1453    f->page_flag = get8(f);
   1454    // absolute granule position
   1455    loc0 = get32(f);
   1456    loc1 = get32(f);
   1457    // @TODO: validate loc0,loc1 as valid positions?
   1458    // stream serial number -- vorbis doesn't interleave, so discard
   1459    get32(f);
   1460    //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
   1461    // page sequence number
   1462    n = get32(f);
   1463    f->last_page = n;
   1464    // CRC32
   1465    get32(f);
   1466    // page_segments
   1467    f->segment_count = get8(f);
   1468    if (!getn(f, f->segments, f->segment_count))
   1469       return error(f, VORBIS_unexpected_eof);
   1470    // assume we _don't_ know any the sample position of any segments
   1471    f->end_seg_with_known_loc = -2;
   1472    if (loc0 != ~0U || loc1 != ~0U) {
   1473       int i;
   1474       // determine which packet is the last one that will complete
   1475       for (i=f->segment_count-1; i >= 0; --i)
   1476          if (f->segments[i] < 255)
   1477             break;
   1478       // 'i' is now the index of the _last_ segment of a packet that ends
   1479       if (i >= 0) {
   1480          f->end_seg_with_known_loc = i;
   1481          f->known_loc_for_packet   = loc0;
   1482       }
   1483    }
   1484    if (f->first_decode) {
   1485       int i,len;
   1486       len = 0;
   1487       for (i=0; i < f->segment_count; ++i)
   1488          len += f->segments[i];
   1489       len += 27 + f->segment_count;
   1490       f->p_first.page_end = f->p_first.page_start + len;
   1491       f->p_first.last_decoded_sample = loc0;
   1492    }
   1493    f->next_seg = 0;
   1494    return TRUE;
   1495 }
   1496 
   1497 static int start_page(vorb *f)
   1498 {
   1499    if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
   1500    return start_page_no_capturepattern(f);
   1501 }
   1502 
   1503 static int start_packet(vorb *f)
   1504 {
   1505    while (f->next_seg == -1) {
   1506       if (!start_page(f)) return FALSE;
   1507       if (f->page_flag & PAGEFLAG_continued_packet)
   1508          return error(f, VORBIS_continued_packet_flag_invalid);
   1509    }
   1510    f->last_seg = FALSE;
   1511    f->valid_bits = 0;
   1512    f->packet_bytes = 0;
   1513    f->bytes_in_seg = 0;
   1514    // f->next_seg is now valid
   1515    return TRUE;
   1516 }
   1517 
   1518 static int maybe_start_packet(vorb *f)
   1519 {
   1520    if (f->next_seg == -1) {
   1521       int x = get8(f);
   1522       if (f->eof) return FALSE; // EOF at page boundary is not an error!
   1523       if (0x4f != x      ) return error(f, VORBIS_missing_capture_pattern);
   1524       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
   1525       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
   1526       if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
   1527       if (!start_page_no_capturepattern(f)) return FALSE;
   1528       if (f->page_flag & PAGEFLAG_continued_packet) {
   1529          // set up enough state that we can read this packet if we want,
   1530          // e.g. during recovery
   1531          f->last_seg = FALSE;
   1532          f->bytes_in_seg = 0;
   1533          return error(f, VORBIS_continued_packet_flag_invalid);
   1534       }
   1535    }
   1536    return start_packet(f);
   1537 }
   1538 
   1539 static int next_segment(vorb *f)
   1540 {
   1541    int len;
   1542    if (f->last_seg) return 0;
   1543    if (f->next_seg == -1) {
   1544       f->last_seg_which = f->segment_count-1; // in case start_page fails
   1545       if (!start_page(f)) { f->last_seg = 1; return 0; }
   1546       if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
   1547    }
   1548    len = f->segments[f->next_seg++];
   1549    if (len < 255) {
   1550       f->last_seg = TRUE;
   1551       f->last_seg_which = f->next_seg-1;
   1552    }
   1553    if (f->next_seg >= f->segment_count)
   1554       f->next_seg = -1;
   1555    assert(f->bytes_in_seg == 0);
   1556    f->bytes_in_seg = len;
   1557    return len;
   1558 }
   1559 
   1560 #define EOP    (-1)
   1561 #define INVALID_BITS  (-1)
   1562 
   1563 static int get8_packet_raw(vorb *f)
   1564 {
   1565    if (!f->bytes_in_seg) {  // CLANG!
   1566       if (f->last_seg) return EOP;
   1567       else if (!next_segment(f)) return EOP;
   1568    }
   1569    assert(f->bytes_in_seg > 0);
   1570    --f->bytes_in_seg;
   1571    ++f->packet_bytes;
   1572    return get8(f);
   1573 }
   1574 
   1575 static int get8_packet(vorb *f)
   1576 {
   1577    int x = get8_packet_raw(f);
   1578    f->valid_bits = 0;
   1579    return x;
   1580 }
   1581 
   1582 static int get32_packet(vorb *f)
   1583 {
   1584    uint32 x;
   1585    x = get8_packet(f);
   1586    x += get8_packet(f) << 8;
   1587    x += get8_packet(f) << 16;
   1588    x += (uint32) get8_packet(f) << 24;
   1589    return x;
   1590 }
   1591 
   1592 static void flush_packet(vorb *f)
   1593 {
   1594    while (get8_packet_raw(f) != EOP);
   1595 }
   1596 
   1597 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
   1598 // as the huffman decoder?
   1599 static uint32 get_bits(vorb *f, int n)
   1600 {
   1601    uint32 z;
   1602 
   1603    if (f->valid_bits < 0) return 0;
   1604    if (f->valid_bits < n) {
   1605       if (n > 24) {
   1606          // the accumulator technique below would not work correctly in this case
   1607          z = get_bits(f, 24);
   1608          z += get_bits(f, n-24) << 24;
   1609          return z;
   1610       }
   1611       if (f->valid_bits == 0) f->acc = 0;
   1612       while (f->valid_bits < n) {
   1613          int z = get8_packet_raw(f);
   1614          if (z == EOP) {
   1615             f->valid_bits = INVALID_BITS;
   1616             return 0;
   1617          }
   1618          f->acc += z << f->valid_bits;
   1619          f->valid_bits += 8;
   1620       }
   1621    }
   1622 
   1623    assert(f->valid_bits >= n);
   1624    z = f->acc & ((1 << n)-1);
   1625    f->acc >>= n;
   1626    f->valid_bits -= n;
   1627    return z;
   1628 }
   1629 
   1630 // @OPTIMIZE: primary accumulator for huffman
   1631 // expand the buffer to as many bits as possible without reading off end of packet
   1632 // it might be nice to allow f->valid_bits and f->acc to be stored in registers,
   1633 // e.g. cache them locally and decode locally
   1634 static __forceinline void prep_huffman(vorb *f)
   1635 {
   1636    if (f->valid_bits <= 24) {
   1637       if (f->valid_bits == 0) f->acc = 0;
   1638       do {
   1639          int z;
   1640          if (f->last_seg && !f->bytes_in_seg) return;
   1641          z = get8_packet_raw(f);
   1642          if (z == EOP) return;
   1643          f->acc += (unsigned) z << f->valid_bits;
   1644          f->valid_bits += 8;
   1645       } while (f->valid_bits <= 24);
   1646    }
   1647 }
   1648 
   1649 enum
   1650 {
   1651    VORBIS_packet_id = 1,
   1652    VORBIS_packet_comment = 3,
   1653    VORBIS_packet_setup = 5
   1654 };
   1655 
   1656 static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
   1657 {
   1658    int i;
   1659    prep_huffman(f);
   1660 
   1661    if (c->codewords == NULL && c->sorted_codewords == NULL)
   1662       return -1;
   1663 
   1664    // cases to use binary search: sorted_codewords && !c->codewords
   1665    //                             sorted_codewords && c->entries > 8
   1666    if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
   1667       // binary search
   1668       uint32 code = bit_reverse(f->acc);
   1669       int x=0, n=c->sorted_entries, len;
   1670 
   1671       while (n > 1) {
   1672          // invariant: sc[x] <= code < sc[x+n]
   1673          int m = x + (n >> 1);
   1674          if (c->sorted_codewords[m] <= code) {
   1675             x = m;
   1676             n -= (n>>1);
   1677          } else {
   1678             n >>= 1;
   1679          }
   1680       }
   1681       // x is now the sorted index
   1682       if (!c->sparse) x = c->sorted_values[x];
   1683       // x is now sorted index if sparse, or symbol otherwise
   1684       len = c->codeword_lengths[x];
   1685       if (f->valid_bits >= len) {
   1686          f->acc >>= len;
   1687          f->valid_bits -= len;
   1688          return x;
   1689       }
   1690 
   1691       f->valid_bits = 0;
   1692       return -1;
   1693    }
   1694 
   1695    // if small, linear search
   1696    assert(!c->sparse);
   1697    for (i=0; i < c->entries; ++i) {
   1698       if (c->codeword_lengths[i] == NO_CODE) continue;
   1699       if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
   1700          if (f->valid_bits >= c->codeword_lengths[i]) {
   1701             f->acc >>= c->codeword_lengths[i];
   1702             f->valid_bits -= c->codeword_lengths[i];
   1703             return i;
   1704          }
   1705          f->valid_bits = 0;
   1706          return -1;
   1707       }
   1708    }
   1709 
   1710    error(f, VORBIS_invalid_stream);
   1711    f->valid_bits = 0;
   1712    return -1;
   1713 }
   1714 
   1715 #ifndef STB_VORBIS_NO_INLINE_DECODE
   1716 
   1717 #define DECODE_RAW(var, f,c)                                  \
   1718    if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)        \
   1719       prep_huffman(f);                                        \
   1720    var = f->acc & FAST_HUFFMAN_TABLE_MASK;                    \
   1721    var = c->fast_huffman[var];                                \
   1722    if (var >= 0) {                                            \
   1723       int n = c->codeword_lengths[var];                       \
   1724       f->acc >>= n;                                           \
   1725       f->valid_bits -= n;                                     \
   1726       if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \
   1727    } else {                                                   \
   1728       var = codebook_decode_scalar_raw(f,c);                  \
   1729    }
   1730 
   1731 #else
   1732 
   1733 static int codebook_decode_scalar(vorb *f, Codebook *c)
   1734 {
   1735    int i;
   1736    if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
   1737       prep_huffman(f);
   1738    // fast huffman table lookup
   1739    i = f->acc & FAST_HUFFMAN_TABLE_MASK;
   1740    i = c->fast_huffman[i];
   1741    if (i >= 0) {
   1742       f->acc >>= c->codeword_lengths[i];
   1743       f->valid_bits -= c->codeword_lengths[i];
   1744       if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
   1745       return i;
   1746    }
   1747    return codebook_decode_scalar_raw(f,c);
   1748 }
   1749 
   1750 #define DECODE_RAW(var,f,c)    var = codebook_decode_scalar(f,c);
   1751 
   1752 #endif
   1753 
   1754 #define DECODE(var,f,c)                                       \
   1755    DECODE_RAW(var,f,c)                                        \
   1756    if (c->sparse) var = c->sorted_values[var];
   1757 
   1758 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
   1759   #define DECODE_VQ(var,f,c)   DECODE_RAW(var,f,c)
   1760 #else
   1761   #define DECODE_VQ(var,f,c)   DECODE(var,f,c)
   1762 #endif
   1763 
   1764 
   1765 
   1766 
   1767 
   1768 
   1769 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
   1770 // where we avoid one addition
   1771 #define CODEBOOK_ELEMENT(c,off)          (c->multiplicands[off])
   1772 #define CODEBOOK_ELEMENT_FAST(c,off)     (c->multiplicands[off])
   1773 #define CODEBOOK_ELEMENT_BASE(c)         (0)
   1774 
   1775 static int codebook_decode_start(vorb *f, Codebook *c)
   1776 {
   1777    int z = -1;
   1778 
   1779    // type 0 is only legal in a scalar context
   1780    if (c->lookup_type == 0)
   1781       error(f, VORBIS_invalid_stream);
   1782    else {
   1783       DECODE_VQ(z,f,c);
   1784       if (c->sparse) assert(z < c->sorted_entries);
   1785       if (z < 0) {  // check for EOP
   1786          if (!f->bytes_in_seg)
   1787             if (f->last_seg)
   1788                return z;
   1789          error(f, VORBIS_invalid_stream);
   1790       }
   1791    }
   1792    return z;
   1793 }
   1794 
   1795 static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
   1796 {
   1797    int i,z = codebook_decode_start(f,c);
   1798    if (z < 0) return FALSE;
   1799    if (len > c->dimensions) len = c->dimensions;
   1800 
   1801 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
   1802    if (c->lookup_type == 1) {
   1803       float last = CODEBOOK_ELEMENT_BASE(c);
   1804       int div = 1;
   1805       for (i=0; i < len; ++i) {
   1806          int off = (z / div) % c->lookup_values;
   1807          float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
   1808          output[i] += val;
   1809          if (c->sequence_p) last = val + c->minimum_value;
   1810          div *= c->lookup_values;
   1811       }
   1812       return TRUE;
   1813    }
   1814 #endif
   1815 
   1816    z *= c->dimensions;
   1817    if (c->sequence_p) {
   1818       float last = CODEBOOK_ELEMENT_BASE(c);
   1819       for (i=0; i < len; ++i) {
   1820          float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
   1821          output[i] += val;
   1822          last = val + c->minimum_value;
   1823       }
   1824    } else {
   1825       float last = CODEBOOK_ELEMENT_BASE(c);
   1826       for (i=0; i < len; ++i) {
   1827          output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
   1828       }
   1829    }
   1830 
   1831    return TRUE;
   1832 }
   1833 
   1834 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
   1835 {
   1836    int i,z = codebook_decode_start(f,c);
   1837    float last = CODEBOOK_ELEMENT_BASE(c);
   1838    if (z < 0) return FALSE;
   1839    if (len > c->dimensions) len = c->dimensions;
   1840 
   1841 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
   1842    if (c->lookup_type == 1) {
   1843       int div = 1;
   1844       for (i=0; i < len; ++i) {
   1845          int off = (z / div) % c->lookup_values;
   1846          float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
   1847          output[i*step] += val;
   1848          if (c->sequence_p) last = val;
   1849          div *= c->lookup_values;
   1850       }
   1851       return TRUE;
   1852    }
   1853 #endif
   1854 
   1855    z *= c->dimensions;
   1856    for (i=0; i < len; ++i) {
   1857       float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
   1858       output[i*step] += val;
   1859       if (c->sequence_p) last = val;
   1860    }
   1861 
   1862    return TRUE;
   1863 }
   1864 
   1865 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
   1866 {
   1867    int c_inter = *c_inter_p;
   1868    int p_inter = *p_inter_p;
   1869    int i,z, effective = c->dimensions;
   1870 
   1871    // type 0 is only legal in a scalar context
   1872    if (c->lookup_type == 0)   return error(f, VORBIS_invalid_stream);
   1873 
   1874    while (total_decode > 0) {
   1875       float last = CODEBOOK_ELEMENT_BASE(c);
   1876       DECODE_VQ(z,f,c);
   1877       #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
   1878       assert(!c->sparse || z < c->sorted_entries);
   1879       #endif
   1880       if (z < 0) {
   1881          if (!f->bytes_in_seg)
   1882             if (f->last_seg) return FALSE;
   1883          return error(f, VORBIS_invalid_stream);
   1884       }
   1885 
   1886       // if this will take us off the end of the buffers, stop short!
   1887       // we check by computing the length of the virtual interleaved
   1888       // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
   1889       // and the length we'll be using (effective)
   1890       if (c_inter + p_inter*ch + effective > len * ch) {
   1891          effective = len*ch - (p_inter*ch - c_inter);
   1892       }
   1893 
   1894    #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
   1895       if (c->lookup_type == 1) {
   1896          int div = 1;
   1897          for (i=0; i < effective; ++i) {
   1898             int off = (z / div) % c->lookup_values;
   1899             float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
   1900             if (outputs[c_inter])
   1901                outputs[c_inter][p_inter] += val;
   1902             if (++c_inter == ch) { c_inter = 0; ++p_inter; }
   1903             if (c->sequence_p) last = val;
   1904             div *= c->lookup_values;
   1905          }
   1906       } else
   1907    #endif
   1908       {
   1909          z *= c->dimensions;
   1910          if (c->sequence_p) {
   1911             for (i=0; i < effective; ++i) {
   1912                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
   1913                if (outputs[c_inter])
   1914                   outputs[c_inter][p_inter] += val;
   1915                if (++c_inter == ch) { c_inter = 0; ++p_inter; }
   1916                last = val;
   1917             }
   1918          } else {
   1919             for (i=0; i < effective; ++i) {
   1920                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
   1921                if (outputs[c_inter])
   1922                   outputs[c_inter][p_inter] += val;
   1923                if (++c_inter == ch) { c_inter = 0; ++p_inter; }
   1924             }
   1925          }
   1926       }
   1927 
   1928       total_decode -= effective;
   1929    }
   1930    *c_inter_p = c_inter;
   1931    *p_inter_p = p_inter;
   1932    return TRUE;
   1933 }
   1934 
   1935 static int predict_point(int x, int x0, int x1, int y0, int y1)
   1936 {
   1937    int dy = y1 - y0;
   1938    int adx = x1 - x0;
   1939    // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
   1940    int err = abs(dy) * (x - x0);
   1941    int off = err / adx;
   1942    return dy < 0 ? y0 - off : y0 + off;
   1943 }
   1944 
   1945 // the following table is block-copied from the specification
   1946 static float inverse_db_table[256] =
   1947 {
   1948   1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
   1949   1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
   1950   1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
   1951   2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
   1952   2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
   1953   3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
   1954   4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
   1955   6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
   1956   7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
   1957   1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
   1958   1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
   1959   1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
   1960   2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
   1961   2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
   1962   3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
   1963   4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
   1964   5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
   1965   7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
   1966   9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
   1967   1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
   1968   1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
   1969   2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
   1970   2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
   1971   3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
   1972   4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
   1973   5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
   1974   7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
   1975   9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
   1976   0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
   1977   0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
   1978   0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
   1979   0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
   1980   0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
   1981   0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
   1982   0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
   1983   0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
   1984   0.00092223983f, 0.00098217216f, 0.0010459992f,  0.0011139742f,
   1985   0.0011863665f,  0.0012634633f,  0.0013455702f,  0.0014330129f,
   1986   0.0015261382f,  0.0016253153f,  0.0017309374f,  0.0018434235f,
   1987   0.0019632195f,  0.0020908006f,  0.0022266726f,  0.0023713743f,
   1988   0.0025254795f,  0.0026895994f,  0.0028643847f,  0.0030505286f,
   1989   0.0032487691f,  0.0034598925f,  0.0036847358f,  0.0039241906f,
   1990   0.0041792066f,  0.0044507950f,  0.0047400328f,  0.0050480668f,
   1991   0.0053761186f,  0.0057254891f,  0.0060975636f,  0.0064938176f,
   1992   0.0069158225f,  0.0073652516f,  0.0078438871f,  0.0083536271f,
   1993   0.0088964928f,  0.009474637f,   0.010090352f,   0.010746080f,
   1994   0.011444421f,   0.012188144f,   0.012980198f,   0.013823725f,
   1995   0.014722068f,   0.015678791f,   0.016697687f,   0.017782797f,
   1996   0.018938423f,   0.020169149f,   0.021479854f,   0.022875735f,
   1997   0.024362330f,   0.025945531f,   0.027631618f,   0.029427276f,
   1998   0.031339626f,   0.033376252f,   0.035545228f,   0.037855157f,
   1999   0.040315199f,   0.042935108f,   0.045725273f,   0.048696758f,
   2000   0.051861348f,   0.055231591f,   0.058820850f,   0.062643361f,
   2001   0.066714279f,   0.071049749f,   0.075666962f,   0.080584227f,
   2002   0.085821044f,   0.091398179f,   0.097337747f,   0.10366330f,
   2003   0.11039993f,    0.11757434f,    0.12521498f,    0.13335215f,
   2004   0.14201813f,    0.15124727f,    0.16107617f,    0.17154380f,
   2005   0.18269168f,    0.19456402f,    0.20720788f,    0.22067342f,
   2006   0.23501402f,    0.25028656f,    0.26655159f,    0.28387361f,
   2007   0.30232132f,    0.32196786f,    0.34289114f,    0.36517414f,
   2008   0.38890521f,    0.41417847f,    0.44109412f,    0.46975890f,
   2009   0.50028648f,    0.53279791f,    0.56742212f,    0.60429640f,
   2010   0.64356699f,    0.68538959f,    0.72993007f,    0.77736504f,
   2011   0.82788260f,    0.88168307f,    0.9389798f,     1.0f
   2012 };
   2013 
   2014 
   2015 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
   2016 // note that you must produce bit-identical output to decode correctly;
   2017 // this specific sequence of operations is specified in the spec (it's
   2018 // drawing integer-quantized frequency-space lines that the encoder
   2019 // expects to be exactly the same)
   2020 //     ... also, isn't the whole point of Bresenham's algorithm to NOT
   2021 // have to divide in the setup? sigh.
   2022 #ifndef STB_VORBIS_NO_DEFER_FLOOR
   2023 #define LINE_OP(a,b)   a *= b
   2024 #else
   2025 #define LINE_OP(a,b)   a = b
   2026 #endif
   2027 
   2028 #ifdef STB_VORBIS_DIVIDE_TABLE
   2029 #define DIVTAB_NUMER   32
   2030 #define DIVTAB_DENOM   64
   2031 int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB
   2032 #endif
   2033 
   2034 static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
   2035 {
   2036    int dy = y1 - y0;
   2037    int adx = x1 - x0;
   2038    int ady = abs(dy);
   2039    int base;
   2040    int x=x0,y=y0;
   2041    int err = 0;
   2042    int sy;
   2043 
   2044 #ifdef STB_VORBIS_DIVIDE_TABLE
   2045    if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) {
   2046       if (dy < 0) {
   2047          base = -integer_divide_table[ady][adx];
   2048          sy = base-1;
   2049       } else {
   2050          base =  integer_divide_table[ady][adx];
   2051          sy = base+1;
   2052       }
   2053    } else {
   2054       base = dy / adx;
   2055       if (dy < 0)
   2056          sy = base - 1;
   2057       else
   2058          sy = base+1;
   2059    }
   2060 #else
   2061    base = dy / adx;
   2062    if (dy < 0)
   2063       sy = base - 1;
   2064    else
   2065       sy = base+1;
   2066 #endif
   2067    ady -= abs(base) * adx;
   2068    if (x1 > n) x1 = n;
   2069    if (x < x1) {
   2070       LINE_OP(output[x], inverse_db_table[y&255]);
   2071       for (++x; x < x1; ++x) {
   2072          err += ady;
   2073          if (err >= adx) {
   2074             err -= adx;
   2075             y += sy;
   2076          } else
   2077             y += base;
   2078          LINE_OP(output[x], inverse_db_table[y&255]);
   2079       }
   2080    }
   2081 }
   2082 
   2083 static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
   2084 {
   2085    int k;
   2086    if (rtype == 0) {
   2087       int step = n / book->dimensions;
   2088       for (k=0; k < step; ++k)
   2089          if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
   2090             return FALSE;
   2091    } else {
   2092       for (k=0; k < n; ) {
   2093          if (!codebook_decode(f, book, target+offset, n-k))
   2094             return FALSE;
   2095          k += book->dimensions;
   2096          offset += book->dimensions;
   2097       }
   2098    }
   2099    return TRUE;
   2100 }
   2101 
   2102 // n is 1/2 of the blocksize --
   2103 // specification: "Correct per-vector decode length is [n]/2"
   2104 static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode)
   2105 {
   2106    int i,j,pass;
   2107    Residue *r = f->residue_config + rn;
   2108    int rtype = f->residue_types[rn];
   2109    int c = r->classbook;
   2110    int classwords = f->codebooks[c].dimensions;
   2111    unsigned int actual_size = rtype == 2 ? n*2 : n;
   2112    unsigned int limit_r_begin = (r->begin < actual_size ? r->begin : actual_size);
   2113    unsigned int limit_r_end   = (r->end   < actual_size ? r->end   : actual_size);
   2114    int n_read = limit_r_end - limit_r_begin;
   2115    int part_read = n_read / r->part_size;
   2116    int temp_alloc_point = temp_alloc_save(f);
   2117    #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
   2118    uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
   2119    #else
   2120    int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications));
   2121    #endif
   2122 
   2123    CHECK(f);
   2124 
   2125    for (i=0; i < ch; ++i)
   2126       if (!do_not_decode[i])
   2127          memset(residue_buffers[i], 0, sizeof(float) * n);
   2128 
   2129    if (rtype == 2 && ch != 1) {
   2130       for (j=0; j < ch; ++j)
   2131          if (!do_not_decode[j])
   2132             break;
   2133       if (j == ch)
   2134          goto done;
   2135 
   2136       for (pass=0; pass < 8; ++pass) {
   2137          int pcount = 0, class_set = 0;
   2138          if (ch == 2) {
   2139             while (pcount < part_read) {
   2140                int z = r->begin + pcount*r->part_size;
   2141                int c_inter = (z & 1), p_inter = z>>1;
   2142                if (pass == 0) {
   2143                   Codebook *c = f->codebooks+r->classbook;
   2144                   int q;
   2145                   DECODE(q,f,c);
   2146                   if (q == EOP) goto done;
   2147                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
   2148                   part_classdata[0][class_set] = r->classdata[q];
   2149                   #else
   2150                   for (i=classwords-1; i >= 0; --i) {
   2151                      classifications[0][i+pcount] = q % r->classifications;
   2152                      q /= r->classifications;
   2153                   }
   2154                   #endif
   2155                }
   2156                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
   2157                   int z = r->begin + pcount*r->part_size;
   2158                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
   2159                   int c = part_classdata[0][class_set][i];
   2160                   #else
   2161                   int c = classifications[0][pcount];
   2162                   #endif
   2163                   int b = r->residue_books[c][pass];
   2164                   if (b >= 0) {
   2165                      Codebook *book = f->codebooks + b;
   2166                      #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
   2167                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
   2168                         goto done;
   2169                      #else
   2170                      // saves 1%
   2171                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
   2172                         goto done;
   2173                      #endif
   2174                   } else {
   2175                      z += r->part_size;
   2176                      c_inter = z & 1;
   2177                      p_inter = z >> 1;
   2178                   }
   2179                }
   2180                #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
   2181                ++class_set;
   2182                #endif
   2183             }
   2184          } else if (ch > 2) {
   2185             while (pcount < part_read) {
   2186                int z = r->begin + pcount*r->part_size;
   2187                int c_inter = z % ch, p_inter = z/ch;
   2188                if (pass == 0) {
   2189                   Codebook *c = f->codebooks+r->classbook;
   2190                   int q;
   2191                   DECODE(q,f,c);
   2192                   if (q == EOP) goto done;
   2193                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
   2194                   part_classdata[0][class_set] = r->classdata[q];
   2195                   #else
   2196                   for (i=classwords-1; i >= 0; --i) {
   2197                      classifications[0][i+pcount] = q % r->classifications;
   2198                      q /= r->classifications;
   2199                   }
   2200                   #endif
   2201                }
   2202                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
   2203                   int z = r->begin + pcount*r->part_size;
   2204                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
   2205                   int c = part_classdata[0][class_set][i];
   2206                   #else
   2207                   int c = classifications[0][pcount];
   2208                   #endif
   2209                   int b = r->residue_books[c][pass];
   2210                   if (b >= 0) {
   2211                      Codebook *book = f->codebooks + b;
   2212                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
   2213                         goto done;
   2214                   } else {
   2215                      z += r->part_size;
   2216                      c_inter = z % ch;
   2217                      p_inter = z / ch;
   2218                   }
   2219                }
   2220                #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
   2221                ++class_set;
   2222                #endif
   2223             }
   2224          }
   2225       }
   2226       goto done;
   2227    }
   2228    CHECK(f);
   2229 
   2230    for (pass=0; pass < 8; ++pass) {
   2231       int pcount = 0, class_set=0;
   2232       while (pcount < part_read) {
   2233          if (pass == 0) {
   2234             for (j=0; j < ch; ++j) {
   2235                if (!do_not_decode[j]) {
   2236                   Codebook *c = f->codebooks+r->classbook;
   2237                   int temp;
   2238                   DECODE(temp,f,c);
   2239                   if (temp == EOP) goto done;
   2240                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
   2241                   part_classdata[j][class_set] = r->classdata[temp];
   2242                   #else
   2243                   for (i=classwords-1; i >= 0; --i) {
   2244                      classifications[j][i+pcount] = temp % r->classifications;
   2245                      temp /= r->classifications;
   2246                   }
   2247                   #endif
   2248                }
   2249             }
   2250          }
   2251          for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
   2252             for (j=0; j < ch; ++j) {
   2253                if (!do_not_decode[j]) {
   2254                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
   2255                   int c = part_classdata[j][class_set][i];
   2256                   #else
   2257                   int c = classifications[j][pcount];
   2258                   #endif
   2259                   int b = r->residue_books[c][pass];
   2260                   if (b >= 0) {
   2261                      float *target = residue_buffers[j];
   2262                      int offset = r->begin + pcount * r->part_size;
   2263                      int n = r->part_size;
   2264                      Codebook *book = f->codebooks + b;
   2265                      if (!residue_decode(f, book, target, offset, n, rtype))
   2266                         goto done;
   2267                   }
   2268                }
   2269             }
   2270          }
   2271          #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
   2272          ++class_set;
   2273          #endif
   2274       }
   2275    }
   2276   done:
   2277    CHECK(f);
   2278    #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
   2279    temp_free(f,part_classdata);
   2280    #else
   2281    temp_free(f,classifications);
   2282    #endif
   2283    temp_alloc_restore(f,temp_alloc_point);
   2284 }
   2285 
   2286 
   2287 #if 0
   2288 // slow way for debugging
   2289 void inverse_mdct_slow(float *buffer, int n)
   2290 {
   2291    int i,j;
   2292    int n2 = n >> 1;
   2293    float *x = (float *) malloc(sizeof(*x) * n2);
   2294    memcpy(x, buffer, sizeof(*x) * n2);
   2295    for (i=0; i < n; ++i) {
   2296       float acc = 0;
   2297       for (j=0; j < n2; ++j)
   2298          // formula from paper:
   2299          //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
   2300          // formula from wikipedia
   2301          //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
   2302          // these are equivalent, except the formula from the paper inverts the multiplier!
   2303          // however, what actually works is NO MULTIPLIER!?!
   2304          //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
   2305          acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
   2306       buffer[i] = acc;
   2307    }
   2308    free(x);
   2309 }
   2310 #elif 0
   2311 // same as above, but just barely able to run in real time on modern machines
   2312 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
   2313 {
   2314    float mcos[16384];
   2315    int i,j;
   2316    int n2 = n >> 1, nmask = (n << 2) -1;
   2317    float *x = (float *) malloc(sizeof(*x) * n2);
   2318    memcpy(x, buffer, sizeof(*x) * n2);
   2319    for (i=0; i < 4*n; ++i)
   2320       mcos[i] = (float) cos(M_PI / 2 * i / n);
   2321 
   2322    for (i=0; i < n; ++i) {
   2323       float acc = 0;
   2324       for (j=0; j < n2; ++j)
   2325          acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask];
   2326       buffer[i] = acc;
   2327    }
   2328    free(x);
   2329 }
   2330 #elif 0
   2331 // transform to use a slow dct-iv; this is STILL basically trivial,
   2332 // but only requires half as many ops
   2333 void dct_iv_slow(float *buffer, int n)
   2334 {
   2335    float mcos[16384];
   2336    float x[2048];
   2337    int i,j;
   2338    int n2 = n >> 1, nmask = (n << 3) - 1;
   2339    memcpy(x, buffer, sizeof(*x) * n);
   2340    for (i=0; i < 8*n; ++i)
   2341       mcos[i] = (float) cos(M_PI / 4 * i / n);
   2342    for (i=0; i < n; ++i) {
   2343       float acc = 0;
   2344       for (j=0; j < n; ++j)
   2345          acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask];
   2346       buffer[i] = acc;
   2347    }
   2348 }
   2349 
   2350 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
   2351 {
   2352    int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4;
   2353    float temp[4096];
   2354 
   2355    memcpy(temp, buffer, n2 * sizeof(float));
   2356    dct_iv_slow(temp, n2);  // returns -c'-d, a-b'
   2357 
   2358    for (i=0; i < n4  ; ++i) buffer[i] = temp[i+n4];            // a-b'
   2359    for (   ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1];   // b-a', c+d'
   2360    for (   ; i < n   ; ++i) buffer[i] = -temp[i - n3_4];       // c'+d
   2361 }
   2362 #endif
   2363 
   2364 #ifndef LIBVORBIS_MDCT
   2365 #define LIBVORBIS_MDCT 0
   2366 #endif
   2367 
   2368 #if LIBVORBIS_MDCT
   2369 // directly call the vorbis MDCT using an interface documented
   2370 // by Jeff Roberts... useful for performance comparison
   2371 typedef struct
   2372 {
   2373   int n;
   2374   int log2n;
   2375 
   2376   float *trig;
   2377   int   *bitrev;
   2378 
   2379   float scale;
   2380 } mdct_lookup;
   2381 
   2382 extern void mdct_init(mdct_lookup *lookup, int n);
   2383 extern void mdct_clear(mdct_lookup *l);
   2384 extern void mdct_backward(mdct_lookup *init, float *in, float *out);
   2385 
   2386 mdct_lookup M1,M2;
   2387 
   2388 void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
   2389 {
   2390    mdct_lookup *M;
   2391    if (M1.n == n) M = &M1;
   2392    else if (M2.n == n) M = &M2;
   2393    else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
   2394    else {
   2395       if (M2.n) __asm int 3;
   2396       mdct_init(&M2, n);
   2397       M = &M2;
   2398    }
   2399 
   2400    mdct_backward(M, buffer, buffer);
   2401 }
   2402 #endif
   2403 
   2404 
   2405 // the following were split out into separate functions while optimizing;
   2406 // they could be pushed back up but eh. __forceinline showed no change;
   2407 // they're probably already being inlined.
   2408 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
   2409 {
   2410    float *ee0 = e + i_off;
   2411    float *ee2 = ee0 + k_off;
   2412    int i;
   2413 
   2414    assert((n & 3) == 0);
   2415    for (i=(n>>2); i > 0; --i) {
   2416       float k00_20, k01_21;
   2417       k00_20  = ee0[ 0] - ee2[ 0];
   2418       k01_21  = ee0[-1] - ee2[-1];
   2419       ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
   2420       ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
   2421       ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
   2422       ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
   2423       A += 8;
   2424 
   2425       k00_20  = ee0[-2] - ee2[-2];
   2426       k01_21  = ee0[-3] - ee2[-3];
   2427       ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
   2428       ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
   2429       ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
   2430       ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
   2431       A += 8;
   2432 
   2433       k00_20  = ee0[-4] - ee2[-4];
   2434       k01_21  = ee0[-5] - ee2[-5];
   2435       ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
   2436       ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
   2437       ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
   2438       ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
   2439       A += 8;
   2440 
   2441       k00_20  = ee0[-6] - ee2[-6];
   2442       k01_21  = ee0[-7] - ee2[-7];
   2443       ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
   2444       ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
   2445       ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
   2446       ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
   2447       A += 8;
   2448       ee0 -= 8;
   2449       ee2 -= 8;
   2450    }
   2451 }
   2452 
   2453 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
   2454 {
   2455    int i;
   2456    float k00_20, k01_21;
   2457 
   2458    float *e0 = e + d0;
   2459    float *e2 = e0 + k_off;
   2460 
   2461    for (i=lim >> 2; i > 0; --i) {
   2462       k00_20 = e0[-0] - e2[-0];
   2463       k01_21 = e0[-1] - e2[-1];
   2464       e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
   2465       e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
   2466       e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
   2467       e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
   2468 
   2469       A += k1;
   2470 
   2471       k00_20 = e0[-2] - e2[-2];
   2472       k01_21 = e0[-3] - e2[-3];
   2473       e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
   2474       e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
   2475       e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
   2476       e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
   2477 
   2478       A += k1;
   2479 
   2480       k00_20 = e0[-4] - e2[-4];
   2481       k01_21 = e0[-5] - e2[-5];
   2482       e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
   2483       e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
   2484       e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
   2485       e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
   2486 
   2487       A += k1;
   2488 
   2489       k00_20 = e0[-6] - e2[-6];
   2490       k01_21 = e0[-7] - e2[-7];
   2491       e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
   2492       e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
   2493       e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
   2494       e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
   2495 
   2496       e0 -= 8;
   2497       e2 -= 8;
   2498 
   2499       A += k1;
   2500    }
   2501 }
   2502 
   2503 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
   2504 {
   2505    int i;
   2506    float A0 = A[0];
   2507    float A1 = A[0+1];
   2508    float A2 = A[0+a_off];
   2509    float A3 = A[0+a_off+1];
   2510    float A4 = A[0+a_off*2+0];
   2511    float A5 = A[0+a_off*2+1];
   2512    float A6 = A[0+a_off*3+0];
   2513    float A7 = A[0+a_off*3+1];
   2514 
   2515    float k00,k11;
   2516 
   2517    float *ee0 = e  +i_off;
   2518    float *ee2 = ee0+k_off;
   2519 
   2520    for (i=n; i > 0; --i) {
   2521       k00     = ee0[ 0] - ee2[ 0];
   2522       k11     = ee0[-1] - ee2[-1];
   2523       ee0[ 0] =  ee0[ 0] + ee2[ 0];
   2524       ee0[-1] =  ee0[-1] + ee2[-1];
   2525       ee2[ 0] = (k00) * A0 - (k11) * A1;
   2526       ee2[-1] = (k11) * A0 + (k00) * A1;
   2527 
   2528       k00     = ee0[-2] - ee2[-2];
   2529       k11     = ee0[-3] - ee2[-3];
   2530       ee0[-2] =  ee0[-2] + ee2[-2];
   2531       ee0[-3] =  ee0[-3] + ee2[-3];
   2532       ee2[-2] = (k00) * A2 - (k11) * A3;
   2533       ee2[-3] = (k11) * A2 + (k00) * A3;
   2534 
   2535       k00     = ee0[-4] - ee2[-4];
   2536       k11     = ee0[-5] - ee2[-5];
   2537       ee0[-4] =  ee0[-4] + ee2[-4];
   2538       ee0[-5] =  ee0[-5] + ee2[-5];
   2539       ee2[-4] = (k00) * A4 - (k11) * A5;
   2540       ee2[-5] = (k11) * A4 + (k00) * A5;
   2541 
   2542       k00     = ee0[-6] - ee2[-6];
   2543       k11     = ee0[-7] - ee2[-7];
   2544       ee0[-6] =  ee0[-6] + ee2[-6];
   2545       ee0[-7] =  ee0[-7] + ee2[-7];
   2546       ee2[-6] = (k00) * A6 - (k11) * A7;
   2547       ee2[-7] = (k11) * A6 + (k00) * A7;
   2548 
   2549       ee0 -= k0;
   2550       ee2 -= k0;
   2551    }
   2552 }
   2553 
   2554 static __forceinline void iter_54(float *z)
   2555 {
   2556    float k00,k11,k22,k33;
   2557    float y0,y1,y2,y3;
   2558 
   2559    k00  = z[ 0] - z[-4];
   2560    y0   = z[ 0] + z[-4];
   2561    y2   = z[-2] + z[-6];
   2562    k22  = z[-2] - z[-6];
   2563 
   2564    z[-0] = y0 + y2;      // z0 + z4 + z2 + z6
   2565    z[-2] = y0 - y2;      // z0 + z4 - z2 - z6
   2566 
   2567    // done with y0,y2
   2568 
   2569    k33  = z[-3] - z[-7];
   2570 
   2571    z[-4] = k00 + k33;    // z0 - z4 + z3 - z7
   2572    z[-6] = k00 - k33;    // z0 - z4 - z3 + z7
   2573 
   2574    // done with k33
   2575 
   2576    k11  = z[-1] - z[-5];
   2577    y1   = z[-1] + z[-5];
   2578    y3   = z[-3] + z[-7];
   2579 
   2580    z[-1] = y1 + y3;      // z1 + z5 + z3 + z7
   2581    z[-3] = y1 - y3;      // z1 + z5 - z3 - z7
   2582    z[-5] = k11 - k22;    // z1 - z5 + z2 - z6
   2583    z[-7] = k11 + k22;    // z1 - z5 - z2 + z6
   2584 }
   2585 
   2586 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
   2587 {
   2588    int a_off = base_n >> 3;
   2589    float A2 = A[0+a_off];
   2590    float *z = e + i_off;
   2591    float *base = z - 16 * n;
   2592 
   2593    while (z > base) {
   2594       float k00,k11;
   2595       float l00,l11;
   2596 
   2597       k00    = z[-0] - z[ -8];
   2598       k11    = z[-1] - z[ -9];
   2599       l00    = z[-2] - z[-10];
   2600       l11    = z[-3] - z[-11];
   2601       z[ -0] = z[-0] + z[ -8];
   2602       z[ -1] = z[-1] + z[ -9];
   2603       z[ -2] = z[-2] + z[-10];
   2604       z[ -3] = z[-3] + z[-11];
   2605       z[ -8] = k00;
   2606       z[ -9] = k11;
   2607       z[-10] = (l00+l11) * A2;
   2608       z[-11] = (l11-l00) * A2;
   2609 
   2610       k00    = z[ -4] - z[-12];
   2611       k11    = z[ -5] - z[-13];
   2612       l00    = z[ -6] - z[-14];
   2613       l11    = z[ -7] - z[-15];
   2614       z[ -4] = z[ -4] + z[-12];
   2615       z[ -5] = z[ -5] + z[-13];
   2616       z[ -6] = z[ -6] + z[-14];
   2617       z[ -7] = z[ -7] + z[-15];
   2618       z[-12] = k11;
   2619       z[-13] = -k00;
   2620       z[-14] = (l11-l00) * A2;
   2621       z[-15] = (l00+l11) * -A2;
   2622 
   2623       iter_54(z);
   2624       iter_54(z-8);
   2625       z -= 16;
   2626    }
   2627 }
   2628 
   2629 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
   2630 {
   2631    int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
   2632    int ld;
   2633    // @OPTIMIZE: reduce register pressure by using fewer variables?
   2634    int save_point = temp_alloc_save(f);
   2635    float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
   2636    float *u=NULL,*v=NULL;
   2637    // twiddle factors
   2638    float *A = f->A[blocktype];
   2639 
   2640    // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
   2641    // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
   2642 
   2643    // kernel from paper
   2644 
   2645 
   2646    // merged:
   2647    //   copy and reflect spectral data
   2648    //   step 0
   2649 
   2650    // note that it turns out that the items added together during
   2651    // this step are, in fact, being added to themselves (as reflected
   2652    // by step 0). inexplicable inefficiency! this became obvious
   2653    // once I combined the passes.
   2654 
   2655    // so there's a missing 'times 2' here (for adding X to itself).
   2656    // this propagates through linearly to the end, where the numbers
   2657    // are 1/2 too small, and need to be compensated for.
   2658 
   2659    {
   2660       float *d,*e, *AA, *e_stop;
   2661       d = &buf2[n2-2];
   2662       AA = A;
   2663       e = &buffer[0];
   2664       e_stop = &buffer[n2];
   2665       while (e != e_stop) {
   2666          d[1] = (e[0] * AA[0] - e[2]*AA[1]);
   2667          d[0] = (e[0] * AA[1] + e[2]*AA[0]);
   2668          d -= 2;
   2669          AA += 2;
   2670          e += 4;
   2671       }
   2672 
   2673       e = &buffer[n2-3];
   2674       while (d >= buf2) {
   2675          d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
   2676          d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
   2677          d -= 2;
   2678          AA += 2;
   2679          e -= 4;
   2680       }
   2681    }
   2682 
   2683    // now we use symbolic names for these, so that we can
   2684    // possibly swap their meaning as we change which operations
   2685    // are in place
   2686 
   2687    u = buffer;
   2688    v = buf2;
   2689 
   2690    // step 2    (paper output is w, now u)
   2691    // this could be in place, but the data ends up in the wrong
   2692    // place... _somebody_'s got to swap it, so this is nominated
   2693    {
   2694       float *AA = &A[n2-8];
   2695       float *d0,*d1, *e0, *e1;
   2696 
   2697       e0 = &v[n4];
   2698       e1 = &v[0];
   2699 
   2700       d0 = &u[n4];
   2701       d1 = &u[0];
   2702 
   2703       while (AA >= A) {
   2704          float v40_20, v41_21;
   2705 
   2706          v41_21 = e0[1] - e1[1];
   2707          v40_20 = e0[0] - e1[0];
   2708          d0[1]  = e0[1] + e1[1];
   2709          d0[0]  = e0[0] + e1[0];
   2710          d1[1]  = v41_21*AA[4] - v40_20*AA[5];
   2711          d1[0]  = v40_20*AA[4] + v41_21*AA[5];
   2712 
   2713          v41_21 = e0[3] - e1[3];
   2714          v40_20 = e0[2] - e1[2];
   2715          d0[3]  = e0[3] + e1[3];
   2716          d0[2]  = e0[2] + e1[2];
   2717          d1[3]  = v41_21*AA[0] - v40_20*AA[1];
   2718          d1[2]  = v40_20*AA[0] + v41_21*AA[1];
   2719 
   2720          AA -= 8;
   2721 
   2722          d0 += 4;
   2723          d1 += 4;
   2724          e0 += 4;
   2725          e1 += 4;
   2726       }
   2727    }
   2728 
   2729    // step 3
   2730    ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
   2731 
   2732    // optimized step 3:
   2733 
   2734    // the original step3 loop can be nested r inside s or s inside r;
   2735    // it's written originally as s inside r, but this is dumb when r
   2736    // iterates many times, and s few. So I have two copies of it and
   2737    // switch between them halfway.
   2738 
   2739    // this is iteration 0 of step 3
   2740    imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
   2741    imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
   2742 
   2743    // this is iteration 1 of step 3
   2744    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
   2745    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
   2746    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
   2747    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
   2748 
   2749    l=2;
   2750    for (; l < (ld-3)>>1; ++l) {
   2751       int k0 = n >> (l+2), k0_2 = k0>>1;
   2752       int lim = 1 << (l+1);
   2753       int i;
   2754       for (i=0; i < lim; ++i)
   2755          imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
   2756    }
   2757 
   2758    for (; l < ld-6; ++l) {
   2759       int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
   2760       int rlim = n >> (l+6), r;
   2761       int lim = 1 << (l+1);
   2762       int i_off;
   2763       float *A0 = A;
   2764       i_off = n2-1;
   2765       for (r=rlim; r > 0; --r) {
   2766          imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
   2767          A0 += k1*4;
   2768          i_off -= 8;
   2769       }
   2770    }
   2771 
   2772    // iterations with count:
   2773    //   ld-6,-5,-4 all interleaved together
   2774    //       the big win comes from getting rid of needless flops
   2775    //         due to the constants on pass 5 & 4 being all 1 and 0;
   2776    //       combining them to be simultaneous to improve cache made little difference
   2777    imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
   2778 
   2779    // output is u
   2780 
   2781    // step 4, 5, and 6
   2782    // cannot be in-place because of step 5
   2783    {
   2784       uint16 *bitrev = f->bit_reverse[blocktype];
   2785       // weirdly, I'd have thought reading sequentially and writing
   2786       // erratically would have been better than vice-versa, but in
   2787       // fact that's not what my testing showed. (That is, with
   2788       // j = bitreverse(i), do you read i and write j, or read j and write i.)
   2789 
   2790       float *d0 = &v[n4-4];
   2791       float *d1 = &v[n2-4];
   2792       while (d0 >= v) {
   2793          int k4;
   2794 
   2795          k4 = bitrev[0];
   2796          d1[3] = u[k4+0];
   2797          d1[2] = u[k4+1];
   2798          d0[3] = u[k4+2];
   2799          d0[2] = u[k4+3];
   2800 
   2801          k4 = bitrev[1];
   2802          d1[1] = u[k4+0];
   2803          d1[0] = u[k4+1];
   2804          d0[1] = u[k4+2];
   2805          d0[0] = u[k4+3];
   2806 
   2807          d0 -= 4;
   2808          d1 -= 4;
   2809          bitrev += 2;
   2810       }
   2811    }
   2812    // (paper output is u, now v)
   2813 
   2814 
   2815    // data must be in buf2
   2816    assert(v == buf2);
   2817 
   2818    // step 7   (paper output is v, now v)
   2819    // this is now in place
   2820    {
   2821       float *C = f->C[blocktype];
   2822       float *d, *e;
   2823 
   2824       d = v;
   2825       e = v + n2 - 4;
   2826 
   2827       while (d < e) {
   2828          float a02,a11,b0,b1,b2,b3;
   2829 
   2830          a02 = d[0] - e[2];
   2831          a11 = d[1] + e[3];
   2832 
   2833          b0 = C[1]*a02 + C[0]*a11;
   2834          b1 = C[1]*a11 - C[0]*a02;
   2835 
   2836          b2 = d[0] + e[ 2];
   2837          b3 = d[1] - e[ 3];
   2838 
   2839          d[0] = b2 + b0;
   2840          d[1] = b3 + b1;
   2841          e[2] = b2 - b0;
   2842          e[3] = b1 - b3;
   2843 
   2844          a02 = d[2] - e[0];
   2845          a11 = d[3] + e[1];
   2846 
   2847          b0 = C[3]*a02 + C[2]*a11;
   2848          b1 = C[3]*a11 - C[2]*a02;
   2849 
   2850          b2 = d[2] + e[ 0];
   2851          b3 = d[3] - e[ 1];
   2852 
   2853          d[2] = b2 + b0;
   2854          d[3] = b3 + b1;
   2855          e[0] = b2 - b0;
   2856          e[1] = b1 - b3;
   2857 
   2858          C += 4;
   2859          d += 4;
   2860          e -= 4;
   2861       }
   2862    }
   2863 
   2864    // data must be in buf2
   2865 
   2866 
   2867    // step 8+decode   (paper output is X, now buffer)
   2868    // this generates pairs of data a la 8 and pushes them directly through
   2869    // the decode kernel (pushing rather than pulling) to avoid having
   2870    // to make another pass later
   2871 
   2872    // this cannot POSSIBLY be in place, so we refer to the buffers directly
   2873 
   2874    {
   2875       float *d0,*d1,*d2,*d3;
   2876 
   2877       float *B = f->B[blocktype] + n2 - 8;
   2878       float *e = buf2 + n2 - 8;
   2879       d0 = &buffer[0];
   2880       d1 = &buffer[n2-4];
   2881       d2 = &buffer[n2];
   2882       d3 = &buffer[n-4];
   2883       while (e >= v) {
   2884          float p0,p1,p2,p3;
   2885 
   2886          p3 =  e[6]*B[7] - e[7]*B[6];
   2887          p2 = -e[6]*B[6] - e[7]*B[7];
   2888 
   2889          d0[0] =   p3;
   2890          d1[3] = - p3;
   2891          d2[0] =   p2;
   2892          d3[3] =   p2;
   2893 
   2894          p1 =  e[4]*B[5] - e[5]*B[4];
   2895          p0 = -e[4]*B[4] - e[5]*B[5];
   2896 
   2897          d0[1] =   p1;
   2898          d1[2] = - p1;
   2899          d2[1] =   p0;
   2900          d3[2] =   p0;
   2901 
   2902          p3 =  e[2]*B[3] - e[3]*B[2];
   2903          p2 = -e[2]*B[2] - e[3]*B[3];
   2904 
   2905          d0[2] =   p3;
   2906          d1[1] = - p3;
   2907          d2[2] =   p2;
   2908          d3[1] =   p2;
   2909 
   2910          p1 =  e[0]*B[1] - e[1]*B[0];
   2911          p0 = -e[0]*B[0] - e[1]*B[1];
   2912 
   2913          d0[3] =   p1;
   2914          d1[0] = - p1;
   2915          d2[3] =   p0;
   2916          d3[0] =   p0;
   2917 
   2918          B -= 8;
   2919          e -= 8;
   2920          d0 += 4;
   2921          d2 += 4;
   2922          d1 -= 4;
   2923          d3 -= 4;
   2924       }
   2925    }
   2926 
   2927    temp_free(f,buf2);
   2928    temp_alloc_restore(f,save_point);
   2929 }
   2930 
   2931 #if 0
   2932 // this is the original version of the above code, if you want to optimize it from scratch
   2933 void inverse_mdct_naive(float *buffer, int n)
   2934 {
   2935    float s;
   2936    float A[1 << 12], B[1 << 12], C[1 << 11];
   2937    int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
   2938    int n3_4 = n - n4, ld;
   2939    // how can they claim this only uses N words?!
   2940    // oh, because they're only used sparsely, whoops
   2941    float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13];
   2942    // set up twiddle factors
   2943 
   2944    for (k=k2=0; k < n4; ++k,k2+=2) {
   2945       A[k2  ] = (float)  cos(4*k*M_PI/n);
   2946       A[k2+1] = (float) -sin(4*k*M_PI/n);
   2947       B[k2  ] = (float)  cos((k2+1)*M_PI/n/2);
   2948       B[k2+1] = (float)  sin((k2+1)*M_PI/n/2);
   2949    }
   2950    for (k=k2=0; k < n8; ++k,k2+=2) {
   2951       C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
   2952       C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
   2953    }
   2954 
   2955    // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
   2956    // Note there are bugs in that pseudocode, presumably due to them attempting
   2957    // to rename the arrays nicely rather than representing the way their actual
   2958    // implementation bounces buffers back and forth. As a result, even in the
   2959    // "some formulars corrected" version, a direct implementation fails. These
   2960    // are noted below as "paper bug".
   2961 
   2962    // copy and reflect spectral data
   2963    for (k=0; k < n2; ++k) u[k] = buffer[k];
   2964    for (   ; k < n ; ++k) u[k] = -buffer[n - k - 1];
   2965    // kernel from paper
   2966    // step 1
   2967    for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) {
   2968       v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2]   - (u[k4+2] - u[n-k4-3])*A[k2+1];
   2969       v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2];
   2970    }
   2971    // step 2
   2972    for (k=k4=0; k < n8; k+=1, k4+=4) {
   2973       w[n2+3+k4] = v[n2+3+k4] + v[k4+3];
   2974       w[n2+1+k4] = v[n2+1+k4] + v[k4+1];
   2975       w[k4+3]    = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4];
   2976       w[k4+1]    = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4];
   2977    }
   2978    // step 3
   2979    ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
   2980    for (l=0; l < ld-3; ++l) {
   2981       int k0 = n >> (l+2), k1 = 1 << (l+3);
   2982       int rlim = n >> (l+4), r4, r;
   2983       int s2lim = 1 << (l+2), s2;
   2984       for (r=r4=0; r < rlim; r4+=4,++r) {
   2985          for (s2=0; s2 < s2lim; s2+=2) {
   2986             u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4];
   2987             u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4];
   2988             u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1]
   2989                                 - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1];
   2990             u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1]
   2991                                 + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1];
   2992          }
   2993       }
   2994       if (l+1 < ld-3) {
   2995          // paper bug: ping-ponging of u&w here is omitted
   2996          memcpy(w, u, sizeof(u));
   2997       }
   2998    }
   2999 
   3000    // step 4
   3001    for (i=0; i < n8; ++i) {
   3002       int j = bit_reverse(i) >> (32-ld+3);
   3003       assert(j < n8);
   3004       if (i == j) {
   3005          // paper bug: original code probably swapped in place; if copying,
   3006          //            need to directly copy in this case
   3007          int i8 = i << 3;
   3008          v[i8+1] = u[i8+1];
   3009          v[i8+3] = u[i8+3];
   3010          v[i8+5] = u[i8+5];
   3011          v[i8+7] = u[i8+7];
   3012       } else if (i < j) {
   3013          int i8 = i << 3, j8 = j << 3;
   3014          v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1];
   3015          v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3];
   3016          v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5];
   3017          v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7];
   3018       }
   3019    }
   3020    // step 5
   3021    for (k=0; k < n2; ++k) {
   3022       w[k] = v[k*2+1];
   3023    }
   3024    // step 6
   3025    for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) {
   3026       u[n-1-k2] = w[k4];
   3027       u[n-2-k2] = w[k4+1];
   3028       u[n3_4 - 1 - k2] = w[k4+2];
   3029       u[n3_4 - 2 - k2] = w[k4+3];
   3030    }
   3031    // step 7
   3032    for (k=k2=0; k < n8; ++k, k2 += 2) {
   3033       v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
   3034       v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
   3035       v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
   3036       v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
   3037    }
   3038    // step 8
   3039    for (k=k2=0; k < n4; ++k,k2 += 2) {
   3040       X[k]      = v[k2+n2]*B[k2  ] + v[k2+1+n2]*B[k2+1];
   3041       X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2  ];
   3042    }
   3043 
   3044    // decode kernel to output
   3045    // determined the following value experimentally
   3046    // (by first figuring out what made inverse_mdct_slow work); then matching that here
   3047    // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?)
   3048    s = 0.5; // theoretically would be n4
   3049 
   3050    // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code,
   3051    //     so it needs to use the "old" B values to behave correctly, or else
   3052    //     set s to 1.0 ]]]
   3053    for (i=0; i < n4  ; ++i) buffer[i] = s * X[i+n4];
   3054    for (   ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1];
   3055    for (   ; i < n   ; ++i) buffer[i] = -s * X[i - n3_4];
   3056 }
   3057 #endif
   3058 
   3059 static float *get_window(vorb *f, int len)
   3060 {
   3061    len <<= 1;
   3062    if (len == f->blocksize_0) return f->window[0];
   3063    if (len == f->blocksize_1) return f->window[1];
   3064    return NULL;
   3065 }
   3066 
   3067 #ifndef STB_VORBIS_NO_DEFER_FLOOR
   3068 typedef int16 YTYPE;
   3069 #else
   3070 typedef int YTYPE;
   3071 #endif
   3072 static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
   3073 {
   3074    int n2 = n >> 1;
   3075    int s = map->chan[i].mux, floor;
   3076    floor = map->submap_floor[s];
   3077    if (f->floor_types[floor] == 0) {
   3078       return error(f, VORBIS_invalid_stream);
   3079    } else {
   3080       Floor1 *g = &f->floor_config[floor].floor1;
   3081       int j,q;
   3082       int lx = 0, ly = finalY[0] * g->floor1_multiplier;
   3083       for (q=1; q < g->values; ++q) {
   3084          j = g->sorted_order[q];
   3085          #ifndef STB_VORBIS_NO_DEFER_FLOOR
   3086          STBV_NOTUSED(step2_flag);
   3087          if (finalY[j] >= 0)
   3088          #else
   3089          if (step2_flag[j])
   3090          #endif
   3091          {
   3092             int hy = finalY[j] * g->floor1_multiplier;
   3093             int hx = g->Xlist[j];
   3094             if (lx != hx)
   3095                draw_line(target, lx,ly, hx,hy, n2);
   3096             CHECK(f);
   3097             lx = hx, ly = hy;
   3098          }
   3099       }
   3100       if (lx < n2) {
   3101          // optimization of: draw_line(target, lx,ly, n,ly, n2);
   3102          for (j=lx; j < n2; ++j)
   3103             LINE_OP(target[j], inverse_db_table[ly]);
   3104          CHECK(f);
   3105       }
   3106    }
   3107    return TRUE;
   3108 }
   3109 
   3110 // The meaning of "left" and "right"
   3111 //
   3112 // For a given frame:
   3113 //     we compute samples from 0..n
   3114 //     window_center is n/2
   3115 //     we'll window and mix the samples from left_start to left_end with data from the previous frame
   3116 //     all of the samples from left_end to right_start can be output without mixing; however,
   3117 //        this interval is 0-length except when transitioning between short and long frames
   3118 //     all of the samples from right_start to right_end need to be mixed with the next frame,
   3119 //        which we don't have, so those get saved in a buffer
   3120 //     frame N's right_end-right_start, the number of samples to mix with the next frame,
   3121 //        has to be the same as frame N+1's left_end-left_start (which they are by
   3122 //        construction)
   3123 
   3124 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
   3125 {
   3126    Mode *m;
   3127    int i, n, prev, next, window_center;
   3128    f->channel_buffer_start = f->channel_buffer_end = 0;
   3129 
   3130   retry:
   3131    if (f->eof) return FALSE;
   3132    if (!maybe_start_packet(f))
   3133       return FALSE;
   3134    // check packet type
   3135    if (get_bits(f,1) != 0) {
   3136       if (IS_PUSH_MODE(f))
   3137          return error(f,VORBIS_bad_packet_type);
   3138       while (EOP != get8_packet(f));
   3139       goto retry;
   3140    }
   3141 
   3142    if (f->alloc.alloc_buffer)
   3143       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
   3144 
   3145    i = get_bits(f, ilog(f->mode_count-1));
   3146    if (i == EOP) return FALSE;
   3147    if (i >= f->mode_count) return FALSE;
   3148    *mode = i;
   3149    m = f->mode_config + i;
   3150    if (m->blockflag) {
   3151       n = f->blocksize_1;
   3152       prev = get_bits(f,1);
   3153       next = get_bits(f,1);
   3154    } else {
   3155       prev = next = 0;
   3156       n = f->blocksize_0;
   3157    }
   3158 
   3159 // WINDOWING
   3160 
   3161    window_center = n >> 1;
   3162    if (m->blockflag && !prev) {
   3163       *p_left_start = (n - f->blocksize_0) >> 2;
   3164       *p_left_end   = (n + f->blocksize_0) >> 2;
   3165    } else {
   3166       *p_left_start = 0;
   3167       *p_left_end   = window_center;
   3168    }
   3169    if (m->blockflag && !next) {
   3170       *p_right_start = (n*3 - f->blocksize_0) >> 2;
   3171       *p_right_end   = (n*3 + f->blocksize_0) >> 2;
   3172    } else {
   3173       *p_right_start = window_center;
   3174       *p_right_end   = n;
   3175    }
   3176 
   3177    return TRUE;
   3178 }
   3179 
   3180 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
   3181 {
   3182    Mapping *map;
   3183    int i,j,k,n,n2;
   3184    int zero_channel[256];
   3185    int really_zero_channel[256];
   3186 
   3187 // WINDOWING
   3188 
   3189    STBV_NOTUSED(left_end);
   3190    n = f->blocksize[m->blockflag];
   3191    map = &f->mapping[m->mapping];
   3192 
   3193 // FLOORS
   3194    n2 = n >> 1;
   3195 
   3196    CHECK(f);
   3197 
   3198    for (i=0; i < f->channels; ++i) {
   3199       int s = map->chan[i].mux, floor;
   3200       zero_channel[i] = FALSE;
   3201       floor = map->submap_floor[s];
   3202       if (f->floor_types[floor] == 0) {
   3203          return error(f, VORBIS_invalid_stream);
   3204       } else {
   3205          Floor1 *g = &f->floor_config[floor].floor1;
   3206          if (get_bits(f, 1)) {
   3207             short *finalY;
   3208             uint8 step2_flag[256];
   3209             static int range_list[4] = { 256, 128, 86, 64 };
   3210             int range = range_list[g->floor1_multiplier-1];
   3211             int offset = 2;
   3212             finalY = f->finalY[i];
   3213             finalY[0] = get_bits(f, ilog(range)-1);
   3214             finalY[1] = get_bits(f, ilog(range)-1);
   3215             for (j=0; j < g->partitions; ++j) {
   3216                int pclass = g->partition_class_list[j];
   3217                int cdim = g->class_dimensions[pclass];
   3218                int cbits = g->class_subclasses[pclass];
   3219                int csub = (1 << cbits)-1;
   3220                int cval = 0;
   3221                if (cbits) {
   3222                   Codebook *c = f->codebooks + g->class_masterbooks[pclass];
   3223                   DECODE(cval,f,c);
   3224                }
   3225                for (k=0; k < cdim; ++k) {
   3226                   int book = g->subclass_books[pclass][cval & csub];
   3227                   cval = cval >> cbits;
   3228                   if (book >= 0) {
   3229                      int temp;
   3230                      Codebook *c = f->codebooks + book;
   3231                      DECODE(temp,f,c);
   3232                      finalY[offset++] = temp;
   3233                   } else
   3234                      finalY[offset++] = 0;
   3235                }
   3236             }
   3237             if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec
   3238             step2_flag[0] = step2_flag[1] = 1;
   3239             for (j=2; j < g->values; ++j) {
   3240                int low, high, pred, highroom, lowroom, room, val;
   3241                low = g->neighbors[j][0];
   3242                high = g->neighbors[j][1];
   3243                //neighbors(g->Xlist, j, &low, &high);
   3244                pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]);
   3245                val = finalY[j];
   3246                highroom = range - pred;
   3247                lowroom = pred;
   3248                if (highroom < lowroom)
   3249                   room = highroom * 2;
   3250                else
   3251                   room = lowroom * 2;
   3252                if (val) {
   3253                   step2_flag[low] = step2_flag[high] = 1;
   3254                   step2_flag[j] = 1;
   3255                   if (val >= room)
   3256                      if (highroom > lowroom)
   3257                         finalY[j] = val - lowroom + pred;
   3258                      else
   3259                         finalY[j] = pred - val + highroom - 1;
   3260                   else
   3261                      if (val & 1)
   3262                         finalY[j] = pred - ((val+1)>>1);
   3263                      else
   3264                         finalY[j] = pred + (val>>1);
   3265                } else {
   3266                   step2_flag[j] = 0;
   3267                   finalY[j] = pred;
   3268                }
   3269             }
   3270 
   3271 #ifdef STB_VORBIS_NO_DEFER_FLOOR
   3272             do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag);
   3273 #else
   3274             // defer final floor computation until _after_ residue
   3275             for (j=0; j < g->values; ++j) {
   3276                if (!step2_flag[j])
   3277                   finalY[j] = -1;
   3278             }
   3279 #endif
   3280          } else {
   3281            error:
   3282             zero_channel[i] = TRUE;
   3283          }
   3284          // So we just defer everything else to later
   3285 
   3286          // at this point we've decoded the floor into buffer
   3287       }
   3288    }
   3289    CHECK(f);
   3290    // at this point we've decoded all floors
   3291 
   3292    if (f->alloc.alloc_buffer)
   3293       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
   3294 
   3295    // re-enable coupled channels if necessary
   3296    memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels);
   3297    for (i=0; i < map->coupling_steps; ++i)
   3298       if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) {
   3299          zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE;
   3300       }
   3301 
   3302    CHECK(f);
   3303 // RESIDUE DECODE
   3304    for (i=0; i < map->submaps; ++i) {
   3305       float *residue_buffers[STB_VORBIS_MAX_CHANNELS];
   3306       int r;
   3307       uint8 do_not_decode[256];
   3308       int ch = 0;
   3309       for (j=0; j < f->channels; ++j) {
   3310          if (map->chan[j].mux == i) {
   3311             if (zero_channel[j]) {
   3312                do_not_decode[ch] = TRUE;
   3313                residue_buffers[ch] = NULL;
   3314             } else {
   3315                do_not_decode[ch] = FALSE;
   3316                residue_buffers[ch] = f->channel_buffers[j];
   3317             }
   3318             ++ch;
   3319          }
   3320       }
   3321       r = map->submap_residue[i];
   3322       decode_residue(f, residue_buffers, ch, n2, r, do_not_decode);
   3323    }
   3324 
   3325    if (f->alloc.alloc_buffer)
   3326       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
   3327    CHECK(f);
   3328 
   3329 // INVERSE COUPLING
   3330    for (i = map->coupling_steps-1; i >= 0; --i) {
   3331       int n2 = n >> 1;
   3332       float *m = f->channel_buffers[map->chan[i].magnitude];
   3333       float *a = f->channel_buffers[map->chan[i].angle    ];
   3334       for (j=0; j < n2; ++j) {
   3335          float a2,m2;
   3336          if (m[j] > 0)
   3337             if (a[j] > 0)
   3338                m2 = m[j], a2 = m[j] - a[j];
   3339             else
   3340                a2 = m[j], m2 = m[j] + a[j];
   3341          else
   3342             if (a[j] > 0)
   3343                m2 = m[j], a2 = m[j] + a[j];
   3344             else
   3345                a2 = m[j], m2 = m[j] - a[j];
   3346          m[j] = m2;
   3347          a[j] = a2;
   3348       }
   3349    }
   3350    CHECK(f);
   3351 
   3352    // finish decoding the floors
   3353 #ifndef STB_VORBIS_NO_DEFER_FLOOR
   3354    for (i=0; i < f->channels; ++i) {
   3355       if (really_zero_channel[i]) {
   3356          memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
   3357       } else {
   3358          do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL);
   3359       }
   3360    }
   3361 #else
   3362    for (i=0; i < f->channels; ++i) {
   3363       if (really_zero_channel[i]) {
   3364          memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
   3365       } else {
   3366          for (j=0; j < n2; ++j)
   3367             f->channel_buffers[i][j] *= f->floor_buffers[i][j];
   3368       }
   3369    }
   3370 #endif
   3371 
   3372 // INVERSE MDCT
   3373    CHECK(f);
   3374    for (i=0; i < f->channels; ++i)
   3375       inverse_mdct(f->channel_buffers[i], n, f, m->blockflag);
   3376    CHECK(f);
   3377 
   3378    // this shouldn't be necessary, unless we exited on an error
   3379    // and want to flush to get to the next packet
   3380    flush_packet(f);
   3381 
   3382    if (f->first_decode) {
   3383       // assume we start so first non-discarded sample is sample 0
   3384       // this isn't to spec, but spec would require us to read ahead
   3385       // and decode the size of all current frames--could be done,
   3386       // but presumably it's not a commonly used feature
   3387       f->current_loc = 0u - n2; // start of first frame is positioned for discard (NB this is an intentional unsigned overflow/wrap-around)
   3388       // we might have to discard samples "from" the next frame too,
   3389       // if we're lapping a large block then a small at the start?
   3390       f->discard_samples_deferred = n - right_end;
   3391       f->current_loc_valid = TRUE;
   3392       f->first_decode = FALSE;
   3393    } else if (f->discard_samples_deferred) {
   3394       if (f->discard_samples_deferred >= right_start - left_start) {
   3395          f->discard_samples_deferred -= (right_start - left_start);
   3396          left_start = right_start;
   3397          *p_left = left_start;
   3398       } else {
   3399          left_start += f->discard_samples_deferred;
   3400          *p_left = left_start;
   3401          f->discard_samples_deferred = 0;
   3402       }
   3403    } else if (f->previous_length == 0 && f->current_loc_valid) {
   3404       // we're recovering from a seek... that means we're going to discard
   3405       // the samples from this packet even though we know our position from
   3406       // the last page header, so we need to update the position based on
   3407       // the discarded samples here
   3408       // but wait, the code below is going to add this in itself even
   3409       // on a discard, so we don't need to do it here...
   3410    }
   3411 
   3412    // check if we have ogg information about the sample # for this packet
   3413    if (f->last_seg_which == f->end_seg_with_known_loc) {
   3414       // if we have a valid current loc, and this is final:
   3415       if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) {
   3416          uint32 current_end = f->known_loc_for_packet;
   3417          // then let's infer the size of the (probably) short final frame
   3418          if (current_end < f->current_loc + (right_end-left_start)) {
   3419             if (current_end < f->current_loc) {
   3420                // negative truncation, that's impossible!
   3421                *len = 0;
   3422             } else {
   3423                *len = current_end - f->current_loc;
   3424             }
   3425             *len += left_start; // this doesn't seem right, but has no ill effect on my test files
   3426             if (*len > right_end) *len = right_end; // this should never happen
   3427             f->current_loc += *len;
   3428             return TRUE;
   3429          }
   3430       }
   3431       // otherwise, just set our sample loc
   3432       // guess that the ogg granule pos refers to the _middle_ of the
   3433       // last frame?
   3434       // set f->current_loc to the position of left_start
   3435       f->current_loc = f->known_loc_for_packet - (n2-left_start);
   3436       f->current_loc_valid = TRUE;
   3437    }
   3438    if (f->current_loc_valid)
   3439       f->current_loc += (right_start - left_start);
   3440 
   3441    if (f->alloc.alloc_buffer)
   3442       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
   3443    *len = right_end;  // ignore samples after the window goes to 0
   3444    CHECK(f);
   3445 
   3446    return TRUE;
   3447 }
   3448 
   3449 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
   3450 {
   3451    int mode, left_end, right_end;
   3452    if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
   3453    return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left);
   3454 }
   3455 
   3456 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
   3457 {
   3458    int prev,i,j;
   3459    // we use right&left (the start of the right- and left-window sin()-regions)
   3460    // to determine how much to return, rather than inferring from the rules
   3461    // (same result, clearer code); 'left' indicates where our sin() window
   3462    // starts, therefore where the previous window's right edge starts, and
   3463    // therefore where to start mixing from the previous buffer. 'right'
   3464    // indicates where our sin() ending-window starts, therefore that's where
   3465    // we start saving, and where our returned-data ends.
   3466 
   3467    // mixin from previous window
   3468    if (f->previous_length) {
   3469       int i,j, n = f->previous_length;
   3470       float *w = get_window(f, n);
   3471       if (w == NULL) return 0;
   3472       for (i=0; i < f->channels; ++i) {
   3473          for (j=0; j < n; ++j)
   3474             f->channel_buffers[i][left+j] =
   3475                f->channel_buffers[i][left+j]*w[    j] +
   3476                f->previous_window[i][     j]*w[n-1-j];
   3477       }
   3478    }
   3479 
   3480    prev = f->previous_length;
   3481 
   3482    // last half of this data becomes previous window
   3483    f->previous_length = len - right;
   3484 
   3485    // @OPTIMIZE: could avoid this copy by double-buffering the
   3486    // output (flipping previous_window with channel_buffers), but
   3487    // then previous_window would have to be 2x as large, and
   3488    // channel_buffers couldn't be temp mem (although they're NOT
   3489    // currently temp mem, they could be (unless we want to level
   3490    // performance by spreading out the computation))
   3491    for (i=0; i < f->channels; ++i)
   3492       for (j=0; right+j < len; ++j)
   3493          f->previous_window[i][j] = f->channel_buffers[i][right+j];
   3494 
   3495    if (!prev)
   3496       // there was no previous packet, so this data isn't valid...
   3497       // this isn't entirely true, only the would-have-overlapped data
   3498       // isn't valid, but this seems to be what the spec requires
   3499       return 0;
   3500 
   3501    // truncate a short frame
   3502    if (len < right) right = len;
   3503 
   3504    f->samples_output += right-left;
   3505 
   3506    return right - left;
   3507 }
   3508 
   3509 static int vorbis_pump_first_frame(stb_vorbis *f)
   3510 {
   3511    int len, right, left, res;
   3512    res = vorbis_decode_packet(f, &len, &left, &right);
   3513    if (res)
   3514       vorbis_finish_frame(f, len, left, right);
   3515    return res;
   3516 }
   3517 
   3518 #ifndef STB_VORBIS_NO_PUSHDATA_API
   3519 static int is_whole_packet_present(stb_vorbis *f)
   3520 {
   3521    // make sure that we have the packet available before continuing...
   3522    // this requires a full ogg parse, but we know we can fetch from f->stream
   3523 
   3524    // instead of coding this out explicitly, we could save the current read state,
   3525    // read the next packet with get8() until end-of-packet, check f->eof, then
   3526    // reset the state? but that would be slower, esp. since we'd have over 256 bytes
   3527    // of state to restore (primarily the page segment table)
   3528 
   3529    int s = f->next_seg, first = TRUE;
   3530    uint8 *p = f->stream;
   3531 
   3532    if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
   3533       for (; s < f->segment_count; ++s) {
   3534          p += f->segments[s];
   3535          if (f->segments[s] < 255)               // stop at first short segment
   3536             break;
   3537       }
   3538       // either this continues, or it ends it...
   3539       if (s == f->segment_count)
   3540          s = -1; // set 'crosses page' flag
   3541       if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
   3542       first = FALSE;
   3543    }
   3544    for (; s == -1;) {
   3545       uint8 *q;
   3546       int n;
   3547 
   3548       // check that we have the page header ready
   3549       if (p + 26 >= f->stream_end)               return error(f, VORBIS_need_more_data);
   3550       // validate the page
   3551       if (memcmp(p, ogg_page_header, 4))         return error(f, VORBIS_invalid_stream);
   3552       if (p[4] != 0)                             return error(f, VORBIS_invalid_stream);
   3553       if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
   3554          if (f->previous_length)
   3555             if ((p[5] & PAGEFLAG_continued_packet))  return error(f, VORBIS_invalid_stream);
   3556          // if no previous length, we're resynching, so we can come in on a continued-packet,
   3557          // which we'll just drop
   3558       } else {
   3559          if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
   3560       }
   3561       n = p[26]; // segment counts
   3562       q = p+27;  // q points to segment table
   3563       p = q + n; // advance past header
   3564       // make sure we've read the segment table
   3565       if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
   3566       for (s=0; s < n; ++s) {
   3567          p += q[s];
   3568          if (q[s] < 255)
   3569             break;
   3570       }
   3571       if (s == n)
   3572          s = -1; // set 'crosses page' flag
   3573       if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
   3574       first = FALSE;
   3575    }
   3576    return TRUE;
   3577 }
   3578 #endif // !STB_VORBIS_NO_PUSHDATA_API
   3579 
   3580 static int start_decoder(vorb *f)
   3581 {
   3582    uint8 header[6], x,y;
   3583    int len,i,j,k, max_submaps = 0;
   3584    int longest_floorlist=0;
   3585 
   3586    // first page, first packet
   3587    f->first_decode = TRUE;
   3588 
   3589    if (!start_page(f))                              return FALSE;
   3590    // validate page flag
   3591    if (!(f->page_flag & PAGEFLAG_first_page))       return error(f, VORBIS_invalid_first_page);
   3592    if (f->page_flag & PAGEFLAG_last_page)           return error(f, VORBIS_invalid_first_page);
   3593    if (f->page_flag & PAGEFLAG_continued_packet)    return error(f, VORBIS_invalid_first_page);
   3594    // check for expected packet length
   3595    if (f->segment_count != 1)                       return error(f, VORBIS_invalid_first_page);
   3596    if (f->segments[0] != 30) {
   3597       // check for the Ogg skeleton fishead identifying header to refine our error
   3598       if (f->segments[0] == 64 &&
   3599           getn(f, header, 6) &&
   3600           header[0] == 'f' &&
   3601           header[1] == 'i' &&
   3602           header[2] == 's' &&
   3603           header[3] == 'h' &&
   3604           header[4] == 'e' &&
   3605           header[5] == 'a' &&
   3606           get8(f)   == 'd' &&
   3607           get8(f)   == '\0')                        return error(f, VORBIS_ogg_skeleton_not_supported);
   3608       else
   3609                                                     return error(f, VORBIS_invalid_first_page);
   3610    }
   3611 
   3612    // read packet
   3613    // check packet header
   3614    if (get8(f) != VORBIS_packet_id)                 return error(f, VORBIS_invalid_first_page);
   3615    if (!getn(f, header, 6))                         return error(f, VORBIS_unexpected_eof);
   3616    if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_first_page);
   3617    // vorbis_version
   3618    if (get32(f) != 0)                               return error(f, VORBIS_invalid_first_page);
   3619    f->channels = get8(f); if (!f->channels)         return error(f, VORBIS_invalid_first_page);
   3620    if (f->channels > STB_VORBIS_MAX_CHANNELS)       return error(f, VORBIS_too_many_channels);
   3621    f->sample_rate = get32(f); if (!f->sample_rate)  return error(f, VORBIS_invalid_first_page);
   3622    get32(f); // bitrate_maximum
   3623    get32(f); // bitrate_nominal
   3624    get32(f); // bitrate_minimum
   3625    x = get8(f);
   3626    {
   3627       int log0,log1;
   3628       log0 = x & 15;
   3629       log1 = x >> 4;
   3630       f->blocksize_0 = 1 << log0;
   3631       f->blocksize_1 = 1 << log1;
   3632       if (log0 < 6 || log0 > 13)                       return error(f, VORBIS_invalid_setup);
   3633       if (log1 < 6 || log1 > 13)                       return error(f, VORBIS_invalid_setup);
   3634       if (log0 > log1)                                 return error(f, VORBIS_invalid_setup);
   3635    }
   3636 
   3637    // framing_flag
   3638    x = get8(f);
   3639    if (!(x & 1))                                    return error(f, VORBIS_invalid_first_page);
   3640 
   3641    // second packet!
   3642    if (!start_page(f))                              return FALSE;
   3643 
   3644    if (!start_packet(f))                            return FALSE;
   3645 
   3646    if (!next_segment(f))                            return FALSE;
   3647 
   3648    if (get8_packet(f) != VORBIS_packet_comment)            return error(f, VORBIS_invalid_setup);
   3649    for (i=0; i < 6; ++i) header[i] = get8_packet(f);
   3650    if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_setup);
   3651    //file vendor
   3652    len = get32_packet(f);
   3653    f->vendor = (char*)setup_malloc(f, sizeof(char) * (len+1));
   3654    if (f->vendor == NULL)                           return error(f, VORBIS_outofmem);
   3655    for(i=0; i < len; ++i) {
   3656       f->vendor[i] = get8_packet(f);
   3657    }
   3658    f->vendor[len] = (char)'\0';
   3659    //user comments
   3660    f->comment_list_length = get32_packet(f);
   3661    f->comment_list = NULL;
   3662    if (f->comment_list_length > 0)
   3663    {
   3664       f->comment_list = (char**) setup_malloc(f, sizeof(char*) * (f->comment_list_length));
   3665       if (f->comment_list == NULL)                  return error(f, VORBIS_outofmem);
   3666    }
   3667 
   3668    for(i=0; i < f->comment_list_length; ++i) {
   3669       len = get32_packet(f);
   3670       f->comment_list[i] = (char*)setup_malloc(f, sizeof(char) * (len+1));
   3671       if (f->comment_list[i] == NULL)               return error(f, VORBIS_outofmem);
   3672 
   3673       for(j=0; j < len; ++j) {
   3674          f->comment_list[i][j] = get8_packet(f);
   3675       }
   3676       f->comment_list[i][len] = (char)'\0';
   3677    }
   3678 
   3679    // framing_flag
   3680    x = get8_packet(f);
   3681    if (!(x & 1))                                    return error(f, VORBIS_invalid_setup);
   3682 
   3683 
   3684    skip(f, f->bytes_in_seg);
   3685    f->bytes_in_seg = 0;
   3686 
   3687    do {
   3688       len = next_segment(f);
   3689       skip(f, len);
   3690       f->bytes_in_seg = 0;
   3691    } while (len);
   3692 
   3693    // third packet!
   3694    if (!start_packet(f))                            return FALSE;
   3695 
   3696    #ifndef STB_VORBIS_NO_PUSHDATA_API
   3697    if (IS_PUSH_MODE(f)) {
   3698       if (!is_whole_packet_present(f)) {
   3699          // convert error in ogg header to write type
   3700          if (f->error == VORBIS_invalid_stream)
   3701             f->error = VORBIS_invalid_setup;
   3702          return FALSE;
   3703       }
   3704    }
   3705    #endif
   3706 
   3707    crc32_init(); // always init it, to avoid multithread race conditions
   3708 
   3709    if (get8_packet(f) != VORBIS_packet_setup)       return error(f, VORBIS_invalid_setup);
   3710    for (i=0; i < 6; ++i) header[i] = get8_packet(f);
   3711    if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_setup);
   3712 
   3713    // codebooks
   3714 
   3715    f->codebook_count = get_bits(f,8) + 1;
   3716    f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count);
   3717    if (f->codebooks == NULL)                        return error(f, VORBIS_outofmem);
   3718    memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count);
   3719    for (i=0; i < f->codebook_count; ++i) {
   3720       uint32 *values;
   3721       int ordered, sorted_count;
   3722       int total=0;
   3723       uint8 *lengths;
   3724       Codebook *c = f->codebooks+i;
   3725       CHECK(f);
   3726       x = get_bits(f, 8); if (x != 0x42)            return error(f, VORBIS_invalid_setup);
   3727       x = get_bits(f, 8); if (x != 0x43)            return error(f, VORBIS_invalid_setup);
   3728       x = get_bits(f, 8); if (x != 0x56)            return error(f, VORBIS_invalid_setup);
   3729       x = get_bits(f, 8);
   3730       c->dimensions = (get_bits(f, 8)<<8) + x;
   3731       x = get_bits(f, 8);
   3732       y = get_bits(f, 8);
   3733       c->entries = (get_bits(f, 8)<<16) + (y<<8) + x;
   3734       ordered = get_bits(f,1);
   3735       c->sparse = ordered ? 0 : get_bits(f,1);
   3736 
   3737       if (c->dimensions == 0 && c->entries != 0)    return error(f, VORBIS_invalid_setup);
   3738 
   3739       if (c->sparse)
   3740          lengths = (uint8 *) setup_temp_malloc(f, c->entries);
   3741       else
   3742          lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
   3743 
   3744       if (!lengths) return error(f, VORBIS_outofmem);
   3745 
   3746       if (ordered) {
   3747          int current_entry = 0;
   3748          int current_length = get_bits(f,5) + 1;
   3749          while (current_entry < c->entries) {
   3750             int limit = c->entries - current_entry;
   3751             int n = get_bits(f, ilog(limit));
   3752             if (current_length >= 32) return error(f, VORBIS_invalid_setup);
   3753             if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
   3754             memset(lengths + current_entry, current_length, n);
   3755             current_entry += n;
   3756             ++current_length;
   3757          }
   3758       } else {
   3759          for (j=0; j < c->entries; ++j) {
   3760             int present = c->sparse ? get_bits(f,1) : 1;
   3761             if (present) {
   3762                lengths[j] = get_bits(f, 5) + 1;
   3763                ++total;
   3764                if (lengths[j] == 32)
   3765                   return error(f, VORBIS_invalid_setup);
   3766             } else {
   3767                lengths[j] = NO_CODE;
   3768             }
   3769          }
   3770       }
   3771 
   3772       if (c->sparse && total >= c->entries >> 2) {
   3773          // convert sparse items to non-sparse!
   3774          if (c->entries > (int) f->setup_temp_memory_required)
   3775             f->setup_temp_memory_required = c->entries;
   3776 
   3777          c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
   3778          if (c->codeword_lengths == NULL) return error(f, VORBIS_outofmem);
   3779          memcpy(c->codeword_lengths, lengths, c->entries);
   3780          setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs!
   3781          lengths = c->codeword_lengths;
   3782          c->sparse = 0;
   3783       }
   3784 
   3785       // compute the size of the sorted tables
   3786       if (c->sparse) {
   3787          sorted_count = total;
   3788       } else {
   3789          sorted_count = 0;
   3790          #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
   3791          for (j=0; j < c->entries; ++j)
   3792             if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
   3793                ++sorted_count;
   3794          #endif
   3795       }
   3796 
   3797       c->sorted_entries = sorted_count;
   3798       values = NULL;
   3799 
   3800       CHECK(f);
   3801       if (!c->sparse) {
   3802          c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries);
   3803          if (!c->codewords)                  return error(f, VORBIS_outofmem);
   3804       } else {
   3805          unsigned int size;
   3806          if (c->sorted_entries) {
   3807             c->codeword_lengths = (uint8 *) setup_malloc(f, c->sorted_entries);
   3808             if (!c->codeword_lengths)           return error(f, VORBIS_outofmem);
   3809             c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries);
   3810             if (!c->codewords)                  return error(f, VORBIS_outofmem);
   3811             values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries);
   3812             if (!values)                        return error(f, VORBIS_outofmem);
   3813          }
   3814          size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries;
   3815          if (size > f->setup_temp_memory_required)
   3816             f->setup_temp_memory_required = size;
   3817       }
   3818 
   3819       if (!compute_codewords(c, lengths, c->entries, values)) {
   3820          if (c->sparse) setup_temp_free(f, values, 0);
   3821          return error(f, VORBIS_invalid_setup);
   3822       }
   3823 
   3824       if (c->sorted_entries) {
   3825          // allocate an extra slot for sentinels
   3826          c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1));
   3827          if (c->sorted_codewords == NULL) return error(f, VORBIS_outofmem);
   3828          // allocate an extra slot at the front so that c->sorted_values[-1] is defined
   3829          // so that we can catch that case without an extra if
   3830          c->sorted_values    = ( int   *) setup_malloc(f, sizeof(*c->sorted_values   ) * (c->sorted_entries+1));
   3831          if (c->sorted_values == NULL) return error(f, VORBIS_outofmem);
   3832          ++c->sorted_values;
   3833          c->sorted_values[-1] = -1;
   3834          compute_sorted_huffman(c, lengths, values);
   3835       }
   3836 
   3837       if (c->sparse) {
   3838          setup_temp_free(f, values, sizeof(*values)*c->sorted_entries);
   3839          setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries);
   3840          setup_temp_free(f, lengths, c->entries);
   3841          c->codewords = NULL;
   3842       }
   3843 
   3844       compute_accelerated_huffman(c);
   3845 
   3846       CHECK(f);
   3847       c->lookup_type = get_bits(f, 4);
   3848       if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup);
   3849       if (c->lookup_type > 0) {
   3850          uint16 *mults;
   3851          c->minimum_value = float32_unpack(get_bits(f, 32));
   3852          c->delta_value = float32_unpack(get_bits(f, 32));
   3853          c->value_bits = get_bits(f, 4)+1;
   3854          c->sequence_p = get_bits(f,1);
   3855          if (c->lookup_type == 1) {
   3856             int values = lookup1_values(c->entries, c->dimensions);
   3857             if (values < 0) return error(f, VORBIS_invalid_setup);
   3858             c->lookup_values = (uint32) values;
   3859          } else {
   3860             c->lookup_values = c->entries * c->dimensions;
   3861          }
   3862          if (c->lookup_values == 0) return error(f, VORBIS_invalid_setup);
   3863          mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values);
   3864          if (mults == NULL) return error(f, VORBIS_outofmem);
   3865          for (j=0; j < (int) c->lookup_values; ++j) {
   3866             int q = get_bits(f, c->value_bits);
   3867             if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); }
   3868             mults[j] = q;
   3869          }
   3870 
   3871 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
   3872          if (c->lookup_type == 1) {
   3873             int len, sparse = c->sparse;
   3874             float last=0;
   3875             // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
   3876             if (sparse) {
   3877                if (c->sorted_entries == 0) goto skip;
   3878                c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions);
   3879             } else
   3880                c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries        * c->dimensions);
   3881             if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
   3882             len = sparse ? c->sorted_entries : c->entries;
   3883             for (j=0; j < len; ++j) {
   3884                unsigned int z = sparse ? c->sorted_values[j] : j;
   3885                unsigned int div=1;
   3886                for (k=0; k < c->dimensions; ++k) {
   3887                   int off = (z / div) % c->lookup_values;
   3888                   float val = mults[off]*c->delta_value + c->minimum_value + last;
   3889                   c->multiplicands[j*c->dimensions + k] = val;
   3890                   if (c->sequence_p)
   3891                      last = val;
   3892                   if (k+1 < c->dimensions) {
   3893                      if (div > UINT_MAX / (unsigned int) c->lookup_values) {
   3894                         setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
   3895                         return error(f, VORBIS_invalid_setup);
   3896                      }
   3897                      div *= c->lookup_values;
   3898                   }
   3899                }
   3900             }
   3901             c->lookup_type = 2;
   3902          }
   3903          else
   3904 #endif
   3905          {
   3906             float last=0;
   3907             CHECK(f);
   3908             c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values);
   3909             if (c->multiplicands == NULL) { setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
   3910             for (j=0; j < (int) c->lookup_values; ++j) {
   3911                float val = mults[j] * c->delta_value + c->minimum_value + last;
   3912                c->multiplicands[j] = val;
   3913                if (c->sequence_p)
   3914                   last = val;
   3915             }
   3916          }
   3917 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
   3918         skip:;
   3919 #endif
   3920          setup_temp_free(f, mults, sizeof(mults[0])*c->lookup_values);
   3921 
   3922          CHECK(f);
   3923       }
   3924       CHECK(f);
   3925    }
   3926 
   3927    // time domain transfers (notused)
   3928 
   3929    x = get_bits(f, 6) + 1;
   3930    for (i=0; i < x; ++i) {
   3931       uint32 z = get_bits(f, 16);
   3932       if (z != 0) return error(f, VORBIS_invalid_setup);
   3933    }
   3934 
   3935    // Floors
   3936    f->floor_count = get_bits(f, 6)+1;
   3937    f->floor_config = (Floor *)  setup_malloc(f, f->floor_count * sizeof(*f->floor_config));
   3938    if (f->floor_config == NULL) return error(f, VORBIS_outofmem);
   3939    for (i=0; i < f->floor_count; ++i) {
   3940       f->floor_types[i] = get_bits(f, 16);
   3941       if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
   3942       if (f->floor_types[i] == 0) {
   3943          Floor0 *g = &f->floor_config[i].floor0;
   3944          g->order = get_bits(f,8);
   3945          g->rate = get_bits(f,16);
   3946          g->bark_map_size = get_bits(f,16);
   3947          g->amplitude_bits = get_bits(f,6);
   3948          g->amplitude_offset = get_bits(f,8);
   3949          g->number_of_books = get_bits(f,4) + 1;
   3950          for (j=0; j < g->number_of_books; ++j)
   3951             g->book_list[j] = get_bits(f,8);
   3952          return error(f, VORBIS_feature_not_supported);
   3953       } else {
   3954          stbv__floor_ordering p[31*8+2];
   3955          Floor1 *g = &f->floor_config[i].floor1;
   3956          int max_class = -1;
   3957          g->partitions = get_bits(f, 5);
   3958          for (j=0; j < g->partitions; ++j) {
   3959             g->partition_class_list[j] = get_bits(f, 4);
   3960             if (g->partition_class_list[j] > max_class)
   3961                max_class = g->partition_class_list[j];
   3962          }
   3963          for (j=0; j <= max_class; ++j) {
   3964             g->class_dimensions[j] = get_bits(f, 3)+1;
   3965             g->class_subclasses[j] = get_bits(f, 2);
   3966             if (g->class_subclasses[j]) {
   3967                g->class_masterbooks[j] = get_bits(f, 8);
   3968                if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
   3969             }
   3970             for (k=0; k < 1 << g->class_subclasses[j]; ++k) {
   3971                g->subclass_books[j][k] = (int16)get_bits(f,8)-1;
   3972                if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
   3973             }
   3974          }
   3975          g->floor1_multiplier = get_bits(f,2)+1;
   3976          g->rangebits = get_bits(f,4);
   3977          g->Xlist[0] = 0;
   3978          g->Xlist[1] = 1 << g->rangebits;
   3979          g->values = 2;
   3980          for (j=0; j < g->partitions; ++j) {
   3981             int c = g->partition_class_list[j];
   3982             for (k=0; k < g->class_dimensions[c]; ++k) {
   3983                g->Xlist[g->values] = get_bits(f, g->rangebits);
   3984                ++g->values;
   3985             }
   3986          }
   3987          // precompute the sorting
   3988          for (j=0; j < g->values; ++j) {
   3989             p[j].x = g->Xlist[j];
   3990             p[j].id = j;
   3991          }
   3992          qsort(p, g->values, sizeof(p[0]), point_compare);
   3993          for (j=0; j < g->values-1; ++j)
   3994             if (p[j].x == p[j+1].x)
   3995                return error(f, VORBIS_invalid_setup);
   3996          for (j=0; j < g->values; ++j)
   3997             g->sorted_order[j] = (uint8) p[j].id;
   3998          // precompute the neighbors
   3999          for (j=2; j < g->values; ++j) {
   4000             int low = 0,hi = 0;
   4001             neighbors(g->Xlist, j, &low,&hi);
   4002             g->neighbors[j][0] = low;
   4003             g->neighbors[j][1] = hi;
   4004          }
   4005 
   4006          if (g->values > longest_floorlist)
   4007             longest_floorlist = g->values;
   4008       }
   4009    }
   4010 
   4011    // Residue
   4012    f->residue_count = get_bits(f, 6)+1;
   4013    f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(f->residue_config[0]));
   4014    if (f->residue_config == NULL) return error(f, VORBIS_outofmem);
   4015    memset(f->residue_config, 0, f->residue_count * sizeof(f->residue_config[0]));
   4016    for (i=0; i < f->residue_count; ++i) {
   4017       uint8 residue_cascade[64];
   4018       Residue *r = f->residue_config+i;
   4019       f->residue_types[i] = get_bits(f, 16);
   4020       if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
   4021       r->begin = get_bits(f, 24);
   4022       r->end = get_bits(f, 24);
   4023       if (r->end < r->begin) return error(f, VORBIS_invalid_setup);
   4024       r->part_size = get_bits(f,24)+1;
   4025       r->classifications = get_bits(f,6)+1;
   4026       r->classbook = get_bits(f,8);
   4027       if (r->classbook >= f->codebook_count) return error(f, VORBIS_invalid_setup);
   4028       for (j=0; j < r->classifications; ++j) {
   4029          uint8 high_bits=0;
   4030          uint8 low_bits=get_bits(f,3);
   4031          if (get_bits(f,1))
   4032             high_bits = get_bits(f,5);
   4033          residue_cascade[j] = high_bits*8 + low_bits;
   4034       }
   4035       r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications);
   4036       if (r->residue_books == NULL) return error(f, VORBIS_outofmem);
   4037       for (j=0; j < r->classifications; ++j) {
   4038          for (k=0; k < 8; ++k) {
   4039             if (residue_cascade[j] & (1 << k)) {
   4040                r->residue_books[j][k] = get_bits(f, 8);
   4041                if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
   4042             } else {
   4043                r->residue_books[j][k] = -1;
   4044             }
   4045          }
   4046       }
   4047       // precompute the classifications[] array to avoid inner-loop mod/divide
   4048       // call it 'classdata' since we already have r->classifications
   4049       r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
   4050       if (!r->classdata) return error(f, VORBIS_outofmem);
   4051       memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
   4052       for (j=0; j < f->codebooks[r->classbook].entries; ++j) {
   4053          int classwords = f->codebooks[r->classbook].dimensions;
   4054          int temp = j;
   4055          r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords);
   4056          if (r->classdata[j] == NULL) return error(f, VORBIS_outofmem);
   4057          for (k=classwords-1; k >= 0; --k) {
   4058             r->classdata[j][k] = temp % r->classifications;
   4059             temp /= r->classifications;
   4060          }
   4061       }
   4062    }
   4063 
   4064    f->mapping_count = get_bits(f,6)+1;
   4065    f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping));
   4066    if (f->mapping == NULL) return error(f, VORBIS_outofmem);
   4067    memset(f->mapping, 0, f->mapping_count * sizeof(*f->mapping));
   4068    for (i=0; i < f->mapping_count; ++i) {
   4069       Mapping *m = f->mapping + i;
   4070       int mapping_type = get_bits(f,16);
   4071       if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
   4072       m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan));
   4073       if (m->chan == NULL) return error(f, VORBIS_outofmem);
   4074       if (get_bits(f,1))
   4075          m->submaps = get_bits(f,4)+1;
   4076       else
   4077          m->submaps = 1;
   4078       if (m->submaps > max_submaps)
   4079          max_submaps = m->submaps;
   4080       if (get_bits(f,1)) {
   4081          m->coupling_steps = get_bits(f,8)+1;
   4082          if (m->coupling_steps > f->channels) return error(f, VORBIS_invalid_setup);
   4083          for (k=0; k < m->coupling_steps; ++k) {
   4084             m->chan[k].magnitude = get_bits(f, ilog(f->channels-1));
   4085             m->chan[k].angle = get_bits(f, ilog(f->channels-1));
   4086             if (m->chan[k].magnitude >= f->channels)        return error(f, VORBIS_invalid_setup);
   4087             if (m->chan[k].angle     >= f->channels)        return error(f, VORBIS_invalid_setup);
   4088             if (m->chan[k].magnitude == m->chan[k].angle)   return error(f, VORBIS_invalid_setup);
   4089          }
   4090       } else
   4091          m->coupling_steps = 0;
   4092 
   4093       // reserved field
   4094       if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
   4095       if (m->submaps > 1) {
   4096          for (j=0; j < f->channels; ++j) {
   4097             m->chan[j].mux = get_bits(f, 4);
   4098             if (m->chan[j].mux >= m->submaps)                return error(f, VORBIS_invalid_setup);
   4099          }
   4100       } else
   4101          // @SPECIFICATION: this case is missing from the spec
   4102          for (j=0; j < f->channels; ++j)
   4103             m->chan[j].mux = 0;
   4104 
   4105       for (j=0; j < m->submaps; ++j) {
   4106          get_bits(f,8); // discard
   4107          m->submap_floor[j] = get_bits(f,8);
   4108          m->submap_residue[j] = get_bits(f,8);
   4109          if (m->submap_floor[j] >= f->floor_count)      return error(f, VORBIS_invalid_setup);
   4110          if (m->submap_residue[j] >= f->residue_count)  return error(f, VORBIS_invalid_setup);
   4111       }
   4112    }
   4113 
   4114    // Modes
   4115    f->mode_count = get_bits(f, 6)+1;
   4116    for (i=0; i < f->mode_count; ++i) {
   4117       Mode *m = f->mode_config+i;
   4118       m->blockflag = get_bits(f,1);
   4119       m->windowtype = get_bits(f,16);
   4120       m->transformtype = get_bits(f,16);
   4121       m->mapping = get_bits(f,8);
   4122       if (m->windowtype != 0)                 return error(f, VORBIS_invalid_setup);
   4123       if (m->transformtype != 0)              return error(f, VORBIS_invalid_setup);
   4124       if (m->mapping >= f->mapping_count)     return error(f, VORBIS_invalid_setup);
   4125    }
   4126 
   4127    flush_packet(f);
   4128 
   4129    f->previous_length = 0;
   4130 
   4131    for (i=0; i < f->channels; ++i) {
   4132       f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1);
   4133       f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
   4134       f->finalY[i]          = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist);
   4135       if (f->channel_buffers[i] == NULL || f->previous_window[i] == NULL || f->finalY[i] == NULL) return error(f, VORBIS_outofmem);
   4136       memset(f->channel_buffers[i], 0, sizeof(float) * f->blocksize_1);
   4137       #ifdef STB_VORBIS_NO_DEFER_FLOOR
   4138       f->floor_buffers[i]   = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
   4139       if (f->floor_buffers[i] == NULL) return error(f, VORBIS_outofmem);
   4140       #endif
   4141    }
   4142 
   4143    if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE;
   4144    if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE;
   4145    f->blocksize[0] = f->blocksize_0;
   4146    f->blocksize[1] = f->blocksize_1;
   4147 
   4148 #ifdef STB_VORBIS_DIVIDE_TABLE
   4149    if (integer_divide_table[1][1]==0)
   4150       for (i=0; i < DIVTAB_NUMER; ++i)
   4151          for (j=1; j < DIVTAB_DENOM; ++j)
   4152             integer_divide_table[i][j] = i / j;
   4153 #endif
   4154 
   4155    // compute how much temporary memory is needed
   4156 
   4157    // 1.
   4158    {
   4159       uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1);
   4160       uint32 classify_mem;
   4161       int i,max_part_read=0;
   4162       for (i=0; i < f->residue_count; ++i) {
   4163          Residue *r = f->residue_config + i;
   4164          unsigned int actual_size = f->blocksize_1 / 2;
   4165          unsigned int limit_r_begin = r->begin < actual_size ? r->begin : actual_size;
   4166          unsigned int limit_r_end   = r->end   < actual_size ? r->end   : actual_size;
   4167          int n_read = limit_r_end - limit_r_begin;
   4168          int part_read = n_read / r->part_size;
   4169          if (part_read > max_part_read)
   4170             max_part_read = part_read;
   4171       }
   4172       #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
   4173       classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *));
   4174       #else
   4175       classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *));
   4176       #endif
   4177 
   4178       // maximum reasonable partition size is f->blocksize_1
   4179 
   4180       f->temp_memory_required = classify_mem;
   4181       if (imdct_mem > f->temp_memory_required)
   4182          f->temp_memory_required = imdct_mem;
   4183    }
   4184 
   4185 
   4186    if (f->alloc.alloc_buffer) {
   4187       assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes);
   4188       // check if there's enough temp memory so we don't error later
   4189       if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset)
   4190          return error(f, VORBIS_outofmem);
   4191    }
   4192 
   4193    // @TODO: stb_vorbis_seek_start expects first_audio_page_offset to point to a page
   4194    // without PAGEFLAG_continued_packet, so this either points to the first page, or
   4195    // the page after the end of the headers. It might be cleaner to point to a page
   4196    // in the middle of the headers, when that's the page where the first audio packet
   4197    // starts, but we'd have to also correctly skip the end of any continued packet in
   4198    // stb_vorbis_seek_start.
   4199    if (f->next_seg == -1) {
   4200       f->first_audio_page_offset = stb_vorbis_get_file_offset(f);
   4201    } else {
   4202       f->first_audio_page_offset = 0;
   4203    }
   4204 
   4205    return TRUE;
   4206 }
   4207 
   4208 static void vorbis_deinit(stb_vorbis *p)
   4209 {
   4210    int i,j;
   4211 
   4212    setup_free(p, p->vendor);
   4213    for (i=0; i < p->comment_list_length; ++i) {
   4214       setup_free(p, p->comment_list[i]);
   4215    }
   4216    setup_free(p, p->comment_list);
   4217 
   4218    if (p->residue_config) {
   4219       for (i=0; i < p->residue_count; ++i) {
   4220          Residue *r = p->residue_config+i;
   4221          if (r->classdata) {
   4222             for (j=0; j < p->codebooks[r->classbook].entries; ++j)
   4223                setup_free(p, r->classdata[j]);
   4224             setup_free(p, r->classdata);
   4225          }
   4226          setup_free(p, r->residue_books);
   4227       }
   4228    }
   4229 
   4230    if (p->codebooks) {
   4231       CHECK(p);
   4232       for (i=0; i < p->codebook_count; ++i) {
   4233          Codebook *c = p->codebooks + i;
   4234          setup_free(p, c->codeword_lengths);
   4235          setup_free(p, c->multiplicands);
   4236          setup_free(p, c->codewords);
   4237          setup_free(p, c->sorted_codewords);
   4238          // c->sorted_values[-1] is the first entry in the array
   4239          setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL);
   4240       }
   4241       setup_free(p, p->codebooks);
   4242    }
   4243    setup_free(p, p->floor_config);
   4244    setup_free(p, p->residue_config);
   4245    if (p->mapping) {
   4246       for (i=0; i < p->mapping_count; ++i)
   4247          setup_free(p, p->mapping[i].chan);
   4248       setup_free(p, p->mapping);
   4249    }
   4250    CHECK(p);
   4251    for (i=0; i < p->channels && i < STB_VORBIS_MAX_CHANNELS; ++i) {
   4252       setup_free(p, p->channel_buffers[i]);
   4253       setup_free(p, p->previous_window[i]);
   4254       #ifdef STB_VORBIS_NO_DEFER_FLOOR
   4255       setup_free(p, p->floor_buffers[i]);
   4256       #endif
   4257       setup_free(p, p->finalY[i]);
   4258    }
   4259    for (i=0; i < 2; ++i) {
   4260       setup_free(p, p->A[i]);
   4261       setup_free(p, p->B[i]);
   4262       setup_free(p, p->C[i]);
   4263       setup_free(p, p->window[i]);
   4264       setup_free(p, p->bit_reverse[i]);
   4265    }
   4266    #ifndef STB_VORBIS_NO_STDIO
   4267    if (p->close_on_free) fclose(p->f);
   4268    #endif
   4269 }
   4270 
   4271 void stb_vorbis_close(stb_vorbis *p)
   4272 {
   4273    if (p == NULL) return;
   4274    vorbis_deinit(p);
   4275    setup_free(p,p);
   4276 }
   4277 
   4278 static void vorbis_init(stb_vorbis *p, const stb_vorbis_alloc *z)
   4279 {
   4280    memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start
   4281    if (z) {
   4282       p->alloc = *z;
   4283       p->alloc.alloc_buffer_length_in_bytes &= ~7;
   4284       p->temp_offset = p->alloc.alloc_buffer_length_in_bytes;
   4285    }
   4286    p->eof = 0;
   4287    p->error = VORBIS__no_error;
   4288    p->stream = NULL;
   4289    p->codebooks = NULL;
   4290    p->page_crc_tests = -1;
   4291    #ifndef STB_VORBIS_NO_STDIO
   4292    p->close_on_free = FALSE;
   4293    p->f = NULL;
   4294    #endif
   4295 }
   4296 
   4297 int stb_vorbis_get_sample_offset(stb_vorbis *f)
   4298 {
   4299    if (f->current_loc_valid)
   4300       return f->current_loc;
   4301    else
   4302       return -1;
   4303 }
   4304 
   4305 stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
   4306 {
   4307    stb_vorbis_info d;
   4308    d.channels = f->channels;
   4309    d.sample_rate = f->sample_rate;
   4310    d.setup_memory_required = f->setup_memory_required;
   4311    d.setup_temp_memory_required = f->setup_temp_memory_required;
   4312    d.temp_memory_required = f->temp_memory_required;
   4313    d.max_frame_size = f->blocksize_1 >> 1;
   4314    return d;
   4315 }
   4316 
   4317 stb_vorbis_comment stb_vorbis_get_comment(stb_vorbis *f)
   4318 {
   4319    stb_vorbis_comment d;
   4320    d.vendor = f->vendor;
   4321    d.comment_list_length = f->comment_list_length;
   4322    d.comment_list = f->comment_list;
   4323    return d;
   4324 }
   4325 
   4326 int stb_vorbis_get_error(stb_vorbis *f)
   4327 {
   4328    int e = f->error;
   4329    f->error = VORBIS__no_error;
   4330    return e;
   4331 }
   4332 
   4333 static stb_vorbis * vorbis_alloc(stb_vorbis *f)
   4334 {
   4335    stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p));
   4336    return p;
   4337 }
   4338 
   4339 #ifndef STB_VORBIS_NO_PUSHDATA_API
   4340 
   4341 void stb_vorbis_flush_pushdata(stb_vorbis *f)
   4342 {
   4343    f->previous_length = 0;
   4344    f->page_crc_tests  = 0;
   4345    f->discard_samples_deferred = 0;
   4346    f->current_loc_valid = FALSE;
   4347    f->first_decode = FALSE;
   4348    f->samples_output = 0;
   4349    f->channel_buffer_start = 0;
   4350    f->channel_buffer_end = 0;
   4351 }
   4352 
   4353 static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len)
   4354 {
   4355    int i,n;
   4356    for (i=0; i < f->page_crc_tests; ++i)
   4357       f->scan[i].bytes_done = 0;
   4358 
   4359    // if we have room for more scans, search for them first, because
   4360    // they may cause us to stop early if their header is incomplete
   4361    if (f->page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
   4362       if (data_len < 4) return 0;
   4363       data_len -= 3; // need to look for 4-byte sequence, so don't miss
   4364                      // one that straddles a boundary
   4365       for (i=0; i < data_len; ++i) {
   4366          if (data[i] == 0x4f) {
   4367             if (0==memcmp(data+i, ogg_page_header, 4)) {
   4368                int j,len;
   4369                uint32 crc;
   4370                // make sure we have the whole page header
   4371                if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
   4372                   // only read up to this page start, so hopefully we'll
   4373                   // have the whole page header start next time
   4374                   data_len = i;
   4375                   break;
   4376                }
   4377                // ok, we have it all; compute the length of the page
   4378                len = 27 + data[i+26];
   4379                for (j=0; j < data[i+26]; ++j)
   4380                   len += data[i+27+j];
   4381                // scan everything up to the embedded crc (which we must 0)
   4382                crc = 0;
   4383                for (j=0; j < 22; ++j)
   4384                   crc = crc32_update(crc, data[i+j]);
   4385                // now process 4 0-bytes
   4386                for (   ; j < 26; ++j)
   4387                   crc = crc32_update(crc, 0);
   4388                // len is the total number of bytes we need to scan
   4389                n = f->page_crc_tests++;
   4390                f->scan[n].bytes_left = len-j;
   4391                f->scan[n].crc_so_far = crc;
   4392                f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24);
   4393                // if the last frame on a page is continued to the next, then
   4394                // we can't recover the sample_loc immediately
   4395                if (data[i+27+data[i+26]-1] == 255)
   4396                   f->scan[n].sample_loc = ~0;
   4397                else
   4398                   f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24);
   4399                f->scan[n].bytes_done = i+j;
   4400                if (f->page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT)
   4401                   break;
   4402                // keep going if we still have room for more
   4403             }
   4404          }
   4405       }
   4406    }
   4407 
   4408    for (i=0; i < f->page_crc_tests;) {
   4409       uint32 crc;
   4410       int j;
   4411       int n = f->scan[i].bytes_done;
   4412       int m = f->scan[i].bytes_left;
   4413       if (m > data_len - n) m = data_len - n;
   4414       // m is the bytes to scan in the current chunk
   4415       crc = f->scan[i].crc_so_far;
   4416       for (j=0; j < m; ++j)
   4417          crc = crc32_update(crc, data[n+j]);
   4418       f->scan[i].bytes_left -= m;
   4419       f->scan[i].crc_so_far = crc;
   4420       if (f->scan[i].bytes_left == 0) {
   4421          // does it match?
   4422          if (f->scan[i].crc_so_far == f->scan[i].goal_crc) {
   4423             // Houston, we have page
   4424             data_len = n+m; // consumption amount is wherever that scan ended
   4425             f->page_crc_tests = -1; // drop out of page scan mode
   4426             f->previous_length = 0; // decode-but-don't-output one frame
   4427             f->next_seg = -1;       // start a new page
   4428             f->current_loc = f->scan[i].sample_loc; // set the current sample location
   4429                                     // to the amount we'd have decoded had we decoded this page
   4430             f->current_loc_valid = f->current_loc != ~0U;
   4431             return data_len;
   4432          }
   4433          // delete entry
   4434          f->scan[i] = f->scan[--f->page_crc_tests];
   4435       } else {
   4436          ++i;
   4437       }
   4438    }
   4439 
   4440    return data_len;
   4441 }
   4442 
   4443 // return value: number of bytes we used
   4444 int stb_vorbis_decode_frame_pushdata(
   4445          stb_vorbis *f,                   // the file we're decoding
   4446          const uint8 *data, int data_len, // the memory available for decoding
   4447          int *channels,                   // place to write number of float * buffers
   4448          float ***output,                 // place to write float ** array of float * buffers
   4449          int *samples                     // place to write number of output samples
   4450      )
   4451 {
   4452    int i;
   4453    int len,right,left;
   4454 
   4455    if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
   4456 
   4457    if (f->page_crc_tests >= 0) {
   4458       *samples = 0;
   4459       return vorbis_search_for_page_pushdata(f, (uint8 *) data, data_len);
   4460    }
   4461 
   4462    f->stream     = (uint8 *) data;
   4463    f->stream_end = (uint8 *) data + data_len;
   4464    f->error      = VORBIS__no_error;
   4465 
   4466    // check that we have the entire packet in memory
   4467    if (!is_whole_packet_present(f)) {
   4468       *samples = 0;
   4469       return 0;
   4470    }
   4471 
   4472    if (!vorbis_decode_packet(f, &len, &left, &right)) {
   4473       // save the actual error we encountered
   4474       enum STBVorbisError error = f->error;
   4475       if (error == VORBIS_bad_packet_type) {
   4476          // flush and resynch
   4477          f->error = VORBIS__no_error;
   4478          while (get8_packet(f) != EOP)
   4479             if (f->eof) break;
   4480          *samples = 0;
   4481          return (int) (f->stream - data);
   4482       }
   4483       if (error == VORBIS_continued_packet_flag_invalid) {
   4484          if (f->previous_length == 0) {
   4485             // we may be resynching, in which case it's ok to hit one
   4486             // of these; just discard the packet
   4487             f->error = VORBIS__no_error;
   4488             while (get8_packet(f) != EOP)
   4489                if (f->eof) break;
   4490             *samples = 0;
   4491             return (int) (f->stream - data);
   4492          }
   4493       }
   4494       // if we get an error while parsing, what to do?
   4495       // well, it DEFINITELY won't work to continue from where we are!
   4496       stb_vorbis_flush_pushdata(f);
   4497       // restore the error that actually made us bail
   4498       f->error = error;
   4499       *samples = 0;
   4500       return 1;
   4501    }
   4502 
   4503    // success!
   4504    len = vorbis_finish_frame(f, len, left, right);
   4505    for (i=0; i < f->channels; ++i)
   4506       f->outputs[i] = f->channel_buffers[i] + left;
   4507 
   4508    if (channels) *channels = f->channels;
   4509    *samples = len;
   4510    *output = f->outputs;
   4511    return (int) (f->stream - data);
   4512 }
   4513 
   4514 stb_vorbis *stb_vorbis_open_pushdata(
   4515          const unsigned char *data, int data_len, // the memory available for decoding
   4516          int *data_used,              // only defined if result is not NULL
   4517          int *error, const stb_vorbis_alloc *alloc)
   4518 {
   4519    stb_vorbis *f, p;
   4520    vorbis_init(&p, alloc);
   4521    p.stream     = (uint8 *) data;
   4522    p.stream_end = (uint8 *) data + data_len;
   4523    p.push_mode  = TRUE;
   4524    if (!start_decoder(&p)) {
   4525       if (p.eof)
   4526          *error = VORBIS_need_more_data;
   4527       else
   4528          *error = p.error;
   4529       vorbis_deinit(&p);
   4530       return NULL;
   4531    }
   4532    f = vorbis_alloc(&p);
   4533    if (f) {
   4534       *f = p;
   4535       *data_used = (int) (f->stream - data);
   4536       *error = 0;
   4537       return f;
   4538    } else {
   4539       vorbis_deinit(&p);
   4540       return NULL;
   4541    }
   4542 }
   4543 #endif // STB_VORBIS_NO_PUSHDATA_API
   4544 
   4545 unsigned int stb_vorbis_get_file_offset(stb_vorbis *f)
   4546 {
   4547    #ifndef STB_VORBIS_NO_PUSHDATA_API
   4548    if (f->push_mode) return 0;
   4549    #endif
   4550    if (USE_MEMORY(f)) return (unsigned int) (f->stream - f->stream_start);
   4551    #ifndef STB_VORBIS_NO_STDIO
   4552    return (unsigned int) (ftell(f->f) - f->f_start);
   4553    #endif
   4554 }
   4555 
   4556 #ifndef STB_VORBIS_NO_PULLDATA_API
   4557 //
   4558 // DATA-PULLING API
   4559 //
   4560 
   4561 static uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last)
   4562 {
   4563    for(;;) {
   4564       int n;
   4565       if (f->eof) return 0;
   4566       n = get8(f);
   4567       if (n == 0x4f) { // page header candidate
   4568          unsigned int retry_loc = stb_vorbis_get_file_offset(f);
   4569          int i;
   4570          // check if we're off the end of a file_section stream
   4571          if (retry_loc - 25 > f->stream_len)
   4572             return 0;
   4573          // check the rest of the header
   4574          for (i=1; i < 4; ++i)
   4575             if (get8(f) != ogg_page_header[i])
   4576                break;
   4577          if (f->eof) return 0;
   4578          if (i == 4) {
   4579             uint8 header[27];
   4580             uint32 i, crc, goal, len;
   4581             for (i=0; i < 4; ++i)
   4582                header[i] = ogg_page_header[i];
   4583             for (; i < 27; ++i)
   4584                header[i] = get8(f);
   4585             if (f->eof) return 0;
   4586             if (header[4] != 0) goto invalid;
   4587             goal = header[22] + (header[23] << 8) + (header[24]<<16) + ((uint32)header[25]<<24);
   4588             for (i=22; i < 26; ++i)
   4589                header[i] = 0;
   4590             crc = 0;
   4591             for (i=0; i < 27; ++i)
   4592                crc = crc32_update(crc, header[i]);
   4593             len = 0;
   4594             for (i=0; i < header[26]; ++i) {
   4595                int s = get8(f);
   4596                crc = crc32_update(crc, s);
   4597                len += s;
   4598             }
   4599             if (len && f->eof) return 0;
   4600             for (i=0; i < len; ++i)
   4601                crc = crc32_update(crc, get8(f));
   4602             // finished parsing probable page
   4603             if (crc == goal) {
   4604                // we could now check that it's either got the last
   4605                // page flag set, OR it's followed by the capture
   4606                // pattern, but I guess TECHNICALLY you could have
   4607                // a file with garbage between each ogg page and recover
   4608                // from it automatically? So even though that paranoia
   4609                // might decrease the chance of an invalid decode by
   4610                // another 2^32, not worth it since it would hose those
   4611                // invalid-but-useful files?
   4612                if (end)
   4613                   *end = stb_vorbis_get_file_offset(f);
   4614                if (last) {
   4615                   if (header[5] & 0x04)
   4616                      *last = 1;
   4617                   else
   4618                      *last = 0;
   4619                }
   4620                set_file_offset(f, retry_loc-1);
   4621                return 1;
   4622             }
   4623          }
   4624         invalid:
   4625          // not a valid page, so rewind and look for next one
   4626          set_file_offset(f, retry_loc);
   4627       }
   4628    }
   4629 }
   4630 
   4631 
   4632 #define SAMPLE_unknown  0xffffffff
   4633 
   4634 // seeking is implemented with a binary search, which narrows down the range to
   4635 // 64K, before using a linear search (because finding the synchronization
   4636 // pattern can be expensive, and the chance we'd find the end page again is
   4637 // relatively high for small ranges)
   4638 //
   4639 // two initial interpolation-style probes are used at the start of the search
   4640 // to try to bound either side of the binary search sensibly, while still
   4641 // working in O(log n) time if they fail.
   4642 
   4643 static int get_seek_page_info(stb_vorbis *f, ProbedPage *z)
   4644 {
   4645    uint8 header[27], lacing[255];
   4646    int i,len;
   4647 
   4648    // record where the page starts
   4649    z->page_start = stb_vorbis_get_file_offset(f);
   4650 
   4651    // parse the header
   4652    getn(f, header, 27);
   4653    if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S')
   4654       return 0;
   4655    getn(f, lacing, header[26]);
   4656 
   4657    // determine the length of the payload
   4658    len = 0;
   4659    for (i=0; i < header[26]; ++i)
   4660       len += lacing[i];
   4661 
   4662    // this implies where the page ends
   4663    z->page_end = z->page_start + 27 + header[26] + len;
   4664 
   4665    // read the last-decoded sample out of the data
   4666    z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24);
   4667 
   4668    // restore file state to where we were
   4669    set_file_offset(f, z->page_start);
   4670    return 1;
   4671 }
   4672 
   4673 // rarely used function to seek back to the preceding page while finding the
   4674 // start of a packet
   4675 static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset)
   4676 {
   4677    unsigned int previous_safe, end;
   4678 
   4679    // now we want to seek back 64K from the limit
   4680    if (limit_offset >= 65536 && limit_offset-65536 >= f->first_audio_page_offset)
   4681       previous_safe = limit_offset - 65536;
   4682    else
   4683       previous_safe = f->first_audio_page_offset;
   4684 
   4685    set_file_offset(f, previous_safe);
   4686 
   4687    while (vorbis_find_page(f, &end, NULL)) {
   4688       if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset)
   4689          return 1;
   4690       set_file_offset(f, end);
   4691    }
   4692 
   4693    return 0;
   4694 }
   4695 
   4696 // implements the search logic for finding a page and starting decoding. if
   4697 // the function succeeds, current_loc_valid will be true and current_loc will
   4698 // be less than or equal to the provided sample number (the closer the
   4699 // better).
   4700 static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number)
   4701 {
   4702    ProbedPage left, right, mid;
   4703    int i, start_seg_with_known_loc, end_pos, page_start;
   4704    uint32 delta, stream_length, padding, last_sample_limit;
   4705    double offset = 0.0, bytes_per_sample = 0.0;
   4706    int probe = 0;
   4707 
   4708    // find the last page and validate the target sample
   4709    stream_length = stb_vorbis_stream_length_in_samples(f);
   4710    if (stream_length == 0)            return error(f, VORBIS_seek_without_length);
   4711    if (sample_number > stream_length) return error(f, VORBIS_seek_invalid);
   4712 
   4713    // this is the maximum difference between the window-center (which is the
   4714    // actual granule position value), and the right-start (which the spec
   4715    // indicates should be the granule position (give or take one)).
   4716    padding = ((f->blocksize_1 - f->blocksize_0) >> 2);
   4717    if (sample_number < padding)
   4718       last_sample_limit = 0;
   4719    else
   4720       last_sample_limit = sample_number - padding;
   4721 
   4722    left = f->p_first;
   4723    while (left.last_decoded_sample == ~0U) {
   4724       // (untested) the first page does not have a 'last_decoded_sample'
   4725       set_file_offset(f, left.page_end);
   4726       if (!get_seek_page_info(f, &left)) goto error;
   4727    }
   4728 
   4729    right = f->p_last;
   4730    assert(right.last_decoded_sample != ~0U);
   4731 
   4732    // starting from the start is handled differently
   4733    if (last_sample_limit <= left.last_decoded_sample) {
   4734       if (stb_vorbis_seek_start(f)) {
   4735          if (f->current_loc > sample_number)
   4736             return error(f, VORBIS_seek_failed);
   4737          return 1;
   4738       }
   4739       return 0;
   4740    }
   4741 
   4742    while (left.page_end != right.page_start) {
   4743       assert(left.page_end < right.page_start);
   4744       // search range in bytes
   4745       delta = right.page_start - left.page_end;
   4746       if (delta <= 65536) {
   4747          // there's only 64K left to search - handle it linearly
   4748          set_file_offset(f, left.page_end);
   4749       } else {
   4750          if (probe < 2) {
   4751             if (probe == 0) {
   4752                // first probe (interpolate)
   4753                double data_bytes = right.page_end - left.page_start;
   4754                bytes_per_sample = data_bytes / right.last_decoded_sample;
   4755                offset = left.page_start + bytes_per_sample * (last_sample_limit - left.last_decoded_sample);
   4756             } else {
   4757                // second probe (try to bound the other side)
   4758                double error = ((double) last_sample_limit - mid.last_decoded_sample) * bytes_per_sample;
   4759                if (error >= 0 && error <  8000) error =  8000;
   4760                if (error <  0 && error > -8000) error = -8000;
   4761                offset += error * 2;
   4762             }
   4763 
   4764             // ensure the offset is valid
   4765             if (offset < left.page_end)
   4766                offset = left.page_end;
   4767             if (offset > right.page_start - 65536)
   4768                offset = right.page_start - 65536;
   4769 
   4770             set_file_offset(f, (unsigned int) offset);
   4771          } else {
   4772             // binary search for large ranges (offset by 32K to ensure
   4773             // we don't hit the right page)
   4774             set_file_offset(f, left.page_end + (delta / 2) - 32768);
   4775          }
   4776 
   4777          if (!vorbis_find_page(f, NULL, NULL)) goto error;
   4778       }
   4779 
   4780       for (;;) {
   4781          if (!get_seek_page_info(f, &mid)) goto error;
   4782          if (mid.last_decoded_sample != ~0U) break;
   4783          // (untested) no frames end on this page
   4784          set_file_offset(f, mid.page_end);
   4785          assert(mid.page_start < right.page_start);
   4786       }
   4787 
   4788       // if we've just found the last page again then we're in a tricky file,
   4789       // and we're close enough (if it wasn't an interpolation probe).
   4790       if (mid.page_start == right.page_start) {
   4791          if (probe >= 2 || delta <= 65536)
   4792             break;
   4793       } else {
   4794          if (last_sample_limit < mid.last_decoded_sample)
   4795             right = mid;
   4796          else
   4797             left = mid;
   4798       }
   4799 
   4800       ++probe;
   4801    }
   4802 
   4803    // seek back to start of the last packet
   4804    page_start = left.page_start;
   4805    set_file_offset(f, page_start);
   4806    if (!start_page(f)) return error(f, VORBIS_seek_failed);
   4807    end_pos = f->end_seg_with_known_loc;
   4808    assert(end_pos >= 0);
   4809 
   4810    for (;;) {
   4811       for (i = end_pos; i > 0; --i)
   4812          if (f->segments[i-1] != 255)
   4813             break;
   4814 
   4815       start_seg_with_known_loc = i;
   4816 
   4817       if (start_seg_with_known_loc > 0 || !(f->page_flag & PAGEFLAG_continued_packet))
   4818          break;
   4819 
   4820       // (untested) the final packet begins on an earlier page
   4821       if (!go_to_page_before(f, page_start))
   4822          goto error;
   4823 
   4824       page_start = stb_vorbis_get_file_offset(f);
   4825       if (!start_page(f)) goto error;
   4826       end_pos = f->segment_count - 1;
   4827    }
   4828 
   4829    // prepare to start decoding
   4830    f->current_loc_valid = FALSE;
   4831    f->last_seg = FALSE;
   4832    f->valid_bits = 0;
   4833    f->packet_bytes = 0;
   4834    f->bytes_in_seg = 0;
   4835    f->previous_length = 0;
   4836    f->next_seg = start_seg_with_known_loc;
   4837 
   4838    for (i = 0; i < start_seg_with_known_loc; i++)
   4839       skip(f, f->segments[i]);
   4840 
   4841    // start decoding (optimizable - this frame is generally discarded)
   4842    if (!vorbis_pump_first_frame(f))
   4843       return 0;
   4844    if (f->current_loc > sample_number)
   4845       return error(f, VORBIS_seek_failed);
   4846    return 1;
   4847 
   4848 error:
   4849    // try to restore the file to a valid state
   4850    stb_vorbis_seek_start(f);
   4851    return error(f, VORBIS_seek_failed);
   4852 }
   4853 
   4854 // the same as vorbis_decode_initial, but without advancing
   4855 static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
   4856 {
   4857    int bits_read, bytes_read;
   4858 
   4859    if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode))
   4860       return 0;
   4861 
   4862    // either 1 or 2 bytes were read, figure out which so we can rewind
   4863    bits_read = 1 + ilog(f->mode_count-1);
   4864    if (f->mode_config[*mode].blockflag)
   4865       bits_read += 2;
   4866    bytes_read = (bits_read + 7) / 8;
   4867 
   4868    f->bytes_in_seg += bytes_read;
   4869    f->packet_bytes -= bytes_read;
   4870    skip(f, -bytes_read);
   4871    if (f->next_seg == -1)
   4872       f->next_seg = f->segment_count - 1;
   4873    else
   4874       f->next_seg--;
   4875    f->valid_bits = 0;
   4876 
   4877    return 1;
   4878 }
   4879 
   4880 int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number)
   4881 {
   4882    uint32 max_frame_samples;
   4883 
   4884    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
   4885 
   4886    // fast page-level search
   4887    if (!seek_to_sample_coarse(f, sample_number))
   4888       return 0;
   4889 
   4890    assert(f->current_loc_valid);
   4891    assert(f->current_loc <= sample_number);
   4892 
   4893    // linear search for the relevant packet
   4894    max_frame_samples = (f->blocksize_1*3 - f->blocksize_0) >> 2;
   4895    while (f->current_loc < sample_number) {
   4896       int left_start, left_end, right_start, right_end, mode, frame_samples;
   4897       if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
   4898          return error(f, VORBIS_seek_failed);
   4899       // calculate the number of samples returned by the next frame
   4900       frame_samples = right_start - left_start;
   4901       if (f->current_loc + frame_samples > sample_number) {
   4902          return 1; // the next frame will contain the sample
   4903       } else if (f->current_loc + frame_samples + max_frame_samples > sample_number) {
   4904          // there's a chance the frame after this could contain the sample
   4905          vorbis_pump_first_frame(f);
   4906       } else {
   4907          // this frame is too early to be relevant
   4908          f->current_loc += frame_samples;
   4909          f->previous_length = 0;
   4910          maybe_start_packet(f);
   4911          flush_packet(f);
   4912       }
   4913    }
   4914    // the next frame should start with the sample
   4915    if (f->current_loc != sample_number) return error(f, VORBIS_seek_failed);
   4916    return 1;
   4917 }
   4918 
   4919 int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number)
   4920 {
   4921    if (!stb_vorbis_seek_frame(f, sample_number))
   4922       return 0;
   4923 
   4924    if (sample_number != f->current_loc) {
   4925       int n;
   4926       uint32 frame_start = f->current_loc;
   4927       stb_vorbis_get_frame_float(f, &n, NULL);
   4928       assert(sample_number > frame_start);
   4929       assert(f->channel_buffer_start + (int) (sample_number-frame_start) <= f->channel_buffer_end);
   4930       f->channel_buffer_start += (sample_number - frame_start);
   4931    }
   4932 
   4933    return 1;
   4934 }
   4935 
   4936 int stb_vorbis_seek_start(stb_vorbis *f)
   4937 {
   4938    if (IS_PUSH_MODE(f)) { return error(f, VORBIS_invalid_api_mixing); }
   4939    set_file_offset(f, f->first_audio_page_offset);
   4940    f->previous_length = 0;
   4941    f->first_decode = TRUE;
   4942    f->next_seg = -1;
   4943    return vorbis_pump_first_frame(f);
   4944 }
   4945 
   4946 unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f)
   4947 {
   4948    unsigned int restore_offset, previous_safe;
   4949    unsigned int end, last_page_loc;
   4950 
   4951    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
   4952    if (!f->total_samples) {
   4953       unsigned int last;
   4954       uint32 lo,hi;
   4955       char header[6];
   4956 
   4957       // first, store the current decode position so we can restore it
   4958       restore_offset = stb_vorbis_get_file_offset(f);
   4959 
   4960       // now we want to seek back 64K from the end (the last page must
   4961       // be at most a little less than 64K, but let's allow a little slop)
   4962       if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset)
   4963          previous_safe = f->stream_len - 65536;
   4964       else
   4965          previous_safe = f->first_audio_page_offset;
   4966 
   4967       set_file_offset(f, previous_safe);
   4968       // previous_safe is now our candidate 'earliest known place that seeking
   4969       // to will lead to the final page'
   4970 
   4971       if (!vorbis_find_page(f, &end, &last)) {
   4972          // if we can't find a page, we're hosed!
   4973          f->error = VORBIS_cant_find_last_page;
   4974          f->total_samples = 0xffffffff;
   4975          goto done;
   4976       }
   4977 
   4978       // check if there are more pages
   4979       last_page_loc = stb_vorbis_get_file_offset(f);
   4980 
   4981       // stop when the last_page flag is set, not when we reach eof;
   4982       // this allows us to stop short of a 'file_section' end without
   4983       // explicitly checking the length of the section
   4984       while (!last) {
   4985          set_file_offset(f, end);
   4986          if (!vorbis_find_page(f, &end, &last)) {
   4987             // the last page we found didn't have the 'last page' flag
   4988             // set. whoops!
   4989             break;
   4990          }
   4991          //previous_safe = last_page_loc+1; // NOTE: not used after this point, but note for debugging
   4992          last_page_loc = stb_vorbis_get_file_offset(f);
   4993       }
   4994 
   4995       set_file_offset(f, last_page_loc);
   4996 
   4997       // parse the header
   4998       getn(f, (unsigned char *)header, 6);
   4999       // extract the absolute granule position
   5000       lo = get32(f);
   5001       hi = get32(f);
   5002       if (lo == 0xffffffff && hi == 0xffffffff) {
   5003          f->error = VORBIS_cant_find_last_page;
   5004          f->total_samples = SAMPLE_unknown;
   5005          goto done;
   5006       }
   5007       if (hi)
   5008          lo = 0xfffffffe; // saturate
   5009       f->total_samples = lo;
   5010 
   5011       f->p_last.page_start = last_page_loc;
   5012       f->p_last.page_end   = end;
   5013       f->p_last.last_decoded_sample = lo;
   5014 
   5015      done:
   5016       set_file_offset(f, restore_offset);
   5017    }
   5018    return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples;
   5019 }
   5020 
   5021 float stb_vorbis_stream_length_in_seconds(stb_vorbis *f)
   5022 {
   5023    return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate;
   5024 }
   5025 
   5026 
   5027 
   5028 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
   5029 {
   5030    int len, right,left,i;
   5031    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
   5032 
   5033    if (!vorbis_decode_packet(f, &len, &left, &right)) {
   5034       f->channel_buffer_start = f->channel_buffer_end = 0;
   5035       return 0;
   5036    }
   5037 
   5038    len = vorbis_finish_frame(f, len, left, right);
   5039    for (i=0; i < f->channels; ++i)
   5040       f->outputs[i] = f->channel_buffers[i] + left;
   5041 
   5042    f->channel_buffer_start = left;
   5043    f->channel_buffer_end   = left+len;
   5044 
   5045    if (channels) *channels = f->channels;
   5046    if (output)   *output = f->outputs;
   5047    return len;
   5048 }
   5049 
   5050 #ifndef STB_VORBIS_NO_STDIO
   5051 
   5052 stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc, unsigned int length)
   5053 {
   5054    stb_vorbis *f, p;
   5055    vorbis_init(&p, alloc);
   5056    p.f = file;
   5057    p.f_start = (uint32) ftell(file);
   5058    p.stream_len   = length;
   5059    p.close_on_free = close_on_free;
   5060    if (start_decoder(&p)) {
   5061       f = vorbis_alloc(&p);
   5062       if (f) {
   5063          *f = p;
   5064          vorbis_pump_first_frame(f);
   5065          return f;
   5066       }
   5067    }
   5068    if (error) *error = p.error;
   5069    vorbis_deinit(&p);
   5070    return NULL;
   5071 }
   5072 
   5073 stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc)
   5074 {
   5075    unsigned int len, start;
   5076    start = (unsigned int) ftell(file);
   5077    fseek(file, 0, SEEK_END);
   5078    len = (unsigned int) (ftell(file) - start);
   5079    fseek(file, start, SEEK_SET);
   5080    return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len);
   5081 }
   5082 
   5083 stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc)
   5084 {
   5085    FILE *f;
   5086 #if defined(_WIN32) && defined(__STDC_WANT_SECURE_LIB__)
   5087    if (0 != fopen_s(&f, filename, "rb"))
   5088       f = NULL;
   5089 #else
   5090    f = fopen(filename, "rb");
   5091 #endif
   5092    if (f)
   5093       return stb_vorbis_open_file(f, TRUE, error, alloc);
   5094    if (error) *error = VORBIS_file_open_failure;
   5095    return NULL;
   5096 }
   5097 #endif // STB_VORBIS_NO_STDIO
   5098 
   5099 stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, int *error, const stb_vorbis_alloc *alloc)
   5100 {
   5101    stb_vorbis *f, p;
   5102    if (!data) {
   5103       if (error) *error = VORBIS_unexpected_eof;
   5104       return NULL;
   5105    }
   5106    vorbis_init(&p, alloc);
   5107    p.stream = (uint8 *) data;
   5108    p.stream_end = (uint8 *) data + len;
   5109    p.stream_start = (uint8 *) p.stream;
   5110    p.stream_len = len;
   5111    p.push_mode = FALSE;
   5112    if (start_decoder(&p)) {
   5113       f = vorbis_alloc(&p);
   5114       if (f) {
   5115          *f = p;
   5116          vorbis_pump_first_frame(f);
   5117          if (error) *error = VORBIS__no_error;
   5118          return f;
   5119       }
   5120    }
   5121    if (error) *error = p.error;
   5122    vorbis_deinit(&p);
   5123    return NULL;
   5124 }
   5125 
   5126 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
   5127 #define PLAYBACK_MONO     1
   5128 #define PLAYBACK_LEFT     2
   5129 #define PLAYBACK_RIGHT    4
   5130 
   5131 #define L  (PLAYBACK_LEFT  | PLAYBACK_MONO)
   5132 #define C  (PLAYBACK_LEFT  | PLAYBACK_RIGHT | PLAYBACK_MONO)
   5133 #define R  (PLAYBACK_RIGHT | PLAYBACK_MONO)
   5134 
   5135 static int8 channel_position[7][6] =
   5136 {
   5137    { 0 },
   5138    { C },
   5139    { L, R },
   5140    { L, C, R },
   5141    { L, R, L, R },
   5142    { L, C, R, L, R },
   5143    { L, C, R, L, R, C },
   5144 };
   5145 
   5146 
   5147 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
   5148    typedef union {
   5149       float f;
   5150       int i;
   5151    } float_conv;
   5152    typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4];
   5153    #define FASTDEF(x) float_conv x
   5154    // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
   5155    #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT))
   5156    #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22))
   5157    #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) (temp.f = (x) + MAGIC(s), temp.i - ADDEND(s))
   5158    #define check_endianness()
   5159 #else
   5160    #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) ((int) ((x) * (1 << (s))))
   5161    #define check_endianness()
   5162    #define FASTDEF(x)
   5163 #endif
   5164 
   5165 static void copy_samples(short *dest, float *src, int len)
   5166 {
   5167    int i;
   5168    check_endianness();
   5169    for (i=0; i < len; ++i) {
   5170       FASTDEF(temp);
   5171       int v = FAST_SCALED_FLOAT_TO_INT(temp, src[i],15);
   5172       if ((unsigned int) (v + 32768) > 65535)
   5173          v = v < 0 ? -32768 : 32767;
   5174       dest[i] = v;
   5175    }
   5176 }
   5177 
   5178 static void compute_samples(int mask, short *output, int num_c, float **data, int d_offset, int len)
   5179 {
   5180    #define STB_BUFFER_SIZE  32
   5181    float buffer[STB_BUFFER_SIZE];
   5182    int i,j,o,n = STB_BUFFER_SIZE;
   5183    check_endianness();
   5184    for (o = 0; o < len; o += STB_BUFFER_SIZE) {
   5185       memset(buffer, 0, sizeof(buffer));
   5186       if (o + n > len) n = len - o;
   5187       for (j=0; j < num_c; ++j) {
   5188          if (channel_position[num_c][j] & mask) {
   5189             for (i=0; i < n; ++i)
   5190                buffer[i] += data[j][d_offset+o+i];
   5191          }
   5192       }
   5193       for (i=0; i < n; ++i) {
   5194          FASTDEF(temp);
   5195          int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
   5196          if ((unsigned int) (v + 32768) > 65535)
   5197             v = v < 0 ? -32768 : 32767;
   5198          output[o+i] = v;
   5199       }
   5200    }
   5201    #undef STB_BUFFER_SIZE
   5202 }
   5203 
   5204 static void compute_stereo_samples(short *output, int num_c, float **data, int d_offset, int len)
   5205 {
   5206    #define STB_BUFFER_SIZE  32
   5207    float buffer[STB_BUFFER_SIZE];
   5208    int i,j,o,n = STB_BUFFER_SIZE >> 1;
   5209    // o is the offset in the source data
   5210    check_endianness();
   5211    for (o = 0; o < len; o += STB_BUFFER_SIZE >> 1) {
   5212       // o2 is the offset in the output data
   5213       int o2 = o << 1;
   5214       memset(buffer, 0, sizeof(buffer));
   5215       if (o + n > len) n = len - o;
   5216       for (j=0; j < num_c; ++j) {
   5217          int m = channel_position[num_c][j] & (PLAYBACK_LEFT | PLAYBACK_RIGHT);
   5218          if (m == (PLAYBACK_LEFT | PLAYBACK_RIGHT)) {
   5219             for (i=0; i < n; ++i) {
   5220                buffer[i*2+0] += data[j][d_offset+o+i];
   5221                buffer[i*2+1] += data[j][d_offset+o+i];
   5222             }
   5223          } else if (m == PLAYBACK_LEFT) {
   5224             for (i=0; i < n; ++i) {
   5225                buffer[i*2+0] += data[j][d_offset+o+i];
   5226             }
   5227          } else if (m == PLAYBACK_RIGHT) {
   5228             for (i=0; i < n; ++i) {
   5229                buffer[i*2+1] += data[j][d_offset+o+i];
   5230             }
   5231          }
   5232       }
   5233       for (i=0; i < (n<<1); ++i) {
   5234          FASTDEF(temp);
   5235          int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15);
   5236          if ((unsigned int) (v + 32768) > 65535)
   5237             v = v < 0 ? -32768 : 32767;
   5238          output[o2+i] = v;
   5239       }
   5240    }
   5241    #undef STB_BUFFER_SIZE
   5242 }
   5243 
   5244 static void convert_samples_short(int buf_c, short **buffer, int b_offset, int data_c, float **data, int d_offset, int samples)
   5245 {
   5246    int i;
   5247    if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
   5248       static int channel_selector[3][2] = { {0}, {PLAYBACK_MONO}, {PLAYBACK_LEFT, PLAYBACK_RIGHT} };
   5249       for (i=0; i < buf_c; ++i)
   5250          compute_samples(channel_selector[buf_c][i], buffer[i]+b_offset, data_c, data, d_offset, samples);
   5251    } else {
   5252       int limit = buf_c < data_c ? buf_c : data_c;
   5253       for (i=0; i < limit; ++i)
   5254          copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
   5255       for (   ; i < buf_c; ++i)
   5256          memset(buffer[i]+b_offset, 0, sizeof(short) * samples);
   5257    }
   5258 }
   5259 
   5260 int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples)
   5261 {
   5262    float **output = NULL;
   5263    int len = stb_vorbis_get_frame_float(f, NULL, &output);
   5264    if (len > num_samples) len = num_samples;
   5265    if (len)
   5266       convert_samples_short(num_c, buffer, 0, f->channels, output, 0, len);
   5267    return len;
   5268 }
   5269 
   5270 static void convert_channels_short_interleaved(int buf_c, short *buffer, int data_c, float **data, int d_offset, int len)
   5271 {
   5272    int i;
   5273    check_endianness();
   5274    if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
   5275       assert(buf_c == 2);
   5276       for (i=0; i < buf_c; ++i)
   5277          compute_stereo_samples(buffer, data_c, data, d_offset, len);
   5278    } else {
   5279       int limit = buf_c < data_c ? buf_c : data_c;
   5280       int j;
   5281       for (j=0; j < len; ++j) {
   5282          for (i=0; i < limit; ++i) {
   5283             FASTDEF(temp);
   5284             float f = data[i][d_offset+j];
   5285             int v = FAST_SCALED_FLOAT_TO_INT(temp, f,15);//data[i][d_offset+j],15);
   5286             if ((unsigned int) (v + 32768) > 65535)
   5287                v = v < 0 ? -32768 : 32767;
   5288             *buffer++ = v;
   5289          }
   5290          for (   ; i < buf_c; ++i)
   5291             *buffer++ = 0;
   5292       }
   5293    }
   5294 }
   5295 
   5296 int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts)
   5297 {
   5298    float **output;
   5299    int len;
   5300    if (num_c == 1) return stb_vorbis_get_frame_short(f,num_c,&buffer, num_shorts);
   5301    len = stb_vorbis_get_frame_float(f, NULL, &output);
   5302    if (len) {
   5303       if (len*num_c > num_shorts) len = num_shorts / num_c;
   5304       convert_channels_short_interleaved(num_c, buffer, f->channels, output, 0, len);
   5305    }
   5306    return len;
   5307 }
   5308 
   5309 int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts)
   5310 {
   5311    float **outputs;
   5312    int len = num_shorts / channels;
   5313    int n=0;
   5314    while (n < len) {
   5315       int k = f->channel_buffer_end - f->channel_buffer_start;
   5316       if (n+k >= len) k = len - n;
   5317       if (k)
   5318          convert_channels_short_interleaved(channels, buffer, f->channels, f->channel_buffers, f->channel_buffer_start, k);
   5319       buffer += k*channels;
   5320       n += k;
   5321       f->channel_buffer_start += k;
   5322       if (n == len) break;
   5323       if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
   5324    }
   5325    return n;
   5326 }
   5327 
   5328 int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int len)
   5329 {
   5330    float **outputs;
   5331    int n=0;
   5332    while (n < len) {
   5333       int k = f->channel_buffer_end - f->channel_buffer_start;
   5334       if (n+k >= len) k = len - n;
   5335       if (k)
   5336          convert_samples_short(channels, buffer, n, f->channels, f->channel_buffers, f->channel_buffer_start, k);
   5337       n += k;
   5338       f->channel_buffer_start += k;
   5339       if (n == len) break;
   5340       if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
   5341    }
   5342    return n;
   5343 }
   5344 
   5345 #ifndef STB_VORBIS_NO_STDIO
   5346 int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output)
   5347 {
   5348    int data_len, offset, total, limit, error;
   5349    short *data;
   5350    stb_vorbis *v = stb_vorbis_open_filename(filename, &error, NULL);
   5351    if (v == NULL) return -1;
   5352    limit = v->channels * 4096;
   5353    *channels = v->channels;
   5354    if (sample_rate)
   5355       *sample_rate = v->sample_rate;
   5356    offset = data_len = 0;
   5357    total = limit;
   5358    data = (short *) malloc(total * sizeof(*data));
   5359    if (data == NULL) {
   5360       stb_vorbis_close(v);
   5361       return -2;
   5362    }
   5363    for (;;) {
   5364       int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
   5365       if (n == 0) break;
   5366       data_len += n;
   5367       offset += n * v->channels;
   5368       if (offset + limit > total) {
   5369          short *data2;
   5370          total *= 2;
   5371          data2 = (short *) realloc(data, total * sizeof(*data));
   5372          if (data2 == NULL) {
   5373             free(data);
   5374             stb_vorbis_close(v);
   5375             return -2;
   5376          }
   5377          data = data2;
   5378       }
   5379    }
   5380    *output = data;
   5381    stb_vorbis_close(v);
   5382    return data_len;
   5383 }
   5384 #endif // NO_STDIO
   5385 
   5386 int stb_vorbis_decode_memory(const uint8 *mem, int len, int *channels, int *sample_rate, short **output)
   5387 {
   5388    int data_len, offset, total, limit, error;
   5389    short *data;
   5390    stb_vorbis *v = stb_vorbis_open_memory(mem, len, &error, NULL);
   5391    if (v == NULL) return -1;
   5392    limit = v->channels * 4096;
   5393    *channels = v->channels;
   5394    if (sample_rate)
   5395       *sample_rate = v->sample_rate;
   5396    offset = data_len = 0;
   5397    total = limit;
   5398    data = (short *) malloc(total * sizeof(*data));
   5399    if (data == NULL) {
   5400       stb_vorbis_close(v);
   5401       return -2;
   5402    }
   5403    for (;;) {
   5404       int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
   5405       if (n == 0) break;
   5406       data_len += n;
   5407       offset += n * v->channels;
   5408       if (offset + limit > total) {
   5409          short *data2;
   5410          total *= 2;
   5411          data2 = (short *) realloc(data, total * sizeof(*data));
   5412          if (data2 == NULL) {
   5413             free(data);
   5414             stb_vorbis_close(v);
   5415             return -2;
   5416          }
   5417          data = data2;
   5418       }
   5419    }
   5420    *output = data;
   5421    stb_vorbis_close(v);
   5422    return data_len;
   5423 }
   5424 #endif // STB_VORBIS_NO_INTEGER_CONVERSION
   5425 
   5426 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
   5427 {
   5428    float **outputs;
   5429    int len = num_floats / channels;
   5430    int n=0;
   5431    int z = f->channels;
   5432    if (z > channels) z = channels;
   5433    while (n < len) {
   5434       int i,j;
   5435       int k = f->channel_buffer_end - f->channel_buffer_start;
   5436       if (n+k >= len) k = len - n;
   5437       for (j=0; j < k; ++j) {
   5438          for (i=0; i < z; ++i)
   5439             *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j];
   5440          for (   ; i < channels; ++i)
   5441             *buffer++ = 0;
   5442       }
   5443       n += k;
   5444       f->channel_buffer_start += k;
   5445       if (n == len)
   5446          break;
   5447       if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
   5448          break;
   5449    }
   5450    return n;
   5451 }
   5452 
   5453 int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
   5454 {
   5455    float **outputs;
   5456    int n=0;
   5457    int z = f->channels;
   5458    if (z > channels) z = channels;
   5459    while (n < num_samples) {
   5460       int i;
   5461       int k = f->channel_buffer_end - f->channel_buffer_start;
   5462       if (n+k >= num_samples) k = num_samples - n;
   5463       if (k) {
   5464          for (i=0; i < z; ++i)
   5465             memcpy(buffer[i]+n, f->channel_buffers[i]+f->channel_buffer_start, sizeof(float)*k);
   5466          for (   ; i < channels; ++i)
   5467             memset(buffer[i]+n, 0, sizeof(float) * k);
   5468       }
   5469       n += k;
   5470       f->channel_buffer_start += k;
   5471       if (n == num_samples)
   5472          break;
   5473       if (!stb_vorbis_get_frame_float(f, NULL, &outputs))
   5474          break;
   5475    }
   5476    return n;
   5477 }
   5478 #endif // STB_VORBIS_NO_PULLDATA_API
   5479 
   5480 /* Version history
   5481     1.17    - 2019-07-08 - fix CVE-2019-13217, -13218, -13219, -13220, -13221, -13222, -13223
   5482                            found with Mayhem by ForAllSecure
   5483     1.16    - 2019-03-04 - fix warnings
   5484     1.15    - 2019-02-07 - explicit failure if Ogg Skeleton data is found
   5485     1.14    - 2018-02-11 - delete bogus dealloca usage
   5486     1.13    - 2018-01-29 - fix truncation of last frame (hopefully)
   5487     1.12    - 2017-11-21 - limit residue begin/end to blocksize/2 to avoid large temp allocs in bad/corrupt files
   5488     1.11    - 2017-07-23 - fix MinGW compilation
   5489     1.10    - 2017-03-03 - more robust seeking; fix negative ilog(); clear error in open_memory
   5490     1.09    - 2016-04-04 - back out 'avoid discarding last frame' fix from previous version
   5491     1.08    - 2016-04-02 - fixed multiple warnings; fix setup memory leaks;
   5492                            avoid discarding last frame of audio data
   5493     1.07    - 2015-01-16 - fixed some warnings, fix mingw, const-correct API
   5494                            some more crash fixes when out of memory or with corrupt files
   5495     1.06    - 2015-08-31 - full, correct support for seeking API (Dougall Johnson)
   5496                            some crash fixes when out of memory or with corrupt files
   5497     1.05    - 2015-04-19 - don't define __forceinline if it's redundant
   5498     1.04    - 2014-08-27 - fix missing const-correct case in API
   5499     1.03    - 2014-08-07 - Warning fixes
   5500     1.02    - 2014-07-09 - Declare qsort compare function _cdecl on windows
   5501     1.01    - 2014-06-18 - fix stb_vorbis_get_samples_float
   5502     1.0     - 2014-05-26 - fix memory leaks; fix warnings; fix bugs in multichannel
   5503                            (API change) report sample rate for decode-full-file funcs
   5504     0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
   5505     0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
   5506     0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
   5507     0.99993 - remove assert that fired on legal files with empty tables
   5508     0.99992 - rewind-to-start
   5509     0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
   5510     0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
   5511     0.9998 - add a full-decode function with a memory source
   5512     0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
   5513     0.9996 - query length of vorbis stream in samples/seconds
   5514     0.9995 - bugfix to another optimization that only happened in certain files
   5515     0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
   5516     0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
   5517     0.9992 - performance improvement of IMDCT; now performs close to reference implementation
   5518     0.9991 - performance improvement of IMDCT
   5519     0.999 - (should have been 0.9990) performance improvement of IMDCT
   5520     0.998 - no-CRT support from Casey Muratori
   5521     0.997 - bugfixes for bugs found by Terje Mathisen
   5522     0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
   5523     0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
   5524     0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
   5525     0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
   5526     0.992 - fixes for MinGW warning
   5527     0.991 - turn fast-float-conversion on by default
   5528     0.990 - fix push-mode seek recovery if you seek into the headers
   5529     0.98b - fix to bad release of 0.98
   5530     0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
   5531     0.97 - builds under c++ (typecasting, don't use 'class' keyword)
   5532     0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
   5533     0.95 - clamping code for 16-bit functions
   5534     0.94 - not publically released
   5535     0.93 - fixed all-zero-floor case (was decoding garbage)
   5536     0.92 - fixed a memory leak
   5537     0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
   5538     0.90 - first public release
   5539 */
   5540 
   5541 #endif // STB_VORBIS_HEADER_ONLY
   5542 
   5543 
   5544 /*
   5545 ------------------------------------------------------------------------------
   5546 This software is available under 2 licenses -- choose whichever you prefer.
   5547 ------------------------------------------------------------------------------
   5548 ALTERNATIVE A - MIT License
   5549 Copyright (c) 2017 Sean Barrett
   5550 Permission is hereby granted, free of charge, to any person obtaining a copy of
   5551 this software and associated documentation files (the "Software"), to deal in
   5552 the Software without restriction, including without limitation the rights to
   5553 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   5554 of the Software, and to permit persons to whom the Software is furnished to do
   5555 so, subject to the following conditions:
   5556 The above copyright notice and this permission notice shall be included in all
   5557 copies or substantial portions of the Software.
   5558 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   5559 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   5560 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   5561 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   5562 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
   5563 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   5564 SOFTWARE.
   5565 ------------------------------------------------------------------------------
   5566 ALTERNATIVE B - Public Domain (www.unlicense.org)
   5567 This is free and unencumbered software released into the public domain.
   5568 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
   5569 software, either in source code form or as a compiled binary, for any purpose,
   5570 commercial or non-commercial, and by any means.
   5571 In jurisdictions that recognize copyright laws, the author or authors of this
   5572 software dedicate any and all copyright interest in the software to the public
   5573 domain. We make this dedication for the benefit of the public at large and to
   5574 the detriment of our heirs and successors. We intend this dedication to be an
   5575 overt act of relinquishment in perpetuity of all present and future rights to
   5576 this software under copyright law.
   5577 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   5578 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   5579 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   5580 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
   5581 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
   5582 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   5583 ------------------------------------------------------------------------------
   5584 */