Back to index

libsfml  1.6+dfsg2
stb_vorbis.c
Go to the documentation of this file.
00001 // Ogg Vorbis I audio decoder  -- version 0.99994
00002 //
00003 // Written in April 2007 by Sean Barrett, sponsored by RAD Game Tools.
00004 //
00005 // Placed in the public domain April 2007 by the author: no copyright is
00006 // claimed, and you may use it for any purpose you like.
00007 //
00008 // No warranty for any purpose is expressed or implied by the author (nor
00009 // by RAD Game Tools). Report bugs and send enhancements to the author.
00010 //
00011 // Get the latest version and other information at:
00012 //     http://nothings.org/stb_vorbis/
00013 
00014 
00015 // Todo:
00016 //
00017 //   - seeking (note you can seek yourself using the pushdata API)
00018 //
00019 // Limitations:
00020 //
00021 //   - floor 0 not supported (used in old ogg vorbis files)
00022 //   - lossless sample-truncation at beginning ignored
00023 //   - cannot concatenate multiple vorbis streams
00024 //   - sample positions are 32-bit, limiting seekable 192Khz
00025 //       files to around 6 hours (Ogg supports 64-bit)
00026 // 
00027 // All of these limitations may be removed in future versions.
00028 
00029 #include "stb_vorbis.h"
00030 
00031 #ifndef STB_VORBIS_HEADER_ONLY
00032 
00033 // global configuration settings (e.g. set these in the project/makefile),
00034 // or just set them in this file at the top (although ideally the first few
00035 // should be visible when the header file is compiled too, although it's not
00036 // crucial)
00037 
00038 // STB_VORBIS_NO_PUSHDATA_API
00039 //     does not compile the code for the various stb_vorbis_*_pushdata()
00040 //     functions
00041 // #define STB_VORBIS_NO_PUSHDATA_API
00042 
00043 // STB_VORBIS_NO_PULLDATA_API
00044 //     does not compile the code for the non-pushdata APIs
00045 // #define STB_VORBIS_NO_PULLDATA_API
00046 
00047 // STB_VORBIS_NO_STDIO
00048 //     does not compile the code for the APIs that use FILE *s internally
00049 //     or externally (implied by STB_VORBIS_NO_PULLDATA_API)
00050 // #define STB_VORBIS_NO_STDIO
00051 
00052 // STB_VORBIS_NO_INTEGER_CONVERSION
00053 //     does not compile the code for converting audio sample data from
00054 //     float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
00055 // #define STB_VORBIS_NO_INTEGER_CONVERSION
00056 
00057 // STB_VORBIS_NO_FAST_SCALED_FLOAT
00058 //      does not use a fast float-to-int trick to accelerate float-to-int on
00059 //      most platforms which requires endianness be defined correctly.
00060 #define STB_VORBIS_NO_FAST_SCALED_FLOAT
00061 
00062 
00063 // STB_VORBIS_MAX_CHANNELS [number]
00064 //     globally define this to the maximum number of channels you need.
00065 //     The spec does not put a restriction on channels except that
00066 //     the count is stored in a byte, so 255 is the hard limit.
00067 //     Reducing this saves about 16 bytes per value, so using 16 saves
00068 //     (255-16)*16 or around 4KB. Plus anything other memory usage
00069 //     I forgot to account for. Can probably go as low as 8 (7.1 audio),
00070 //     6 (5.1 audio), or 2 (stereo only).
00071 #ifndef STB_VORBIS_MAX_CHANNELS
00072 #define STB_VORBIS_MAX_CHANNELS    16  // enough for anyone?
00073 #endif
00074 
00075 // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
00076 //     after a flush_pushdata(), stb_vorbis begins scanning for the
00077 //     next valid page, without backtracking. when it finds something
00078 //     that looks like a page, it streams through it and verifies its
00079 //     CRC32. Should that validation fail, it keeps scanning. But it's
00080 //     possible that _while_ streaming through to check the CRC32 of
00081 //     one candidate page, it sees another candidate page. This #define
00082 //     determines how many "overlapping" candidate pages it can search
00083 //     at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
00084 //     garbage pages could be as big as 64KB, but probably average ~16KB.
00085 //     So don't hose ourselves by scanning an apparent 64KB page and
00086 //     missing a ton of real ones in the interim; so minimum of 2
00087 #ifndef STB_VORBIS_PUSHDATA_CRC_COUNT
00088 #define STB_VORBIS_PUSHDATA_CRC_COUNT  4
00089 #endif
00090 
00091 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
00092 //     sets the log size of the huffman-acceleration table.  Maximum
00093 //     supported value is 24. with larger numbers, more decodings are O(1),
00094 //     but the table size is larger so worse cache missing, so you'll have
00095 //     to probe (and try multiple ogg vorbis files) to find the sweet spot.
00096 #ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
00097 #define STB_VORBIS_FAST_HUFFMAN_LENGTH   10
00098 #endif
00099 
00100 // STB_VORBIS_FAST_BINARY_LENGTH [number]
00101 //     sets the log size of the binary-search acceleration table. this
00102 //     is used in similar fashion to the fast-huffman size to set initial
00103 //     parameters for the binary search
00104 
00105 // STB_VORBIS_FAST_HUFFMAN_INT
00106 //     The fast huffman tables are much more efficient if they can be
00107 //     stored as 16-bit results instead of 32-bit results. This restricts
00108 //     the codebooks to having only 65535 possible outcomes, though.
00109 //     (At least, accelerated by the huffman table.)
00110 #ifndef STB_VORBIS_FAST_HUFFMAN_INT
00111 #define STB_VORBIS_FAST_HUFFMAN_SHORT
00112 #endif
00113 
00114 // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
00115 //     If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
00116 //     back on binary searching for the correct one. This requires storing
00117 //     extra tables with the huffman codes in sorted order. Defining this
00118 //     symbol trades off space for speed by forcing a linear search in the
00119 //     non-fast case, except for "sparse" codebooks.
00120 // #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
00121 
00122 // STB_VORBIS_DIVIDES_IN_RESIDUE
00123 //     stb_vorbis precomputes the result of the scalar residue decoding
00124 //     that would otherwise require a divide per chunk. you can trade off
00125 //     space for time by defining this symbol.
00126 // #define STB_VORBIS_DIVIDES_IN_RESIDUE
00127 
00128 // STB_VORBIS_DIVIDES_IN_CODEBOOK
00129 //     vorbis VQ codebooks can be encoded two ways: with every case explicitly
00130 //     stored, or with all elements being chosen from a small range of values,
00131 //     and all values possible in all elements. By default, stb_vorbis expands
00132 //     this latter kind out to look like the former kind for ease of decoding,
00133 //     because otherwise an integer divide-per-vector-element is required to
00134 //     unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
00135 //     trade off storage for speed.
00136 //#define STB_VORBIS_DIVIDES_IN_CODEBOOK
00137 
00138 // STB_VORBIS_CODEBOOK_SHORTS
00139 //     The vorbis file format encodes VQ codebook floats as ax+b where a and
00140 //     b are floating point per-codebook constants, and x is a 16-bit int.
00141 //     Normally, stb_vorbis decodes them to floats rather than leaving them
00142 //     as 16-bit ints and computing ax+b while decoding. This is a speed/space
00143 //     tradeoff; you can save space by defining this flag.
00144 #ifndef STB_VORBIS_CODEBOOK_SHORTS
00145 #define STB_VORBIS_CODEBOOK_FLOATS
00146 #endif
00147 
00148 // STB_VORBIS_DIVIDE_TABLE
00149 //     this replaces small integer divides in the floor decode loop with
00150 //     table lookups. made less than 1% difference, so disabled by default.
00151 
00152 // STB_VORBIS_NO_INLINE_DECODE
00153 //     disables the inlining of the scalar codebook fast-huffman decode.
00154 //     might save a little codespace; useful for debugging
00155 // #define STB_VORBIS_NO_INLINE_DECODE
00156 
00157 // STB_VORBIS_NO_DEFER_FLOOR
00158 //     Normally we only decode the floor without synthesizing the actual
00159 //     full curve. We can instead synthesize the curve immediately. This
00160 //     requires more memory and is very likely slower, so I don't think
00161 //     you'd ever want to do it except for debugging.
00162 // #define STB_VORBIS_NO_DEFER_FLOOR
00163 
00164 
00165 
00166 
00168 
00169 #ifdef STB_VORBIS_NO_PULLDATA_API
00170    #define STB_VORBIS_NO_INTEGER_CONVERSION
00171    #define STB_VORBIS_NO_STDIO
00172 #endif
00173 
00174 #if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
00175    #define STB_VORBIS_NO_STDIO 1
00176 #endif
00177 
00178 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
00179 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
00180 
00181    // only need endianness for fast-float-to-int, which we don't
00182    // use for pushdata
00183 
00184    #ifndef STB_VORBIS_BIG_ENDIAN
00185      #define STB_VORBIS_ENDIAN  0
00186    #else
00187      #define STB_VORBIS_ENDIAN  1
00188    #endif
00189 
00190 #endif
00191 #endif
00192 
00193 
00194 #ifndef STB_VORBIS_NO_STDIO
00195 #include <stdio.h>
00196 #endif
00197 
00198 #ifndef STB_VORBIS_NO_CRT
00199 #include <stdlib.h>
00200 #include <string.h>
00201 #include <assert.h>
00202 #include <math.h>
00203 
00204 #if !defined(__APPLE__) && !defined(MACOSX) && !defined(macintosh) && !defined(Macintosh) &&!defined(__FreeBSD__)
00205 #include <malloc.h>
00206 #endif
00207 
00208 #else
00209 #define NULL 0
00210 #endif
00211 
00212 #ifndef _MSC_VER
00213    #if __GNUC__
00214       #define __forceinline inline
00215    #else
00216       #define __forceinline
00217    #endif
00218 #endif
00219 
00220 #if STB_VORBIS_MAX_CHANNELS > 256
00221 #error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range"
00222 #endif
00223 
00224 #if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24
00225 #error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range"
00226 #endif
00227 
00228 
00229 #define MAX_BLOCKSIZE_LOG  13   // from specification
00230 #define MAX_BLOCKSIZE      (1 << MAX_BLOCKSIZE_LOG)
00231 
00232 
00233 typedef unsigned char  uint8;
00234 typedef   signed char   int8;
00235 typedef unsigned short uint16;
00236 typedef   signed short  int16;
00237 typedef unsigned int   uint32;
00238 typedef   signed int    int32;
00239 
00240 #ifndef TRUE
00241 #define TRUE 1
00242 #define FALSE 0
00243 #endif
00244 
00245 #ifdef STB_VORBIS_CODEBOOK_FLOATS
00246 typedef float codetype;
00247 #else
00248 typedef uint16 codetype;
00249 #endif
00250 
00251 // @NOTE
00252 //
00253 // Some arrays below are tagged "//varies", which means it's actually
00254 // a variable-sized piece of data, but rather than malloc I assume it's
00255 // small enough it's better to just allocate it all together with the
00256 // main thing
00257 //
00258 // Most of the variables are specified with the smallest size I could pack
00259 // them into. It might give better performance to make them all full-sized
00260 // integers. It should be safe to freely rearrange the structures or change
00261 // the sizes larger--nothing relies on silently truncating etc., nor the
00262 // order of variables.
00263 
00264 #define FAST_HUFFMAN_TABLE_SIZE   (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
00265 #define FAST_HUFFMAN_TABLE_MASK   (FAST_HUFFMAN_TABLE_SIZE - 1)
00266 
00267 typedef struct
00268 {
00269    int dimensions, entries;
00270    uint8 *codeword_lengths;
00271    float  minimum_value;
00272    float  delta_value;
00273    uint8  value_bits;
00274    uint8  lookup_type;
00275    uint8  sequence_p;
00276    uint8  sparse;
00277    uint32 lookup_values;
00278    codetype *multiplicands;
00279    uint32 *codewords;
00280    #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
00281     int16  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
00282    #else
00283     int32  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
00284    #endif
00285    uint32 *sorted_codewords;
00286    int    *sorted_values;
00287    int     sorted_entries;
00288 } Codebook;
00289 
00290 typedef struct
00291 {
00292    uint8 order;
00293    uint16 rate;
00294    uint16 bark_map_size;
00295    uint8 amplitude_bits;
00296    uint8 amplitude_offset;
00297    uint8 number_of_books;
00298    uint8 book_list[16]; // varies
00299 } Floor0;
00300 
00301 typedef struct
00302 {
00303    uint8 partitions;
00304    uint8 partition_class_list[32]; // varies
00305    uint8 class_dimensions[16]; // varies
00306    uint8 class_subclasses[16]; // varies
00307    uint8 class_masterbooks[16]; // varies
00308    int16 subclass_books[16][8]; // varies
00309    uint16 Xlist[31*8+2]; // varies
00310    uint8 sorted_order[31*8+2];
00311    uint8 neighbors[31*8+2][2];
00312    uint8 floor1_multiplier;
00313    uint8 rangebits;
00314    int values;
00315 } Floor1;
00316 
00317 typedef union
00318 {
00319    Floor0 floor0;
00320    Floor1 floor1;
00321 } Floor;
00322 
00323 typedef struct
00324 {
00325    uint32 begin, end;
00326    uint32 part_size;
00327    uint8 classifications;
00328    uint8 classbook;
00329    uint8 **classdata;
00330    int16 (*residue_books)[8];
00331 } Residue;
00332 
00333 typedef struct
00334 {
00335    uint8 magnitude;
00336    uint8 angle;
00337    uint8 mux;
00338 } MappingChannel;
00339 
00340 typedef struct
00341 {
00342    uint16 coupling_steps;
00343    MappingChannel *chan;
00344    uint8  submaps;
00345    uint8  submap_floor[15]; // varies
00346    uint8  submap_residue[15]; // varies
00347 } Mapping;
00348 
00349 typedef struct
00350 {
00351    uint8 blockflag;
00352    uint8 mapping;
00353    uint16 windowtype;
00354    uint16 transformtype;
00355 } Mode;
00356 
00357 typedef struct
00358 {
00359    uint32  goal_crc;    // expected crc if match
00360    int     bytes_left;  // bytes left in packet
00361    uint32  crc_so_far;  // running crc
00362    int     bytes_done;  // bytes processed in _current_ chunk
00363    uint32  sample_loc;  // granule pos encoded in page
00364 } CRCscan;
00365 
00366 typedef struct
00367 {
00368    uint32 page_start, page_end;
00369    uint32 after_previous_page_start;
00370    uint32 first_decoded_sample;
00371    uint32 last_decoded_sample;
00372 } ProbedPage;
00373 
00374 struct stb_vorbis
00375 {
00376   // user-accessible info
00377    unsigned int sample_rate;
00378    int channels;
00379 
00380    unsigned int setup_memory_required;
00381    unsigned int temp_memory_required;
00382    unsigned int setup_temp_memory_required;
00383 
00384   // input config
00385 #ifndef STB_VORBIS_NO_STDIO
00386    FILE *f;
00387    uint32 f_start;
00388    int close_on_free;
00389 #endif
00390 
00391    uint8 *stream;
00392    uint8 *stream_start;
00393    uint8 *stream_end;
00394 
00395    uint32 stream_len;
00396 
00397    uint8  push_mode;
00398 
00399    uint32 first_audio_page_offset;
00400 
00401    ProbedPage p_first, p_last;
00402 
00403   // memory management
00404    stb_vorbis_alloc alloc;
00405    int setup_offset;
00406    int temp_offset;
00407 
00408   // run-time results
00409    int eof;
00410    enum STBVorbisError error;
00411 
00412   // user-useful data
00413 
00414   // header info
00415    int blocksize[2];
00416    int blocksize_0, blocksize_1;
00417    int codebook_count;
00418    Codebook *codebooks;
00419    int floor_count;
00420    uint16 floor_types[64]; // varies
00421    Floor *floor_config;
00422    int residue_count;
00423    uint16 residue_types[64]; // varies
00424    Residue *residue_config;
00425    int mapping_count;
00426    Mapping *mapping;
00427    int mode_count;
00428    Mode mode_config[64];  // varies
00429 
00430    uint32 total_samples;
00431 
00432   // decode buffer
00433    float *channel_buffers[STB_VORBIS_MAX_CHANNELS];
00434    float *outputs        [STB_VORBIS_MAX_CHANNELS];
00435 
00436    float *previous_window[STB_VORBIS_MAX_CHANNELS];
00437    int previous_length;
00438 
00439    #ifndef STB_VORBIS_NO_DEFER_FLOOR
00440    int16 *finalY[STB_VORBIS_MAX_CHANNELS];
00441    #else
00442    float *floor_buffers[STB_VORBIS_MAX_CHANNELS];
00443    #endif
00444 
00445    uint32 current_loc; // sample location of next frame to decode
00446    int    current_loc_valid;
00447 
00448   // per-blocksize precomputed data
00449    
00450    // twiddle factors
00451    float *A[2],*B[2],*C[2];
00452    float *window[2];
00453    uint16 *bit_reverse[2];
00454 
00455   // current page/packet/segment streaming info
00456    uint32 serial; // stream serial number for verification
00457    int last_page;
00458    int segment_count;
00459    uint8 segments[255];
00460    uint8 page_flag;
00461    uint8 bytes_in_seg;
00462    uint8 first_decode;
00463    int next_seg;
00464    int last_seg;  // flag that we're on the last segment
00465    int last_seg_which; // what was the segment number of the last seg?
00466    uint32 acc;
00467    int valid_bits;
00468    int packet_bytes;
00469    int end_seg_with_known_loc;
00470    uint32 known_loc_for_packet;
00471    int discard_samples_deferred;
00472    uint32 samples_output;
00473 
00474   // push mode scanning
00475    int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
00476 #ifndef STB_VORBIS_NO_PUSHDATA_API
00477    CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT];
00478 #endif
00479 
00480   // sample-access
00481    int channel_buffer_start;
00482    int channel_buffer_end;
00483 };
00484 
00485 extern int my_prof(int slot);
00486 //#define stb_prof my_prof
00487 
00488 #ifndef stb_prof
00489 #define stb_prof(x)  0
00490 #endif
00491 
00492 #if defined(STB_VORBIS_NO_PUSHDATA_API)
00493    #define IS_PUSH_MODE(f)   FALSE
00494 #elif defined(STB_VORBIS_NO_PULLDATA_API)
00495    #define IS_PUSH_MODE(f)   TRUE
00496 #else
00497    #define IS_PUSH_MODE(f)   ((f)->push_mode)
00498 #endif
00499 
00500 typedef struct stb_vorbis vorb;
00501 
00502 static int error(vorb *f, enum STBVorbisError e)
00503 {
00504    f->error = e;
00505    if (!f->eof && e != VORBIS_need_more_data) {
00506       f->error=e; // breakpoint for debugging
00507    }
00508    return 0;
00509 }
00510 
00511 
00512 // these functions are used for allocating temporary memory
00513 // while decoding. if you can afford the stack space, use
00514 // alloca(); otherwise, provide a temp buffer and it will
00515 // allocate out of those.
00516 
00517 #define array_size_required(count,size)  (count*(sizeof(void *)+(size)))
00518 
00519 #define temp_alloc(f,size)              (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
00520 #ifdef dealloca
00521 #define temp_free(f,p)                  (f->alloc.alloc_buffer ? 0 : dealloca(size))
00522 #else
00523 #define temp_free(f,p)                  0
00524 #endif
00525 #define temp_alloc_save(f)              ((f)->temp_offset)
00526 #define temp_alloc_restore(f,p)         ((f)->temp_offset = (p))
00527 
00528 #define temp_block_array(f,count,size)  make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
00529 
00530 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
00531 static void *make_block_array(void *mem, int count, int size)
00532 {
00533    int i;
00534    void ** p = (void **) mem;
00535    char *q = (char *) (p + count);
00536    for (i=0; i < count; ++i) {
00537       p[i] = q;
00538       q += size;
00539    }
00540    return p;
00541 }
00542 
00543 static void *setup_malloc(vorb *f, int sz)
00544 {
00545    sz = (sz+3) & ~3;
00546    f->setup_memory_required += sz;
00547    if (f->alloc.alloc_buffer) {
00548       void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
00549       if (f->setup_offset + sz > f->temp_offset) return NULL;
00550       f->setup_offset += sz;
00551       return p;
00552    }
00553    return sz ? malloc(sz) : NULL;
00554 }
00555 
00556 static void setup_free(vorb *f, void *p)
00557 {
00558    if (f->alloc.alloc_buffer) return; // do nothing; setup mem is not a stack
00559    free(p);
00560 }
00561 
00562 static void *setup_temp_malloc(vorb *f, int sz)
00563 {
00564    sz = (sz+3) & ~3;
00565    if (f->alloc.alloc_buffer) {
00566       if (f->temp_offset - sz < f->setup_offset) return NULL;
00567       f->temp_offset -= sz;
00568       return (char *) f->alloc.alloc_buffer + f->temp_offset;
00569    }
00570    return malloc(sz);
00571 }
00572 
00573 static void setup_temp_free(vorb *f, void *p, size_t sz)
00574 {
00575    if (f->alloc.alloc_buffer) {
00576       f->temp_offset += (sz+3)&~3;
00577       return;
00578    }
00579    free(p);
00580 }
00581 
00582 #define CRC32_POLY    0x04c11db7   // from spec
00583 
00584 static uint32 crc_table[256];
00585 static void crc32_init(void)
00586 {
00587    int i,j;
00588    uint32 s;
00589    for(i=0; i < 256; i++) {
00590       for (s=i<<24, j=0; j < 8; ++j)
00591          s = (s << 1) ^ (s >= (1<<31) ? CRC32_POLY : 0);
00592       crc_table[i] = s;
00593    }
00594 }
00595 
00596 static __forceinline uint32 crc32_update(uint32 crc, uint8 byte)
00597 {
00598    return (crc << 8) ^ crc_table[byte ^ (crc >> 24)];
00599 }
00600 
00601 
00602 // used in setup, and for huffman that doesn't go fast path
00603 static unsigned int bit_reverse(unsigned int n)
00604 {
00605   n = ((n & 0xAAAAAAAA) >>  1) | ((n & 0x55555555) << 1);
00606   n = ((n & 0xCCCCCCCC) >>  2) | ((n & 0x33333333) << 2);
00607   n = ((n & 0xF0F0F0F0) >>  4) | ((n & 0x0F0F0F0F) << 4);
00608   n = ((n & 0xFF00FF00) >>  8) | ((n & 0x00FF00FF) << 8);
00609   return (n >> 16) | (n << 16);
00610 }
00611 
00612 static float square(float x)
00613 {
00614    return x*x;
00615 }
00616 
00617 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
00618 // as required by the specification. fast(?) implementation from stb.h
00619 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
00620 static int ilog(int32 n)
00621 {
00622    static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
00623 
00624    // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
00625    if (n < (1U << 14))
00626         if (n < (1U <<  4))        return     0 + log2_4[n      ];
00627         else if (n < (1U <<  9))      return  5 + log2_4[n >>  5];
00628              else                     return 10 + log2_4[n >> 10];
00629    else if (n < (1U << 24))
00630              if (n < (1U << 19))      return 15 + log2_4[n >> 15];
00631              else                     return 20 + log2_4[n >> 20];
00632         else if (n < (1U << 29))      return 25 + log2_4[n >> 25];
00633              else if (n < (1U << 31)) return 30 + log2_4[n >> 30];
00634                   else                return 0; // signed n returns 0
00635 }
00636 
00637 #ifndef M_PI
00638   #define M_PI  3.14159265358979323846264f  // from CRC
00639 #endif
00640 
00641 // code length assigned to a value with no huffman encoding
00642 #define NO_CODE   255
00643 
00645 //
00646 // these functions are only called at setup, and only a few times
00647 // per file
00648 
00649 static float float32_unpack(uint32 x)
00650 {
00651    // from the specification
00652    uint32 mantissa = x & 0x1fffff;
00653    uint32 sign = x & 0x80000000;
00654    uint32 exp = (x & 0x7fe00000) >> 21;
00655    double res = sign ? -(double)mantissa : (double)mantissa;
00656    return (float) ldexp((float)res, exp-788);
00657 }
00658 
00659 
00660 // zlib & jpeg huffman tables assume that the output symbols
00661 // can either be arbitrarily arranged, or have monotonically
00662 // increasing frequencies--they rely on the lengths being sorted;
00663 // this makes for a very simple generation algorithm.
00664 // vorbis allows a huffman table with non-sorted lengths. This
00665 // requires a more sophisticated construction, since symbols in
00666 // order do not map to huffman codes "in order".
00667 static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
00668 {
00669    if (!c->sparse) {
00670       c->codewords      [symbol] = huff_code;
00671    } else {
00672       c->codewords       [count] = huff_code;
00673       c->codeword_lengths[count] = len;
00674       values             [count] = symbol;
00675    }
00676 }
00677 
00678 static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
00679 {
00680    int i,k,m=0;
00681    uint32 available[32];
00682 
00683    memset(available, 0, sizeof(available));
00684    // find the first entry
00685    for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
00686    if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
00687    // add to the list
00688    add_entry(c, 0, k, m++, len[k], values);
00689    // add all available leaves
00690    for (i=1; i <= len[k]; ++i)
00691       available[i] = 1 << (32-i);
00692    // note that the above code treats the first case specially,
00693    // but it's really the same as the following code, so they
00694    // could probably be combined (except the initial code is 0,
00695    // and I use 0 in available[] to mean 'empty')
00696    for (i=k+1; i < n; ++i) {
00697       uint32 res;
00698       int z = len[i], y;
00699       if (z == NO_CODE) continue;
00700       // find lowest available leaf (should always be earliest,
00701       // which is what the specification calls for)
00702       // note that this property, and the fact we can never have
00703       // more than one free leaf at a given level, isn't totally
00704       // trivial to prove, but it seems true and the assert never
00705       // fires, so!
00706       while (z > 0 && !available[z]) --z;
00707       if (z == 0) { assert(0); return FALSE; }
00708       res = available[z];
00709       available[z] = 0;
00710       add_entry(c, bit_reverse(res), i, m++, len[i], values);
00711       // propogate availability up the tree
00712       if (z != len[i]) {
00713          for (y=len[i]; y > z; --y) {
00714             assert(available[y] == 0);
00715             available[y] = res + (1 << (32-y));
00716          }
00717       }
00718    }
00719    return TRUE;
00720 }
00721 
00722 // accelerated huffman table allows fast O(1) match of all symbols
00723 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
00724 static void compute_accelerated_huffman(Codebook *c)
00725 {
00726    int i, len;
00727    for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
00728       c->fast_huffman[i] = -1;
00729 
00730    len = c->sparse ? c->sorted_entries : c->entries;
00731    #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
00732    if (len > 32767) len = 32767; // largest possible value we can encode!
00733    #endif
00734    for (i=0; i < len; ++i) {
00735       if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
00736          uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
00737          // set table entries for all bit combinations in the higher bits
00738          while (z < FAST_HUFFMAN_TABLE_SIZE) {
00739              c->fast_huffman[z] = i;
00740              z += 1 << c->codeword_lengths[i];
00741          }
00742       }
00743    }
00744 }
00745 
00746 static int uint32_compare(const void *p, const void *q)
00747 {
00748    uint32 x = * (uint32 *) p;
00749    uint32 y = * (uint32 *) q;
00750    return x < y ? -1 : x > y;
00751 }
00752 
00753 static int include_in_sort(Codebook *c, uint8 len)
00754 {
00755    if (c->sparse) { assert(len != NO_CODE); return TRUE; }
00756    if (len == NO_CODE) return FALSE;
00757    if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
00758    return FALSE;
00759 }
00760 
00761 // if the fast table above doesn't work, we want to binary
00762 // search them... need to reverse the bits
00763 static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
00764 {
00765    int i, len;
00766    // build a list of all the entries
00767    // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
00768    // this is kind of a frivolous optimization--I don't see any performance improvement,
00769    // but it's like 4 extra lines of code, so.
00770    if (!c->sparse) {
00771       int k = 0;
00772       for (i=0; i < c->entries; ++i)
00773          if (include_in_sort(c, lengths[i])) 
00774             c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
00775       assert(k == c->sorted_entries);
00776    } else {
00777       for (i=0; i < c->sorted_entries; ++i)
00778          c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
00779    }
00780 
00781    qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare);
00782    c->sorted_codewords[c->sorted_entries] = 0xffffffff;
00783 
00784    len = c->sparse ? c->sorted_entries : c->entries;
00785    // now we need to indicate how they correspond; we could either
00786    //   #1: sort a different data structure that says who they correspond to
00787    //   #2: for each sorted entry, search the original list to find who corresponds
00788    //   #3: for each original entry, find the sorted entry
00789    // #1 requires extra storage, #2 is slow, #3 can use binary search!
00790    for (i=0; i < len; ++i) {
00791       int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
00792       if (include_in_sort(c,huff_len)) {
00793          uint32 code = bit_reverse(c->codewords[i]);
00794          int x=0, n=c->sorted_entries;
00795          while (n > 1) {
00796             // invariant: sc[x] <= code < sc[x+n]
00797             int m = x + (n >> 1);
00798             if (c->sorted_codewords[m] <= code) {
00799                x = m;
00800                n -= (n>>1);
00801             } else {
00802                n >>= 1;
00803             }
00804          }
00805          assert(c->sorted_codewords[x] == code);
00806          if (c->sparse) {
00807             c->sorted_values[x] = values[i];
00808             c->codeword_lengths[x] = huff_len;
00809          } else {
00810             c->sorted_values[x] = i;
00811          }
00812       }
00813    }
00814 }
00815 
00816 // only run while parsing the header (3 times)
00817 static int vorbis_validate(uint8 *data)
00818 {
00819    static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
00820    return memcmp(data, vorbis, 6) == 0;
00821 }
00822 
00823 // called from setup only, once per code book
00824 // (formula implied by specification)
00825 static int lookup1_values(int entries, int dim)
00826 {
00827    int r = (int) floor(exp((float) log((float) entries) / dim));
00828    if ((int) floor(pow((float) r+1, dim)) <= entries)   // (int) cast for MinGW warning;
00829       ++r;                                              // floor() to avoid _ftol() when non-CRT
00830    assert(pow((float) r+1, dim) > entries);
00831    assert((int) floor(pow((float) r, dim)) <= entries); // (int),floor() as above
00832    return r;
00833 }
00834 
00835 // called twice per file
00836 static void compute_twiddle_factors(int n, float *A, float *B, float *C)
00837 {
00838    int n4 = n >> 2, n8 = n >> 3;
00839    int k,k2;
00840 
00841    for (k=k2=0; k < n4; ++k,k2+=2) {
00842       A[k2  ] = (float)  cos(4*k*M_PI/n);
00843       A[k2+1] = (float) -sin(4*k*M_PI/n);
00844       B[k2  ] = (float)  cos((k2+1)*M_PI/n/2) * 0.5f;
00845       B[k2+1] = (float)  sin((k2+1)*M_PI/n/2) * 0.5f;
00846    }
00847    for (k=k2=0; k < n8; ++k,k2+=2) {
00848       C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
00849       C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
00850    }
00851 }
00852 
00853 static void compute_window(int n, float *window)
00854 {
00855    int n2 = n >> 1, i;
00856    for (i=0; i < n2; ++i)
00857       window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
00858 }
00859 
00860 static void compute_bitreverse(int n, uint16 *rev)
00861 {
00862    int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
00863    int i, n8 = n >> 3;
00864    for (i=0; i < n8; ++i)
00865       rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
00866 }
00867 
00868 static int init_blocksize(vorb *f, int b, int n)
00869 {
00870    int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
00871    f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
00872    f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
00873    f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
00874    if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
00875    compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
00876    f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
00877    if (!f->window[b]) return error(f, VORBIS_outofmem);
00878    compute_window(n, f->window[b]);
00879    f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8);
00880    if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
00881    compute_bitreverse(n, f->bit_reverse[b]);
00882    return TRUE;
00883 }
00884 
00885 static void neighbors(uint16 *x, int n, int *plow, int *phigh)
00886 {
00887    int low = -1;
00888    int high = 65536;
00889    int i;
00890    for (i=0; i < n; ++i) {
00891       if (x[i] > low  && x[i] < x[n]) { *plow  = i; low = x[i]; }
00892       if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
00893    }
00894 }
00895 
00896 // this has been repurposed so y is now the original index instead of y
00897 typedef struct
00898 {
00899    uint16 x,y;
00900 } Point;
00901 
00902 int point_compare(const void *p, const void *q)
00903 {
00904    Point *a = (Point *) p;
00905    Point *b = (Point *) q;
00906    return a->x < b->x ? -1 : a->x > b->x;
00907 }
00908 
00909 //
00911 
00912 
00913 #if defined(STB_VORBIS_NO_STDIO)
00914    #define USE_MEMORY(z)    TRUE
00915 #else
00916    #define USE_MEMORY(z)    ((z)->stream)
00917 #endif
00918 
00919 static uint8 get8(vorb *z)
00920 {
00921    if (USE_MEMORY(z)) {
00922       if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
00923       return *z->stream++;
00924    }
00925 
00926    #ifndef STB_VORBIS_NO_STDIO
00927    {
00928    int c = fgetc(z->f);
00929    if (c == EOF) { z->eof = TRUE; return 0; }
00930    return c;
00931    }
00932    #endif
00933 }
00934 
00935 static uint32 get32(vorb *f)
00936 {
00937    uint32 x;
00938    x = get8(f);
00939    x += get8(f) << 8;
00940    x += get8(f) << 16;
00941    x += get8(f) << 24;
00942    return x;
00943 }
00944 
00945 static int getn(vorb *z, uint8 *data, int n)
00946 {
00947    if (USE_MEMORY(z)) {
00948       if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
00949       memcpy(data, z->stream, n);
00950       z->stream += n;
00951       return 1;
00952    }
00953 
00954    #ifndef STB_VORBIS_NO_STDIO   
00955    if (fread(data, n, 1, z->f) == 1)
00956       return 1;
00957    else {
00958       z->eof = 1;
00959       return 0;
00960    }
00961    #endif
00962 }
00963 
00964 static void skip(vorb *z, int n)
00965 {
00966    if (USE_MEMORY(z)) {
00967       z->stream += n;
00968       if (z->stream >= z->stream_end) z->eof = 1;
00969       return;
00970    }
00971    #ifndef STB_VORBIS_NO_STDIO
00972    {
00973       long x = ftell(z->f);
00974       fseek(z->f, x+n, SEEK_SET);
00975    }
00976    #endif
00977 }
00978 
00979 static int set_file_offset(stb_vorbis *f, unsigned int loc)
00980 {
00981    #ifndef STB_VORBIS_NO_PUSHDATA_API
00982    if (f->push_mode) return 0;
00983    #endif
00984    f->eof = 0;
00985    if (USE_MEMORY(f)) {
00986       if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
00987          f->stream = f->stream_end;
00988          f->eof = 1;
00989          return 0;
00990       } else {
00991          f->stream = f->stream_start + loc;
00992          return 1;
00993       }
00994    }
00995    #ifndef STB_VORBIS_NO_STDIO
00996    if (loc + f->f_start < loc || loc >= 0x80000000) {
00997       loc = 0x7fffffff;
00998       f->eof = 1;
00999    } else {
01000       loc += f->f_start;
01001    }
01002    if (!fseek(f->f, loc, SEEK_SET))
01003       return 1;
01004    f->eof = 1;
01005    fseek(f->f, f->f_start, SEEK_END);
01006    return 0;
01007    #endif
01008 }
01009 
01010 
01011 static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
01012 
01013 static int capture_pattern(vorb *f)
01014 {
01015    if (0x4f != get8(f)) return FALSE;
01016    if (0x67 != get8(f)) return FALSE;
01017    if (0x67 != get8(f)) return FALSE;
01018    if (0x53 != get8(f)) return FALSE;
01019    return TRUE;
01020 }
01021 
01022 #define PAGEFLAG_continued_packet   1
01023 #define PAGEFLAG_first_page         2
01024 #define PAGEFLAG_last_page          4
01025 
01026 static int start_page_no_capturepattern(vorb *f)
01027 {
01028    uint32 loc0,loc1,n,i;
01029    // stream structure version
01030    if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
01031    // header flag
01032    f->page_flag = get8(f);
01033    // absolute granule position
01034    loc0 = get32(f); 
01035    loc1 = get32(f);
01036    // @TODO: validate loc0,loc1 as valid positions?
01037    // stream serial number -- vorbis doesn't interleave, so discard
01038    get32(f);
01039    //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
01040    // page sequence number
01041    n = get32(f);
01042    f->last_page = n;
01043    // CRC32
01044    get32(f);
01045    // page_segments
01046    f->segment_count = get8(f);
01047    if (!getn(f, f->segments, f->segment_count))
01048       return error(f, VORBIS_unexpected_eof);
01049    // assume we _don't_ know any the sample position of any segments
01050    f->end_seg_with_known_loc = -2;
01051    if (loc0 != ~0 || loc1 != ~0) {
01052       // determine which packet is the last one that will complete
01053       for (i=f->segment_count-1; i >= 0; --i)
01054          if (f->segments[i] < 255)
01055             break;
01056       // 'i' is now the index of the _last_ segment of a packet that ends
01057       if (i >= 0) {
01058          f->end_seg_with_known_loc = i;
01059          f->known_loc_for_packet   = loc0;
01060       }
01061    }
01062    if (f->first_decode) {
01063       int i,len;
01064       ProbedPage p;
01065       len = 0;
01066       for (i=0; i < f->segment_count; ++i)
01067          len += f->segments[i];
01068       len += 27 + f->segment_count;
01069       p.page_start = f->first_audio_page_offset;
01070       p.page_end = p.page_start + len;
01071       p.after_previous_page_start = p.page_start;
01072       p.first_decoded_sample = 0;
01073       p.last_decoded_sample = loc0;
01074       f->p_first = p;
01075    }
01076    f->next_seg = 0;
01077    return TRUE;
01078 }
01079 
01080 static int start_page(vorb *f)
01081 {
01082    if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
01083    return start_page_no_capturepattern(f);
01084 }
01085 
01086 static int start_packet(vorb *f)
01087 {
01088    while (f->next_seg == -1) {
01089       if (!start_page(f)) return FALSE;
01090       if (f->page_flag & PAGEFLAG_continued_packet)
01091          return error(f, VORBIS_continued_packet_flag_invalid);
01092    }
01093    f->last_seg = FALSE;
01094    f->valid_bits = 0;
01095    f->packet_bytes = 0;
01096    f->bytes_in_seg = 0;
01097    // f->next_seg is now valid
01098    return TRUE;
01099 }
01100 
01101 static int maybe_start_packet(vorb *f)
01102 {
01103    if (f->next_seg == -1) {
01104       int x = get8(f);
01105       if (f->eof) return FALSE; // EOF at page boundary is not an error!
01106       if (0x4f != x      ) return error(f, VORBIS_missing_capture_pattern);
01107       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
01108       if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
01109       if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
01110       if (!start_page_no_capturepattern(f)) return FALSE;
01111       if (f->page_flag & PAGEFLAG_continued_packet) {
01112          // set up enough state that we can read this packet if we want,
01113          // e.g. during recovery
01114          f->last_seg = FALSE;
01115          f->bytes_in_seg = 0;
01116          return error(f, VORBIS_continued_packet_flag_invalid);
01117       }
01118    }
01119    return start_packet(f);
01120 }
01121 
01122 static int next_segment(vorb *f)
01123 {
01124    int len;
01125    if (f->last_seg) return 0;
01126    if (f->next_seg == -1) {
01127       f->last_seg_which = f->segment_count-1; // in case start_page fails
01128       if (!start_page(f)) { f->last_seg = 1; return 0; }
01129       if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
01130    }
01131    len = f->segments[f->next_seg++];
01132    if (len < 255) {
01133       f->last_seg = TRUE;
01134       f->last_seg_which = f->next_seg-1;
01135    }
01136    if (f->next_seg >= f->segment_count)
01137       f->next_seg = -1;
01138    assert(f->bytes_in_seg == 0);
01139    f->bytes_in_seg = len;
01140    return len;
01141 }
01142 
01143 #define EOP    (-1)
01144 #define INVALID_BITS  (-1)
01145 
01146 static int get8_packet_raw(vorb *f)
01147 {
01148    if (!f->bytes_in_seg)
01149       if (f->last_seg) return EOP;
01150       else if (!next_segment(f)) return EOP;
01151    assert(f->bytes_in_seg > 0);
01152    --f->bytes_in_seg;
01153    ++f->packet_bytes;
01154    return get8(f);
01155 }
01156 
01157 static int get8_packet(vorb *f)
01158 {
01159    int x = get8_packet_raw(f);
01160    f->valid_bits = 0;
01161    return x;
01162 }
01163 
01164 static void flush_packet(vorb *f)
01165 {
01166    while (get8_packet_raw(f) != EOP);
01167 }
01168 
01169 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
01170 // as the huffman decoder?
01171 static uint32 get_bits(vorb *f, int n)
01172 {
01173    uint32 z;
01174 
01175    if (f->valid_bits < 0) return 0;
01176    if (f->valid_bits < n) {
01177       if (n > 24) {
01178          // the accumulator technique below would not work correctly in this case
01179          z = get_bits(f, 24);
01180          z += get_bits(f, n-24) << 24;
01181          return z;
01182       }
01183       if (f->valid_bits == 0) f->acc = 0;
01184       while (f->valid_bits < n) {
01185          int z = get8_packet_raw(f);
01186          if (z == EOP) {
01187             f->valid_bits = INVALID_BITS;
01188             return 0;
01189          }
01190          f->acc += z << f->valid_bits;
01191          f->valid_bits += 8;
01192       }
01193    }
01194    if (f->valid_bits < 0) return 0;
01195    z = f->acc & ((1 << n)-1);
01196    f->acc >>= n;
01197    f->valid_bits -= n;
01198    return z;
01199 }
01200 
01201 static int32 get_bits_signed(vorb *f, int n)
01202 {
01203    uint32 z = get_bits(f, n);
01204    if (z & (1 << (n-1)))
01205       z += ~((1 << n) - 1);
01206    return (int32) z;
01207 }
01208 
01209 // @OPTIMIZE: primary accumulator for huffman
01210 // expand the buffer to as many bits as possible without reading off end of packet
01211 // it might be nice to allow f->valid_bits and f->acc to be stored in registers,
01212 // e.g. cache them locally and decode locally
01213 static __forceinline void prep_huffman(vorb *f)
01214 {
01215    if (f->valid_bits <= 24) {
01216       if (f->valid_bits == 0) f->acc = 0;
01217       do {
01218          int z;
01219          if (f->last_seg && !f->bytes_in_seg) return;
01220          z = get8_packet_raw(f);
01221          if (z == EOP) return;
01222          f->acc += z << f->valid_bits;
01223          f->valid_bits += 8;
01224       } while (f->valid_bits <= 24);
01225    }
01226 }
01227 
01228 enum
01229 {
01230    VORBIS_packet_id = 1,
01231    VORBIS_packet_comment = 3,
01232    VORBIS_packet_setup = 5,
01233 };
01234 
01235 static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
01236 {
01237    int i;
01238    prep_huffman(f);
01239 
01240    assert(c->sorted_codewords || c->codewords);
01241    // cases to use binary search: sorted_codewords && !c->codewords
01242    //                             sorted_codewords && c->entries > 8
01243    if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
01244       // binary search
01245       uint32 code = bit_reverse(f->acc);
01246       int x=0, n=c->sorted_entries, len;
01247 
01248       while (n > 1) {
01249          // invariant: sc[x] <= code < sc[x+n]
01250          int m = x + (n >> 1);
01251          if (c->sorted_codewords[m] <= code) {
01252             x = m;
01253             n -= (n>>1);
01254          } else {
01255             n >>= 1;
01256          }
01257       }
01258       // x is now the sorted index
01259       if (!c->sparse) x = c->sorted_values[x];
01260       // x is now sorted index if sparse, or symbol otherwise
01261       len = c->codeword_lengths[x];
01262       if (f->valid_bits >= len) {
01263          f->acc >>= len;
01264          f->valid_bits -= len;
01265          return x;
01266       }
01267 
01268       f->valid_bits = 0;
01269       return -1;
01270    }
01271 
01272    // if small, linear search
01273    assert(!c->sparse);
01274    for (i=0; i < c->entries; ++i) {
01275       if (c->codeword_lengths[i] == NO_CODE) continue;
01276       if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
01277          if (f->valid_bits >= c->codeword_lengths[i]) {
01278             f->acc >>= c->codeword_lengths[i];
01279             f->valid_bits -= c->codeword_lengths[i];
01280             return i;
01281          }
01282          f->valid_bits = 0;
01283          return -1;
01284       }
01285    }
01286 
01287    error(f, VORBIS_invalid_stream);
01288    f->valid_bits = 0;
01289    return -1;
01290 }
01291 
01292 static int codebook_decode_scalar(vorb *f, Codebook *c)
01293 {
01294    int i;
01295    if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
01296       prep_huffman(f);
01297    // fast huffman table lookup
01298    i = f->acc & FAST_HUFFMAN_TABLE_MASK;
01299    i = c->fast_huffman[i];
01300    if (i >= 0) {
01301       f->acc >>= c->codeword_lengths[i];
01302       f->valid_bits -= c->codeword_lengths[i];
01303       if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
01304       return i;
01305    }
01306    return codebook_decode_scalar_raw(f,c);
01307 }
01308 
01309 #ifndef STB_VORBIS_NO_INLINE_DECODE
01310 
01311 #define DECODE_RAW(var, f,c)                                  \
01312    if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)        \
01313       prep_huffman(f);                                        \
01314    var = f->acc & FAST_HUFFMAN_TABLE_MASK;                    \
01315    var = c->fast_huffman[var];                                \
01316    if (var >= 0) {                                            \
01317       int n = c->codeword_lengths[var];                       \
01318       f->acc >>= n;                                           \
01319       f->valid_bits -= n;                                     \
01320       if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \
01321    } else {                                                   \
01322       var = codebook_decode_scalar_raw(f,c);                  \
01323    }
01324 
01325 #else
01326 
01327 #define DECODE_RAW(var,f,c)    var = codebook_decode_scalar(f,c);
01328 
01329 #endif
01330 
01331 #define DECODE(var,f,c)                                       \
01332    DECODE_RAW(var,f,c)                                        \
01333    if (c->sparse) var = c->sorted_values[var];
01334 
01335 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
01336   #define DECODE_VQ(var,f,c)   DECODE_RAW(var,f,c)
01337 #else
01338   #define DECODE_VQ(var,f,c)   DECODE(var,f,c)
01339 #endif
01340 
01341 
01342 
01343 
01344 
01345 
01346 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
01347 // where we avoid one addition
01348 #ifndef STB_VORBIS_CODEBOOK_FLOATS
01349    #define CODEBOOK_ELEMENT(c,off)          (c->multiplicands[off] * c->delta_value + c->minimum_value)
01350    #define CODEBOOK_ELEMENT_FAST(c,off)     (c->multiplicands[off] * c->delta_value)
01351    #define CODEBOOK_ELEMENT_BASE(c)         (c->minimum_value)
01352 #else
01353    #define CODEBOOK_ELEMENT(c,off)          (c->multiplicands[off])
01354    #define CODEBOOK_ELEMENT_FAST(c,off)     (c->multiplicands[off])
01355    #define CODEBOOK_ELEMENT_BASE(c)         (0)
01356 #endif
01357 
01358 static int codebook_decode_start(vorb *f, Codebook *c, int len)
01359 {
01360    int z = -1;
01361 
01362    // type 0 is only legal in a scalar context
01363    if (c->lookup_type == 0)
01364       error(f, VORBIS_invalid_stream);
01365    else {
01366       DECODE_VQ(z,f,c);
01367       if (c->sparse) assert(z < c->sorted_entries);
01368       if (z < 0) {  // check for EOP
01369          if (!f->bytes_in_seg)
01370             if (f->last_seg)
01371                return z;
01372          error(f, VORBIS_invalid_stream);
01373       }
01374    }
01375    return z;
01376 }
01377 
01378 static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
01379 {
01380    int i,z = codebook_decode_start(f,c,len);
01381    if (z < 0) return FALSE;
01382    if (len > c->dimensions) len = c->dimensions;
01383 
01384 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
01385    if (c->lookup_type == 1) {
01386       float last = CODEBOOK_ELEMENT_BASE(c);
01387       int div = 1;
01388       for (i=0; i < len; ++i) {
01389          int off = (z / div) % c->lookup_values;
01390          float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
01391          output[i] += val;
01392          if (c->sequence_p) last = val + c->minimum_value;
01393          div *= c->lookup_values;
01394       }
01395       return TRUE;
01396    }
01397 #endif
01398 
01399    z *= c->dimensions;
01400    if (c->sequence_p) {
01401       float last = CODEBOOK_ELEMENT_BASE(c);
01402       for (i=0; i < len; ++i) {
01403          float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
01404          output[i] += val;
01405          last = val + c->minimum_value;
01406       }
01407    } else {
01408       float last = CODEBOOK_ELEMENT_BASE(c);
01409       for (i=0; i < len; ++i) {
01410          output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
01411       }
01412    }
01413 
01414    return TRUE;
01415 }
01416 
01417 static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
01418 {
01419    int i,z = codebook_decode_start(f,c,len);
01420    float last = CODEBOOK_ELEMENT_BASE(c);
01421    if (z < 0) return FALSE;
01422    if (len > c->dimensions) len = c->dimensions;
01423 
01424 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
01425    if (c->lookup_type == 1) {
01426       int div = 1;
01427       for (i=0; i < len; ++i) {
01428          int off = (z / div) % c->lookup_values;
01429          float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
01430          output[i*step] += val;
01431          if (c->sequence_p) last = val;
01432          div *= c->lookup_values;
01433       }
01434       return TRUE;
01435    }
01436 #endif
01437 
01438    z *= c->dimensions;
01439    for (i=0; i < len; ++i) {
01440       float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
01441       output[i*step] += val;
01442       if (c->sequence_p) last = val;
01443    }
01444 
01445    return TRUE;
01446 }
01447 
01448 static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
01449 {
01450    int c_inter = *c_inter_p;
01451    int p_inter = *p_inter_p;
01452    int i,z, effective = c->dimensions;
01453 
01454    // type 0 is only legal in a scalar context
01455    if (c->lookup_type == 0)   return error(f, VORBIS_invalid_stream);
01456 
01457    while (total_decode > 0) {
01458       float last = CODEBOOK_ELEMENT_BASE(c);
01459       DECODE_VQ(z,f,c);
01460       #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
01461       assert(!c->sparse || z < c->sorted_entries);
01462       #endif
01463       if (z < 0) {
01464          if (!f->bytes_in_seg)
01465             if (f->last_seg) return FALSE;
01466          return error(f, VORBIS_invalid_stream);
01467       }
01468 
01469       // if this will take us off the end of the buffers, stop short!
01470       // we check by computing the length of the virtual interleaved
01471       // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
01472       // and the length we'll be using (effective)
01473       if (c_inter + p_inter*ch + effective > len * ch) {
01474          effective = len*ch - (p_inter*ch - c_inter);
01475       }
01476 
01477    #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
01478       if (c->lookup_type == 1) {
01479          int div = 1;
01480          for (i=0; i < effective; ++i) {
01481             int off = (z / div) % c->lookup_values;
01482             float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
01483             outputs[c_inter][p_inter] += val;
01484             if (++c_inter == ch) { c_inter = 0; ++p_inter; }
01485             if (c->sequence_p) last = val;
01486             div *= c->lookup_values;
01487          }
01488       } else
01489    #endif
01490       {
01491          z *= c->dimensions;
01492          if (c->sequence_p) {
01493             for (i=0; i < effective; ++i) {
01494                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
01495                outputs[c_inter][p_inter] += val;
01496                if (++c_inter == ch) { c_inter = 0; ++p_inter; }
01497                last = val;
01498             }
01499          } else {
01500             for (i=0; i < effective; ++i) {
01501                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
01502                outputs[c_inter][p_inter] += val;
01503                if (++c_inter == ch) { c_inter = 0; ++p_inter; }
01504             }
01505          }
01506       }
01507 
01508       total_decode -= effective;
01509    }
01510    *c_inter_p = c_inter;
01511    *p_inter_p = p_inter;
01512    return TRUE;
01513 }
01514 
01515 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
01516 static int codebook_decode_deinterleave_repeat_2(vorb *f, Codebook *c, float **outputs, int *c_inter_p, int *p_inter_p, int len, int total_decode)
01517 {
01518    int c_inter = *c_inter_p;
01519    int p_inter = *p_inter_p;
01520    int i,z, effective = c->dimensions;
01521 
01522    // type 0 is only legal in a scalar context
01523    if (c->lookup_type == 0)   return error(f, VORBIS_invalid_stream);
01524 
01525    while (total_decode > 0) {
01526       float last = CODEBOOK_ELEMENT_BASE(c);
01527       DECODE_VQ(z,f,c);
01528 
01529       if (z < 0) {
01530          if (!f->bytes_in_seg)
01531             if (f->last_seg) return FALSE;
01532          return error(f, VORBIS_invalid_stream);
01533       }
01534 
01535       // if this will take us off the end of the buffers, stop short!
01536       // we check by computing the length of the virtual interleaved
01537       // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
01538       // and the length we'll be using (effective)
01539       if (c_inter + p_inter*2 + effective > len * 2) {
01540          effective = len*2 - (p_inter*2 - c_inter);
01541       }
01542 
01543       {
01544          z *= c->dimensions;
01545          stb_prof(11);
01546          if (c->sequence_p) {
01547             // haven't optimized this case because I don't have any examples
01548             for (i=0; i < effective; ++i) {
01549                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
01550                outputs[c_inter][p_inter] += val;
01551                if (++c_inter == 2) { c_inter = 0; ++p_inter; }
01552                last = val;
01553             }
01554          } else {
01555             i=0;
01556             if (c_inter == 1) {
01557                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
01558                outputs[c_inter][p_inter] += val;
01559                c_inter = 0; ++p_inter;
01560                ++i;
01561             }
01562             {
01563                float *z0 = outputs[0];
01564                float *z1 = outputs[1];
01565                for (; i+1 < effective;) {
01566                   z0[p_inter] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
01567                   z1[p_inter] += CODEBOOK_ELEMENT_FAST(c,z+i+1) + last;
01568                   ++p_inter;
01569                   i += 2;
01570                }
01571             }
01572             if (i < effective) {
01573                float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
01574                outputs[c_inter][p_inter] += val;
01575                if (++c_inter == 2) { c_inter = 0; ++p_inter; }
01576             }
01577          }
01578       }
01579 
01580       total_decode -= effective;
01581    }
01582    *c_inter_p = c_inter;
01583    *p_inter_p = p_inter;
01584    return TRUE;
01585 }
01586 #endif
01587 
01588 static int predict_point(int x, int x0, int x1, int y0, int y1)
01589 {
01590    int dy = y1 - y0;
01591    int adx = x1 - x0;
01592    // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
01593    int err = abs(dy) * (x - x0);
01594    int off = err / adx;
01595    return dy < 0 ? y0 - off : y0 + off;
01596 }
01597 
01598 // the following table is block-copied from the specification
01599 static float inverse_db_table[256] =
01600 {
01601   1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f, 
01602   1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f, 
01603   1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f, 
01604   2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f, 
01605   2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f, 
01606   3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f, 
01607   4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f, 
01608   6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f, 
01609   7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f, 
01610   1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f, 
01611   1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f, 
01612   1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f, 
01613   2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f, 
01614   2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f, 
01615   3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f, 
01616   4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f, 
01617   5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f, 
01618   7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f, 
01619   9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f, 
01620   1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f, 
01621   1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f, 
01622   2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f, 
01623   2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f, 
01624   3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f, 
01625   4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f, 
01626   5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f, 
01627   7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f, 
01628   9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f, 
01629   0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f, 
01630   0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f, 
01631   0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f, 
01632   0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f, 
01633   0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f, 
01634   0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f, 
01635   0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f, 
01636   0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f, 
01637   0.00092223983f, 0.00098217216f, 0.0010459992f,  0.0011139742f, 
01638   0.0011863665f,  0.0012634633f,  0.0013455702f,  0.0014330129f, 
01639   0.0015261382f,  0.0016253153f,  0.0017309374f,  0.0018434235f, 
01640   0.0019632195f,  0.0020908006f,  0.0022266726f,  0.0023713743f, 
01641   0.0025254795f,  0.0026895994f,  0.0028643847f,  0.0030505286f, 
01642   0.0032487691f,  0.0034598925f,  0.0036847358f,  0.0039241906f, 
01643   0.0041792066f,  0.0044507950f,  0.0047400328f,  0.0050480668f, 
01644   0.0053761186f,  0.0057254891f,  0.0060975636f,  0.0064938176f, 
01645   0.0069158225f,  0.0073652516f,  0.0078438871f,  0.0083536271f, 
01646   0.0088964928f,  0.009474637f,   0.010090352f,   0.010746080f, 
01647   0.011444421f,   0.012188144f,   0.012980198f,   0.013823725f, 
01648   0.014722068f,   0.015678791f,   0.016697687f,   0.017782797f, 
01649   0.018938423f,   0.020169149f,   0.021479854f,   0.022875735f, 
01650   0.024362330f,   0.025945531f,   0.027631618f,   0.029427276f, 
01651   0.031339626f,   0.033376252f,   0.035545228f,   0.037855157f, 
01652   0.040315199f,   0.042935108f,   0.045725273f,   0.048696758f, 
01653   0.051861348f,   0.055231591f,   0.058820850f,   0.062643361f, 
01654   0.066714279f,   0.071049749f,   0.075666962f,   0.080584227f, 
01655   0.085821044f,   0.091398179f,   0.097337747f,   0.10366330f, 
01656   0.11039993f,    0.11757434f,    0.12521498f,    0.13335215f, 
01657   0.14201813f,    0.15124727f,    0.16107617f,    0.17154380f, 
01658   0.18269168f,    0.19456402f,    0.20720788f,    0.22067342f, 
01659   0.23501402f,    0.25028656f,    0.26655159f,    0.28387361f, 
01660   0.30232132f,    0.32196786f,    0.34289114f,    0.36517414f, 
01661   0.38890521f,    0.41417847f,    0.44109412f,    0.46975890f, 
01662   0.50028648f,    0.53279791f,    0.56742212f,    0.60429640f, 
01663   0.64356699f,    0.68538959f,    0.72993007f,    0.77736504f, 
01664   0.82788260f,    0.88168307f,    0.9389798f,     1.0f
01665 };
01666 
01667 
01668 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
01669 // note that you must produce bit-identical output to decode correctly;
01670 // this specific sequence of operations is specified in the spec (it's
01671 // drawing integer-quantized frequency-space lines that the encoder
01672 // expects to be exactly the same)
01673 //     ... also, isn't the whole point of Bresenham's algorithm to NOT
01674 // have to divide in the setup? sigh.
01675 #ifndef STB_VORBIS_NO_DEFER_FLOOR
01676 #define LINE_OP(a,b)   a *= b
01677 #else
01678 #define LINE_OP(a,b)   a = b
01679 #endif
01680 
01681 #ifdef STB_VORBIS_DIVIDE_TABLE
01682 #define DIVTAB_NUMER   32
01683 #define DIVTAB_DENOM   64
01684 int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB
01685 #endif
01686 
01687 static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
01688 {
01689    int dy = y1 - y0;
01690    int adx = x1 - x0;
01691    int ady = abs(dy);
01692    int base;
01693    int x=x0,y=y0;
01694    int err = 0;
01695    int sy;
01696 
01697 #ifdef STB_VORBIS_DIVIDE_TABLE
01698    if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) {
01699       if (dy < 0) {
01700          base = -integer_divide_table[ady][adx];
01701          sy = base-1;
01702       } else {
01703          base =  integer_divide_table[ady][adx];
01704          sy = base+1;
01705       }
01706    } else {
01707       base = dy / adx;
01708       if (dy < 0)
01709          sy = base - 1;
01710       else
01711          sy = base+1;
01712    }
01713 #else
01714    base = dy / adx;
01715    if (dy < 0)
01716       sy = base - 1;
01717    else
01718       sy = base+1;
01719 #endif
01720    ady -= abs(base) * adx;
01721    if (x1 > n) x1 = n;
01722    LINE_OP(output[x], inverse_db_table[y]);
01723    for (++x; x < x1; ++x) {
01724       err += ady;
01725       if (err >= adx) {
01726          err -= adx;
01727          y += sy;
01728       } else
01729          y += base;
01730       LINE_OP(output[x], inverse_db_table[y]);
01731    }
01732 }
01733 
01734 static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
01735 {
01736    int k;
01737    if (rtype == 0) {
01738       int step = n / book->dimensions;
01739       for (k=0; k < step; ++k)
01740          if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
01741             return FALSE;
01742    } else {
01743       for (k=0; k < n; ) {
01744          if (!codebook_decode(f, book, target+offset, n-k))
01745             return FALSE;
01746          k += book->dimensions;
01747          offset += book->dimensions;
01748       }
01749    }
01750    return TRUE;
01751 }
01752 
01753 static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode)
01754 {
01755    int i,j,pass;
01756    Residue *r = f->residue_config + rn;
01757    int rtype = f->residue_types[rn];
01758    int c = r->classbook;
01759    int classwords = f->codebooks[c].dimensions;
01760    int n_read = r->end - r->begin;
01761    int part_read = n_read / r->part_size;
01762    int temp_alloc_point = temp_alloc_save(f);
01763    #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
01764    uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
01765    #else
01766    int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications));
01767    #endif
01768 
01769    stb_prof(2);
01770    for (i=0; i < ch; ++i)
01771       if (!do_not_decode[i])
01772          memset(residue_buffers[i], 0, sizeof(float) * n);
01773 
01774    if (rtype == 2 && ch != 1) {
01775       int len = ch * n;
01776       for (j=0; j < ch; ++j)
01777          if (!do_not_decode[j])
01778             break;
01779       if (j == ch)
01780          goto done;
01781 
01782       stb_prof(3);
01783       for (pass=0; pass < 8; ++pass) {
01784          int pcount = 0, class_set = 0;
01785          if (ch == 2) {
01786             stb_prof(13);
01787             while (pcount < part_read) {
01788                int z = r->begin + pcount*r->part_size;
01789                int c_inter = (z & 1), p_inter = z>>1;
01790                if (pass == 0) {
01791                   Codebook *c = f->codebooks+r->classbook;
01792                   int q;
01793                   DECODE(q,f,c);
01794                   if (q == EOP) goto done;
01795                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
01796                   part_classdata[0][class_set] = r->classdata[q];
01797                   #else
01798                   for (i=classwords-1; i >= 0; --i) {
01799                      classifications[0][i+pcount] = q % r->classifications;
01800                      q /= r->classifications;
01801                   }
01802                   #endif
01803                }
01804                stb_prof(5);
01805                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
01806                   int z = r->begin + pcount*r->part_size;
01807                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
01808                   int c = part_classdata[0][class_set][i];
01809                   #else
01810                   int c = classifications[0][pcount];
01811                   #endif
01812                   int b = r->residue_books[c][pass];
01813                   if (b >= 0) {
01814                      Codebook *book = f->codebooks + b;
01815                      stb_prof(20);  // accounts for X time
01816                      #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
01817                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
01818                         goto done;
01819                      #else
01820                      // saves 1%
01821                      if (!codebook_decode_deinterleave_repeat_2(f, book, residue_buffers, &c_inter, &p_inter, n, r->part_size))
01822                         goto done;
01823                      #endif
01824                      stb_prof(7);
01825                   } else {
01826                      z += r->part_size;
01827                      c_inter = z & 1;
01828                      p_inter = z >> 1;
01829                   }
01830                }
01831                stb_prof(8);
01832                #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
01833                ++class_set;
01834                #endif
01835             }
01836          } else if (ch == 1) {
01837             while (pcount < part_read) {
01838                int z = r->begin + pcount*r->part_size;
01839                int c_inter = 0, p_inter = z;
01840                if (pass == 0) {
01841                   Codebook *c = f->codebooks+r->classbook;
01842                   int q;
01843                   DECODE(q,f,c);
01844                   if (q == EOP) goto done;
01845                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
01846                   part_classdata[0][class_set] = r->classdata[q];
01847                   #else
01848                   for (i=classwords-1; i >= 0; --i) {
01849                      classifications[0][i+pcount] = q % r->classifications;
01850                      q /= r->classifications;
01851                   }
01852                   #endif
01853                }
01854                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
01855                   int z = r->begin + pcount*r->part_size;
01856                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
01857                   int c = part_classdata[0][class_set][i];
01858                   #else
01859                   int c = classifications[0][pcount];
01860                   #endif
01861                   int b = r->residue_books[c][pass];
01862                   if (b >= 0) {
01863                      Codebook *book = f->codebooks + b;
01864                      stb_prof(22);
01865                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
01866                         goto done;
01867                      stb_prof(3);
01868                   } else {
01869                      z += r->part_size;
01870                      c_inter = 0;
01871                      p_inter = z;
01872                   }
01873                }
01874                #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
01875                ++class_set;
01876                #endif
01877             }
01878          } else {
01879             while (pcount < part_read) {
01880                int z = r->begin + pcount*r->part_size;
01881                int c_inter = z % ch, p_inter = z/ch;
01882                if (pass == 0) {
01883                   Codebook *c = f->codebooks+r->classbook;
01884                   int q;
01885                   DECODE(q,f,c);
01886                   if (q == EOP) goto done;
01887                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
01888                   part_classdata[0][class_set] = r->classdata[q];
01889                   #else
01890                   for (i=classwords-1; i >= 0; --i) {
01891                      classifications[0][i+pcount] = q % r->classifications;
01892                      q /= r->classifications;
01893                   }
01894                   #endif
01895                }
01896                for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
01897                   int z = r->begin + pcount*r->part_size;
01898                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
01899                   int c = part_classdata[0][class_set][i];
01900                   #else
01901                   int c = classifications[0][pcount];
01902                   #endif
01903                   int b = r->residue_books[c][pass];
01904                   if (b >= 0) {
01905                      Codebook *book = f->codebooks + b;
01906                      stb_prof(22);
01907                      if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
01908                         goto done;
01909                      stb_prof(3);
01910                   } else {
01911                      z += r->part_size;
01912                      c_inter = z % ch;
01913                      p_inter = z / ch;
01914                   }
01915                }
01916                #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
01917                ++class_set;
01918                #endif
01919             }
01920          }
01921       }
01922       goto done;
01923    }
01924    stb_prof(9);
01925 
01926    for (pass=0; pass < 8; ++pass) {
01927       int pcount = 0, class_set=0;
01928       while (pcount < part_read) {
01929          if (pass == 0) {
01930             for (j=0; j < ch; ++j) {
01931                if (!do_not_decode[j]) {
01932                   Codebook *c = f->codebooks+r->classbook;
01933                   int temp;
01934                   DECODE(temp,f,c);
01935                   if (temp == EOP) goto done;
01936                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
01937                   part_classdata[j][class_set] = r->classdata[temp];
01938                   #else
01939                   for (i=classwords-1; i >= 0; --i) {
01940                      classifications[j][i+pcount] = temp % r->classifications;
01941                      temp /= r->classifications;
01942                   }
01943                   #endif
01944                }
01945             }
01946          }
01947          for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
01948             for (j=0; j < ch; ++j) {
01949                if (!do_not_decode[j]) {
01950                   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
01951                   int c = part_classdata[j][class_set][i];
01952                   #else
01953                   int c = classifications[j][pcount];
01954                   #endif
01955                   int b = r->residue_books[c][pass];
01956                   if (b >= 0) {
01957                      float *target = residue_buffers[j];
01958                      int offset = r->begin + pcount * r->part_size;
01959                      int n = r->part_size;
01960                      Codebook *book = f->codebooks + b;
01961                      if (!residue_decode(f, book, target, offset, n, rtype))
01962                         goto done;
01963                   }
01964                }
01965             }
01966          }
01967          #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
01968          ++class_set;
01969          #endif
01970       }
01971    }
01972   done:
01973    stb_prof(0);
01974    temp_alloc_restore(f,temp_alloc_point);
01975 }
01976 
01977 
01978 #if 0
01979 // slow way for debugging
01980 void inverse_mdct_slow(float *buffer, int n)
01981 {
01982    int i,j;
01983    int n2 = n >> 1;
01984    float *x = (float *) malloc(sizeof(*x) * n2);
01985    memcpy(x, buffer, sizeof(*x) * n2);
01986    for (i=0; i < n; ++i) {
01987       float acc = 0;
01988       for (j=0; j < n2; ++j)
01989          // formula from paper:
01990          //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
01991          // formula from wikipedia
01992          //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
01993          // these are equivalent, except the formula from the paper inverts the multiplier!
01994          // however, what actually works is NO MULTIPLIER!?!
01995          //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
01996          acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
01997       buffer[i] = acc;
01998    }
01999    free(x);
02000 }
02001 #elif 0
02002 // same as above, but just barely able to run in real time on modern machines
02003 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
02004 {
02005    float mcos[16384];
02006    int i,j;
02007    int n2 = n >> 1, nmask = (n << 2) -1;
02008    float *x = (float *) malloc(sizeof(*x) * n2);
02009    memcpy(x, buffer, sizeof(*x) * n2);
02010    for (i=0; i < 4*n; ++i)
02011       mcos[i] = (float) cos(M_PI / 2 * i / n);
02012 
02013    for (i=0; i < n; ++i) {
02014       float acc = 0;
02015       for (j=0; j < n2; ++j)
02016          acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask];
02017       buffer[i] = acc;
02018    }
02019    free(x);
02020 }
02021 #else
02022 // transform to use a slow dct-iv; this is STILL basically trivial,
02023 // but only requires half as many ops
02024 void dct_iv_slow(float *buffer, int n)
02025 {
02026    float mcos[16384];
02027    float x[2048];
02028    int i,j;
02029    int n2 = n >> 1, nmask = (n << 3) - 1;
02030    memcpy(x, buffer, sizeof(*x) * n);
02031    for (i=0; i < 8*n; ++i)
02032       mcos[i] = (float) cos(M_PI / 4 * i / n);
02033    for (i=0; i < n; ++i) {
02034       float acc = 0;
02035       for (j=0; j < n; ++j)
02036          acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask];
02037          //acc += x[j] * cos(M_PI / n * (i + 0.5) * (j + 0.5));
02038       buffer[i] = acc;
02039    }
02040 }
02041 
02042 void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
02043 {
02044    int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4;
02045    float temp[4096];
02046 
02047    memcpy(temp, buffer, n2 * sizeof(float));
02048    dct_iv_slow(temp, n2);  // returns -c'-d, a-b'
02049 
02050    for (i=0; i < n4  ; ++i) buffer[i] = temp[i+n4];            // a-b'
02051    for (   ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1];   // b-a', c+d'
02052    for (   ; i < n   ; ++i) buffer[i] = -temp[i - n3_4];       // c'+d
02053 }
02054 #endif
02055 
02056 #ifndef LIBVORBIS_MDCT
02057 #define LIBVORBIS_MDCT 0
02058 #endif
02059 
02060 #if LIBVORBIS_MDCT
02061 // directly call the vorbis MDCT using an interface documented
02062 // by Jeff Roberts... useful for performance comparison
02063 typedef struct 
02064 {
02065   int n;
02066   int log2n;
02067   
02068   float *trig;
02069   int   *bitrev;
02070 
02071   float scale;
02072 } mdct_lookup;
02073 
02074 extern void mdct_init(mdct_lookup *lookup, int n);
02075 extern void mdct_clear(mdct_lookup *l);
02076 extern void mdct_backward(mdct_lookup *init, float *in, float *out);
02077 
02078 mdct_lookup M1,M2;
02079 
02080 void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
02081 {
02082    mdct_lookup *M;
02083    if (M1.n == n) M = &M1;
02084    else if (M2.n == n) M = &M2;
02085    else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
02086    else { 
02087       if (M2.n) __asm int 3;
02088       mdct_init(&M2, n);
02089       M = &M2;
02090    }
02091 
02092    mdct_backward(M, buffer, buffer);
02093 }
02094 #endif
02095 
02096 
02097 // the following were split out into separate functions while optimizing;
02098 // they could be pushed back up but eh. __forceinline showed no change;
02099 // they're probably already being inlined.
02100 static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
02101 {
02102    float *ee0 = e + i_off;
02103    float *ee2 = ee0 + k_off;
02104    int i;
02105 
02106    assert((n & 3) == 0);
02107    for (i=(n>>2); i > 0; --i) {
02108       float k00_20, k01_21;
02109       k00_20  = ee0[ 0] - ee2[ 0];
02110       k01_21  = ee0[-1] - ee2[-1];
02111       ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
02112       ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
02113       ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
02114       ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
02115       A += 8;
02116 
02117       k00_20  = ee0[-2] - ee2[-2];
02118       k01_21  = ee0[-3] - ee2[-3];
02119       ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
02120       ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
02121       ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
02122       ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
02123       A += 8;
02124 
02125       k00_20  = ee0[-4] - ee2[-4];
02126       k01_21  = ee0[-5] - ee2[-5];
02127       ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
02128       ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
02129       ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
02130       ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
02131       A += 8;
02132 
02133       k00_20  = ee0[-6] - ee2[-6];
02134       k01_21  = ee0[-7] - ee2[-7];
02135       ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
02136       ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
02137       ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
02138       ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
02139       A += 8;
02140       ee0 -= 8;
02141       ee2 -= 8;
02142    }
02143 }
02144 
02145 static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
02146 {
02147    int i;
02148    float k00_20, k01_21;
02149 
02150    float *e0 = e + d0;
02151    float *e2 = e0 + k_off;
02152 
02153    for (i=lim >> 2; i > 0; --i) {
02154       k00_20 = e0[-0] - e2[-0];
02155       k01_21 = e0[-1] - e2[-1];
02156       e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
02157       e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
02158       e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
02159       e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
02160 
02161       A += k1;
02162 
02163       k00_20 = e0[-2] - e2[-2];
02164       k01_21 = e0[-3] - e2[-3];
02165       e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
02166       e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
02167       e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
02168       e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
02169 
02170       A += k1;
02171 
02172       k00_20 = e0[-4] - e2[-4];
02173       k01_21 = e0[-5] - e2[-5];
02174       e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
02175       e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
02176       e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
02177       e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
02178 
02179       A += k1;
02180 
02181       k00_20 = e0[-6] - e2[-6];
02182       k01_21 = e0[-7] - e2[-7];
02183       e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
02184       e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
02185       e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
02186       e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
02187 
02188       e0 -= 8;
02189       e2 -= 8;
02190 
02191       A += k1;
02192    }
02193 }
02194 
02195 static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
02196 {
02197    int i;
02198    float A0 = A[0];
02199    float A1 = A[0+1];
02200    float A2 = A[0+a_off];
02201    float A3 = A[0+a_off+1];
02202    float A4 = A[0+a_off*2+0];
02203    float A5 = A[0+a_off*2+1];
02204    float A6 = A[0+a_off*3+0];
02205    float A7 = A[0+a_off*3+1];
02206 
02207    float k00,k11;
02208 
02209    float *ee0 = e  +i_off;
02210    float *ee2 = ee0+k_off;
02211 
02212    for (i=n; i > 0; --i) {
02213       k00     = ee0[ 0] - ee2[ 0];
02214       k11     = ee0[-1] - ee2[-1];
02215       ee0[ 0] =  ee0[ 0] + ee2[ 0];
02216       ee0[-1] =  ee0[-1] + ee2[-1];
02217       ee2[ 0] = (k00) * A0 - (k11) * A1;
02218       ee2[-1] = (k11) * A0 + (k00) * A1;
02219 
02220       k00     = ee0[-2] - ee2[-2];
02221       k11     = ee0[-3] - ee2[-3];
02222       ee0[-2] =  ee0[-2] + ee2[-2];
02223       ee0[-3] =  ee0[-3] + ee2[-3];
02224       ee2[-2] = (k00) * A2 - (k11) * A3;
02225       ee2[-3] = (k11) * A2 + (k00) * A3;
02226 
02227       k00     = ee0[-4] - ee2[-4];
02228       k11     = ee0[-5] - ee2[-5];
02229       ee0[-4] =  ee0[-4] + ee2[-4];
02230       ee0[-5] =  ee0[-5] + ee2[-5];
02231       ee2[-4] = (k00) * A4 - (k11) * A5;
02232       ee2[-5] = (k11) * A4 + (k00) * A5;
02233 
02234       k00     = ee0[-6] - ee2[-6];
02235       k11     = ee0[-7] - ee2[-7];
02236       ee0[-6] =  ee0[-6] + ee2[-6];
02237       ee0[-7] =  ee0[-7] + ee2[-7];
02238       ee2[-6] = (k00) * A6 - (k11) * A7;
02239       ee2[-7] = (k11) * A6 + (k00) * A7;
02240 
02241       ee0 -= k0;
02242       ee2 -= k0;
02243    }
02244 }
02245 
02246 static __forceinline void iter_54(float *z)
02247 {
02248    float k00,k11,k22,k33;
02249    float y0,y1,y2,y3;
02250 
02251    k00  = z[ 0] - z[-4];
02252    y0   = z[ 0] + z[-4];
02253    y2   = z[-2] + z[-6];
02254    k22  = z[-2] - z[-6];
02255 
02256    z[-0] = y0 + y2;      // z0 + z4 + z2 + z6
02257    z[-2] = y0 - y2;      // z0 + z4 - z2 - z6
02258 
02259    // done with y0,y2
02260 
02261    k33  = z[-3] - z[-7];
02262 
02263    z[-4] = k00 + k33;    // z0 - z4 + z3 - z7
02264    z[-6] = k00 - k33;    // z0 - z4 - z3 + z7
02265 
02266    // done with k33
02267 
02268    k11  = z[-1] - z[-5];
02269    y1   = z[-1] + z[-5];
02270    y3   = z[-3] + z[-7];
02271 
02272    z[-1] = y1 + y3;      // z1 + z5 + z3 + z7
02273    z[-3] = y1 - y3;      // z1 + z5 - z3 - z7
02274    z[-5] = k11 - k22;    // z1 - z5 + z2 - z6
02275    z[-7] = k11 + k22;    // z1 - z5 - z2 + z6
02276 }
02277 
02278 static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
02279 {
02280    int k_off = -8;
02281    int a_off = base_n >> 3;
02282    float A2 = A[0+a_off];
02283    float *z = e + i_off;
02284    float *base = z - 16 * n;
02285 
02286    while (z > base) {
02287       float k00,k11;
02288 
02289       k00   = z[-0] - z[-8];
02290       k11   = z[-1] - z[-9];
02291       z[-0] = z[-0] + z[-8];
02292       z[-1] = z[-1] + z[-9];
02293       z[-8] =  k00;
02294       z[-9] =  k11 ;
02295 
02296       k00    = z[ -2] - z[-10];
02297       k11    = z[ -3] - z[-11];
02298       z[ -2] = z[ -2] + z[-10];
02299       z[ -3] = z[ -3] + z[-11];
02300       z[-10] = (k00+k11) * A2;
02301       z[-11] = (k11-k00) * A2;
02302 
02303       k00    = z[-12] - z[ -4];  // reverse to avoid a unary negation
02304       k11    = z[ -5] - z[-13];
02305       z[ -4] = z[ -4] + z[-12];
02306       z[ -5] = z[ -5] + z[-13];
02307       z[-12] = k11;
02308       z[-13] = k00;
02309 
02310       k00    = z[-14] - z[ -6];  // reverse to avoid a unary negation
02311       k11    = z[ -7] - z[-15];
02312       z[ -6] = z[ -6] + z[-14];
02313       z[ -7] = z[ -7] + z[-15];
02314       z[-14] = (k00+k11) * A2;
02315       z[-15] = (k00-k11) * A2;
02316 
02317       iter_54(z);
02318       iter_54(z-8);
02319       z -= 16;
02320    }
02321 }
02322 
02323 static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
02324 {
02325    int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
02326    int n3_4 = n - n4, ld;
02327    // @OPTIMIZE: reduce register pressure by using fewer variables?
02328    int save_point = temp_alloc_save(f);
02329    float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
02330    float *u=NULL,*v=NULL;
02331    // twiddle factors
02332    float *A = f->A[blocktype];
02333 
02334    // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
02335    // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
02336 
02337    // kernel from paper
02338 
02339 
02340    // merged:
02341    //   copy and reflect spectral data
02342    //   step 0
02343 
02344    // note that it turns out that the items added together during
02345    // this step are, in fact, being added to themselves (as reflected
02346    // by step 0). inexplicable inefficiency! this became obvious
02347    // once I combined the passes.
02348 
02349    // so there's a missing 'times 2' here (for adding X to itself).
02350    // this propogates through linearly to the end, where the numbers
02351    // are 1/2 too small, and need to be compensated for.
02352 
02353    {
02354       float *d,*e, *AA, *e_stop;
02355       d = &buf2[n2-2];
02356       AA = A;
02357       e = &buffer[0];
02358       e_stop = &buffer[n2];
02359       while (e != e_stop) {
02360          d[1] = (e[0] * AA[0] - e[2]*AA[1]);
02361          d[0] = (e[0] * AA[1] + e[2]*AA[0]);
02362          d -= 2;
02363          AA += 2;
02364          e += 4;
02365       }
02366 
02367       e = &buffer[n2-3];
02368       while (d >= buf2) {
02369          d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
02370          d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
02371          d -= 2;
02372          AA += 2;
02373          e -= 4;
02374       }
02375    }
02376 
02377    // now we use symbolic names for these, so that we can
02378    // possibly swap their meaning as we change which operations
02379    // are in place
02380 
02381    u = buffer;
02382    v = buf2;
02383 
02384    // step 2    (paper output is w, now u)
02385    // this could be in place, but the data ends up in the wrong
02386    // place... _somebody_'s got to swap it, so this is nominated
02387    {
02388       float *AA = &A[n2-8];
02389       float *d0,*d1, *e0, *e1;
02390 
02391       e0 = &v[n4];
02392       e1 = &v[0];
02393 
02394       d0 = &u[n4];
02395       d1 = &u[0];
02396 
02397       while (AA >= A) {
02398          float v40_20, v41_21;
02399 
02400          v41_21 = e0[1] - e1[1];
02401          v40_20 = e0[0] - e1[0];
02402          d0[1]  = e0[1] + e1[1];
02403          d0[0]  = e0[0] + e1[0];
02404          d1[1]  = v41_21*AA[4] - v40_20*AA[5];
02405          d1[0]  = v40_20*AA[4] + v41_21*AA[5];
02406 
02407          v41_21 = e0[3] - e1[3];
02408          v40_20 = e0[2] - e1[2];
02409          d0[3]  = e0[3] + e1[3];
02410          d0[2]  = e0[2] + e1[2];
02411          d1[3]  = v41_21*AA[0] - v40_20*AA[1];
02412          d1[2]  = v40_20*AA[0] + v41_21*AA[1];
02413 
02414          AA -= 8;
02415 
02416          d0 += 4;
02417          d1 += 4;
02418          e0 += 4;
02419          e1 += 4;
02420       }
02421    }
02422 
02423    // step 3
02424    ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
02425 
02426    // optimized step 3:
02427 
02428    // the original step3 loop can be nested r inside s or s inside r;
02429    // it's written originally as s inside r, but this is dumb when r
02430    // iterates many times, and s few. So I have two copies of it and
02431    // switch between them halfway.
02432 
02433    // this is iteration 0 of step 3
02434    imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
02435    imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
02436 
02437    // this is iteration 1 of step 3
02438    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
02439    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
02440    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
02441    imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
02442 
02443    l=2;
02444    for (; l < (ld-3)>>1; ++l) {
02445       int k0 = n >> (l+2), k0_2 = k0>>1;
02446       int lim = 1 << (l+1);
02447       int i;
02448       for (i=0; i < lim; ++i)
02449          imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
02450    }
02451 
02452    for (; l < ld-6; ++l) {
02453       int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
02454       int rlim = n >> (l+6), r;
02455       int lim = 1 << (l+1);
02456       int i_off;
02457       float *A0 = A;
02458       i_off = n2-1;
02459       for (r=rlim; r > 0; --r) {
02460          imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
02461          A0 += k1*4;
02462          i_off -= 8;
02463       }
02464    }
02465 
02466    // iterations with count:
02467    //   ld-6,-5,-4 all interleaved together
02468    //       the big win comes from getting rid of needless flops
02469    //         due to the constants on pass 5 & 4 being all 1 and 0;
02470    //       combining them to be simultaneous to improve cache made little difference
02471    imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
02472 
02473    // output is u
02474 
02475    // step 4, 5, and 6
02476    // cannot be in-place because of step 5
02477    {
02478       uint16 *bitrev = f->bit_reverse[blocktype];
02479       // weirdly, I'd have thought reading sequentially and writing
02480       // erratically would have been better than vice-versa, but in
02481       // fact that's not what my testing showed. (That is, with
02482       // j = bitreverse(i), do you read i and write j, or read j and write i.)
02483 
02484       float *d0 = &v[n4-4];
02485       float *d1 = &v[n2-4];
02486       while (d0 >= v) {
02487          int k4;
02488 
02489          k4 = bitrev[0];
02490          d1[3] = u[k4+0];
02491          d1[2] = u[k4+1];
02492          d0[3] = u[k4+2];
02493          d0[2] = u[k4+3];
02494 
02495          k4 = bitrev[1];
02496          d1[1] = u[k4+0];
02497          d1[0] = u[k4+1];
02498          d0[1] = u[k4+2];
02499          d0[0] = u[k4+3];
02500          
02501          d0 -= 4;
02502          d1 -= 4;
02503          bitrev += 2;
02504       }
02505    }
02506    // (paper output is u, now v)
02507 
02508 
02509    // data must be in buf2
02510    assert(v == buf2);
02511 
02512    // step 7   (paper output is v, now v)
02513    // this is now in place
02514    {
02515       float *C = f->C[blocktype];
02516       float *d, *e;
02517 
02518       d = v;
02519       e = v + n2 - 4;
02520 
02521       while (d < e) {
02522          float a02,a11,b0,b1,b2,b3;
02523 
02524          a02 = d[0] - e[2];
02525          a11 = d[1] + e[3];
02526 
02527          b0 = C[1]*a02 + C[0]*a11;
02528          b1 = C[1]*a11 - C[0]*a02;
02529 
02530          b2 = d[0] + e[ 2];
02531          b3 = d[1] - e[ 3];
02532 
02533          d[0] = b2 + b0;
02534          d[1] = b3 + b1;
02535          e[2] = b2 - b0;
02536          e[3] = b1 - b3;
02537 
02538          a02 = d[2] - e[0];
02539          a11 = d[3] + e[1];
02540 
02541          b0 = C[3]*a02 + C[2]*a11;
02542          b1 = C[3]*a11 - C[2]*a02;
02543 
02544          b2 = d[2] + e[ 0];
02545          b3 = d[3] - e[ 1];
02546 
02547          d[2] = b2 + b0;
02548          d[3] = b3 + b1;
02549          e[0] = b2 - b0;
02550          e[1] = b1 - b3;
02551 
02552          C += 4;
02553          d += 4;
02554          e -= 4;
02555       }
02556    }
02557 
02558    // data must be in buf2
02559 
02560 
02561    // step 8+decode   (paper output is X, now buffer)
02562    // this generates pairs of data a la 8 and pushes them directly through
02563    // the decode kernel (pushing rather than pulling) to avoid having
02564    // to make another pass later
02565 
02566    // this cannot POSSIBLY be in place, so we refer to the buffers directly
02567 
02568    {
02569       float *d0,*d1,*d2,*d3;
02570 
02571       float *B = f->B[blocktype] + n2 - 8;
02572       float *e = buf2 + n2 - 8;
02573       d0 = &buffer[0];
02574       d1 = &buffer[n2-4];
02575       d2 = &buffer[n2];
02576       d3 = &buffer[n-4];
02577       while (e >= v) {
02578          float p0,p1,p2,p3;
02579 
02580          p3 =  e[6]*B[7] - e[7]*B[6];
02581          p2 = -e[6]*B[6] - e[7]*B[7]; 
02582 
02583          d0[0] =   p3;
02584          d1[3] = - p3;
02585          d2[0] =   p2;
02586          d3[3] =   p2;
02587 
02588          p1 =  e[4]*B[5] - e[5]*B[4];
02589          p0 = -e[4]*B[4] - e[5]*B[5]; 
02590 
02591          d0[1] =   p1;
02592          d1[2] = - p1;
02593          d2[1] =   p0;
02594          d3[2] =   p0;
02595 
02596          p3 =  e[2]*B[3] - e[3]*B[2];
02597          p2 = -e[2]*B[2] - e[3]*B[3]; 
02598 
02599          d0[2] =   p3;
02600          d1[1] = - p3;
02601          d2[2] =   p2;
02602          d3[1] =   p2;
02603 
02604          p1 =  e[0]*B[1] - e[1]*B[0];
02605          p0 = -e[0]*B[0] - e[1]*B[1]; 
02606 
02607          d0[3] =   p1;
02608          d1[0] = - p1;
02609          d2[3] =   p0;
02610          d3[0] =   p0;
02611 
02612          B -= 8;
02613          e -= 8;
02614          d0 += 4;
02615          d2 += 4;
02616          d1 -= 4;
02617          d3 -= 4;
02618       }
02619    }
02620 
02621    temp_alloc_restore(f,save_point);
02622 }
02623 
02624 #if 0
02625 // this is the original version of the above code, if you want to optimize it from scratch
02626 void inverse_mdct_naive(float *buffer, int n)
02627 {
02628    float s;
02629    float A[1 << 12], B[1 << 12], C[1 << 11];
02630    int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
02631    int n3_4 = n - n4, ld;
02632    // how can they claim this only uses N words?!
02633    // oh, because they're only used sparsely, whoops
02634    float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13];
02635    // set up twiddle factors
02636 
02637    for (k=k2=0; k < n4; ++k,k2+=2) {
02638       A[k2  ] = (float)  cos(4*k*M_PI/n);
02639       A[k2+1] = (float) -sin(4*k*M_PI/n);
02640       B[k2  ] = (float)  cos((k2+1)*M_PI/n/2);
02641       B[k2+1] = (float)  sin((k2+1)*M_PI/n/2);
02642    }
02643    for (k=k2=0; k < n8; ++k,k2+=2) {
02644       C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
02645       C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
02646    }
02647 
02648    // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
02649    // Note there are bugs in that pseudocode, presumably due to them attempting
02650    // to rename the arrays nicely rather than representing the way their actual
02651    // implementation bounces buffers back and forth. As a result, even in the
02652    // "some formulars corrected" version, a direct implementation fails. These
02653    // are noted below as "paper bug".
02654 
02655    // copy and reflect spectral data
02656    for (k=0; k < n2; ++k) u[k] = buffer[k];
02657    for (   ; k < n ; ++k) u[k] = -buffer[n - k - 1];
02658    // kernel from paper
02659    // step 1
02660    for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) {
02661       v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2]   - (u[k4+2] - u[n-k4-3])*A[k2+1];
02662       v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2];
02663    }
02664    // step 2
02665    for (k=k4=0; k < n8; k+=1, k4+=4) {
02666       w[n2+3+k4] = v[n2+3+k4] + v[k4+3];
02667       w[n2+1+k4] = v[n2+1+k4] + v[k4+1];
02668       w[k4+3]    = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4];
02669       w[k4+1]    = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4];
02670    }
02671    // step 3
02672    ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
02673    for (l=0; l < ld-3; ++l) {
02674       int k0 = n >> (l+2), k1 = 1 << (l+3);
02675       int rlim = n >> (l+4), r4, r;
02676       int s2lim = 1 << (l+2), s2;
02677       for (r=r4=0; r < rlim; r4+=4,++r) {
02678          for (s2=0; s2 < s2lim; s2+=2) {
02679             u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4];
02680             u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4];
02681             u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1]
02682                                 - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1];
02683             u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1]
02684                                 + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1];
02685          }
02686       }
02687       if (l+1 < ld-3) {
02688          // paper bug: ping-ponging of u&w here is omitted
02689          memcpy(w, u, sizeof(u));
02690       }
02691    }
02692 
02693    // step 4
02694    for (i=0; i < n8; ++i) {
02695       int j = bit_reverse(i) >> (32-ld+3);
02696       assert(j < n8);
02697       if (i == j) {
02698          // paper bug: original code probably swapped in place; if copying,
02699          //            need to directly copy in this case
02700          int i8 = i << 3;
02701          v[i8+1] = u[i8+1];
02702          v[i8+3] = u[i8+3];
02703          v[i8+5] = u[i8+5];
02704          v[i8+7] = u[i8+7];
02705       } else if (i < j) {
02706          int i8 = i << 3, j8 = j << 3;
02707          v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1];
02708          v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3];
02709          v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5];
02710          v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7];
02711       }
02712    }
02713    // step 5
02714    for (k=0; k < n2; ++k) {
02715       w[k] = v[k*2+1];
02716    }
02717    // step 6
02718    for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) {
02719       u[n-1-k2] = w[k4];
02720       u[n-2-k2] = w[k4+1];
02721       u[n3_4 - 1 - k2] = w[k4+2];
02722       u[n3_4 - 2 - k2] = w[k4+3];
02723    }
02724    // step 7
02725    for (k=k2=0; k < n8; ++k, k2 += 2) {
02726       v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
02727       v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
02728       v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
02729       v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
02730    }
02731    // step 8
02732    for (k=k2=0; k < n4; ++k,k2 += 2) {
02733       X[k]      = v[k2+n2]*B[k2  ] + v[k2+1+n2]*B[k2+1];
02734       X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2  ];
02735    }
02736 
02737    // decode kernel to output
02738    // determined the following value experimentally
02739    // (by first figuring out what made inverse_mdct_slow work); then matching that here
02740    // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?)
02741    s = 0.5; // theoretically would be n4
02742 
02743    // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code,
02744    //     so it needs to use the "old" B values to behave correctly, or else
02745    //     set s to 1.0 ]]]
02746    for (i=0; i < n4  ; ++i) buffer[i] = s * X[i+n4];
02747    for (   ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1];
02748    for (   ; i < n   ; ++i) buffer[i] = -s * X[i - n3_4];
02749 }
02750 #endif
02751 
02752 static float *get_window(vorb *f, int len)
02753 {
02754    len <<= 1;
02755    if (len == f->blocksize_0) return f->window[0];
02756    if (len == f->blocksize_1) return f->window[1];
02757    assert(0);
02758    return NULL;
02759 }
02760 
02761 #ifndef STB_VORBIS_NO_DEFER_FLOOR
02762 typedef int16 YTYPE;
02763 #else
02764 typedef int YTYPE;
02765 #endif
02766 static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
02767 {
02768    int n2 = n >> 1;
02769    int s = map->chan[i].mux, floor;
02770    floor = map->submap_floor[s];
02771    if (f->floor_types[floor] == 0) {
02772       return error(f, VORBIS_invalid_stream);
02773    } else {
02774       Floor1 *g = &f->floor_config[floor].floor1;
02775       int j,q;
02776       int lx = 0, ly = finalY[0] * g->floor1_multiplier;
02777       for (q=1; q < g->values; ++q) {
02778          j = g->sorted_order[q];
02779          #ifndef STB_VORBIS_NO_DEFER_FLOOR
02780          if (finalY[j] >= 0)
02781          #else
02782          if (step2_flag[j])
02783          #endif
02784          {
02785             int hy = finalY[j] * g->floor1_multiplier;
02786             int hx = g->Xlist[j];
02787             draw_line(target, lx,ly, hx,hy, n2);
02788             lx = hx, ly = hy;
02789          }
02790       }
02791       if (lx < n2)
02792          // optimization of: draw_line(target, lx,ly, n,ly, n2);
02793          for (j=lx; j < n2; ++j)
02794             LINE_OP(target[j], inverse_db_table[ly]);
02795    }
02796    return TRUE;
02797 }
02798 
02799 static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
02800 {
02801    Mode *m;
02802    int i, n, prev, next, window_center;
02803    f->channel_buffer_start = f->channel_buffer_end = 0;
02804 
02805   retry:
02806    if (f->eof) return FALSE;
02807    if (!maybe_start_packet(f))
02808       return FALSE;
02809    // check packet type
02810    if (get_bits(f,1) != 0) {
02811       if (IS_PUSH_MODE(f))
02812          return error(f,VORBIS_bad_packet_type);
02813       while (EOP != get8_packet(f));
02814       goto retry;
02815    }
02816 
02817    if (f->alloc.alloc_buffer)
02818       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
02819 
02820    i = get_bits(f, ilog(f->mode_count-1));
02821    if (i == EOP) return FALSE;
02822    if (i >= f->mode_count) return FALSE;
02823    *mode = i;
02824    m = f->mode_config + i;
02825    if (m->blockflag) {
02826       n = f->blocksize_1;
02827       prev = get_bits(f,1);
02828       next = get_bits(f,1);
02829    } else {
02830       prev = next = 0;
02831       n = f->blocksize_0;
02832    }
02833 
02834 // WINDOWING
02835 
02836    window_center = n >> 1;
02837    if (m->blockflag && !prev) {
02838       *p_left_start = (n - f->blocksize_0) >> 2;
02839       *p_left_end   = (n + f->blocksize_0) >> 2;
02840    } else {
02841       *p_left_start = 0;
02842       *p_left_end   = window_center;
02843    }
02844    if (m->blockflag && !next) {
02845       *p_right_start = (n*3 - f->blocksize_0) >> 2;
02846       *p_right_end   = (n*3 + f->blocksize_0) >> 2;
02847    } else {
02848       *p_right_start = window_center;
02849       *p_right_end   = n;
02850    }
02851    return TRUE;
02852 }
02853 
02854 static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
02855 {
02856    Mapping *map;
02857    int i,j,k,n,n2;
02858    int zero_channel[256];
02859    int really_zero_channel[256];
02860    int window_center;
02861 
02862 // WINDOWING
02863 
02864    n = f->blocksize[m->blockflag];
02865    window_center = n >> 1;
02866 
02867    map = &f->mapping[m->mapping];
02868 
02869 // FLOORS
02870    n2 = n >> 1;
02871 
02872    stb_prof(1);
02873    for (i=0; i < f->channels; ++i) {
02874       int s = map->chan[i].mux, floor;
02875       zero_channel[i] = FALSE;
02876       floor = map->submap_floor[s];
02877       if (f->floor_types[floor] == 0) {
02878          return error(f, VORBIS_invalid_stream);
02879       } else {
02880          Floor1 *g = &f->floor_config[floor].floor1;
02881          if (get_bits(f, 1)) {
02882             short *finalY;
02883             uint8 step2_flag[256];
02884             static int range_list[4] = { 256, 128, 86, 64 };
02885             int range = range_list[g->floor1_multiplier-1];
02886             int offset = 2;
02887             finalY = f->finalY[i];
02888             finalY[0] = get_bits(f, ilog(range)-1);
02889             finalY[1] = get_bits(f, ilog(range)-1);
02890             for (j=0; j < g->partitions; ++j) {
02891                int pclass = g->partition_class_list[j];
02892                int cdim = g->class_dimensions[pclass];
02893                int cbits = g->class_subclasses[pclass];
02894                int csub = (1 << cbits)-1;
02895                int cval = 0;
02896                if (cbits) {
02897                   Codebook *c = f->codebooks + g->class_masterbooks[pclass];
02898                   DECODE(cval,f,c);
02899                }
02900                for (k=0; k < cdim; ++k) {
02901                   int book = g->subclass_books[pclass][cval & csub];
02902                   cval = cval >> cbits;
02903                   if (book >= 0) {
02904                      int temp;
02905                      Codebook *c = f->codebooks + book;
02906                      DECODE(temp,f,c);
02907                      finalY[offset++] = temp;
02908                   } else
02909                      finalY[offset++] = 0;
02910                }
02911             }
02912             if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec
02913             step2_flag[0] = step2_flag[1] = 1;
02914             for (j=2; j < g->values; ++j) {
02915                int low, high, pred, highroom, lowroom, room, val;
02916                low = g->neighbors[j][0];
02917                high = g->neighbors[j][1];
02918                //neighbors(g->Xlist, j, &low, &high);
02919                pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]);
02920                val = finalY[j];
02921                highroom = range - pred;
02922                lowroom = pred;
02923                if (highroom < lowroom)
02924                   room = highroom * 2;
02925                else
02926                   room = lowroom * 2;
02927                if (val) {
02928                   step2_flag[low] = step2_flag[high] = 1;
02929                   step2_flag[j] = 1;
02930                   if (val >= room)
02931                      if (highroom > lowroom)
02932                         finalY[j] = val - lowroom + pred;
02933                      else
02934                         finalY[j] = pred - val + highroom - 1;
02935                   else
02936                      if (val & 1)
02937                         finalY[j] = pred - ((val+1)>>1);
02938                      else
02939                         finalY[j] = pred + (val>>1);
02940                } else {
02941                   step2_flag[j] = 0;
02942                   finalY[j] = pred;
02943                }
02944             }
02945 
02946 #ifdef STB_VORBIS_NO_DEFER_FLOOR
02947             do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag);
02948 #else
02949             // defer final floor computation until _after_ residue
02950             for (j=0; j < g->values; ++j) {
02951                if (!step2_flag[j])
02952                   finalY[j] = -1;
02953             }
02954 #endif
02955          } else {
02956            error:
02957             zero_channel[i] = TRUE;
02958          }
02959          // So we just defer everything else to later
02960 
02961          // at this point we've decoded the floor into buffer
02962       }
02963    }
02964    stb_prof(0);
02965    // at this point we've decoded all floors
02966 
02967    if (f->alloc.alloc_buffer)
02968       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
02969 
02970    // re-enable coupled channels if necessary
02971    memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels);
02972    for (i=0; i < map->coupling_steps; ++i)
02973       if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) {
02974          zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE;
02975       }
02976 
02977 // RESIDUE DECODE
02978    for (i=0; i < map->submaps; ++i) {
02979       float *residue_buffers[STB_VORBIS_MAX_CHANNELS];
02980       int r,t;
02981       uint8 do_not_decode[256];
02982       int ch = 0;
02983       for (j=0; j < f->channels; ++j) {
02984          if (map->chan[j].mux == i) {
02985             if (zero_channel[j]) {
02986                do_not_decode[ch] = TRUE;
02987                residue_buffers[ch] = NULL;
02988             } else {
02989                do_not_decode[ch] = FALSE;
02990                residue_buffers[ch] = f->channel_buffers[j];
02991             }
02992             ++ch;
02993          }
02994       }
02995       r = map->submap_residue[i];
02996       t = f->residue_types[r];
02997       decode_residue(f, residue_buffers, ch, n2, r, do_not_decode);
02998    }
02999 
03000    if (f->alloc.alloc_buffer)
03001       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
03002 
03003 // INVERSE COUPLING
03004    stb_prof(14);
03005    for (i = map->coupling_steps-1; i >= 0; --i) {
03006       int n2 = n >> 1;
03007       float *m = f->channel_buffers[map->chan[i].magnitude];
03008       float *a = f->channel_buffers[map->chan[i].angle    ];
03009       for (j=0; j < n2; ++j) {
03010          float a2,m2;
03011          if (m[j] > 0)
03012             if (a[j] > 0)
03013                m2 = m[j], a2 = m[j] - a[j];
03014             else
03015                a2 = m[j], m2 = m[j] + a[j];
03016          else
03017             if (a[j] > 0)
03018                m2 = m[j], a2 = m[j] + a[j];
03019             else
03020                a2 = m[j], m2 = m[j] - a[j];
03021          m[j] = m2;
03022          a[j] = a2;
03023       }
03024    }
03025 
03026    // finish decoding the floors
03027 #ifndef STB_VORBIS_NO_DEFER_FLOOR
03028    stb_prof(15);
03029    for (i=0; i < f->channels; ++i) {
03030       if (really_zero_channel[i]) {
03031          memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
03032       } else {
03033          do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL);
03034       }
03035    }
03036 #else
03037    for (i=0; i < f->channels; ++i) {
03038       if (really_zero_channel[i]) {
03039          memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
03040       } else {
03041          for (j=0; j < n2; ++j)
03042             f->channel_buffers[i][j] *= f->floor_buffers[i][j];
03043       }
03044    }
03045 #endif
03046 
03047 // INVERSE MDCT
03048    stb_prof(16);
03049    for (i=0; i < f->channels; ++i)
03050       inverse_mdct(f->channel_buffers[i], n, f, m->blockflag);
03051    stb_prof(0);
03052 
03053    // this shouldn't be necessary, unless we exited on an error
03054    // and want to flush to get to the next packet
03055    flush_packet(f);
03056 
03057    if (f->first_decode) {
03058       // assume we start so first non-discarded sample is sample 0
03059       // this isn't to spec, but spec would require us to read ahead
03060       // and decode the size of all current frames--could be done,
03061       // but presumably it's not a commonly used feature
03062       f->current_loc = -n2; // start of first frame is positioned for discard
03063       // we might have to discard samples "from" the next frame too,
03064       // if we're lapping a large block then a small at the start?
03065       f->discard_samples_deferred = n - right_end;
03066       f->current_loc_valid = TRUE;
03067       f->first_decode = FALSE;
03068    } else if (f->discard_samples_deferred) {
03069       left_start += f->discard_samples_deferred;
03070       *p_left = left_start;
03071       f->discard_samples_deferred = 0;
03072    } else if (f->previous_length == 0 && f->current_loc_valid) {
03073       // we're recovering from a seek... that means we're going to discard
03074       // the samples from this packet even though we know our position from
03075       // the last page header, so we need to update the position based on
03076       // the discarded samples here
03077       // but wait, the code below is going to add this in itself even
03078       // on a discard, so we don't need to do it here...
03079    }
03080 
03081    // check if we have ogg information about the sample # for this packet
03082    if (f->last_seg_which == f->end_seg_with_known_loc) {
03083       // if we have a valid current loc, and this is final:
03084       if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) {
03085          uint32 current_end = f->known_loc_for_packet - (n-right_end);
03086          // then let's infer the size of the (probably) short final frame
03087          if (current_end < f->current_loc + right_end) {
03088             if (current_end < f->current_loc) {
03089                // negative truncation, that's impossible!
03090                *len = 0;
03091             } else {
03092                *len = current_end - f->current_loc;
03093             }
03094             *len += left_start;
03095             f->current_loc += *len;
03096             return TRUE;
03097          }
03098       }
03099       // otherwise, just set our sample loc
03100       // guess that the ogg granule pos refers to the _middle_ of the
03101       // last frame?
03102       // set f->current_loc to the position of left_start
03103       f->current_loc = f->known_loc_for_packet - (n2-left_start);
03104       f->current_loc_valid = TRUE;
03105    }
03106    if (f->current_loc_valid)
03107       f->current_loc += (right_start - left_start);
03108 
03109    if (f->alloc.alloc_buffer)
03110       assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
03111    *len = right_end;  // ignore samples after the window goes to 0
03112    return TRUE;
03113 }
03114 
03115 static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
03116 {
03117    int mode, left_end, right_end;
03118    if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
03119    return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left);
03120 }
03121 
03122 static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
03123 {
03124    int prev,i,j;
03125    // we use right&left (the start of the right- and left-window sin()-regions)
03126    // to determine how much to return, rather than inferring from the rules
03127    // (same result, clearer code); 'left' indicates where our sin() window
03128    // starts, therefore where the previous window's right edge starts, and
03129    // therefore where to start mixing from the previous buffer. 'right'
03130    // indicates where our sin() ending-window starts, therefore that's where
03131    // we start saving, and where our returned-data ends.
03132 
03133    // mixin from previous window
03134    if (f->previous_length) {
03135       int i,j, n = f->previous_length;
03136       float *w = get_window(f, n);
03137       for (i=0; i < f->channels; ++i) {
03138          for (j=0; j < n; ++j)
03139             f->channel_buffers[i][left+j] =
03140                f->channel_buffers[i][left+j]*w[    j] +
03141                f->previous_window[i][     j]*w[n-1-j];
03142       }
03143    }
03144 
03145    prev = f->previous_length;
03146 
03147    // last half of this data becomes previous window
03148    f->previous_length = len - right;
03149 
03150    // @OPTIMIZE: could avoid this copy by double-buffering the
03151    // output (flipping previous_window with channel_buffers), but
03152    // then previous_window would have to be 2x as large, and
03153    // channel_buffers couldn't be temp mem (although they're NOT
03154    // currently temp mem, they could be (unless we want to level
03155    // performance by spreading out the computation))
03156    for (i=0; i < f->channels; ++i)
03157       for (j=0; right+j < len; ++j)
03158          f->previous_window[i][j] = f->channel_buffers[i][right+j];
03159 
03160    if (!prev)
03161       // there was no previous packet, so this data isn't valid...
03162       // this isn't entirely true, only the would-have-overlapped data
03163       // isn't valid, but this seems to be what the spec requires
03164       return 0;
03165 
03166    // truncate a short frame
03167    if (len < right) right = len;
03168 
03169    f->samples_output += right-left;
03170 
03171    return right - left;
03172 }
03173 
03174 static void vorbis_pump_first_frame(stb_vorbis *f)
03175 {
03176    int len, right, left;
03177    if (vorbis_decode_packet(f, &len, &left, &right))
03178       vorbis_finish_frame(f, len, left, right);
03179 }
03180 
03181 #ifndef STB_VORBIS_NO_PUSHDATA_API
03182 static int is_whole_packet_present(stb_vorbis *f, int end_page)
03183 {
03184    // make sure that we have the packet available before continuing...
03185    // this requires a full ogg parse, but we know we can fetch from f->stream
03186 
03187    // instead of coding this out explicitly, we could save the current read state,
03188    // read the next packet with get8() until end-of-packet, check f->eof, then
03189    // reset the state? but that would be slower, esp. since we'd have over 256 bytes
03190    // of state to restore (primarily the page segment table)
03191 
03192    int s = f->next_seg, first = TRUE;
03193    uint8 *p = f->stream;
03194 
03195    if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
03196       for (; s < f->segment_count; ++s) {
03197          p += f->segments[s];
03198          if (f->segments[s] < 255)               // stop at first short segment
03199             break;
03200       }
03201       // either this continues, or it ends it...
03202       if (end_page)
03203          if (s < f->segment_count-1)             return error(f, VORBIS_invalid_stream);
03204       if (s == f->segment_count)
03205          s = -1; // set 'crosses page' flag
03206       if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
03207       first = FALSE;
03208    }
03209    for (; s == -1;) {
03210       uint8 *q; 
03211       int n;
03212 
03213       // check that we have the page header ready
03214       if (p + 26 >= f->stream_end)               return error(f, VORBIS_need_more_data);
03215       // validate the page
03216       if (memcmp(p, ogg_page_header, 4))         return error(f, VORBIS_invalid_stream);
03217       if (p[4] != 0)                             return error(f, VORBIS_invalid_stream);
03218       if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
03219          if (f->previous_length)
03220             if ((p[5] & PAGEFLAG_continued_packet))  return error(f, VORBIS_invalid_stream);
03221          // if no previous length, we're resynching, so we can come in on a continued-packet,
03222          // which we'll just drop
03223       } else {
03224          if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
03225       }
03226       n = p[26]; // segment counts
03227       q = p+27;  // q points to segment table
03228       p = q + n; // advance past header
03229       // make sure we've read the segment table
03230       if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
03231       for (s=0; s < n; ++s) {
03232          p += q[s];
03233          if (q[s] < 255)
03234             break;
03235       }
03236       if (end_page)
03237          if (s < n-1)                            return error(f, VORBIS_invalid_stream);
03238       if (s == f->segment_count)
03239          s = -1; // set 'crosses page' flag
03240       if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
03241       first = FALSE;
03242    }
03243    return TRUE;
03244 }
03245 #endif // !STB_VORBIS_NO_PUSHDATA_API
03246 
03247 static int start_decoder(vorb *f)
03248 {
03249    uint8 header[6], x,y;
03250    int len,i,j,k, max_submaps = 0;
03251    int longest_floorlist=0;
03252 
03253    // first page, first packet
03254 
03255    if (!start_page(f))                              return FALSE;
03256    // validate page flag
03257    if (!(f->page_flag & PAGEFLAG_first_page))       return error(f, VORBIS_invalid_first_page);
03258    if (f->page_flag & PAGEFLAG_last_page)           return error(f, VORBIS_invalid_first_page);
03259    if (f->page_flag & PAGEFLAG_continued_packet)    return error(f, VORBIS_invalid_first_page);
03260    // check for expected packet length
03261    if (f->segment_count != 1)                       return error(f, VORBIS_invalid_first_page);
03262    if (f->segments[0] != 30)                        return error(f, VORBIS_invalid_first_page);
03263    // read packet
03264    // check packet header
03265    if (get8(f) != VORBIS_packet_id)                 return error(f, VORBIS_invalid_first_page);
03266    if (!getn(f, header, 6))                         return error(f, VORBIS_unexpected_eof);
03267    if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_first_page);
03268    // vorbis_version
03269    if (get32(f) != 0)                               return error(f, VORBIS_invalid_first_page);
03270    f->channels = get8(f); if (!f->channels)         return error(f, VORBIS_invalid_first_page);
03271    if (f->channels > STB_VORBIS_MAX_CHANNELS)       return error(f, VORBIS_too_many_channels);
03272    f->sample_rate = get32(f); if (!f->sample_rate)  return error(f, VORBIS_invalid_first_page);
03273    get32(f); // bitrate_maximum
03274    get32(f); // bitrate_nominal
03275    get32(f); // bitrate_minimum
03276    x = get8(f);
03277    { int log0,log1;
03278    log0 = x & 15;
03279    log1 = x >> 4;
03280    f->blocksize_0 = 1 << log0;
03281    f->blocksize_1 = 1 << log1;
03282    if (log0 < 6 || log0 > 13)                       return error(f, VORBIS_invalid_setup);
03283    if (log1 < 6 || log1 > 13)                       return error(f, VORBIS_invalid_setup);
03284    if (log0 > log1)                                 return error(f, VORBIS_invalid_setup);
03285    }
03286 
03287    // framing_flag
03288    x = get8(f);
03289    if (!(x & 1))                                    return error(f, VORBIS_invalid_first_page);
03290 
03291    // second packet!
03292    if (!start_page(f))                              return FALSE;
03293 
03294    if (!start_packet(f))                            return FALSE;
03295    do {
03296       len = next_segment(f);
03297       skip(f, len);
03298       f->bytes_in_seg = 0;
03299    } while (len);
03300 
03301    // third packet!
03302    if (!start_packet(f))                            return FALSE;
03303 
03304    #ifndef STB_VORBIS_NO_PUSHDATA_API
03305    if (IS_PUSH_MODE(f)) {
03306       if (!is_whole_packet_present(f, TRUE)) {
03307          // convert error in ogg header to write type
03308          if (f->error == VORBIS_invalid_stream)
03309             f->error = VORBIS_invalid_setup;
03310          return FALSE;
03311       }
03312    }
03313    #endif
03314 
03315    crc32_init(); // always init it, to avoid multithread race conditions
03316 
03317    if (get8_packet(f) != VORBIS_packet_setup)       return error(f, VORBIS_invalid_setup);
03318    for (i=0; i < 6; ++i) header[i] = get8_packet(f);
03319    if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_setup);
03320 
03321    // codebooks
03322 
03323    f->codebook_count = get_bits(f,8) + 1;
03324    f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count);
03325    if (f->codebooks == NULL)                        return error(f, VORBIS_outofmem);
03326    memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count);
03327    for (i=0; i < f->codebook_count; ++i) {
03328       uint32 *values;
03329       int ordered, sorted_count;
03330       int total=0;
03331       uint8 *lengths;
03332       Codebook *c = f->codebooks+i;
03333       x = get_bits(f, 8); if (x != 0x42)            return error(f, VORBIS_invalid_setup);
03334       x = get_bits(f, 8); if (x != 0x43)            return error(f, VORBIS_invalid_setup);
03335       x = get_bits(f, 8); if (x != 0x56)            return error(f, VORBIS_invalid_setup);
03336       x = get_bits(f, 8);
03337       c->dimensions = (get_bits(f, 8)<<8) + x;
03338       x = get_bits(f, 8);
03339       y = get_bits(f, 8);
03340       c->entries = (get_bits(f, 8)<<16) + (y<<8) + x;
03341       ordered = get_bits(f,1);
03342       c->sparse = ordered ? 0 : get_bits(f,1);
03343 
03344       if (c->sparse)
03345          lengths = (uint8 *) setup_temp_malloc(f, c->entries);
03346       else
03347          lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
03348 
03349       if (!lengths) return error(f, VORBIS_outofmem);
03350 
03351       if (ordered) {
03352          int current_entry = 0;
03353          int current_length = get_bits(f,5) + 1;
03354          while (current_entry < c->entries) {
03355             int limit = c->entries - current_entry;
03356             int n = get_bits(f, ilog(limit));
03357             if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
03358             memset(lengths + current_entry, current_length, n);
03359             current_entry += n;
03360             ++current_length;
03361          }
03362       } else {
03363          for (j=0; j < c->entries; ++j) {
03364             int present = c->sparse ? get_bits(f,1) : 1;
03365             if (present) {
03366                lengths[j] = get_bits(f, 5) + 1;
03367                ++total;
03368             } else {
03369                lengths[j] = NO_CODE;
03370             }
03371          }
03372       }
03373 
03374       if (c->sparse && total >= c->entries >> 2) {
03375          // convert sparse items to non-sparse!
03376          if (c->entries > (int) f->setup_temp_memory_required)
03377             f->setup_temp_memory_required = c->entries;
03378 
03379          c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
03380          memcpy(c->codeword_lengths, lengths, c->entries);
03381          setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs!
03382          lengths = c->codeword_lengths;
03383          c->sparse = 0;
03384       }
03385 
03386       // compute the size of the sorted tables
03387       if (c->sparse) {
03388          sorted_count = total;
03389          //assert(total != 0);
03390       } else {
03391          sorted_count = 0;
03392          #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
03393          for (j=0; j < c->entries; ++j)
03394             if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
03395                ++sorted_count;
03396          #endif
03397       }
03398 
03399       c->sorted_entries = sorted_count;
03400       values = NULL;
03401 
03402       if (!c->sparse) {
03403          c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries);
03404          if (!c->codewords)                  return error(f, VORBIS_outofmem);
03405       } else {
03406          unsigned int size;
03407          if (c->sorted_entries) {
03408             c->codeword_lengths = (uint8 *) setup_malloc(f, c->sorted_entries);
03409             if (!c->codeword_lengths)           return error(f, VORBIS_outofmem);
03410             c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries);
03411             if (!c->codewords)                  return error(f, VORBIS_outofmem);
03412             values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries);
03413             if (!values)                        return error(f, VORBIS_outofmem);
03414          }
03415          size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries;
03416          if (size > f->setup_temp_memory_required)
03417             f->setup_temp_memory_required = size;
03418       }
03419 
03420       if (!compute_codewords(c, lengths, c->entries, values)) {
03421          if (c->sparse) setup_temp_free(f, values, 0);
03422          return error(f, VORBIS_invalid_setup);
03423       }
03424 
03425       if (c->sorted_entries) {
03426          // allocate an extra slot for sentinels
03427          c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1));
03428          // allocate an extra slot at the front so that c->sorted_values[-1] is defined
03429          // so that we can catch that case without an extra if
03430          c->sorted_values    = ( int   *) setup_malloc(f, sizeof(*c->sorted_values   ) * (c->sorted_entries+1));
03431          if (c->sorted_values) { ++c->sorted_values; c->sorted_values[-1] = -1; }
03432          compute_sorted_huffman(c, lengths, values);
03433       }
03434 
03435       if (c->sparse) {
03436          setup_temp_free(f, values, sizeof(*values)*c->sorted_entries);
03437          setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries);
03438          setup_temp_free(f, lengths, c->entries);
03439          c->codewords = NULL;
03440       }
03441 
03442       compute_accelerated_huffman(c);
03443 
03444       c->lookup_type = get_bits(f, 4);
03445       if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup);
03446       if (c->lookup_type > 0) {
03447          uint16 *mults;
03448          c->minimum_value = float32_unpack(get_bits(f, 32));
03449          c->delta_value = float32_unpack(get_bits(f, 32));
03450          c->value_bits = get_bits(f, 4)+1;
03451          c->sequence_p = get_bits(f,1);
03452          if (c->lookup_type == 1) {
03453             c->lookup_values = lookup1_values(c->entries, c->dimensions);
03454          } else {
03455             c->lookup_values = c->entries * c->dimensions;
03456          }
03457          mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values);
03458          if (mults == NULL) return error(f, VORBIS_outofmem);
03459          for (j=0; j < (int) c->lookup_values; ++j) {
03460             int q = get_bits(f, c->value_bits);
03461             if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); }
03462             mults[j] = q;
03463          }
03464 
03465 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
03466          if (c->lookup_type == 1) {
03467             int len, sparse = c->sparse;
03468             // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
03469             if (sparse) {
03470                if (c->sorted_entries == 0) goto skip;
03471                c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions);
03472             } else
03473                c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries        * c->dimensions);
03474             if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
03475             len = sparse ? c->sorted_entries : c->entries;
03476             for (j=0; j < len; ++j) {
03477                int z = sparse ? c->sorted_values[j] : j, div=1;
03478                for (k=0; k < c->dimensions; ++k) {
03479                   int off = (z / div) % c->lookup_values;
03480                   c->multiplicands[j*c->dimensions + k] =
03481                          #ifndef STB_VORBIS_CODEBOOK_FLOATS
03482                             mults[off];
03483                          #else
03484                             mults[off]*c->delta_value + c->minimum_value;
03485                             // in this case (and this case only) we could pre-expand c->sequence_p,
03486                             // and throw away the decode logic for it; have to ALSO do
03487                             // it in the case below, but it can only be done if
03488                             //    STB_VORBIS_CODEBOOK_FLOATS
03489                             //   !STB_VORBIS_DIVIDES_IN_CODEBOOK
03490                          #endif
03491                   div *= c->lookup_values;
03492                }
03493             }
03494             setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
03495             c->lookup_type = 2;
03496          }
03497          else
03498 #endif
03499          {
03500             c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values);
03501             #ifndef STB_VORBIS_CODEBOOK_FLOATS
03502             memcpy(c->multiplicands, mults, sizeof(c->multiplicands[0]) * c->lookup_values);
03503             #else
03504             for (j=0; j < (int) c->lookup_values; ++j)
03505                c->multiplicands[j] = mults[j] * c->delta_value + c->minimum_value;
03506             #endif
03507             setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
03508          }
03509         skip:;
03510 
03511          #ifdef STB_VORBIS_CODEBOOK_FLOATS
03512          if (c->lookup_type == 2 && c->sequence_p) {
03513             for (j=1; j < (int) c->lookup_values; ++j)
03514                c->multiplicands[j] = c->multiplicands[j-1];
03515             c->sequence_p = 0;
03516          }
03517          #endif
03518       }
03519    }
03520 
03521    // time domain transfers (notused)
03522 
03523    x = get_bits(f, 6) + 1;
03524    for (i=0; i < x; ++i) {
03525       uint32 z = get_bits(f, 16);
03526       if (z != 0) return error(f, VORBIS_invalid_setup);
03527    }
03528 
03529    // Floors
03530    f->floor_count = get_bits(f, 6)+1;
03531    f->floor_config = (Floor *)  setup_malloc(f, f->floor_count * sizeof(*f->floor_config));
03532    for (i=0; i < f->floor_count; ++i) {
03533       f->floor_types[i] = get_bits(f, 16);
03534       if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
03535       if (f->floor_types[i] == 0) {
03536          Floor0 *g = &f->floor_config[i].floor0;
03537          g->order = get_bits(f,8);
03538          g->rate = get_bits(f,16);
03539          g->bark_map_size = get_bits(f,16);
03540          g->amplitude_bits = get_bits(f,6);
03541          g->amplitude_offset = get_bits(f,8);
03542          g->number_of_books = get_bits(f,4) + 1;
03543          for (j=0; j < g->number_of_books; ++j)
03544             g->book_list[j] = get_bits(f,8);
03545          return error(f, VORBIS_feature_not_supported);
03546       } else {
03547          Point p[31*8+2];
03548          Floor1 *g = &f->floor_config[i].floor1;
03549          int max_class = -1; 
03550          g->partitions = get_bits(f, 5);
03551          for (j=0; j < g->partitions; ++j) {
03552             g->partition_class_list[j] = get_bits(f, 4);
03553             if (g->partition_class_list[j] > max_class)
03554                max_class = g->partition_class_list[j];
03555          }
03556          for (j=0; j <= max_class; ++j) {
03557             g->class_dimensions[j] = get_bits(f, 3)+1;
03558             g->class_subclasses[j] = get_bits(f, 2);
03559             if (g->class_subclasses[j]) {
03560                g->class_masterbooks[j] = get_bits(f, 8);
03561                if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
03562             }
03563             for (k=0; k < 1 << g->class_subclasses[j]; ++k) {
03564                g->subclass_books[j][k] = get_bits(f,8)-1;
03565                if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
03566             }
03567          }
03568          g->floor1_multiplier = get_bits(f,2)+1;
03569          g->rangebits = get_bits(f,4);
03570          g->Xlist[0] = 0;
03571          g->Xlist[1] = 1 << g->rangebits;
03572          g->values = 2;
03573          for (j=0; j < g->partitions; ++j) {
03574             int c = g->partition_class_list[j];
03575             for (k=0; k < g->class_dimensions[c]; ++k) {
03576                g->Xlist[g->values] = get_bits(f, g->rangebits);
03577                ++g->values;
03578             }
03579          }
03580          // precompute the sorting
03581          for (j=0; j < g->values; ++j) {
03582             p[j].x = g->Xlist[j];
03583             p[j].y = j;
03584          }
03585          qsort(p, g->values, sizeof(p[0]), point_compare);
03586          for (j=0; j < g->values; ++j)
03587             g->sorted_order[j] = (uint8) p[j].y;
03588          // precompute the neighbors
03589          for (j=2; j < g->values; ++j) {
03590             int low,hi;
03591             neighbors(g->Xlist, j, &low,&hi);
03592             g->neighbors[j][0] = low;
03593             g->neighbors[j][1] = hi;
03594          }
03595 
03596          if (g->values > longest_floorlist)
03597             longest_floorlist = g->values;
03598       }
03599    }
03600 
03601    // Residue
03602    f->residue_count = get_bits(f, 6)+1;
03603    f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(*f->residue_config));
03604    for (i=0; i < f->residue_count; ++i) {
03605       uint8 residue_cascade[64];
03606       Residue *r = f->residue_config+i;
03607       f->residue_types[i] = get_bits(f, 16);
03608       if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
03609       r->begin = get_bits(f, 24);
03610       r->end = get_bits(f, 24);
03611       r->part_size = get_bits(f,24)+1;
03612       r->classifications = get_bits(f,6)+1;
03613       r->classbook = get_bits(f,8);
03614       for (j=0; j < r->classifications; ++j) {
03615          uint8 high_bits=0;
03616          uint8 low_bits=get_bits(f,3);
03617          if (get_bits(f,1))
03618             high_bits = get_bits(f,5);
03619          residue_cascade[j] = high_bits*8 + low_bits;
03620       }
03621       r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications);
03622       for (j=0; j < r->classifications; ++j) {
03623          for (k=0; k < 8; ++k) {
03624             if (residue_cascade[j] & (1 << k)) {
03625                r->residue_books[j][k] = get_bits(f, 8);
03626                if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
03627             } else {
03628                r->residue_books[j][k] = -1;
03629             }
03630          }
03631       }
03632       // precompute the classifications[] array to avoid inner-loop mod/divide
03633       // call it 'classdata' since we already have r->classifications
03634       r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
03635       if (!r->classdata) return error(f, VORBIS_outofmem);
03636       memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
03637       for (j=0; j < f->codebooks[r->classbook].entries; ++j) {
03638          int classwords = f->codebooks[r->classbook].dimensions;
03639          int temp = j;
03640          r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords);
03641          for (k=classwords-1; k >= 0; --k) {
03642             r->classdata[j][k] = temp % r->classifications;
03643             temp /= r->classifications;
03644          }
03645       }
03646    }
03647 
03648    f->mapping_count = get_bits(f,6)+1;
03649    f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping));
03650    for (i=0; i < f->mapping_count; ++i) {
03651       Mapping *m = f->mapping + i;      
03652       int mapping_type = get_bits(f,16);
03653       if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
03654       m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan));
03655       if (get_bits(f,1))
03656          m->submaps = get_bits(f,4);
03657       else
03658          m->submaps = 1;
03659       if (m->submaps > max_submaps)
03660          max_submaps = m->submaps;
03661       if (get_bits(f,1)) {
03662          m->coupling_steps = get_bits(f,8)+1;
03663          for (k=0; k < m->coupling_steps; ++k) {
03664             m->chan[k].magnitude = get_bits(f, ilog(f->channels)-1);
03665             m->chan[k].angle = get_bits(f, ilog(f->channels)-1);
03666             if (m->chan[k].magnitude >= f->channels)        return error(f, VORBIS_invalid_setup);
03667             if (m->chan[k].angle     >= f->channels)        return error(f, VORBIS_invalid_setup);
03668             if (m->chan[k].magnitude == m->chan[k].angle)   return error(f, VORBIS_invalid_setup);
03669          }
03670       } else
03671          m->coupling_steps = 0;
03672 
03673       // reserved field
03674       if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
03675       if (m->submaps > 1) {
03676          for (j=0; j < f->channels; ++j) {
03677             m->chan[j].mux = get_bits(f, 4);
03678             if (m->chan[j].mux >= m->submaps)                return error(f, VORBIS_invalid_setup);
03679          }
03680       } else
03681          // @SPECIFICATION: this case is missing from the spec
03682          for (j=0; j < f->channels; ++j)
03683             m->chan[j].mux = 0;
03684 
03685       for (j=0; j < m->submaps; ++j) {
03686          get_bits(f,8); // discard
03687          m->submap_floor[j] = get_bits(f,8);
03688          m->submap_residue[j] = get_bits(f,8);
03689          if (m->submap_floor[j] >= f->floor_count)      return error(f, VORBIS_invalid_setup);
03690          if (m->submap_residue[j] >= f->residue_count)  return error(f, VORBIS_invalid_setup);
03691       }
03692    }
03693 
03694    // Modes
03695    f->mode_count = get_bits(f, 6)+1;
03696    for (i=0; i < f->mode_count; ++i) {
03697       Mode *m = f->mode_config+i;
03698       m->blockflag = get_bits(f,1);
03699       m->windowtype = get_bits(f,16);
03700       m->transformtype = get_bits(f,16);
03701       m->mapping = get_bits(f,8);
03702       if (m->windowtype != 0)                 return error(f, VORBIS_invalid_setup);
03703       if (m->transformtype != 0)              return error(f, VORBIS_invalid_setup);
03704       if (m->mapping >= f->mapping_count)     return error(f, VORBIS_invalid_setup);
03705    }
03706 
03707    flush_packet(f);
03708 
03709    f->previous_length = 0;
03710 
03711    for (i=0; i < f->channels; ++i) {
03712       f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1);
03713       f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
03714       f->finalY[i]          = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist);
03715       #ifdef STB_VORBIS_NO_DEFER_FLOOR
03716       f->floor_buffers[i]   = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
03717       #endif
03718    }
03719 
03720    if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE;
03721    if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE;
03722    f->blocksize[0] = f->blocksize_0;
03723    f->blocksize[1] = f->blocksize_1;
03724 
03725 #ifdef STB_VORBIS_DIVIDE_TABLE
03726    if (integer_divide_table[1][1]==0)
03727       for (i=0; i < DIVTAB_NUMER; ++i)
03728          for (j=1; j < DIVTAB_DENOM; ++j)
03729             integer_divide_table[i][j] = i / j;
03730 #endif
03731 
03732    // compute how much temporary memory is needed
03733 
03734    // 1.
03735    {
03736       uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1);
03737       uint32 classify_mem;
03738       int i,max_part_read=0;
03739       for (i=0; i < f->residue_count; ++i) {
03740          Residue *r = f->residue_config + i;
03741          int n_read = r->end - r->begin;
03742          int part_read = n_read / r->part_size;
03743          if (part_read > max_part_read)
03744             max_part_read = part_read;
03745       }
03746       #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
03747       classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *));
03748       #else
03749       classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *));
03750       #endif
03751 
03752       f->temp_memory_required = classify_mem;
03753       if (imdct_mem > f->temp_memory_required)
03754          f->temp_memory_required = imdct_mem;
03755    }
03756 
03757    f->first_decode = TRUE;
03758 
03759    if (f->alloc.alloc_buffer) {
03760       assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes);
03761       // check if there's enough temp memory so we don't error later
03762       if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset)
03763          return error(f, VORBIS_outofmem);
03764    }
03765 
03766    f->first_audio_page_offset = stb_vorbis_get_file_offset(f);
03767 
03768    return TRUE;
03769 }
03770 
03771 static void vorbis_deinit(stb_vorbis *p)
03772 {
03773    int i,j;
03774    for (i=0; i < p->residue_count; ++i) {
03775       Residue *r = p->residue_config+i;
03776       if (r->classdata) {
03777          for (j=0; j < p->codebooks[r->classbook].entries; ++j)
03778             setup_free(p, r->classdata[j]);
03779          setup_free(p, r->classdata);
03780       }
03781       setup_free(p, r->residue_books);
03782    }
03783 
03784    if (p->codebooks) {
03785       for (i=0; i < p->codebook_count; ++i) {
03786          Codebook *c = p->codebooks + i;
03787          setup_free(p, c->codeword_lengths);
03788          setup_free(p, c->multiplicands);
03789          setup_free(p, c->codewords);
03790          setup_free(p, c->sorted_codewords);
03791          // c->sorted_values[-1] is the first entry in the array
03792          setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL);
03793       }
03794       setup_free(p, p->codebooks);
03795    }
03796    setup_free(p, p->floor_config);
03797    setup_free(p, p->residue_config);
03798    for (i=0; i < p->mapping_count; ++i)
03799       setup_free(p, p->mapping[i].chan);
03800    setup_free(p, p->mapping);
03801    for (i=0; i < p->channels; ++i) {
03802       setup_free(p, p->channel_buffers[i]);
03803       setup_free(p, p->previous_window[i]);
03804       #ifdef STB_VORBIS_NO_DEFER_FLOOR
03805       setup_free(p, p->floor_buffers[i]);
03806       #endif
03807       setup_free(p, p->finalY[i]);
03808    }
03809    for (i=0; i < 2; ++i) {
03810       setup_free(p, p->A[i]);
03811       setup_free(p, p->B[i]);
03812       setup_free(p, p->C[i]);
03813       setup_free(p, p->window[i]);
03814        setup_free(p, p->bit_reverse[i]);
03815    }
03816    #ifndef STB_VORBIS_NO_STDIO
03817    if (p->close_on_free) fclose(p->f);
03818    #endif
03819 }
03820 
03821 void stb_vorbis_close(stb_vorbis *p)
03822 {
03823    if (p == NULL) return;
03824    vorbis_deinit(p);
03825    setup_free(p,p);
03826 }
03827 
03828 static void vorbis_init(stb_vorbis *p, stb_vorbis_alloc *z)
03829 {
03830    memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start
03831    if (z) {
03832       p->alloc = *z;
03833       p->alloc.alloc_buffer_length_in_bytes = (p->alloc.alloc_buffer_length_in_bytes+3) & ~3;
03834       p->temp_offset = p->alloc.alloc_buffer_length_in_bytes;
03835    }
03836    p->eof = 0;
03837    p->error = VORBIS__no_error;
03838    p->stream = NULL;
03839    p->codebooks = NULL;
03840    p->page_crc_tests = -1;
03841    #ifndef STB_VORBIS_NO_STDIO
03842    p->close_on_free = FALSE;
03843    p->f = NULL;
03844    #endif
03845 }
03846 
03847 int stb_vorbis_get_sample_offset(stb_vorbis *f)
03848 {
03849    if (f->current_loc_valid)
03850       return f->current_loc;
03851    else
03852       return -1;
03853 }
03854 
03855 stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
03856 {
03857    stb_vorbis_info d;
03858    d.channels = f->channels;
03859    d.sample_rate = f->sample_rate;
03860    d.setup_memory_required = f->setup_memory_required;
03861    d.setup_temp_memory_required = f->setup_temp_memory_required;
03862    d.temp_memory_required = f->temp_memory_required;
03863    d.max_frame_size = f->blocksize_1 >> 1;
03864    return d;
03865 }
03866 
03867 int stb_vorbis_get_error(stb_vorbis *f)
03868 {
03869    int e = f->error;
03870    f->error = VORBIS__no_error;
03871    return e;
03872 }
03873 
03874 static stb_vorbis * vorbis_alloc(stb_vorbis *f)
03875 {
03876    stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p));
03877    return p;
03878 }
03879 
03880 #ifndef STB_VORBIS_NO_PUSHDATA_API
03881 
03882 void stb_vorbis_flush_pushdata(stb_vorbis *f)
03883 {
03884    f->previous_length = 0;
03885    f->page_crc_tests  = 0;
03886    f->discard_samples_deferred = 0;
03887    f->current_loc_valid = FALSE;
03888    f->first_decode = FALSE;
03889    f->samples_output = 0;
03890    f->channel_buffer_start = 0;
03891    f->channel_buffer_end = 0;
03892 }
03893 
03894 static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len)
03895 {
03896    int i,n;
03897    for (i=0; i < f->page_crc_tests; ++i)
03898       f->scan[i].bytes_done = 0;
03899 
03900    // if we have room for more scans, search for them first, because
03901    // they may cause us to stop early if their header is incomplete
03902    if (f->page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
03903       if (data_len < 4) return 0;
03904       data_len -= 3; // need to look for 4-byte sequence, so don't miss
03905                      // one that straddles a boundary
03906       for (i=0; i < data_len; ++i) {
03907          if (data[i] == 0x4f) {
03908             if (0==memcmp(data+i, ogg_page_header, 4)) {
03909                int j,len;
03910                uint32 crc;
03911                // make sure we have the whole page header
03912                if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
03913                   // only read up to this page start, so hopefully we'll
03914                   // have the whole page header start next time
03915                   data_len = i;
03916                   break;
03917                }
03918                // ok, we have it all; compute the length of the page
03919                len = 27 + data[i+26];
03920                for (j=0; j < data[i+26]; ++j)
03921                   len += data[i+27+j];
03922                // scan everything up to the embedded crc (which we must 0)
03923                crc = 0;
03924                for (j=0; j < 22; ++j)
03925                   crc = crc32_update(crc, data[i+j]);
03926                // now process 4 0-bytes
03927                for (   ; j < 26; ++j)
03928                   crc = crc32_update(crc, 0);
03929                // len is the total number of bytes we need to scan
03930                n = f->page_crc_tests++;
03931                f->scan[n].bytes_left = len-j;
03932                f->scan[n].crc_so_far = crc;
03933                f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24);
03934                // if the last frame on a page is continued to the next, then
03935                // we can't recover the sample_loc immediately
03936                if (data[i+27+data[i+26]-1] == 255)
03937                   f->scan[n].sample_loc = ~0;
03938                else
03939                   f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24);
03940                f->scan[n].bytes_done = i+j;
03941                if (f->page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT)
03942                   break;
03943                // keep going if we still have room for more
03944             }
03945          }
03946       }
03947    }
03948 
03949    for (i=0; i < f->page_crc_tests;) {
03950       uint32 crc;
03951       int j;
03952       int n = f->scan[i].bytes_done;
03953       int m = f->scan[i].bytes_left;
03954       if (m > data_len - n) m = data_len - n;
03955       // m is the bytes to scan in the current chunk
03956       crc = f->scan[i].crc_so_far;
03957       for (j=0; j < m; ++j)
03958          crc = crc32_update(crc, data[n+j]);
03959       f->scan[i].bytes_left -= m;
03960       f->scan[i].crc_so_far = crc;
03961       if (f->scan[i].bytes_left == 0) {
03962          // does it match?
03963          if (f->scan[i].crc_so_far == f->scan[i].goal_crc) {
03964             // Houston, we have page
03965             data_len = n+m; // consumption amount is wherever that scan ended
03966             f->page_crc_tests = -1; // drop out of page scan mode
03967             f->previous_length = 0; // decode-but-don't-output one frame
03968             f->next_seg = -1;       // start a new page
03969             f->current_loc = f->scan[i].sample_loc; // set the current sample location
03970                                     // to the amount we'd have decoded had we decoded this page
03971             f->current_loc_valid = f->current_loc != ~0;
03972             return data_len;
03973          }
03974          // delete entry
03975          f->scan[i] = f->scan[--f->page_crc_tests];
03976       } else {
03977          ++i;
03978       }
03979    }
03980 
03981    return data_len;
03982 }
03983 
03984 // return value: number of bytes we used
03985 int stb_vorbis_decode_frame_pushdata(
03986          stb_vorbis *f,                 // the file we're decoding
03987          uint8 *data, int data_len,     // the memory available for decoding
03988          int *channels,                 // place to write number of float * buffers
03989          float ***output,               // place to write float ** array of float * buffers
03990          int *samples                   // place to write number of output samples
03991      )
03992 {
03993    int i;
03994    int len,right,left;
03995 
03996    if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
03997 
03998    if (f->page_crc_tests >= 0) {
03999       *samples = 0;
04000       return vorbis_search_for_page_pushdata(f, data, data_len);
04001    }
04002 
04003    f->stream     = data;
04004    f->stream_end = data + data_len;
04005    f->error      = VORBIS__no_error;
04006 
04007    // check that we have the entire packet in memory
04008    if (!is_whole_packet_present(f, FALSE)) {
04009       *samples = 0;
04010       return 0;
04011    }
04012 
04013    if (!vorbis_decode_packet(f, &len, &left, &right)) {
04014       // save the actual error we encountered
04015       enum STBVorbisError error = f->error;
04016       if (error == VORBIS_bad_packet_type) {
04017          // flush and resynch
04018          f->error = VORBIS__no_error;
04019          while (get8_packet(f) != EOP)
04020             if (f->eof) break;
04021          *samples = 0;
04022          return f->stream - data;
04023       }
04024       if (error == VORBIS_continued_packet_flag_invalid) {
04025          if (f->previous_length == 0) {
04026             // we may be resynching, in which case it's ok to hit one
04027             // of these; just discard the packet
04028             f->error = VORBIS__no_error;
04029             while (get8_packet(f) != EOP)
04030                if (f->eof) break;
04031             *samples = 0;
04032             return f->stream - data;
04033          }
04034       }
04035       // if we get an error while parsing, what to do?
04036       // well, it DEFINITELY won't work to continue from where we are!
04037       stb_vorbis_flush_pushdata(f);
04038       // restore the error that actually made us bail
04039       f->error = error;
04040       *samples = 0;
04041       return 1;
04042    }
04043 
04044    // success!
04045    len = vorbis_finish_frame(f, len, left, right);
04046    for (i=0; i < f->channels; ++i)
04047       f->outputs[i] = f->channel_buffers[i] + left;
04048 
04049    if (channels) *channels = f->channels;
04050    *samples = len;
04051    *output = f->outputs;
04052    return f->stream - data;
04053 }
04054 
04055 stb_vorbis *stb_vorbis_open_pushdata(
04056          unsigned char *data, int data_len, // the memory available for decoding
04057          int *data_used,              // only defined if result is not NULL
04058          int *error, stb_vorbis_alloc *alloc)
04059 {
04060    stb_vorbis *f, p;
04061    vorbis_init(&p, alloc);
04062    p.stream     = data;
04063    p.stream_end = data + data_len;
04064    p.push_mode  = TRUE;
04065    if (!start_decoder(&p)) {
04066       if (p.eof)
04067          *error = VORBIS_need_more_data;
04068       else
04069          *error = p.error;
04070       return NULL;
04071    }
04072    f = vorbis_alloc(&p);
04073    if (f) {
04074       *f = p;
04075       *data_used = f->stream - data;
04076       *error = 0;
04077       return f;
04078    } else {
04079       vorbis_deinit(&p);
04080       return NULL;
04081    }
04082 }
04083 #endif // STB_VORBIS_NO_PUSHDATA_API
04084 
04085 unsigned int stb_vorbis_get_file_offset(stb_vorbis *f)
04086 {
04087    #ifndef STB_VORBIS_NO_PUSHDATA_API
04088    if (f->push_mode) return 0;
04089    #endif
04090    if (USE_MEMORY(f)) return f->stream - f->stream_start;
04091    #ifndef STB_VORBIS_NO_STDIO
04092    return ftell(f->f) - f->f_start;
04093    #endif
04094 }
04095 
04096 #ifndef STB_VORBIS_NO_PULLDATA_API
04097 //
04098 // DATA-PULLING API
04099 //
04100 
04101 static uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last)
04102 {
04103    for(;;) {
04104       int n;
04105       if (f->eof) return 0;
04106       n = get8(f);
04107       if (n == 0x4f) { // page header
04108          unsigned int retry_loc = stb_vorbis_get_file_offset(f);
04109          int i;
04110          // check if we're off the end of a file_section stream
04111          if (retry_loc - 25 > f->stream_len)
04112             return 0;
04113          // check the rest of the header
04114          for (i=1; i < 4; ++i)
04115             if (get8(f) != ogg_page_header[i])
04116                break;
04117          if (f->eof) return 0;
04118          if (i == 4) {
04119             uint8 header[27];
04120             uint32 i, crc, goal, len;
04121             for (i=0; i < 4; ++i)
04122                header[i] = ogg_page_header[i];
04123             for (; i < 27; ++i)
04124                header[i] = get8(f);
04125             if (f->eof) return 0;
04126             if (header[4] != 0) goto invalid;
04127             goal = header[22] + (header[23] << 8) + (header[24]<<16) + (header[25]<<24);
04128             for (i=22; i < 26; ++i)
04129                header[i] = 0;
04130             crc = 0;
04131             for (i=0; i < 27; ++i)
04132                crc = crc32_update(crc, header[i]);
04133             len = 0;
04134             for (i=0; i < header[26]; ++i) {
04135                int s = get8(f);
04136                crc = crc32_update(crc, s);
04137                len += s;
04138             }
04139             if (len && f->eof) return 0;
04140             for (i=0; i < len; ++i)
04141                crc = crc32_update(crc, get8(f));
04142             // finished parsing probable page
04143             if (crc == goal) {
04144                // we could now check that it's either got the last
04145                // page flag set, OR it's followed by the capture
04146                // pattern, but I guess TECHNICALLY you could have
04147                // a file with garbage between each ogg page and recover
04148                // from it automatically? So even though that paranoia
04149                // might decrease the chance of an invalid decode by
04150                // another 2^32, not worth it since it would hose those
04151                // invalid-but-useful files?
04152                if (end)
04153                   *end = stb_vorbis_get_file_offset(f);
04154                if (last)
04155                   if (header[5] & 0x04)
04156                      *last = 1;
04157                   else
04158                      *last = 0;
04159                set_file_offset(f, retry_loc-1);
04160                return 1;
04161             }
04162          }
04163         invalid:
04164          // not a valid page, so rewind and look for next one
04165          set_file_offset(f, retry_loc);
04166       }
04167    }
04168 }
04169 
04170 // seek is implemented with 'interpolation search'--this is like
04171 // binary search, but we use the data values to estimate the likely
04172 // location of the data item (plus a bit of a bias so when the
04173 // estimation is wrong we don't waste overly much time)
04174 
04175 #define SAMPLE_unknown  0xffffffff
04176 
04177 
04178 // ogg vorbis, in its insane infinite wisdom, only provides
04179 // information about the sample at the END of the page.
04180 // therefore we COULD have the data we need in the current
04181 // page, and not know it. we could just use the end location
04182 // as our only knowledge for bounds, seek back, and eventually
04183 // the binary search finds it. or we can try to be smart and
04184 // not waste time trying to locate more pages. we try to be
04185 // smart, since this data is already in memory anyway, so
04186 // doing needless I/O would be crazy!
04187 static int vorbis_analyze_page(stb_vorbis *f, ProbedPage *z)
04188 {
04189    uint8 header[27], lacing[255];
04190    uint8 packet_type[255];
04191    int num_packet, packet_start, previous =0;
04192    int i,len;
04193    uint32 samples;
04194 
04195    // record where the page starts
04196    z->page_start = stb_vorbis_get_file_offset(f);
04197 
04198    // parse the header
04199    getn(f, header, 27);
04200    assert(header[0] == 'O' && header[1] == 'g' && header[2] == 'g' && header[3] == 'S');
04201    getn(f, lacing, header[26]);
04202 
04203    // determine the length of the payload
04204    len = 0;
04205    for (i=0; i < header[26]; ++i)
04206       len += lacing[i];
04207 
04208    // this implies where the page ends
04209    z->page_end = z->page_start + 27 + header[26] + len;
04210 
04211    // read the last-decoded sample out of the data
04212    z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 16);
04213 
04214    if (header[5] & 4) {
04215       // if this is the last page, it's not possible to work
04216       // backwards to figure out the first sample! whoops! fuck.
04217       z->first_decoded_sample = SAMPLE_unknown;
04218       set_file_offset(f, z->page_start);
04219       return 1;
04220    }
04221 
04222    // scan through the frames to determine the sample-count of each one...
04223    // our goal is the sample # of the first fully-decoded sample on the
04224    // page, which is the first decoded sample of the 2nd page
04225 
04226    num_packet=0;
04227 
04228    packet_start = ((header[5] & 1) == 0);
04229 
04230    for (i=0; i < header[26]; ++i) {
04231       if (packet_start) {
04232          uint8 n,b,m;
04233          if (lacing[i] == 0) goto bail; // trying to read from zero-length packet
04234          n = get8(f);
04235          // if bottom bit is non-zero, we've got corruption
04236          if (n & 1) goto bail;
04237          n >>= 1;
04238          b = ilog(f->mode_count-1);
04239          m = n >> b;
04240          n &= (1 << b)-1;
04241          if (n >= f->mode_count) goto bail;
04242          if (num_packet == 0 && f->mode_config[n].blockflag)
04243             previous = (m & 1);
04244          packet_type[num_packet++] = f->mode_config[n].blockflag;
04245          skip(f, lacing[i]-1);
04246       } else
04247          skip(f, lacing[i]);
04248       packet_start = (lacing[i] < 255);
04249    }
04250 
04251    // now that we know the sizes of all the pages, we can start determining
04252    // how much sample data there is.
04253 
04254    samples = 0;
04255 
04256    // for the last packet, we step by its whole length, because the definition
04257    // is that we encoded the end sample loc of the 'last packet completed',
04258    // where 'completed' refers to packets being split, and we are left to guess
04259    // what 'end sample loc' means. we assume it means ignoring the fact that
04260    // the last half of the data is useless without windowing against the next
04261    // packet... (so it's not REALLY complete in that sense)
04262    if (num_packet > 1)
04263       samples += f->blocksize[packet_type[num_packet-1]];
04264 
04265    for (i=num_packet-2; i >= 1; --i) {
04266       // now, for this packet, how many samples do we have that
04267       // do not overlap the following packet?
04268       if (packet_type[i] == 1)
04269          if (packet_type[i+1] == 1)
04270             samples += f->blocksize_1 >> 1;
04271          else
04272             samples += ((f->blocksize_1 - f->blocksize_0) >> 2) + (f->blocksize_0 >> 1);
04273       else
04274          samples += f->blocksize_0 >> 1;
04275    }
04276    // now, at this point, we've rewound to the very beginning of the
04277    // _second_ packet. if we entirely discard the first packet after
04278    // a seek, this will be exactly the right sample number. HOWEVER!
04279    // we can't as easily compute this number for the LAST page. The
04280    // only way to get the sample offset of the LAST page is to use
04281    // the end loc from the previous page. But what that returns us
04282    // is _exactly_ the place where we get our first non-overlapped
04283    // sample. (I think. Stupid spec for being ambiguous.) So for
04284    // consistency it's better to do that here, too. However, that
04285    // will then require us to NOT discard all of the first frame we
04286    // decode, in some cases, which means an even weirder frame size
04287    // and extra code. what a fucking pain.
04288    
04289    // we're going to discard the first packet if we
04290    // start the seek here, so we don't care about it. (we could actually
04291    // do better; if the first packet is long, and the previous packet
04292    // is short, there's actually data in the first half of the first
04293    // packet that doesn't need discarding... but not worth paying the
04294    // effort of tracking that of that here and in the seeking logic)
04295    // except crap, if we infer it from the _previous_ packet's end
04296    // location, we DO need to use that definition... and we HAVE to
04297    // infer the start loc of the LAST packet from the previous packet's
04298    // end location. fuck you, ogg vorbis.
04299 
04300    z->first_decoded_sample = z->last_decoded_sample - samples;
04301 
04302    // restore file state to where we were
04303    set_file_offset(f, z->page_start);
04304    return 1;
04305 
04306    // restore file state to where we were
04307   bail:
04308    set_file_offset(f, z->page_start);
04309    return 0;
04310 }
04311 
04312 static int vorbis_seek_frame_from_page(stb_vorbis *f, uint32 page_start, uint32 first_sample, uint32 target_sample, int fine)
04313 {
04314    int left_start, left_end, right_start, right_end, mode,i;
04315    int frame=0;
04316    uint32 frame_start;
04317    int frames_to_skip, data_to_skip;
04318 
04319    // first_sample is the sample # of the first sample that doesn't
04320    // overlap the previous page... note that this requires us to
04321    // _partially_ discard the first packet! bleh.
04322    set_file_offset(f, page_start);
04323 
04324    f->next_seg = -1;  // force page resync
04325 
04326    frame_start = first_sample;
04327    // frame start is where the previous packet's last decoded sample
04328    // was, which corresponds to left_end... EXCEPT if the previous
04329    // packet was long and this packet is short? Probably a bug here.
04330 
04331 
04332    // now, we can start decoding frames... we'll only FAKE decode them,
04333    // until we find the frame that contains our sample; then we'll rewind,
04334    // and try again
04335    for (;;) {
04336       int start;
04337 
04338       if (!vorbis_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
04339          return error(f, VORBIS_seek_failed);
04340 
04341       if (frame == 0)
04342          start = left_end;
04343       else
04344          start = left_start;
04345 
04346       // the window starts at left_start; the last valid sample we generate
04347       // before the next frame's window start is right_start-1
04348       if (target_sample < frame_start + right_start-start)
04349          break;
04350 
04351       flush_packet(f);
04352       if (f->eof)
04353          return error(f, VORBIS_seek_failed);
04354 
04355       frame_start += right_start - start;
04356 
04357       ++frame;
04358    }
04359 
04360    // ok, at this point, the sample we want is contained in frame #'frame'
04361 
04362    // to decode frame #'frame' normally, we have to decode the
04363    // previous frame first... but if it's the FIRST frame of the page
04364    // we can't. if it's the first frame, it means it falls in the part
04365    // of the first frame that doesn't overlap either of the other frames.
04366    // so, if we have to handle that case for the first frame, we might
04367    // as well handle it for all of them, so:
04368    if (target_sample > frame_start + (left_end - left_start)) {
04369       // so what we want to do is go ahead and just immediately decode
04370       // this frame, but then make it so the next get_frame_float() uses
04371       // this already-decoded data? or do we want to go ahead and rewind,
04372       // and leave a flag saying to skip the first N data? let's do that
04373       frames_to_skip = frame;  // if this is frame #1, skip 1 frame (#0)
04374       data_to_skip = left_end - left_start;
04375    } else {
04376       // otherwise, we want to skip frames 0, 1, 2, ... frame-2
04377       // (which means frame-2+1 total frames) then decode frame-1,
04378       // then leave frame pending
04379       frames_to_skip = frame - 1;
04380       assert(frames_to_skip >= 0);
04381       data_to_skip = -1;      
04382    }
04383 
04384    set_file_offset(f, page_start);
04385    f->next_seg = - 1; // force page resync
04386 
04387    for (i=0; i < frames_to_skip; ++i) {
04388       maybe_start_packet(f);
04389       flush_packet(f);
04390    }
04391 
04392    if (data_to_skip >= 0) {
04393       int i,j,n = f->blocksize_0 >> 1;
04394       f->discard_samples_deferred = data_to_skip;
04395       for (i=0; i < f->channels; ++i)
04396          for (j=0; j < n; ++j)
04397             f->previous_window[i][j] = 0;
04398       f->previous_length = n;
04399       frame_start += data_to_skip;
04400    } else {
04401       f->previous_length = 0;
04402       vorbis_pump_first_frame(f);
04403    }
04404 
04405    // at this point, the NEXT decoded frame will generate the desired sample
04406    if (fine) {
04407       // so if we're doing sample accurate streaming, we want to go ahead and decode it!
04408       if (target_sample != frame_start) {
04409          int n;
04410          stb_vorbis_get_frame_float(f, &n, NULL);
04411          assert(target_sample > frame_start);
04412          assert(f->channel_buffer_start + (int) (target_sample-frame_start) < f->channel_buffer_end);
04413          f->channel_buffer_start += (target_sample - frame_start);
04414       }
04415    }
04416 
04417    return 0;
04418 }
04419 
04420 static int vorbis_seek_base(stb_vorbis *f, unsigned int sample_number, int fine)
04421 {
04422    ProbedPage p[2],q;
04423    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
04424 
04425    // do we know the location of the last page?
04426    if (f->p_last.page_start == 0) {
04427       uint32 z = stb_vorbis_stream_length_in_samples(f);
04428       if (z == 0) return error(f, VORBIS_cant_find_last_page);
04429    }
04430 
04431    p[0] = f->p_first;
04432    p[1] = f->p_last;
04433 
04434    if (sample_number >= f->p_last.last_decoded_sample)
04435       sample_number = f->p_last.last_decoded_sample-1;
04436 
04437    if (sample_number < f->p_first.last_decoded_sample) {
04438       vorbis_seek_frame_from_page(f, p[0].page_start, 0, sample_number, fine);
04439       return 0;
04440    } else {
04441       int attempts=0;
04442       while (p[0].page_end < p[1].page_start) {
04443          uint32 probe;
04444          uint32 start_offset, end_offset;
04445          uint32 start_sample, end_sample;
04446 
04447          // copy these into local variables so we can tweak them
04448          // if any are unknown
04449          start_offset = p[0].page_end;
04450          end_offset   = p[1].after_previous_page_start; // an address known to seek to page p[1]
04451          start_sample = p[0].last_decoded_sample;
04452          end_sample   = p[1].last_decoded_sample;
04453 
04454          // currently there is no such tweaking logic needed/possible?
04455          if (start_sample == SAMPLE_unknown || end_sample == SAMPLE_unknown)
04456             return error(f, VORBIS_seek_failed);
04457 
04458          // now we want to lerp between these for the target samples...
04459       
04460          // step 1: we need to bias towards the page start...
04461          if (start_offset + 4000 < end_offset)
04462             end_offset -= 4000;
04463 
04464          // now compute an interpolated search loc
04465          probe = start_offset + (int) floor((float) (end_offset - start_offset) / (end_sample - start_sample) * (sample_number - start_sample));
04466 
04467          // next we need to bias towards binary search...
04468          // code is a little wonky to allow for full 32-bit unsigned values
04469          if (attempts >= 4) {
04470             uint32 probe2 = start_offset + ((end_offset - start_offset) >> 1);
04471             if (attempts >= 8)
04472                probe = probe2;
04473             else if (probe < probe2)
04474                probe = probe + ((probe2 - probe) >> 1);
04475             else
04476                probe = probe2 + ((probe - probe2) >> 1);
04477          }
04478          ++attempts;
04479 
04480          set_file_offset(f, probe);
04481          if (!vorbis_find_page(f, NULL, NULL))   return error(f, VORBIS_seek_failed);
04482          if (!vorbis_analyze_page(f, &q))        return error(f, VORBIS_seek_failed);
04483          q.after_previous_page_start = probe;
04484 
04485          // it's possible we've just found the last page again
04486          if (q.page_start == p[1].page_start) {
04487             p[1] = q;
04488             continue;
04489          }
04490 
04491          if (sample_number < q.last_decoded_sample)
04492             p[1] = q;
04493          else
04494             p[0] = q;
04495       }
04496 
04497       if (p[0].last_decoded_sample <= sample_number && sample_number < p[1].last_decoded_sample) {
04498          vorbis_seek_frame_from_page(f, p[1].page_start, p[0].last_decoded_sample, sample_number, fine);
04499          return 0;
04500       }
04501       return error(f, VORBIS_seek_failed);
04502    }
04503 }
04504 
04505 int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number)
04506 {
04507    return vorbis_seek_base(f, sample_number, FALSE);
04508 }
04509 
04510 int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number)
04511 {
04512    return vorbis_seek_base(f, sample_number, TRUE);
04513 }
04514 
04515 void stb_vorbis_seek_start(stb_vorbis *f)
04516 {
04517    if (IS_PUSH_MODE(f)) { error(f, VORBIS_invalid_api_mixing); return; }
04518    set_file_offset(f, f->first_audio_page_offset);
04519    f->previous_length = 0;
04520    f->first_decode = TRUE;
04521    f->next_seg = -1;
04522    vorbis_pump_first_frame(f);
04523 }
04524 
04525 unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f)
04526 {
04527    unsigned int restore_offset, previous_safe;
04528    unsigned int end, last_page_loc;
04529 
04530    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
04531    if (!f->total_samples) {
04532       int last;
04533       uint32 lo,hi;
04534       char header[6];
04535 
04536       // first, store the current decode position so we can restore it
04537       restore_offset = stb_vorbis_get_file_offset(f);
04538 
04539       // now we want to seek back 64K from the end (the last page must
04540       // be at most a little less than 64K, but let's allow a little slop)
04541       if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset)
04542          previous_safe = f->stream_len - 65536;
04543       else
04544          previous_safe = f->first_audio_page_offset;
04545 
04546       set_file_offset(f, previous_safe);
04547       // previous_safe is now our candidate 'earliest known place that seeking
04548       // to will lead to the final page'
04549 
04550       if (!vorbis_find_page(f, &end, (int unsigned *)&last)) {
04551          // if we can't find a page, we're hosed!
04552          f->error = VORBIS_cant_find_last_page;
04553          f->total_samples = 0xffffffff;
04554          goto done;
04555       }
04556 
04557       // check if there are more pages
04558       last_page_loc = stb_vorbis_get_file_offset(f);
04559 
04560       // stop when the last_page flag is set, not when we reach eof;
04561       // this allows us to stop short of a 'file_section' end without
04562       // explicitly checking the length of the section
04563       while (!last) {
04564          set_file_offset(f, end);
04565          if (!vorbis_find_page(f, &end, (int unsigned *)&last)) {
04566             // the last page we found didn't have the 'last page' flag
04567             // set. whoops!
04568             break;
04569          }
04570          previous_safe = last_page_loc+1;
04571          last_page_loc = stb_vorbis_get_file_offset(f);
04572       }
04573 
04574       set_file_offset(f, last_page_loc);
04575 
04576       // parse the header
04577       getn(f, (unsigned char *)header, 6);
04578       // extract the absolute granule position
04579       lo = get32(f);
04580       hi = get32(f);
04581       if (lo == 0xffffffff && hi == 0xffffffff) {
04582          f->error = VORBIS_cant_find_last_page;
04583          f->total_samples = SAMPLE_unknown;
04584          goto done;
04585       }
04586       if (hi)
04587          lo = 0xfffffffe; // saturate
04588       f->total_samples = lo;
04589 
04590       f->p_last.page_start = last_page_loc;
04591       f->p_last.page_end   = end;
04592       f->p_last.last_decoded_sample = lo;
04593       f->p_last.first_decoded_sample = SAMPLE_unknown;
04594       f->p_last.after_previous_page_start = previous_safe;
04595 
04596      done:
04597       set_file_offset(f, restore_offset);
04598    }
04599    return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples;
04600 }
04601 
04602 float stb_vorbis_stream_length_in_seconds(stb_vorbis *f)
04603 {
04604    return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate;
04605 }
04606 
04607 
04608 
04609 int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
04610 {
04611    int len, right,left,i;
04612    if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
04613 
04614    if (!vorbis_decode_packet(f, &len, &left, &right)) {
04615       f->channel_buffer_start = f->channel_buffer_end = 0;
04616       return 0;
04617    }
04618 
04619    len = vorbis_finish_frame(f, len, left, right);
04620    for (i=0; i < f->channels; ++i)
04621       f->outputs[i] = f->channel_buffers[i] + left;
04622 
04623    f->channel_buffer_start = left;
04624    f->channel_buffer_end   = left+len;
04625 
04626    if (channels) *channels = f->channels;
04627    if (output)   *output = f->outputs;
04628    return len;
04629 }
04630 
04631 #ifndef STB_VORBIS_NO_STDIO
04632 
04633 stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, stb_vorbis_alloc *alloc, unsigned int length)
04634 {
04635    stb_vorbis *f, p;
04636    vorbis_init(&p, alloc);
04637    p.f = file;
04638    p.f_start = ftell(file);
04639    p.stream_len   = length;
04640    p.close_on_free = close_on_free;
04641    if (start_decoder(&p)) {
04642       f = vorbis_alloc(&p);
04643       if (f) {
04644          *f = p;
04645          vorbis_pump_first_frame(f);
04646          return f;
04647       }
04648    }
04649    if (error) *error = p.error;
04650    vorbis_deinit(&p);
04651    return NULL;
04652 }
04653 
04654 stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, stb_vorbis_alloc *alloc)
04655 {
04656    unsigned int len, start;
04657    start = ftell(file);
04658    fseek(file, 0, SEEK_END);
04659    len = ftell(file) - start;
04660    fseek(file, start, SEEK_SET);
04661    return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len);
04662 }
04663 
04664 stb_vorbis * stb_vorbis_open_filename(char *filename, int *error, stb_vorbis_alloc *alloc)
04665 {
04666    FILE *f = fopen(filename, "rb");
04667    if (f) 
04668       return stb_vorbis_open_file(f, TRUE, error, alloc);
04669    if (error) *error = VORBIS_file_open_failure;
04670    return NULL;
04671 }
04672 #endif // STB_VORBIS_NO_STDIO
04673 
04674 stb_vorbis * stb_vorbis_open_memory(unsigned char *data, int len, int *error, stb_vorbis_alloc *alloc)
04675 {
04676    stb_vorbis *f, p;
04677    if (data == NULL) return NULL;
04678    vorbis_init(&p, alloc);
04679    p.stream = data;
04680    p.stream_end = data + len;
04681    p.stream_start = p.stream;
04682    p.stream_len = len;
04683    p.push_mode = FALSE;
04684    if (start_decoder(&p)) {
04685       f = vorbis_alloc(&p);
04686       if (f) {
04687          *f = p;
04688          vorbis_pump_first_frame(f);
04689          return f;
04690       }
04691    }
04692    if (error) *error = p.error;
04693    vorbis_deinit(&p);
04694    return NULL;
04695 }
04696 
04697 #ifndef STB_VORBIS_NO_INTEGER_CONVERSION
04698 #define PLAYBACK_MONO     1
04699 #define PLAYBACK_LEFT     2
04700 #define PLAYBACK_RIGHT    4
04701 
04702 #define L  (PLAYBACK_LEFT  | PLAYBACK_MONO)
04703 #define C  (PLAYBACK_LEFT  | PLAYBACK_RIGHT | PLAYBACK_MONO)
04704 #define R  (PLAYBACK_RIGHT | PLAYBACK_MONO)
04705 
04706 static int8 channel_position[7][6] =
04707 {
04708    { 0 },
04709    { C },
04710    { L, R },
04711    { L, C, R },
04712    { L, R, L, R },
04713    { L, C, R, L, R },
04714    { L, C, R, L, R, C },
04715 };
04716 
04717 
04718 #ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
04719    // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
04720    #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT))
04721    #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22))
04722    #define FAST_SCALED_FLOAT_TO_INT(x,s) ((temp = (x) + MAGIC(s)), (*(int *)&temp) - ADDEND(s))
04723    #define check_endianness()  
04724    typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4];
04725    #define FASTDEF(x) x
04726 #else
04727    #define FAST_SCALED_FLOAT_TO_INT(x,s) ((int) ((x) * (1 << (s))))
04728    #define check_endianness()
04729    #define FASTDEF(x)
04730 #endif
04731 
04732 static void copy_samples(short *dest, float *src, int len)
04733 {
04734    int i;
04735    FASTDEF(float temp);
04736    check_endianness();
04737    for (i=0; i < len; ++i) {
04738       int v = FAST_SCALED_FLOAT_TO_INT(src[i],15);
04739       if ((unsigned int) (v + 32768) > 65535)
04740          v = v < 0 ? -32768 : 32767;
04741       dest[i] = v;
04742    }
04743 }
04744 
04745 static void compute_samples(int mask, short *output, int num_c, float **data, int d_offset, int len)
04746 {
04747    #define BUFFER_SIZE  32
04748    float buffer[BUFFER_SIZE];
04749    int i,j,o,n = BUFFER_SIZE;
04750    FASTDEF(float temp);
04751    check_endianness();
04752    for (o = 0; o < len; o += BUFFER_SIZE) {
04753       memset(buffer, 0, sizeof(buffer));
04754       if (o + n > len) n = len - o;
04755       for (j=0; j < num_c; ++j) {
04756          if (channel_position[num_c][j] & mask) {
04757             for (i=0; i < n; ++i)
04758                buffer[i] += data[j][d_offset+o+i];
04759          }
04760       }
04761       for (i=0; i < n; ++i) {
04762          int v = FAST_SCALED_FLOAT_TO_INT(buffer[i],15);
04763          if ((unsigned int) (v + 32768) > 65535)
04764             v = v < 0 ? -32768 : 32767;
04765          output[o+i] = v;
04766       }
04767    }
04768 }
04769 
04770 static int channel_selector[3][2] = { {0}, {PLAYBACK_MONO}, {PLAYBACK_LEFT, PLAYBACK_RIGHT} };
04771 static void compute_stereo_samples(short *output, int num_c, float **data, int d_offset, int len)
04772 {
04773    #define BUFFER_SIZE  32
04774    float buffer[BUFFER_SIZE];
04775    int i,j,o,n = BUFFER_SIZE >> 1;
04776    FASTDEF(float temp);
04777    // o is the offset in the source data
04778    check_endianness();
04779    for (o = 0; o < len; o += BUFFER_SIZE >> 1) {
04780       // o2 is the offset in the output data
04781       int o2 = o << 1;
04782       memset(buffer, 0, sizeof(buffer));
04783       if (o + n > len) n = len - o;
04784       for (j=0; j < num_c; ++j) {
04785          int m = channel_position[num_c][j] & (PLAYBACK_LEFT | PLAYBACK_RIGHT);
04786          if (m == (PLAYBACK_LEFT | PLAYBACK_RIGHT)) {
04787             for (i=0; i < n; ++i) {
04788                buffer[i*2+0] += data[j][d_offset+o+i];
04789                buffer[i*2+1] += data[j][d_offset+o+i];
04790             }
04791          } else if (m == PLAYBACK_LEFT) {
04792             for (i=0; i < n; ++i) {
04793                buffer[i*2+0] += data[j][d_offset+o+i];
04794             }
04795          } else if (m == PLAYBACK_RIGHT) {
04796             for (i=0; i < n; ++i) {
04797                buffer[i*2+1] += data[j][d_offset+o+i];
04798             }
04799          }
04800       }
04801       for (i=0; i < (n<<1); ++i) {
04802          int v = FAST_SCALED_FLOAT_TO_INT(buffer[i],15);
04803          if ((unsigned int) (v + 32768) > 65535)
04804             v = v < 0 ? -32768 : 32767;
04805          output[o2+i] = v;
04806       }
04807    }
04808 }
04809 
04810 static void convert_samples_short(int buf_c, short **buffer, int b_offset, int data_c, float **data, int d_offset, int samples)
04811 {
04812    int i;
04813    if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
04814       static int channel_selector[3][2] = { {0}, {PLAYBACK_MONO}, {PLAYBACK_LEFT, PLAYBACK_RIGHT} };
04815       for (i=0; i < buf_c; ++i)
04816          compute_samples(channel_selector[buf_c][i], buffer[i]+b_offset, data_c, data, d_offset, samples);
04817    } else {
04818       int limit = buf_c < data_c ? buf_c : data_c;
04819       for (i=0; i < limit; ++i)
04820          copy_samples(buffer[i]+b_offset, data[i], samples);
04821       for (   ; i < buf_c; ++i)
04822          memset(buffer[i]+b_offset, 0, sizeof(short) * samples);
04823    }
04824 }
04825 
04826 int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples)
04827 {
04828    float **output;
04829    int len = stb_vorbis_get_frame_float(f, NULL, &output);
04830    if (len > num_samples) len = num_samples;
04831    if (len)
04832       convert_samples_short(num_c, buffer, 0, f->channels, output, 0, len);
04833    return len;
04834 }
04835 
04836 static void convert_channels_short_interleaved(int buf_c, short *buffer, int data_c, float **data, int d_offset, int len)
04837 {
04838    int i;
04839    check_endianness();
04840    if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
04841       assert(buf_c == 2);
04842       for (i=0; i < buf_c; ++i)
04843          compute_stereo_samples(buffer, data_c, data, d_offset, len);
04844    } else {
04845       int limit = buf_c < data_c ? buf_c : data_c;
04846       int j;
04847       FASTDEF(float temp);
04848       for (j=0; j < len; ++j) {
04849          for (i=0; i < limit; ++i) {
04850             int v = FAST_SCALED_FLOAT_TO_INT(data[i][d_offset+j],15);
04851             if ((unsigned int) (v + 32768) > 65535)
04852                v = v < 0 ? -32768 : 32767;
04853             *buffer++ = v;
04854          }
04855          for (   ; i < buf_c; ++i)
04856             *buffer++ = 0;
04857       }
04858    }
04859 }
04860 
04861 int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts)
04862 {
04863    float **output;
04864    int len;
04865    if (num_c == 1) return stb_vorbis_get_frame_short(f,num_c,&buffer, num_shorts);
04866    len = stb_vorbis_get_frame_float(f, NULL, &output);
04867    if (len) {
04868       if (len*num_c > num_shorts) len = num_shorts / num_c;
04869       convert_channels_short_interleaved(num_c, buffer, f->channels, output, 0, len);
04870    }
04871    return len;
04872 }
04873 
04874 int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts)
04875 {
04876    float **outputs;
04877    int len = num_shorts / channels;
04878    int n=0;
04879    int z = f->channels;
04880    if (z > channels) z = channels;
04881    while (n < len) {
04882       int k = f->channel_buffer_end - f->channel_buffer_start;
04883       if (n+k >= len) k = len - n;
04884       if (k)
04885          convert_channels_short_interleaved(channels, buffer, f->channels, f->channel_buffers, f->channel_buffer_start, k);
04886       buffer += k*channels;
04887       n += k;
04888       f->channel_buffer_start += k;
04889       if (n == len) break;
04890       if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
04891    }
04892    return n;
04893 }
04894 
04895 int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int len)
04896 {
04897    float **outputs;
04898    int n=0;
04899    int z = f->channels;
04900    if (z > channels) z = channels;
04901    while (n < len) {
04902       int k = f->channel_buffer_end - f->channel_buffer_start;
04903       if (n+k >= len) k = len - n;
04904       if (k)
04905          convert_samples_short(channels, buffer, n, f->channels, f->channel_buffers, f->channel_buffer_start, k);
04906       n += k;
04907       f->channel_buffer_start += k;
04908       if (n == len) break;
04909       if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
04910    }
04911    return n;
04912 }
04913 
04914 #ifndef STB_VORBIS_NO_STDIO
04915 int stb_vorbis_decode_filename(char *filename, int *channels, short **output)
04916 {
04917    int data_len, offset, total, limit, error;
04918    short *data;
04919    stb_vorbis *v = stb_vorbis_open_filename(filename, &error, NULL);
04920    if (v == NULL) return -1;
04921    limit = v->channels * 4096;
04922    *channels = v->channels;
04923    offset = data_len = 0;
04924    total = limit;
04925    data = (short *) malloc(total * sizeof(*data));
04926    if (data == NULL) {
04927       stb_vorbis_close(v);
04928       return -2;
04929    }
04930    for (;;) {
04931       int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
04932       if (n == 0) break;
04933       data_len += n;
04934       offset += n * v->channels;
04935       if (offset + limit > total) {
04936          short *data2;
04937          total *= 2;
04938          data2 = (short *) realloc(data, total * sizeof(*data));
04939          if (data2 == NULL) {
04940             free(data);
04941             stb_vorbis_close(v);
04942             return -2;
04943          }
04944          data = data2;
04945       }
04946    }
04947    *output = data;
04948    return data_len;
04949 }
04950 #endif // NO_STDIO
04951 
04952 int stb_vorbis_decode_memory(uint8 *mem, int len, int *channels, short **output)
04953 {
04954    int data_len, offset, total, limit, error;
04955    short *data;
04956    stb_vorbis *v = stb_vorbis_open_memory(mem, len, &error, NULL);
04957    if (v == NULL) return -1;
04958    limit = v->channels * 4096;
04959    *channels = v->channels;
04960    offset = data_len = 0;
04961    total = limit;
04962    data = (short *) malloc(total * sizeof(*data));
04963    if (data == NULL) {
04964       stb_vorbis_close(v);
04965       return -2;
04966    }
04967    for (;;) {
04968       int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset);
04969       if (n == 0) break;
04970       data_len += n;
04971       offset += n * v->channels;
04972       if (offset + limit > total) {
04973          short *data2;
04974          total *= 2;
04975          data2 = (short *) realloc(data, total * sizeof(*data));
04976          if (data2 == NULL) {
04977             free(data);
04978             stb_vorbis_close(v);
04979             return -2;
04980          }
04981          data = data2;
04982       }
04983    }
04984    *output = data;
04985    return data_len;
04986 }
04987 #endif
04988 
04989 int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats)
04990 {
04991    float **outputs;
04992    int len = num_floats / channels;
04993    int n=0;
04994    int z = f->channels;
04995    if (z > channels) z = channels;
04996    while (n < len) {
04997       int i,j;
04998       int k = f->channel_buffer_end - f->channel_buffer_start;
04999       if (n+k >= len) k = len - n;
05000       for (j=0; j < k; ++j) {
05001          for (i=0; i < z; ++i)
05002             *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j];
05003          for (   ; i < channels; ++i)
05004             *buffer++ = 0;
05005       }
05006       n += k;
05007       f->channel_buffer_start += k;
05008       if (n == len) break;
05009       if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
05010    }
05011    return n;
05012 }
05013 
05014 int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples)
05015 {
05016    float **outputs;
05017    int n=0;
05018    int z = f->channels;
05019    if (z > channels) z = channels;
05020    while (n < num_samples) {
05021       int i;
05022       int k = f->channel_buffer_end - f->channel_buffer_start;
05023       if (n+k >= num_samples) k = num_samples - n;
05024       if (k) {
05025          for (i=0; i < z; ++i)
05026             memcpy(buffer[i]+n, f->channel_buffers+f->channel_buffer_start, sizeof(float)*k);
05027          for (   ; i < channels; ++i)
05028             memset(buffer[i]+n, 0, sizeof(float) * k);
05029       }
05030       n += k;
05031       f->channel_buffer_start += k;
05032       if (n == num_samples) break;
05033       if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break;
05034    }
05035    return n;
05036 }
05037 #endif // STB_VORBIS_NO_PULLDATA_API
05038 
05039 #endif // STB_VORBIS_HEADER_ONLY