Back to index

tor  0.2.3.18-rc
torgzip.c
Go to the documentation of this file.
00001 /* Copyright (c) 2004, Roger Dingledine.
00002  * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
00003  * Copyright (c) 2007-2012, The Tor Project, Inc. */
00004 /* See LICENSE for licensing information */
00005 
00011 #include "orconfig.h"
00012 
00013 #include <stdlib.h>
00014 #include <stdio.h>
00015 #include <assert.h>
00016 #include <string.h>
00017 #include "torint.h"
00018 
00019 #ifdef HAVE_NETINET_IN_H
00020 #include <netinet/in.h>
00021 #endif
00022 
00023 #include "util.h"
00024 #include "torlog.h"
00025 #include "torgzip.h"
00026 
00027 /* zlib 1.2.4 and 1.2.5 do some "clever" things with macros.  Instead of
00028    saying "(defined(FOO) ? FOO : 0)" they like to say "FOO-0", on the theory
00029    that nobody will care if the compile outputs a no-such-identifier warning.
00030 
00031    Sorry, but we like -Werror over here, so I guess we need to define these.
00032    I hope that zlib 1.2.6 doesn't break these too.
00033 */
00034 #ifndef _LARGEFILE64_SOURCE
00035 #define _LARGEFILE64_SOURCE 0
00036 #endif
00037 #ifndef _LFS64_LARGEFILE
00038 #define _LFS64_LARGEFILE 0
00039 #endif
00040 #ifndef _FILE_OFFSET_BITS
00041 #define _FILE_OFFSET_BITS 0
00042 #endif
00043 #ifndef off64_t
00044 #define off64_t int64_t
00045 #endif
00046 
00047 #include <zlib.h>
00048 
00051 static int gzip_is_supported = -1;
00052 
00055 int
00056 is_gzip_supported(void)
00057 {
00058   if (gzip_is_supported >= 0)
00059     return gzip_is_supported;
00060 
00061   if (!strcmpstart(ZLIB_VERSION, "0.") ||
00062       !strcmpstart(ZLIB_VERSION, "1.0") ||
00063       !strcmpstart(ZLIB_VERSION, "1.1"))
00064     gzip_is_supported = 0;
00065   else
00066     gzip_is_supported = 1;
00067 
00068   return gzip_is_supported;
00069 }
00070 
00072 static INLINE int
00073 method_bits(compress_method_t method)
00074 {
00075   /* Bits+16 means "use gzip" in zlib >= 1.2 */
00076   return method == GZIP_METHOD ? 15+16 : 15;
00077 }
00078 
00080 /* These macros define the maximum allowable compression factor.  Anything of
00081  * size greater than CHECK_FOR_COMPRESSION_BOMB_AFTER is not allowed to
00082  * have an uncompression factor (uncompressed size:compressed size ratio) of
00083  * any greater than MAX_UNCOMPRESSION_FACTOR.
00084  *
00085  * Picking a value for MAX_UNCOMPRESSION_FACTOR is a trade-off: we want it to
00086  * be small to limit the attack multiplier, but we also want it to be large
00087  * enough so that no legitimate document --even ones we might invent in the
00088  * future -- ever compresses by a factor of greater than
00089  * MAX_UNCOMPRESSION_FACTOR. Within those parameters, there's a reasonably
00090  * large range of possible values. IMO, anything over 8 is probably safe; IMO
00091  * anything under 50 is probably sufficient.
00092  */
00093 #define MAX_UNCOMPRESSION_FACTOR 25
00094 #define CHECK_FOR_COMPRESSION_BOMB_AFTER (1024*64)
00095 
00099 static int
00100 is_compression_bomb(size_t size_in, size_t size_out)
00101 {
00102   if (size_in == 0 || size_out < CHECK_FOR_COMPRESSION_BOMB_AFTER)
00103     return 0;
00104 
00105   return (size_out / size_in > MAX_UNCOMPRESSION_FACTOR);
00106 }
00107 
00113 int
00114 tor_gzip_compress(char **out, size_t *out_len,
00115                   const char *in, size_t in_len,
00116                   compress_method_t method)
00117 {
00118   struct z_stream_s *stream = NULL;
00119   size_t out_size, old_size;
00120   off_t offset;
00121 
00122   tor_assert(out);
00123   tor_assert(out_len);
00124   tor_assert(in);
00125   tor_assert(in_len < UINT_MAX);
00126 
00127   *out = NULL;
00128 
00129   if (method == GZIP_METHOD && !is_gzip_supported()) {
00130     /* Old zlib version don't support gzip in deflateInit2 */
00131     log_warn(LD_BUG, "Gzip not supported with zlib %s", ZLIB_VERSION);
00132     goto err;
00133   }
00134 
00135   stream = tor_malloc_zero(sizeof(struct z_stream_s));
00136   stream->zalloc = Z_NULL;
00137   stream->zfree = Z_NULL;
00138   stream->opaque = NULL;
00139   stream->next_in = (unsigned char*) in;
00140   stream->avail_in = (unsigned int)in_len;
00141 
00142   if (deflateInit2(stream, Z_BEST_COMPRESSION, Z_DEFLATED,
00143                    method_bits(method),
00144                    8, Z_DEFAULT_STRATEGY) != Z_OK) {
00145     log_warn(LD_GENERAL, "Error from deflateInit2: %s",
00146              stream->msg?stream->msg:"<no message>");
00147     goto err;
00148   }
00149 
00150   /* Guess 50% compression. */
00151   out_size = in_len / 2;
00152   if (out_size < 1024) out_size = 1024;
00153   *out = tor_malloc(out_size);
00154   stream->next_out = (unsigned char*)*out;
00155   stream->avail_out = (unsigned int)out_size;
00156 
00157   while (1) {
00158     switch (deflate(stream, Z_FINISH))
00159       {
00160       case Z_STREAM_END:
00161         goto done;
00162       case Z_OK:
00163         /* In case zlib doesn't work as I think .... */
00164         if (stream->avail_out >= stream->avail_in+16)
00165           break;
00166       case Z_BUF_ERROR:
00167         offset = stream->next_out - ((unsigned char*)*out);
00168         old_size = out_size;
00169         out_size *= 2;
00170         if (out_size < old_size) {
00171           log_warn(LD_GENERAL, "Size overflow in compression.");
00172           goto err;
00173         }
00174         *out = tor_realloc(*out, out_size);
00175         stream->next_out = (unsigned char*)(*out + offset);
00176         if (out_size - offset > UINT_MAX) {
00177           log_warn(LD_BUG,  "Ran over unsigned int limit of zlib while "
00178                    "uncompressing.");
00179           goto err;
00180         }
00181         stream->avail_out = (unsigned int)(out_size - offset);
00182         break;
00183       default:
00184         log_warn(LD_GENERAL, "Gzip compression didn't finish: %s",
00185                  stream->msg ? stream->msg : "<no message>");
00186         goto err;
00187       }
00188   }
00189  done:
00190   *out_len = stream->total_out;
00191 #ifdef OPENBSD
00192   /* "Hey Rocky!  Watch me change an unsigned field to a signed field in a
00193    *    third-party API!"
00194    * "Oh, that trick will just make people do unsafe casts to the unsigned
00195    *    type in their cross-platform code!"
00196    * "Don't be foolish.  I'm _sure_ they'll have the good sense to make sure
00197    *    the newly unsigned field isn't negative." */
00198   tor_assert(stream->total_out >= 0);
00199 #endif
00200   if (((size_t)stream->total_out) > out_size + 4097) {
00201     /* If we're wasting more than 4k, don't. */
00202     *out = tor_realloc(*out, stream->total_out + 1);
00203   }
00204   if (deflateEnd(stream)!=Z_OK) {
00205     log_warn(LD_BUG, "Error freeing gzip structures");
00206     goto err;
00207   }
00208   tor_free(stream);
00209 
00210   if (is_compression_bomb(*out_len, in_len)) {
00211     log_warn(LD_BUG, "We compressed something and got an insanely high "
00212           "compression factor; other Tors would think this was a zlib bomb.");
00213     goto err;
00214   }
00215 
00216   return 0;
00217  err:
00218   if (stream) {
00219     deflateEnd(stream);
00220     tor_free(stream);
00221   }
00222   tor_free(*out);
00223   return -1;
00224 }
00225 
00237 int
00238 tor_gzip_uncompress(char **out, size_t *out_len,
00239                     const char *in, size_t in_len,
00240                     compress_method_t method,
00241                     int complete_only,
00242                     int protocol_warn_level)
00243 {
00244   struct z_stream_s *stream = NULL;
00245   size_t out_size, old_size;
00246   off_t offset;
00247   int r;
00248 
00249   tor_assert(out);
00250   tor_assert(out_len);
00251   tor_assert(in);
00252   tor_assert(in_len < UINT_MAX);
00253 
00254   if (method == GZIP_METHOD && !is_gzip_supported()) {
00255     /* Old zlib version don't support gzip in inflateInit2 */
00256     log_warn(LD_BUG, "Gzip not supported with zlib %s", ZLIB_VERSION);
00257     return -1;
00258   }
00259 
00260   *out = NULL;
00261 
00262   stream = tor_malloc_zero(sizeof(struct z_stream_s));
00263   stream->zalloc = Z_NULL;
00264   stream->zfree = Z_NULL;
00265   stream->opaque = NULL;
00266   stream->next_in = (unsigned char*) in;
00267   stream->avail_in = (unsigned int)in_len;
00268 
00269   if (inflateInit2(stream,
00270                    method_bits(method)) != Z_OK) {
00271     log_warn(LD_GENERAL, "Error from inflateInit2: %s",
00272              stream->msg?stream->msg:"<no message>");
00273     goto err;
00274   }
00275 
00276   out_size = in_len * 2;  /* guess 50% compression. */
00277   if (out_size < 1024) out_size = 1024;
00278   if (out_size >= SIZE_T_CEILING || out_size > UINT_MAX)
00279     goto err;
00280 
00281   *out = tor_malloc(out_size);
00282   stream->next_out = (unsigned char*)*out;
00283   stream->avail_out = (unsigned int)out_size;
00284 
00285   while (1) {
00286     switch (inflate(stream, complete_only ? Z_FINISH : Z_SYNC_FLUSH))
00287       {
00288       case Z_STREAM_END:
00289         if (stream->avail_in == 0)
00290           goto done;
00291         /* There may be more compressed data here. */
00292         if ((r = inflateEnd(stream)) != Z_OK) {
00293           log_warn(LD_BUG, "Error freeing gzip structures");
00294           goto err;
00295         }
00296         if (inflateInit2(stream, method_bits(method)) != Z_OK) {
00297           log_warn(LD_GENERAL, "Error from second inflateInit2: %s",
00298                    stream->msg?stream->msg:"<no message>");
00299           goto err;
00300         }
00301         break;
00302       case Z_OK:
00303         if (!complete_only && stream->avail_in == 0)
00304           goto done;
00305         /* In case zlib doesn't work as I think.... */
00306         if (stream->avail_out >= stream->avail_in+16)
00307           break;
00308       case Z_BUF_ERROR:
00309         if (stream->avail_out > 0) {
00310           log_fn(protocol_warn_level, LD_PROTOCOL,
00311                  "possible truncated or corrupt zlib data");
00312           goto err;
00313         }
00314         offset = stream->next_out - (unsigned char*)*out;
00315         old_size = out_size;
00316         out_size *= 2;
00317         if (out_size < old_size) {
00318           log_warn(LD_GENERAL, "Size overflow in uncompression.");
00319           goto err;
00320         }
00321         if (is_compression_bomb(in_len, out_size)) {
00322           log_warn(LD_GENERAL, "Input looks like a possible zlib bomb; "
00323                    "not proceeding.");
00324           goto err;
00325         }
00326         if (out_size >= SIZE_T_CEILING) {
00327           log_warn(LD_BUG, "Hit SIZE_T_CEILING limit while uncompressing.");
00328           goto err;
00329         }
00330         *out = tor_realloc(*out, out_size);
00331         stream->next_out = (unsigned char*)(*out + offset);
00332         if (out_size - offset > UINT_MAX) {
00333           log_warn(LD_BUG,  "Ran over unsigned int limit of zlib while "
00334                    "uncompressing.");
00335           goto err;
00336         }
00337         stream->avail_out = (unsigned int)(out_size - offset);
00338         break;
00339       default:
00340         log_warn(LD_GENERAL, "Gzip decompression returned an error: %s",
00341                  stream->msg ? stream->msg : "<no message>");
00342         goto err;
00343       }
00344   }
00345  done:
00346   *out_len = stream->next_out - (unsigned char*)*out;
00347   r = inflateEnd(stream);
00348   tor_free(stream);
00349   if (r != Z_OK) {
00350     log_warn(LD_BUG, "Error freeing gzip structures");
00351     goto err;
00352   }
00353 
00354   /* NUL-terminate output. */
00355   if (out_size == *out_len)
00356     *out = tor_realloc(*out, out_size + 1);
00357   (*out)[*out_len] = '\0';
00358 
00359   return 0;
00360  err:
00361   if (stream) {
00362     inflateEnd(stream);
00363     tor_free(stream);
00364   }
00365   if (*out) {
00366     tor_free(*out);
00367   }
00368   return -1;
00369 }
00370 
00375 compress_method_t
00376 detect_compression_method(const char *in, size_t in_len)
00377 {
00378   if (in_len > 2 && fast_memeq(in, "\x1f\x8b", 2)) {
00379     return GZIP_METHOD;
00380   } else if (in_len > 2 && (in[0] & 0x0f) == 8 &&
00381              (ntohs(get_uint16(in)) % 31) == 0) {
00382     return ZLIB_METHOD;
00383   } else {
00384     return UNKNOWN_METHOD;
00385   }
00386 }
00387 
00390 struct tor_zlib_state_t {
00391   struct z_stream_s stream; 
00392   int compress; 
00395   size_t input_so_far;
00397   size_t output_so_far;
00398 };
00399 
00403 tor_zlib_state_t *
00404 tor_zlib_new(int compress, compress_method_t method)
00405 {
00406   tor_zlib_state_t *out;
00407 
00408   if (method == GZIP_METHOD && !is_gzip_supported()) {
00409     /* Old zlib version don't support gzip in inflateInit2 */
00410     log_warn(LD_BUG, "Gzip not supported with zlib %s", ZLIB_VERSION);
00411     return NULL;
00412  }
00413 
00414  out = tor_malloc_zero(sizeof(tor_zlib_state_t));
00415  out->stream.zalloc = Z_NULL;
00416  out->stream.zfree = Z_NULL;
00417  out->stream.opaque = NULL;
00418  out->compress = compress;
00419  if (compress) {
00420    if (deflateInit2(&out->stream, Z_BEST_COMPRESSION, Z_DEFLATED,
00421                     method_bits(method), 8, Z_DEFAULT_STRATEGY) != Z_OK)
00422      goto err;
00423  } else {
00424    if (inflateInit2(&out->stream, method_bits(method)) != Z_OK)
00425      goto err;
00426  }
00427  return out;
00428 
00429  err:
00430  tor_free(out);
00431  return NULL;
00432 }
00433 
00444 tor_zlib_output_t
00445 tor_zlib_process(tor_zlib_state_t *state,
00446                  char **out, size_t *out_len,
00447                  const char **in, size_t *in_len,
00448                  int finish)
00449 {
00450   int err;
00451   tor_assert(*in_len <= UINT_MAX);
00452   tor_assert(*out_len <= UINT_MAX);
00453   state->stream.next_in = (unsigned char*) *in;
00454   state->stream.avail_in = (unsigned int)*in_len;
00455   state->stream.next_out = (unsigned char*) *out;
00456   state->stream.avail_out = (unsigned int)*out_len;
00457 
00458   if (state->compress) {
00459     err = deflate(&state->stream, finish ? Z_FINISH : Z_SYNC_FLUSH);
00460   } else {
00461     err = inflate(&state->stream, finish ? Z_FINISH : Z_SYNC_FLUSH);
00462   }
00463 
00464   state->input_so_far += state->stream.next_in - ((unsigned char*)*in);
00465   state->output_so_far += state->stream.next_out - ((unsigned char*)*out);
00466 
00467   *out = (char*) state->stream.next_out;
00468   *out_len = state->stream.avail_out;
00469   *in = (const char *) state->stream.next_in;
00470   *in_len = state->stream.avail_in;
00471 
00472   if (! state->compress &&
00473       is_compression_bomb(state->input_so_far, state->output_so_far)) {
00474     log_warn(LD_DIR, "Possible zlib bomb; abandoning stream.");
00475     return TOR_ZLIB_ERR;
00476   }
00477 
00478   switch (err)
00479     {
00480     case Z_STREAM_END:
00481       return TOR_ZLIB_DONE;
00482     case Z_BUF_ERROR:
00483       if (state->stream.avail_in == 0)
00484         return TOR_ZLIB_OK;
00485       return TOR_ZLIB_BUF_FULL;
00486     case Z_OK:
00487       if (state->stream.avail_out == 0 || finish)
00488         return TOR_ZLIB_BUF_FULL;
00489       return TOR_ZLIB_OK;
00490     default:
00491       log_warn(LD_GENERAL, "Gzip returned an error: %s",
00492                state->stream.msg ? state->stream.msg : "<no message>");
00493       return TOR_ZLIB_ERR;
00494     }
00495 }
00496 
00498 void
00499 tor_zlib_free(tor_zlib_state_t *state)
00500 {
00501   if (!state)
00502     return;
00503 
00504   if (state->compress)
00505     deflateEnd(&state->stream);
00506   else
00507     inflateEnd(&state->stream);
00508 
00509   tor_free(state);
00510 }
00511