Back to index

enigmail  1.4.3
optimizejars.py
Go to the documentation of this file.
00001 # ***** BEGIN LICENSE BLOCK *****
00002 # Version: MPL 1.1/GPL 2.0/LGPL 2.1
00003 #
00004 # The contents of this file are subject to the Mozilla Public License Version
00005 # 1.1 (the "License"); you may not use this file except in compliance with
00006 # the License. You may obtain a copy of the License at
00007 # http://www.mozilla.org/MPL/
00008 #
00009 # Software distributed under the License is distributed on an "AS IS" basis,
00010 # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
00011 # for the specific language governing rights and limitations under the
00012 # License.
00013 #
00014 # The Original Code is mozilla.org code
00015 #
00016 # The Initial Developer of the Original Code is
00017 # Mozilla Foundation.
00018 # Portions created by the Initial Developer are Copyright (C) 2010
00019 # the Initial Developer. All Rights Reserved.
00020 #
00021 # Contributor(s):
00022 #   Taras Glek <tglek@mozilla.com>
00023 #
00024 # Alternatively, the contents of this file may be used under the terms of
00025 # either the GNU General Public License Version 2 or later (the "GPL"), or
00026 # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
00027 # in which case the provisions of the GPL or the LGPL are applicable instead
00028 # of those above. If you wish to allow use of your version of this file only
00029 # under the terms of either the GPL or the LGPL, and not to allow others to
00030 # use your version of this file under the terms of the MPL, indicate your
00031 # decision by deleting the provisions above and replace them with the notice
00032 # and other provisions required by the GPL or the LGPL. If you do not delete
00033 # the provisions above, a recipient may use your version of this file under
00034 # the terms of any one of the MPL, the GPL or the LGPL.
00035 #
00036 # ***** END LICENSE BLOCK *****
00037 
00038 import sys, os, subprocess, struct, re
00039 
00040 local_file_header = [
00041     ("signature", "uint32"),
00042     ("min_version", "uint16"),
00043     ("general_flag", "uint16"),
00044     ("compression", "uint16"),
00045     ("lastmod_time", "uint16"),
00046     ("lastmod_date", "uint16"),
00047     ("crc32", "uint32"),
00048     ("compressed_size", "uint32"),
00049     ("uncompressed_size", "uint32"),
00050     ("filename_size", "uint16"),
00051     ("extra_field_size", "uint16"),
00052     ("filename", "filename_size"),
00053     ("extra_field", "extra_field_size"),
00054     ("data", "compressed_size")
00055 ]
00056 
00057 cdir_entry = [
00058     ("signature", "uint32"),
00059     ("creator_version", "uint16"),
00060     ("min_version", "uint16"),
00061     ("general_flag", "uint16"),
00062     ("compression", "uint16"),
00063     ("lastmod_time", "uint16"),
00064     ("lastmod_date", "uint16"),
00065     ("crc32", "uint32"),
00066     ("compressed_size", "uint32"),
00067     ("uncompressed_size", "uint32"),
00068     ("filename_size", "uint16"),
00069     ("extrafield_size", "uint16"),
00070     ("filecomment_size", "uint16"),
00071     ("disknum", "uint16"),
00072     ("internal_attr", "uint16"),
00073     ("external_attr", "uint32"),
00074     ("offset", "uint32"),
00075     ("filename", "filename_size"),
00076     ("extrafield", "extrafield_size"),
00077     ("filecomment", "filecomment_size"),
00078 ]
00079 
00080 cdir_end = [
00081     ("signature", "uint32"),
00082     ("disk_num", "uint16"),
00083     ("cdir_disk", "uint16"),
00084     ("disk_entries", "uint16"),
00085     ("cdir_entries", "uint16"),
00086     ("cdir_size", "uint32"),
00087     ("cdir_offset", "uint32"),
00088     ("comment_size", "uint16"),
00089 ]
00090 
00091 type_mapping = { "uint32":"I", "uint16":"H"}
00092 
00093 def format_struct (format):
00094     string_fields = {}
00095     fmt = "<"
00096     for (name,value) in iter(format):
00097         try:
00098             fmt += type_mapping[value][0]
00099         except KeyError:
00100             string_fields[name] = value
00101     return (fmt, string_fields)
00102 
00103 def size_of(format):
00104     return struct.calcsize(format_struct(format)[0])
00105 
00106 class MyStruct:
00107     def __init__(self, format, string_fields):
00108         self.__dict__["struct_members"] = {}
00109         self.__dict__["format"] = format
00110         self.__dict__["string_fields"] = string_fields
00111 
00112     def addMember(self, name, value):
00113         self.__dict__["struct_members"][name] = value
00114 
00115     def __getattr__(self, item):
00116         try:
00117             return self.__dict__["struct_members"][item]
00118         except:
00119             pass
00120         print("no %s" %item)
00121         print(self.__dict__["struct_members"])
00122         raise AttributeError
00123 
00124     def __setattr__(self, item, value):
00125         if item in self.__dict__["struct_members"]:
00126             self.__dict__["struct_members"][item] = value
00127         else:
00128             raise AttributeError
00129 
00130     def pack(self):
00131         extra_data = ""
00132         values = []
00133         string_fields = self.__dict__["string_fields"]
00134         struct_members = self.__dict__["struct_members"]
00135         format = self.__dict__["format"]
00136         for (name,_) in format:
00137             if name in string_fields:
00138                 extra_data = extra_data + struct_members[name]
00139             else:
00140                 values.append(struct_members[name]);
00141         return struct.pack(format_struct(format)[0], *values) + extra_data
00142    
00143 ENDSIG = 0x06054b50
00144 
00145 def assert_true(cond, msg):
00146     if not cond:
00147         raise Exception(msg)
00148         exit(1)
00149 
00150 class BinaryBlob:
00151     def __init__(self, f):
00152        self.data = open(f, "rb").read()
00153        self.offset = 0
00154        self.length = len(self.data)
00155 
00156     def readAt(self, pos, length):
00157         self.offset = pos + length
00158         return self.data[pos:self.offset]
00159 
00160     def read_struct (self, format, offset = None):
00161         if offset == None:
00162             offset = self.offset
00163         (fstr, string_fields) = format_struct(format)
00164         size = struct.calcsize(fstr)
00165         data = self.readAt(offset, size)
00166         ret = struct.unpack(fstr, data)
00167         retstruct = MyStruct(format, string_fields)
00168         i = 0
00169         for (name,_) in iter(format):
00170             member_desc = None
00171             if not name in string_fields:
00172                 member_data = ret[i]
00173                 i = i + 1
00174             else:
00175                 # zip has data fields which are described by other struct fields, this does 
00176                 # additional reads to fill em in
00177                 member_desc = string_fields[name]
00178                 member_data = self.readAt(self.offset, retstruct.__getattr__(member_desc))
00179             retstruct.addMember(name, member_data)
00180         # sanity check serialization code
00181         data = self.readAt(offset, self.offset - offset)
00182         out_data = retstruct.pack()
00183         assert_true(out_data == data, "Serialization fail %d !=%d"% (len(out_data), len(data)))
00184         return retstruct
00185 
00186 def optimizejar(jar, outjar, inlog = None):
00187     if inlog is not None:
00188         inlog = open(inlog).read().rstrip()
00189         # in the case of an empty log still move the index forward
00190         if len(inlog) == 0:
00191             inlog = []
00192         else:
00193             inlog = inlog.split("\n")
00194     outlog = []
00195     jarblob = BinaryBlob(jar)
00196     dirend = jarblob.read_struct(cdir_end, jarblob.length - size_of(cdir_end))
00197     assert_true(dirend.signature == ENDSIG, "no signature in the end");
00198     cdir_offset = dirend.cdir_offset
00199     readahead = 0
00200     if inlog is None and cdir_offset == 4:
00201         readahead = struct.unpack("<I", jarblob.readAt(0, 4))[0]
00202         print("%s: startup data ends at byte %d" % (outjar, readahead));
00203 
00204     total_stripped = 0;
00205     jarblob.offset = cdir_offset
00206     central_directory = []
00207     for i in range(0, dirend.cdir_entries):
00208         entry = jarblob.read_struct(cdir_entry)
00209         if entry.filename[-1:] == "/":
00210             total_stripped += len(entry.pack())
00211         else:
00212             total_stripped += entry.extrafield_size
00213         central_directory.append(entry)
00214         
00215     reordered_count = 0
00216     if inlog is not None:
00217         dup_guard = set()
00218         for ordered_name in inlog:
00219             if ordered_name in dup_guard:
00220                 continue
00221             else:
00222                 dup_guard.add(ordered_name)
00223             found = False
00224             for i in range(reordered_count, len(central_directory)):
00225                 if central_directory[i].filename == ordered_name:
00226                     # swap the cdir entries
00227                     tmp = central_directory[i]
00228                     central_directory[i] = central_directory[reordered_count]
00229                     central_directory[reordered_count] = tmp
00230                     reordered_count = reordered_count + 1
00231                     found = True
00232                     break
00233             if not found:
00234                 print( "Can't find '%s' in %s" % (ordered_name, jar))
00235 
00236     outfd = open(outjar, "wb")
00237     out_offset = 0
00238     if inlog is not None:
00239         # have to put central directory at offset 4 cos 0 confuses some tools.
00240         # This also lets us specify how many entries should be preread
00241         dirend.cdir_offset = 4
00242         # make room for central dir + end of dir + 4 extra bytes at front
00243         out_offset = dirend.cdir_offset + dirend.cdir_size + size_of(cdir_end) - total_stripped
00244         outfd.seek(out_offset)
00245 
00246     cdir_data = ""
00247     written_count = 0
00248     crc_mapping = {}
00249     dups_found = 0
00250     dupe_bytes = 0
00251     # store number of bytes suggested for readahead
00252     for entry in central_directory:
00253         # read in the header twice..first for comparison, second time for convenience when writing out
00254         jarfile = jarblob.read_struct(local_file_header, entry.offset)
00255         assert_true(jarfile.filename == entry.filename, "Directory/Localheader mismatch")
00256         # drop directory entries
00257         if entry.filename[-1:] == "/":
00258             total_stripped += len(jarfile.pack())
00259             dirend.cdir_entries -= 1
00260             continue
00261         # drop extra field data
00262         else:
00263             total_stripped += jarfile.extra_field_size;
00264         entry.extrafield = jarfile.extra_field = ""
00265         entry.extrafield_size = jarfile.extra_field_size = 0
00266         # January 1st, 2010
00267         entry.lastmod_date = jarfile.lastmod_date = ((2010 - 1980) << 9) | (1 << 5) | 1
00268         entry.lastmod_time = jarfile.lastmod_time = 0
00269         data = jarfile.pack()
00270         outfd.write(data)
00271         old_entry_offset = entry.offset
00272         entry.offset = out_offset
00273         out_offset = out_offset + len(data)
00274         entry_data = entry.pack()
00275         cdir_data += entry_data
00276         expected_len = entry.filename_size + entry.extrafield_size + entry.filecomment_size
00277         assert_true(len(entry_data) != expected_len,
00278                     "%s entry size - expected:%d got:%d" % (entry.filename, len(entry_data), expected_len))
00279         written_count += 1
00280 
00281         if entry.crc32 in crc_mapping:
00282             dups_found += 1
00283             dupe_bytes += entry.compressed_size + len(data) + len(entry_data)
00284             print("%s\n\tis a duplicate of\n%s\n---"%(entry.filename, crc_mapping[entry.crc32]))
00285         else:
00286             crc_mapping[entry.crc32] = entry.filename;
00287 
00288         if inlog is not None:
00289             if written_count == reordered_count:
00290                 readahead = out_offset
00291                 print("%s: startup data ends at byte %d"%( outjar, readahead));
00292             elif written_count < reordered_count:
00293                 pass
00294                 #print("%s @ %d" % (entry.filename, out_offset))
00295         elif readahead >= old_entry_offset + len(data):
00296             outlog.append(entry.filename)
00297             reordered_count += 1
00298 
00299     if inlog is None:
00300         dirend.cdir_offset = out_offset
00301 
00302     if dups_found > 0:
00303         print("WARNING: Found %d duplicate files taking %d bytes"%(dups_found, dupe_bytes))
00304 
00305     dirend.cdir_size = len(cdir_data)
00306     dirend.disk_entries = dirend.cdir_entries
00307     dirend_data = dirend.pack()
00308     assert_true(size_of(cdir_end) == len(dirend_data), "Failed to serialize directory end correctly. Serialized size;%d, expected:%d"%(len(dirend_data), size_of(cdir_end)));
00309 
00310     outfd.seek(dirend.cdir_offset)
00311     outfd.write(cdir_data)
00312     outfd.write(dirend_data)
00313 
00314     # for ordered jars the central directory is written in the begining of the file, so a second central-directory
00315     # entry has to be written in the end of the file
00316     if inlog is not None:
00317         outfd.seek(0)
00318         outfd.write(struct.pack("<I", readahead));
00319         outfd.seek(out_offset)
00320         outfd.write(dirend_data)
00321 
00322     print "Stripped %d bytes" % total_stripped
00323     print "%s %d/%d in %s" % (("Ordered" if inlog is not None else "Deoptimized"),
00324                               reordered_count, len(central_directory), outjar)
00325     outfd.close()
00326     return outlog
00327         
00328 if len(sys.argv) != 5:
00329     print "Usage: --optimize|--deoptimize %s JAR_LOG_DIR IN_JAR_DIR OUT_JAR_DIR" % sys.argv[0]
00330     exit(1)
00331 
00332 jar_regex = re.compile("\\.jar?$")
00333 
00334 def optimize(JAR_LOG_DIR, IN_JAR_DIR, OUT_JAR_DIR):
00335     ls = os.listdir(IN_JAR_DIR)
00336     for jarfile in ls:
00337         if not re.search(jar_regex, jarfile):
00338             continue
00339         injarfile = os.path.join(IN_JAR_DIR, jarfile)
00340         outjarfile = os.path.join(OUT_JAR_DIR, jarfile) 
00341         logfile = os.path.join(JAR_LOG_DIR, jarfile + ".log")
00342         if not os.path.isfile(logfile):
00343             logfile = None
00344         optimizejar(injarfile, outjarfile, logfile)
00345 
00346 def deoptimize(JAR_LOG_DIR, IN_JAR_DIR, OUT_JAR_DIR):
00347     if not os.path.exists(JAR_LOG_DIR):
00348         os.makedirs(JAR_LOG_DIR)
00349 
00350     ls = os.listdir(IN_JAR_DIR)
00351     for jarfile in ls:
00352         if not re.search(jar_regex, jarfile):
00353             continue
00354         injarfile = os.path.join(IN_JAR_DIR, jarfile)
00355         outjarfile = os.path.join(OUT_JAR_DIR, jarfile) 
00356         logfile = os.path.join(JAR_LOG_DIR, jarfile + ".log")
00357         log = optimizejar(injarfile, outjarfile, None)
00358         open(logfile, "wb").write("\n".join(log))
00359 
00360 def main():        
00361     MODE = sys.argv[1]
00362     JAR_LOG_DIR = sys.argv[2]
00363     IN_JAR_DIR = sys.argv[3]
00364     OUT_JAR_DIR = sys.argv[4]
00365     if MODE == "--optimize":
00366         optimize(JAR_LOG_DIR, IN_JAR_DIR, OUT_JAR_DIR)
00367     elif MODE == "--deoptimize":
00368         deoptimize(JAR_LOG_DIR, IN_JAR_DIR, OUT_JAR_DIR)
00369     else:
00370         print("Unknown mode %s" % MODE)
00371         exit(1)
00372 
00373 if __name__ == '__main__':
00374     main()