Back to index

plone3  3.1.7
packer.py
Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 
00003 import sys, re, unittest, textwrap
00004 from optparse import OptionParser, OptionValueError
00005 
00006 
00007 class KeywordMapper:
00008     def __init__(self, regexp, encoder):
00009         if isinstance(regexp, (str, unicode)):
00010             self.regexp = re.compile(regexp)
00011         else:
00012             self.regexp = regexp
00013         self.encoder = encoder
00014         self.mapping = {}
00015 
00016     def analyseKeywords(self, input):
00017         matches = self.regexp.findall(input)
00018 
00019         protected = {}
00020         keyword_count = {}
00021         index = 0
00022         for match in matches:
00023             if match not in keyword_count:
00024                 keyword_count[match] = 0
00025                 protected[self.encoder(index)] = index
00026                 index = index + 1
00027             keyword_count[match] = keyword_count[match] + 1
00028 
00029         for match in matches:
00030             if match in protected and keyword_count[match]:
00031                 keyword_count[match] = 0
00032 
00033         protected = {}
00034         for match in keyword_count:
00035             if not keyword_count[match]:
00036                 protected[match] = None
00037 
00038         ## sorted_matches = [(c,len(v),v) for v,c in keyword_count.iteritems()]
00039         # the above line implements the original behaviour, the code below
00040         # removes keywords which have not enough weight to be encoded, in total
00041         # this saves some bytes, because the total length of the generated
00042         # codes is a bit smaller. This needs corresponding code in the
00043         # fast_decode javascript function of the decoder, see comment there
00044         sorted_matches = []
00045         for value, count in keyword_count.iteritems():
00046             weight = count * len(value)
00047             if len(value) >= weight:
00048                 keyword_count[value] = 0
00049                 sorted_matches.append((0, value))
00050             else:
00051                 sorted_matches.append((weight, value))
00052         sorted_matches.sort()
00053         sorted_matches.reverse()
00054         sorted_matches = [x[-1] for x in sorted_matches]
00055 
00056         index = 0
00057         mapping = {}
00058         for match in sorted_matches:
00059             if not keyword_count[match]:
00060                 if match not in protected:
00061                     mapping[match] = (-1, match)
00062                 continue
00063             while 1:
00064                 encoded = self.encoder(index)
00065                 index = index + 1
00066                 if encoded in protected:
00067                     mapping[encoded] = (index-1, encoded)
00068                     continue
00069                 else:
00070                     break
00071             mapping[match] = (index-1, encoded)
00072 
00073         return mapping
00074 
00075     def analyse(self, input):
00076         self.mapping = self.analyseKeywords(input)
00077 
00078     def getKeywords(self):
00079         sorted = zip(self.mapping.itervalues(), self.mapping.iterkeys())
00080         sorted.sort()
00081         keywords = []
00082         for (index, encoded), value in sorted:
00083             if index >= 0:
00084                 if encoded != value:
00085                     keywords.append(value)
00086                 else:
00087                     keywords.append('')
00088         return keywords
00089 
00090     def sub(self, input):
00091         def repl(m):
00092             return self.mapping.get(m.group(0), ('', m.group(0)))[1]
00093         return self.regexp.sub(repl, input)
00094 
00095 
00096 class JavascriptKeywordMapper(KeywordMapper):
00097     def __init__(self, regexp=None, encoder=None):
00098         if regexp is None:
00099             self.regexp = re.compile(r'\w+')
00100         elif isinstance(regexp, (str, unicode)):
00101             self.regexp = re.compile(regexp)
00102         else:
00103             self.regexp = regexp
00104         if encoder is None:
00105             self.encoder = self._encode
00106         else:
00107             self.encoder = encoder
00108         self.mapping = {}
00109 
00110     def _encode(self, charCode,
00111                 mapping="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"):
00112         result = []
00113         quotient = charCode
00114         while quotient or not len(result):
00115             quotient, remainder = divmod(quotient, 62)
00116             result.append(mapping[remainder])
00117         result.reverse()
00118         return "".join(result)
00119 
00120     def getDecodeFunction(self, fast=True, name=None):
00121         jspacker = JavascriptPacker('full')
00122 
00123         # fast boot function
00124         fast_decoder = r"""
00125             // does the browser support String.replace where the
00126             //  replacement value is a function?
00127             if (!''.replace(/^/, String)) {
00128                 // decode all the values we need
00129                 // we have to add the dollar prefix, because $encoded can be
00130                 // any keyword in the decode function below. For example
00131                 // 'constructor' is an attribute of any object and it would
00132                 // return a false positive match in that case.
00133                 while ($count--) $decode["$"+$encode($count)] = $keywords[$count] || $encode($count);
00134                 // global replacement function
00135                 $keywords = [function($encoded){$result = $decode["$"+$encoded]; return $result!=undefined?$result:$encoded}];
00136                 // generic match
00137                 $encode = function(){return'\\w+'};
00138                 // reset the loop counter -  we are now doing a global replace
00139                 $count = 1;
00140             };"""
00141 
00142         if name is None:
00143             # boot function
00144             decoder = r"""
00145                 function($packed, $ascii, $count, $keywords, $encode, $decode) {
00146                     $encode = function($charCode) {
00147                         return ($charCode < $ascii ? "" : $encode(parseInt($charCode / $ascii))) +
00148                             (($charCode = $charCode % $ascii) > 35 ? String.fromCharCode($charCode + 29) : $charCode.toString(36));
00149                     };
00150                     // fastDecodePlaceholder
00151                     while ($count--)
00152                         if ($keywords[$count])
00153                             $packed = $packed.replace(new RegExp("\\b" + $encode($count) + "\\b", "g"), $keywords[$count]);
00154                     return $packed;
00155                 }"""
00156 
00157             if fast:
00158                 decoder = decoder.replace('// fastDecodePlaceholder', fast_decoder)
00159 
00160             decoder = jspacker.pack(decoder)
00161 
00162         else:
00163             decoder = r"""
00164                 var %s = function($ascii, $count, $keywords, $encode, $decode) {
00165                     $encode = function($charCode) {
00166                         return ($charCode < $ascii ? "" : $encode(parseInt($charCode / $ascii))) +
00167                             (($charCode = $charCode %% $ascii) > 35 ? String.fromCharCode($charCode + 29) : $charCode.toString(36));
00168                     };
00169                     // fastDecodePlaceholder
00170                     var decoder = function($packed, $ascii1, $count1, $keywords1, $encode1, $decode1) {
00171                         $count1 = $count;
00172                         while ($count1--)
00173                             if ($keywords[$count1])
00174                                 $packed = $packed.replace(new RegExp("\\b" + $encode($count1) + "\\b", "g"), $keywords[$count1]);
00175                         return $packed;
00176                     };
00177                     return decoder;
00178                 }""" % name
00179 
00180             if fast:
00181                 decoder = decoder.replace('// fastDecodePlaceholder', fast_decoder)
00182 
00183             decoder = jspacker.pack(decoder)
00184 
00185             keywords = self.getKeywords()
00186             decoder = "%s(62, %i, '%s'.split('|'), 0, {});" % (decoder, len(keywords), "|".join(keywords))
00187 
00188         return decoder
00189 
00190     def getDecoder(self, input, keyword_var=None, decode_func=None):
00191         if keyword_var is None:
00192             keywords = self.getKeywords()
00193             num_keywords = len(keywords)
00194             keywords = "|".join(keywords)
00195             keywords = "'%s'.split('|')" % keywords
00196         else:
00197             keywords = keyword_var
00198             num_keywords = len(self.getKeywords())
00199 
00200         if decode_func is None:
00201             decode_func = self.getDecodeFunction()
00202 
00203         escaped_single = input.replace("\\","\\\\").replace("'","\\'").replace('\n','\\n')
00204         escaped_double = input.replace("\\","\\\\").replace('"','\\"').replace('\n','\\n')
00205         if len(escaped_single) < len(escaped_double):
00206             script = "'%s'" % escaped_single
00207         else:
00208             script = '"%s"' % escaped_double
00209         return "eval(%s(%s,62,%i,%s,0,{}))" % (decode_func, script,
00210                                                num_keywords,
00211                                                keywords)
00212 
00213 
00214 class Packer:
00215     def __init__(self):
00216         self.patterns = []
00217 
00218     def copy(self):
00219         result = Packer()
00220         result.patterns = self.patterns[:]
00221         return result
00222 
00223     def _repl(self, match):
00224         # store protected part
00225         self.replacelist.append(match.group(1))
00226         # return escaped index
00227         return "\x00%i\x00" % len(self.replacelist)
00228 
00229     def pack(self, input):
00230         # list of protected parts
00231         self.replacelist = []
00232         output = input
00233         for regexp, replacement, keyword_encoder in self.patterns:
00234             if replacement is None:
00235                 if keyword_encoder is None:
00236                     # protect the matched parts
00237                     output = regexp.sub(self._repl, output)
00238                 else:
00239                     mapper = KeywordMapper(regexp=regexp,
00240                                            encoder=keyword_encoder)
00241                     # get keywords
00242                     mapper.analyse(output)
00243                     # replace keywords
00244                     output = mapper.sub(output)
00245             else:
00246                 # substitute
00247                 output = regexp.sub(replacement, output)
00248         # restore protected parts
00249         replacelist = list(enumerate(self.replacelist))
00250         replacelist.reverse() # from back to front, so 1 doesn't break 10 etc.
00251         for index, replacement in replacelist:
00252             # we use lambda in here, so the real string is used and no escaping
00253             # is done on it
00254             before = len(output)
00255             regexp = re.compile('\x00%i\x00' % (index+1))
00256             output = regexp.sub(lambda m:replacement, output)
00257         # done
00258         return output
00259 
00260     def protect(self, pattern, flags=None):
00261         self.keywordSub(pattern, None, flags)
00262 
00263     def sub(self, pattern, replacement, flags=None):
00264         if flags is None:
00265             self.patterns.append((re.compile(pattern), replacement, None))
00266         else:
00267             self.patterns.append((re.compile(pattern, flags), replacement, None))
00268 
00269     def keywordSub(self, pattern, keyword_encoder, flags=None):
00270         if flags is None:
00271             self.patterns.append((re.compile(pattern), None, keyword_encoder))
00272         else:
00273             self.patterns.append((re.compile(pattern, flags), None, keyword_encoder))
00274 
00275 
00276 class JavascriptPacker(Packer):
00277     def __init__(self, level='safe'):
00278         Packer.__init__(self)
00279         # protect strings
00280         # these sometimes catch to much, but in safe mode this doesn't hurt
00281         
00282         # the parts:
00283         # match a single quote
00284         # match anything but the single quote, a backslash and a newline "[^'\\\n]"
00285         # or match a null escape (\0 not followed by another digit) "\\0(?![0-9])"
00286         # or match a character escape (no newline) "\\[^\n]"
00287         # do this until there is another single quote "(?:<snip>)*?'"
00288         # all this return one group "(<snip>)"
00289         self.protect(r"""('(?:[^'\\\n]|\\0(?![0-9])|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\[^\n])*?'|"""
00290                      r""""(?:[^"\\\n]|\\0(?![0-9])|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\[^\n])*?")""")
00291 
00292         # protect regular expressions
00293         self.protect(r"""\s+(\/[^\/\n\r\*](?:\\/|[^\n\r])*\/g?i?)""")
00294         self.protect(r"""([^\w\$\/'"*)\?:]\/[^\/\n\r\*](?:\\/|[^\n\r])*\/g?i?)""")
00295 
00296         # protect IE conditional compilation
00297         self.protect(r'(/\*@.*?(?:\*/|\n|\*/(?!\n)))', re.DOTALL)
00298 
00299         # remove one line comments
00300         self.sub(r'\s*//.*$', '', re.MULTILINE)
00301         # remove multiline comments
00302         self.sub(r'/\*.*?\*/', '', re.DOTALL)
00303 
00304         if level == 'full':
00305             # encode local variables. those are preceeded by dollar signs
00306             # the amount of dollar signs says how many characters are preserved
00307             # any trailing digits are preserved as well
00308             # $name -> n, $$name -> na, $top1 -> t1, $top2 -> t2
00309             def _dollar_replacement(match):
00310                 length = len(match.group(2))
00311                 start = length - max(length - len(match.group(3)), 0)
00312                 result = match.group(1)[start:start+length] + match.group(4)
00313                 return result
00314             self.sub(r"""((\$+)([a-zA-Z\$_]+))(\d*)\b""", _dollar_replacement)
00315             
00316             self.keywordSub(r"""(\b_[A-Za-z\d]\w+)""", lambda i: "_%i" % i)
00317 
00318         # strip whitespace at the beginning and end of each line
00319         self.sub(r'^[ \t\r\f\v]*(.*?)[ \t\r\f\v]*$', r'\1', re.MULTILINE)
00320         # whitespace after some special chars but not
00321         # before function declaration
00322         self.sub(r'([{;\[(,=&|\?:<>%!/])\s+(?!function)', r'\1')
00323         # after an equal sign a function definition is ok
00324         self.sub(r'=\s+(?=function)', r'=')
00325         if level == 'full':
00326             # whitespace after some more special chars
00327             self.sub(r'([};\):,])\s+', r'\1')
00328         # whitespace before some special chars
00329         self.sub(r'\s+([={},&|\?:\.()<>%!/\]])', r'\1')
00330         # whitespace before plus chars if no other plus char before it
00331         self.sub(r'(?<!\+)\s+\+', '+')
00332         # whitespace after plus chars if no other plus char after it
00333         self.sub(r'\+\s+(?!\+)', '+')
00334         # whitespace before minus chars if no other minus char before it
00335         self.sub(r'(?<!-)\s+-', '-')
00336         # whitespace after minus chars if no other minus char after it
00337         self.sub(r'-\s+(?!-)', '-')
00338         # remove redundant semi-colons
00339         self.sub(r';+\s*([};])', r'\1')
00340         # remove any excessive whitespace left except newlines
00341         self.sub(r'[ \t\r\f\v]+', ' ')
00342         # excessive newlines
00343         self.sub(r'\n+', '\n')
00344         # first newline
00345         self.sub(r'^\n', '')
00346 
00347 
00348 class CSSPacker(Packer):
00349     def __init__(self, level='safe'):
00350         Packer.__init__(self)
00351         # protect strings
00352         # these sometimes catch to much, but in safe mode this doesn't hurt
00353         self.protect(r"""('(?:\\'|\\\n|.)*?')""")
00354         self.protect(r'''("(?:\\"|\\\n|.)*?")''')
00355         # strip whitespace
00356         self.sub(r'^[ \t\r\f\v]*(.*?)[ \t\r\f\v]*$', r'\1', re.MULTILINE)
00357         if level == 'full':
00358             # remove comments
00359             self.sub(r'/\*.*? ?[\\/*]*\*/', r'', re.DOTALL)
00360             #remove more whitespace
00361             self.sub(r'\s*([{,;:])\s+', r'\1')
00362         else:
00363             # remove comment contents
00364             self.sub(r'/\*.*?( ?[\\/*]*\*/)', r'/*\1', re.DOTALL)
00365             # remove lines with comments only (consisting of stars only)
00366             self.sub(r'^/\*+\*/$', '', re.MULTILINE)
00367         # excessive newlines
00368         self.sub(r'\n+', '\n')
00369         # first newline
00370         self.sub(r'^\n', '')
00371 
00372 
00373 optparser = OptionParser()
00374 
00375 optparser.add_option("-o", "--output", dest="filename",
00376                      help="Write output to FILE", metavar="FILE")
00377 
00378 optparser.add_option("", "--test", action="store_true", dest="run_tests",
00379                      help="Run test suite")
00380 
00381 def js_packer_callback(option, opt_str, value, parser, *args, **kwargs):
00382     if parser.values.css:
00383         raise OptionValueError("only one packer can be used at once")
00384     parser.values.javascript = True
00385 
00386 optparser.add_option("-j", "--javascript", action="callback",
00387                      dest="javascript", callback=js_packer_callback,
00388                      help="Force to use javascript packer")
00389 
00390 def css_packer_callback(option, opt_str, value, parser, *args, **kwargs):
00391     if parser.values.javascript:
00392         raise OptionValueError("only one packer can be used at once")
00393     parser.values.css = True
00394 
00395 optparser.add_option("-c", "--css", action="callback",
00396                      dest="css", callback=css_packer_callback,
00397                      help="Force to use css packer")
00398 
00399 optparser.add_option("-l", "--level", dest="level", default="safe",
00400                      help="Declare which level of packing to use (safe, full), default is 'safe'")
00401 
00402 optparser.add_option("-e", "--encode", action="store_true", dest="encode",
00403                      help="Encode keywords (only javascript)")
00404 
00405 
00406 # be aware that the initial indentation gets removed in the following tests,
00407 # the inner indentation is preserved though (see textwrap.dedent)
00408 js_compression_tests = (
00409     (
00410         'standardJS',
00411         """\
00412             /* a comment */
00413 
00414             function dummy() {
00415 
00416                 var localvar = 10 // one line comment
00417 
00418                 document.write(localvar);
00419                 return 'bar'
00420             }
00421         """, 
00422         """\
00423             function dummy(){var localvar=10
00424             document.write(localvar);return 'bar'}
00425         """,
00426         'safe'
00427     ),
00428     (
00429         'standardJS',
00430         """\
00431             /* a comment */
00432 
00433             function dummy() {
00434 
00435                 var localvar = 10 // one line comment
00436 
00437                 document.write(localvar);
00438                 return 'bar'
00439             }
00440         """, 
00441         """\
00442             function dummy(){var localvar=10
00443             document.write(localvar);return 'bar'}""",
00444         'full'
00445     ),
00446     (
00447         'stringProtection',
00448         """
00449             var leafnode = this.shared.xmldata.selectSingleNode('//*[@selected]');
00450             var portal_url = 'http://127.0.0.1:9080/plone';
00451         """,
00452         """var leafnode=this.shared.xmldata.selectSingleNode('//*[@selected]');var portal_url='http://127.0.0.1:9080/plone';"""
00453     ),
00454     (
00455         'newlinesInStrings',
00456         r"""var message = "foo: " + foo + "\nbar: " + bar;""",
00457         r"""var message="foo: "+foo+"\nbar: "+bar;"""
00458     ),
00459     (
00460         'escapedStrings',
00461         r"""var message = "foo: \"something in quotes\"" + foo + "\nbar: " + bar;""",
00462         r"""var message="foo: \"something in quotes\""+foo+"\nbar: "+bar;"""
00463     ),
00464     (
00465         'escapedStrings2',
00466         r"""kukit.kssp.string = kukit.tk.mkParser('string', {
00467                 "'": 'this.emitAndReturn(new kukit.kssp.quote(this.src))',
00468                 "\\": 'new kukit.kssp.backslashed(this.src, kukit.kssp.backslash)'
00469                 });
00470             kukit.kssp.string.prototype.process = function() {
00471                 // collect up the value of the string, omitting the quotes
00472                 this.txt = '';
00473                 for (var i=1; i<this.result.length-1; i++) {
00474                     this.txt += this.result[i].txt;
00475                 }
00476             };
00477 
00478             kukit.kssp.string2 = kukit.tk.mkParser('string', {
00479                 '"': 'this.emitAndReturn(new kukit.kssp.dquote(this.src))',
00480                 "\\": 'new kukit.kssp.backslashed(this.src, kukit.kssp.backslash)'
00481                 });
00482             kukit.kssp.string2.prototype.process = kukit.kssp.string.prototype.process; 
00483 
00484 
00485             kukit.kssp.backslashed = kukit.tk.mkParser('backslashed', {});
00486             kukit.kssp.backslashed.prototype.nextStep = function(table) {
00487                 // digest the next character and store it as txt
00488                 var src = this.src;
00489                 var length = src.text.length;
00490                 if (length < src.pos + 1) {
00491                     this.emitError('Missing character after backslash');
00492                 } else { 
00493                     this.result.push(new kukit.tk.Fraction(src, src.pos+1));
00494                     this.src.pos += 1;
00495                     this.finished = true;
00496                 }
00497             };
00498             kukit.kssp.backslashed.prototype.process = function() {
00499                 this.txt = this.result[1].txt;
00500             };
00501         """,
00502         r"""kukit.kssp.string=kukit.tk.mkParser('string',{"'":'this.emitAndReturn(new kukit.kssp.quote(this.src))',"\\":'new kukit.kssp.backslashed(this.src, kukit.kssp.backslash)'});kukit.kssp.string.prototype.process=function(){this.txt='';for(var i=1;i<this.result.length-1;i++){this.txt+=this.result[i].txt}};kukit.kssp.string2=kukit.tk.mkParser('string',{'"':'this.emitAndReturn(new kukit.kssp.dquote(this.src))',"\\":'new kukit.kssp.backslashed(this.src, kukit.kssp.backslash)'});kukit.kssp.string2.prototype.process=kukit.kssp.string.prototype.process;kukit.kssp.backslashed=kukit.tk.mkParser('backslashed',{});kukit.kssp.backslashed.prototype.nextStep=function(table){var src=this.src;var length=src.text.length;if(length<src.pos+1){this.emitError('Missing character after backslash')} else{this.result.push(new kukit.tk.Fraction(src,src.pos+1));this.src.pos+=1;this.finished=true}};kukit.kssp.backslashed.prototype.process=function(){this.txt=this.result[1].txt};""",
00503         'safe'
00504     ),
00505     (
00506         'escapedStrings2',
00507         r"""kukit.kssp.string = kukit.tk.mkParser('string', {
00508                 "'": 'this.emitAndReturn(new kukit.kssp.quote(this.src))',
00509                 "\\": 'new kukit.kssp.backslashed(this.src, kukit.kssp.backslash)'
00510                 });
00511             kukit.kssp.string.prototype.process = function() {
00512                 // collect up the value of the string, omitting the quotes
00513                 this.txt = '';
00514                 for (var i=1; i<this.result.length-1; i++) {
00515                     this.txt += this.result[i].txt;
00516                 }
00517             };
00518 
00519             kukit.kssp.string2 = kukit.tk.mkParser('string', {
00520                 '"': 'this.emitAndReturn(new kukit.kssp.dquote(this.src))',
00521                 "\\": 'new kukit.kssp.backslashed(this.src, kukit.kssp.backslash)'
00522                 });
00523             kukit.kssp.string2.prototype.process = kukit.kssp.string.prototype.process; 
00524 
00525 
00526             kukit.kssp.backslashed = kukit.tk.mkParser('backslashed', {});
00527             kukit.kssp.backslashed.prototype.nextStep = function(table) {
00528                 // digest the next character and store it as txt
00529                 var src = this.src;
00530                 var length = src.text.length;
00531                 if (length < src.pos + 1) {
00532                     this.emitError('Missing character after backslash');
00533                 } else { 
00534                     this.result.push(new kukit.tk.Fraction(src, src.pos+1));
00535                     this.src.pos += 1;
00536                     this.finished = true;
00537                 }
00538             };
00539             kukit.kssp.backslashed.prototype.process = function() {
00540                 this.txt = this.result[1].txt;
00541             };
00542         """,
00543         r"""kukit.kssp.string=kukit.tk.mkParser('string',{"'":'this.emitAndReturn(new kukit.kssp.quote(this.src))',"\\":'new kukit.kssp.backslashed(this.src, kukit.kssp.backslash)'});kukit.kssp.string.prototype.process=function(){this.txt='';for(var i=1;i<this.result.length-1;i++){this.txt+=this.result[i].txt}};kukit.kssp.string2=kukit.tk.mkParser('string',{'"':'this.emitAndReturn(new kukit.kssp.dquote(this.src))',"\\":'new kukit.kssp.backslashed(this.src, kukit.kssp.backslash)'});kukit.kssp.string2.prototype.process=kukit.kssp.string.prototype.process;kukit.kssp.backslashed=kukit.tk.mkParser('backslashed',{});kukit.kssp.backslashed.prototype.nextStep=function(table){var src=this.src;var length=src.text.length;if(length<src.pos+1){this.emitError('Missing character after backslash')}else{this.result.push(new kukit.tk.Fraction(src,src.pos+1));this.src.pos+=1;this.finished=true}};kukit.kssp.backslashed.prototype.process=function(){this.txt=this.result[1].txt};""",
00544         'full'
00545     ),
00546     (
00547         'mixingSingleAndDoubleQuotes',
00548         """\
00549             alert("Address '" + $address + "' not found");
00550         """,
00551         """\
00552             alert("Address '"+$address+"' not found");""",
00553         'safe'
00554     ),
00555     (
00556         'mixingSingleAndDoubleQuotes',
00557         """\
00558             alert("Address '" + $address + "' not found");
00559         """,
00560         """\
00561             alert("Address '"+a+"' not found");""",
00562         'full'
00563     ),
00564     (
00565         'protectRegularExpressions',
00566         """\
00567             replace( /^\/\//i, "" );
00568         """,
00569         """\
00570             replace(/^\/\//i,"");"""
00571     ),
00572     (
00573         'whitspaceAroundPlus',
00574         """\
00575             var message = foo + bar;
00576             message = foo++ + bar;
00577             message = foo + ++bar;
00578         """,
00579         """\
00580             var message=foo+bar;message=foo++ +bar;message=foo+ ++bar;"""
00581     ),
00582     (
00583         'whitspaceAroundMinus',
00584         """\
00585             var message = foo - bar;
00586             message = foo-- - bar;
00587             message = foo - --bar;
00588         """,
00589         """\
00590             var message=foo-bar;message=foo-- -bar;message=foo- --bar;"""
00591     ),
00592     (
00593         'missingSemicolon',
00594         """\
00595             var x = function() {
00596  
00597             } /* missing ; here */
00598             next_instr;
00599         """,
00600         """\
00601             var x=function(){}
00602             next_instr;""",
00603         'safe'
00604     ),
00605     # be aware that the following produces invalid code. You *have* to add
00606     # a semicolon after a '}' followed by a normal instruction
00607     (
00608         'missingSemicolon',
00609         """\
00610             var x = function() {
00611  
00612             } /* missing ; here */
00613             next_instr;
00614         """,
00615         """\
00616             var x=function(){}next_instr;""",
00617         'full'
00618     ),
00619     (
00620         'missingSemicolon2',
00621         """\
00622             id=id || 'ids:list'  // defaults to ids:list, this is the most common usage
00623 
00624             if (selectbutton.isSelected==null){
00625                 initialState=initialState || false;
00626                 selectbutton.isSelected=initialState;
00627                 }
00628         """,
00629         """\
00630             id=id||'ids:list'
00631             if(selectbutton.isSelected==null){initialState=initialState||false;selectbutton.isSelected=initialState}
00632         """,
00633         'safe'
00634     ),
00635     (
00636         'missingSemicolon2',
00637         """\
00638             id=id || 'ids:list'  // defaults to ids:list, this is the most common usage
00639 
00640             if (selectbutton.isSelected==null){
00641                 initialState=initialState || false;
00642                 selectbutton.isSelected=initialState;
00643                 }
00644         """,
00645         """\
00646             id=id||'ids:list'
00647             if(selectbutton.isSelected==null){initialState=initialState||false;selectbutton.isSelected=initialState}""",
00648         'full'
00649     ),
00650     # excessive semicolons after curly brackets get removed
00651     (
00652         'nestedCurlyBracketsWithSemicolons',
00653         """\
00654             function dummy(a, b) {
00655                 if (a > b) {
00656                     do something
00657                 } else {
00658                     do something else
00659                 };
00660             };
00661             next_instr;
00662         """,
00663         """\
00664             function dummy(a,b){if(a>b){do something} else{do something else}};next_instr;""",
00665         'safe'
00666     ),
00667     (
00668         'nestedCurlyBracketsWithSemicolons',
00669         """\
00670             function dummy(a, b) {
00671                 if (a > b) {
00672                     do something
00673                 } else {
00674                     do something else
00675                 };
00676             };
00677             next_instr;
00678         """,
00679         """\
00680             function dummy(a,b){if(a>b){do something}else{do something else}};next_instr;""",
00681         'full'
00682     ),
00683     (
00684         'onelineVsMultilineComment',
00685         """\
00686             function abc() {
00687                 return value;
00688             }; //********************
00689 
00690             function xyz(a, b) {
00691                 /* docstring for this function */
00692                 if (a == null) {
00693                     return 1
00694                 }
00695             }
00696         """,
00697         """\
00698             function abc(){return value};
00699             function xyz(a,b){if(a==null){return 1}}
00700         """,
00701         'safe'
00702     ),
00703     (
00704         'onelineVsMultilineComment',
00705         """\
00706             function abc() {
00707                 return value;
00708             }; //********************
00709 
00710             function xyz(a, b) {
00711                 /* docstring for this function */
00712                 if (a == null) {
00713                     return 1
00714                 }
00715             }
00716         """,
00717         """\
00718             function abc(){return value};function xyz(a,b){if(a==null){return 1}}""",
00719         'full'
00720     ),
00721     (
00722         'conditionalIE',
00723         """\
00724             /* for Internet Explorer */
00725             /*@cc_on @*/
00726             /*@if (@_win32)
00727               document.write("<script id=__ie_onload defer src=javascript:void(0)><\/script>");
00728               var script = document.getElementById("__ie_onload");
00729               script.onreadystatechange = function() {
00730                      if (this.readyState == "complete") {
00731                             DOMContentLoadedInit(); // call the onload handler
00732                      }
00733               };
00734             /*@end @*/
00735         """,
00736         """\
00737             /*@cc_on @*/
00738             /*@if (@_win32)
00739              document.write("<script id=__ie_onload defer src=javascript:void(0)><\\/script>");var script=document.getElementById("__ie_onload");script.onreadystatechange=function(){if(this.readyState=="complete"){DOMContentLoadedInit()}};/*@end @*/
00740         """
00741     ),
00742     # variable encoding
00743     (
00744         'localVars',
00745         """\
00746             function dummy($node, $$value) {
00747                 $node.className = $$value;
00748             }
00749         """,
00750         """\
00751             function dummy(n,va){n.className=va}""",
00752         'full'
00753     ),
00754     (
00755         'privateVars',
00756         """\
00757             function dummy(_node, _value) {
00758                 _node.className = _value;
00759             }
00760         """,
00761         """\
00762             function dummy(_1,_0){_1.className=_0}""",
00763         'full'
00764     ),
00765     (
00766         'noDoubleUnderscoresAtBeginning',
00767         """\
00768             function dummy(__node, _value) {
00769                 __node.className = _value;
00770             }
00771         """,
00772         """\
00773             function dummy(__node,_0){__node.className=_0}""",
00774         'full'
00775     ),
00776     (
00777         'atAtLeastTwoChars',
00778         """\
00779             function dummy(_a, _va) {
00780                 _a.className = _va;
00781             }
00782         """,
00783         """\
00784             function dummy(_a,_0){_a.className=_0}""",
00785         'full'
00786     ),
00787 )
00788 
00789 
00790 css_safe_compression_tests = (
00791     (
00792         'commentCompression',
00793         """
00794             /* this is a comment */
00795             #testElement {
00796                 property: value; /* another comment */
00797             }
00798             /**********/
00799             /* this is a multi
00800                line comment */
00801             #testElement {
00802                 /* yet another comment */
00803                 property: value;
00804             }
00805         """,
00806         """\
00807             /* */
00808             #testElement {
00809             property: value; /* */
00810             }
00811             /* */
00812             #testElement {
00813             /* */
00814             property: value;
00815             }
00816         """
00817     ),
00818     (
00819         'newlineCompression',
00820         """
00821         
00822         
00823         /* this is a comment */
00824         
00825         #testElement {
00826             property: value; /* another comment */
00827         }
00828         
00829         /* this is a multi
00830            line comment */
00831         #testElement {
00832         
00833             /* yet another comment */
00834             property: value;
00835             
00836         }
00837         
00838         
00839         """,
00840         """\
00841             /* */
00842             #testElement {
00843             property: value; /* */
00844             }
00845             /* */
00846             #testElement {
00847             /* */
00848             property: value;
00849             }
00850         """
00851     ),
00852     # see http://www.dithered.com/css_filters/index.html
00853     (
00854         'commentHacks1',
00855         """
00856             #testElement {
00857                 property/**/: value;
00858                 property/* */: value;
00859                 property /**/: value;
00860                 property: /**/value;
00861             }
00862         """,
00863         """\
00864             #testElement {
00865             property/**/: value;
00866             property/* */: value;
00867             property /**/: value;
00868             property: /**/value;
00869             }
00870         """
00871     ),
00872     (
00873         'commentHacks2',
00874         """
00875             selector/* */ {  }
00876         """,
00877         """\
00878             selector/* */ {  }
00879         """
00880     ),
00881     (
00882         'commentHacks3',
00883         """
00884             selector/* foobar */ {  }
00885         """,
00886         """\
00887             selector/* */ {  }
00888         """
00889     ),
00890     (
00891         'commentHacks4',
00892         """
00893             selector/**/ {  }
00894         """,
00895         """\
00896             selector/**/ {  }
00897         """
00898     ),
00899     (
00900         'commentHacks5',
00901         """
00902             /* \*/
00903             rules
00904             /* */
00905         """,
00906         """\
00907             /* \*/
00908             rules
00909             /* */
00910         """
00911     ),
00912     (
00913         'commentHacks6',
00914         """
00915             /* foobar \*/
00916             rules
00917             /* */
00918         """,
00919         """\
00920             /* \*/
00921             rules
00922             /* */
00923         """
00924     ),
00925     (
00926         'commentHacks7',
00927         """
00928             /*/*/
00929             rules
00930             /* */
00931         """,
00932         """\
00933             /*/*/
00934             rules
00935             /* */
00936         """
00937     ),
00938     (
00939         'commentHacks8',
00940         """
00941             /*/*//*/
00942             rules
00943             /* */
00944         """,
00945         """\
00946             /*/*//*/
00947             rules
00948             /* */
00949         """
00950     ),
00951     (
00952         'stringProtection',
00953         """
00954             /* test string protection */
00955             #selector,
00956             #another {
00957                 content: 'foo; bar';
00958             }
00959         """,
00960         """\
00961             /* */
00962             #selector,
00963             #another {
00964             content: 'foo; bar';
00965             }
00966         """
00967     ),
00968 )
00969 
00970 css_full_compression_tests = (
00971     (
00972         'commentCompression',
00973         """
00974             /* this is a comment */
00975             #testElement {
00976                 property: value; /* another comment */
00977             }
00978             /**********/
00979             /* this is a multi
00980                line comment */
00981             #testElement {
00982                 /* yet another comment */
00983                 property: value;
00984             }
00985         """,
00986         """\
00987             #testElement{property:value;}
00988             #testElement{property:value;}
00989         """
00990     ),
00991     (
00992         'newlineCompression',
00993         """
00994         
00995         
00996         /* this is a comment */
00997         
00998         #testElement {
00999             property: value; /* another comment */
01000         }
01001         
01002         /* this is a multi
01003            line comment */
01004         #testElement {
01005         
01006             /* yet another comment */
01007             property: value;
01008             
01009         }
01010         
01011         
01012         """,
01013         """\
01014             #testElement{property:value;}
01015             #testElement{property:value;}
01016         """
01017     ),
01018     # see http://www.dithered.com/css_filters/index.html
01019     # in full compression all hacks get removed
01020     (
01021         'commentHacks1',
01022         """
01023             #testElement {
01024                 property/**/: value;
01025                 property/* */: value;
01026                 property /**/: value;
01027                 property: /**/value;
01028             }
01029         """,
01030         """\
01031             #testElement{property:value;property:value;property:value;property:value;}
01032         """
01033     ),
01034     (
01035         'commentHacks2',
01036         """
01037             selector/* */ {  }
01038         """,
01039         """\
01040             selector{}
01041         """
01042     ),
01043     (
01044         'commentHacks3',
01045         """
01046             selector/* foobar */ {  }
01047         """,
01048         """\
01049             selector{}
01050         """
01051     ),
01052     (
01053         'commentHacks4',
01054         """
01055             selector/**/ {  }
01056         """,
01057         """\
01058             selector{}
01059         """
01060     ),
01061     (
01062         'commentHacks5',
01063         """
01064             /* \*/
01065             rules
01066             /* */
01067         """,
01068         """\
01069             rules
01070         """
01071     ),
01072     (
01073         'commentHacks6',
01074         """
01075             /* foobar \*/
01076             rules
01077             /* */
01078         """,
01079         """\
01080             rules
01081         """
01082     ),
01083     (
01084         'commentHacks7',
01085         """
01086             /*/*/
01087             rules
01088             /* */
01089         """,
01090         """\
01091             rules
01092         """
01093     ),
01094     (
01095         'commentHacks8',
01096         """
01097             /*/*//*/
01098             rules
01099             /* */
01100         """,
01101         """\
01102             rules
01103         """
01104     ),
01105     (
01106         'stringProtection',
01107         """
01108             /* test string protection and full compression */
01109             #selector,
01110             #another {
01111                 content: 'foo; bar';
01112             }
01113         """,
01114         """\
01115             #selector,#another{content:'foo; bar';}
01116         """
01117     ),
01118 )
01119 
01120 class PackerTestCase(unittest.TestCase):
01121     def __init__(self, name, input, output, packer):
01122         unittest.TestCase.__init__(self)
01123         self.name = name
01124         self.input = input
01125         self.output = output
01126         self.packer = packer
01127 
01128     def __str__(self):
01129         return self.name
01130 
01131     def runTest(self):
01132         self.assertEqual(self.packer.pack(self.input), self.output)
01133 
01134 
01135 def test_suite():
01136     suite = unittest.TestSuite()
01137 
01138     jspacker = {
01139         'safe': JavascriptPacker('safe'),
01140         'full': JavascriptPacker('full'),
01141     }
01142     csspacker = {
01143         'safe': CSSPacker('safe'),
01144         'full': CSSPacker('full'),
01145     }
01146 
01147     for info in js_compression_tests:
01148         name = info[0]
01149         input = textwrap.dedent(info[1])
01150         output = textwrap.dedent(info[2])
01151         if (len(info) == 4):
01152             compression = info[3].split(",")
01153         else:
01154             compression = ("safe", "full")
01155 
01156         for packer in compression:
01157             suite.addTest(PackerTestCase("%s (%s)" % (name, packer),
01158                                          input, output,
01159                                          jspacker[packer]))
01160 
01161     packer = "safe"
01162     for name, input, output in css_safe_compression_tests:
01163         input = textwrap.dedent(input)
01164         output = textwrap.dedent(output)
01165 
01166         suite.addTest(PackerTestCase("%s (%s)" % (name, packer),
01167                                      input, output,
01168                                      csspacker[packer]))
01169 
01170     packer = "full"
01171     for name, input, output in css_full_compression_tests:
01172         input = textwrap.dedent(input)
01173         output = textwrap.dedent(output)
01174 
01175         suite.addTest(PackerTestCase("%s (%s)" % (name, packer),
01176                                      input, output,
01177                                      csspacker[packer]))
01178 
01179     return suite
01180 
01181 
01182 def run():
01183     (options, args) = optparser.parse_args()
01184 
01185     if options.run_tests:
01186         unittest.main(defaultTest='test_suite', argv=sys.argv[:1])
01187         return
01188 
01189     if options.javascript:
01190         packer = JavascriptPacker(options.level)
01191     elif options.css:
01192         packer = CSSPacker(options.level)
01193     elif len(args):
01194         print >> sys.stderr, "Autodetection of packer not implemented yet."
01195         sys.exit(1)
01196     else:
01197         print >> sys.stderr, "You have to specify the packer for input from stdin."
01198         sys.exit(1)
01199 
01200     if not len(args):
01201         args = [sys.stdin]
01202 
01203     mapper = None
01204     if options.encode and isinstance(packer, JavascriptPacker):
01205         mapper = JavascriptKeywordMapper()
01206 
01207     for f in args:
01208         if isinstance(f, basestring):
01209             f = open(f)
01210         s = f.read()
01211         f.close()
01212         result = packer.pack(s)
01213         if mapper is not None:
01214             mapper.analyse(result)
01215             result = mapper.sub(result)
01216             result = mapper.getDecoder(result)
01217         print result
01218 
01219 if __name__ == '__main__':
01220     run()