Back to index

d-push  2.0
wbxmldecoder.php
Go to the documentation of this file.
00001 <?php
00002 /***********************************************
00003 * File      :   wbxmldecoder.php
00004 * Project   :   Z-Push
00005 * Descr     :   WBXMLDecoder decodes from Wap Binary XML
00006 *
00007 * Created   :   01.10.2007
00008 *
00009 * Copyright 2007 - 2011 Zarafa Deutschland GmbH
00010 *
00011 * This program is free software: you can redistribute it and/or modify
00012 * it under the terms of the GNU Affero General Public License, version 3,
00013 * as published by the Free Software Foundation with the following additional
00014 * term according to sec. 7:
00015 *
00016 * According to sec. 7 of the GNU Affero General Public License, version 3,
00017 * the terms of the AGPL are supplemented with the following terms:
00018 *
00019 * "Zarafa" is a registered trademark of Zarafa B.V.
00020 * "Z-Push" is a registered trademark of Zarafa Deutschland GmbH
00021 * The licensing of the Program under the AGPL does not imply a trademark license.
00022 * Therefore any rights, title and interest in our trademarks remain entirely with us.
00023 *
00024 * However, if you propagate an unmodified version of the Program you are
00025 * allowed to use the term "Z-Push" to indicate that you distribute the Program.
00026 * Furthermore you may use our trademarks where it is necessary to indicate
00027 * the intended purpose of a product or service provided you use it in accordance
00028 * with honest practices in industrial or commercial matters.
00029 * If you want to propagate modified versions of the Program under the name "Z-Push",
00030 * you may only do so if you have a written permission by Zarafa Deutschland GmbH
00031 * (to acquire a permission please contact Zarafa at trademark@zarafa.com).
00032 *
00033 * This program is distributed in the hope that it will be useful,
00034 * but WITHOUT ANY WARRANTY; without even the implied warranty of
00035 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00036 * GNU Affero General Public License for more details.
00037 *
00038 * You should have received a copy of the GNU Affero General Public License
00039 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00040 *
00041 * Consult LICENSE file for details
00042 ************************************************/
00043 
00044 
00045 class WBXMLDecoder extends WBXMLDefs {
00046     private $in;
00047 
00048     private $version;
00049     private $publicid;
00050     private $publicstringid;
00051     private $charsetid;
00052     private $stringtable;
00053 
00054     private $tagcp = 0;
00055     private $attrcp = 0;
00056 
00057     private $ungetbuffer;
00058 
00059     private $logStack = array();
00060 
00061     private $inputBuffer = "";
00062     private $isWBXML = true;
00063 
00064     const VERSION = 0x03;
00065 
00073     public function WBXMLDecoder($input) {
00074         // make sure WBXML_DEBUG is defined. It should be at this point
00075         if (!defined('WBXML_DEBUG')) define('WBXML_DEBUG', false);
00076 
00077         $this->in = $input;
00078 
00079         $this->readVersion();
00080         if (isset($this->version) && $this->version != self::VERSION) {
00081             $this->isWBXML = false;
00082             return;
00083         }
00084 
00085         $this->publicid = $this->getMBUInt();
00086         if($this->publicid == 0) {
00087             $this->publicstringid = $this->getMBUInt();
00088         }
00089 
00090         $this->charsetid = $this->getMBUInt();
00091         $this->stringtable = $this->getStringTable();
00092     }
00093 
00100     public function getElement() {
00101         $element = $this->getToken();
00102 
00103         switch($element[EN_TYPE]) {
00104             case EN_TYPE_STARTTAG:
00105                 return $element;
00106             case EN_TYPE_ENDTAG:
00107                 return $element;
00108             case EN_TYPE_CONTENT:
00109                 while(1) {
00110                     $next = $this->getToken();
00111                     if($next == false)
00112                         return false;
00113                     else if($next[EN_TYPE] == EN_CONTENT) {
00114                         $element[EN_CONTENT] .= $next[EN_CONTENT];
00115                     } else {
00116                         $this->ungetElement($next);
00117                         break;
00118                     }
00119                 }
00120                 return $element;
00121         }
00122 
00123         return false;
00124     }
00125 
00132     public function peek() {
00133         $element = $this->getElement();
00134         $this->ungetElement($element);
00135         return $element;
00136     }
00137 
00146     public function getElementStartTag($tag) {
00147         $element = $this->getToken();
00148 
00149         if($element[EN_TYPE] == EN_TYPE_STARTTAG && $element[EN_TAG] == $tag)
00150             return $element;
00151         else {
00152             ZLog::Write(LOGLEVEL_WBXMLSTACK, sprintf("WBXMLDecoder->getElementStartTag(): unmatched WBXML tag: '%s' matching '%s' type '%s' flags '%s'", $tag, ((isset($element[EN_TAG]))?$element[EN_TAG]:""), ((isset($element[EN_TYPE]))?$element[EN_TYPE]:""), ((isset($element[EN_FLAGS]))?$element[EN_FLAGS]:"")));
00153             $this->ungetElement($element);
00154         }
00155 
00156         return false;
00157     }
00158 
00165     public function getElementEndTag() {
00166         $element = $this->getToken();
00167 
00168         if($element[EN_TYPE] == EN_TYPE_ENDTAG)
00169             return $element;
00170         else {
00171             ZLog::Write(LOGLEVEL_WBXMLSTACK, sprintf("WBXMLDecoder->getElementEndTag(): unmatched WBXML tag: '%s' type '%s' flags '%s'", ((isset($element[EN_TAG]))?$element[EN_TAG]:""), ((isset($element[EN_TYPE]))?$element[EN_TYPE]:""), ((isset($element[EN_FLAGS]))?$element[EN_FLAGS]:"")));
00172 
00173             $bt = debug_backtrace();
00174             ZLog::Write(LOGLEVEL_ERROR, sprintf("WBXMLDecoder->getElementEndTag(): could not read end tag in '%s'. Please enable the LOGLEVEL_WBXML and send the log to the Z-Push dev team.", $bt[0]["file"] . ":" . $bt[0]["line"]));
00175 
00176             // log the remaining wbxml content
00177             $this->ungetElement($element);
00178             while($el = $this->getElement());
00179         }
00180 
00181         return false;
00182     }
00183 
00190     public function getElementContent() {
00191         $element = $this->getToken();
00192 
00193         if($element[EN_TYPE] == EN_TYPE_CONTENT) {
00194             return $element[EN_CONTENT];
00195         }
00196         else {
00197             ZLog::Write(LOGLEVEL_WBXMLSTACK, sprintf("WBXMLDecoder->getElementContent(): unmatched WBXML content: '%s' type '%s' flags '%s'", ((isset($element[EN_TAG]))?$element[EN_TAG]:""), ((isset($element[EN_TYPE]))?$element[EN_TYPE]:""), ((isset($element[EN_FLAGS]))?$element[EN_FLAGS]:"")));
00198             $this->ungetElement($element);
00199         }
00200 
00201         return false;
00202     }
00203 
00212     public function ungetElement($element) {
00213         if($this->ungetbuffer)
00214             ZLog::Write(LOGLEVEL_ERROR,sprintf("WBXMLDecoder->ungetElement(): WBXML double unget on tag: '%s' type '%s' flags '%s'", ((isset($element[EN_TAG]))?$element[EN_TAG]:""), ((isset($element[EN_TYPE]))?$element[EN_TYPE]:""), ((isset($element[EN_FLAGS]))?$element[EN_FLAGS]:"")));
00215 
00216         $this->ungetbuffer = $element;
00217     }
00218 
00225     public function GetPlainInputStream() {
00226         $plain = $this->inputBuffer;
00227         while($data = fread($this->in, 4096))
00228             $plain .= $data;
00229 
00230         return $plain;
00231     }
00232 
00239     public function IsWBXML() {
00240         return $this->isWBXML;
00241     }
00242 
00243 
00244 
00255     private function getToken() {
00256         // See if there's something in the ungetBuffer
00257         if($this->ungetbuffer) {
00258             $element = $this->ungetbuffer;
00259             $this->ungetbuffer = false;
00260             return $element;
00261         }
00262 
00263         $el = $this->_getToken();
00264         $this->logToken($el);
00265 
00266         return $el;
00267     }
00268 
00277     private function logToken($el) {
00278         if(!WBXML_DEBUG)
00279             return;
00280 
00281         $spaces = str_repeat(" ", count($this->logStack));
00282 
00283         switch($el[EN_TYPE]) {
00284             case EN_TYPE_STARTTAG:
00285                 if($el[EN_FLAGS] & EN_FLAGS_CONTENT) {
00286                     ZLog::Write(LOGLEVEL_WBXML,"I " . $spaces . " <". $el[EN_TAG] . ">");
00287                     array_push($this->logStack, $el[EN_TAG]);
00288                 } else
00289                     ZLog::Write(LOGLEVEL_WBXML,"I " . $spaces . " <" . $el[EN_TAG] . "/>");
00290 
00291                 break;
00292             case EN_TYPE_ENDTAG:
00293                 $tag = array_pop($this->logStack);
00294                 ZLog::Write(LOGLEVEL_WBXML,"I " . $spaces . "</" . $tag . ">");
00295                 break;
00296             case EN_TYPE_CONTENT:
00297                 ZLog::Write(LOGLEVEL_WBXML,"I " . $spaces . " " . $el[EN_CONTENT]);
00298                 break;
00299         }
00300     }
00301 
00308     private function _getToken() {
00309         // Get the data from the input stream
00310         $element = array();
00311 
00312         while(1) {
00313             $byte = $this->getByte();
00314 
00315             if(!isset($byte))
00316                 break;
00317 
00318             switch($byte) {
00319                 case WBXML_SWITCH_PAGE:
00320                     $this->tagcp = $this->getByte();
00321                     continue;
00322 
00323                 case WBXML_END:
00324                     $element[EN_TYPE] = EN_TYPE_ENDTAG;
00325                     return $element;
00326 
00327                 case WBXML_ENTITY:
00328                     $entity = $this->getMBUInt();
00329                     $element[EN_TYPE] = EN_TYPE_CONTENT;
00330                     $element[EN_CONTENT] = $this->entityToCharset($entity);
00331                     return $element;
00332 
00333                 case WBXML_STR_I:
00334                     $element[EN_TYPE] = EN_TYPE_CONTENT;
00335                     $element[EN_CONTENT] = $this->getTermStr();
00336                     return $element;
00337 
00338                 case WBXML_LITERAL:
00339                     $element[EN_TYPE] = EN_TYPE_STARTTAG;
00340                     $element[EN_TAG] = $this->getStringTableEntry($this->getMBUInt());
00341                     $element[EN_FLAGS] = 0;
00342                     return $element;
00343 
00344                 case WBXML_EXT_I_0:
00345                 case WBXML_EXT_I_1:
00346                 case WBXML_EXT_I_2:
00347                     $this->getTermStr();
00348                     // Ignore extensions
00349                     continue;
00350 
00351                 case WBXML_PI:
00352                     // Ignore PI
00353                     $this->getAttributes();
00354                     continue;
00355 
00356                 case WBXML_LITERAL_C:
00357                     $element[EN_TYPE] = EN_TYPE_STARTTAG;
00358                     $element[EN_TAG] = $this->getStringTableEntry($this->getMBUInt());
00359                     $element[EN_FLAGS] = EN_FLAGS_CONTENT;
00360                     return $element;
00361 
00362                 case WBXML_EXT_T_0:
00363                 case WBXML_EXT_T_1:
00364                 case WBXML_EXT_T_2:
00365                     $this->getMBUInt();
00366                     // Ingore extensions;
00367                     continue;
00368 
00369                 case WBXML_STR_T:
00370                     $element[EN_TYPE] = EN_TYPE_CONTENT;
00371                     $element[EN_CONTENT] = $this->getStringTableEntry($this->getMBUInt());
00372                     return $element;
00373 
00374                 case WBXML_LITERAL_A:
00375                     $element[EN_TYPE] = EN_TYPE_STARTTAG;
00376                     $element[EN_TAG] = $this->getStringTableEntry($this->getMBUInt());
00377                     $element[EN_ATTRIBUTES] = $this->getAttributes();
00378                     $element[EN_FLAGS] = EN_FLAGS_ATTRIBUTES;
00379                     return $element;
00380                 case WBXML_EXT_0:
00381                 case WBXML_EXT_1:
00382                 case WBXML_EXT_2:
00383                     continue;
00384 
00385                 case WBXML_OPAQUE:
00386                     $length = $this->getMBUInt();
00387                     $element[EN_TYPE] = EN_TYPE_CONTENT;
00388                     $element[EN_CONTENT] = $this->getOpaque($length);
00389                     return $element;
00390 
00391                 case WBXML_LITERAL_AC:
00392                     $element[EN_TYPE] = EN_TYPE_STARTTAG;
00393                     $element[EN_TAG] = $this->getStringTableEntry($this->getMBUInt());
00394                     $element[EN_ATTRIBUTES] = $this->getAttributes();
00395                     $element[EN_FLAGS] = EN_FLAGS_ATTRIBUTES | EN_FLAGS_CONTENT;
00396                     return $element;
00397 
00398                 default:
00399                     $element[EN_TYPE] = EN_TYPE_STARTTAG;
00400                     $element[EN_TAG] = $this->getMapping($this->tagcp, $byte & 0x3f);
00401                     $element[EN_FLAGS] = ($byte & 0x80 ? EN_FLAGS_ATTRIBUTES : 0) | ($byte & 0x40 ? EN_FLAGS_CONTENT : 0);
00402                     if($byte & 0x80)
00403                         $element[EN_ATTRIBUTES] = $this->getAttributes();
00404                     return $element;
00405             }
00406         }
00407     }
00408 
00415     private function getAttributes() {
00416         $attributes = array();
00417         $attr = "";
00418 
00419         while(1) {
00420             $byte = $this->getByte();
00421 
00422             if(count($byte) == 0)
00423                 break;
00424 
00425             switch($byte) {
00426                 case WBXML_SWITCH_PAGE:
00427                     $this->attrcp = $this->getByte();
00428                     break;
00429 
00430                 case WBXML_END:
00431                     if($attr != "")
00432                         $attributes += $this->splitAttribute($attr);
00433 
00434                     return $attributes;
00435 
00436                 case WBXML_ENTITY:
00437                     $entity = $this->getMBUInt();
00438                     $attr .= $this->entityToCharset($entity);
00439                     return $element;
00440 
00441                 case WBXML_STR_I:
00442                     $attr .= $this->getTermStr();
00443                     return $element;
00444 
00445                 case WBXML_LITERAL:
00446                     if($attr != "")
00447                         $attributes += $this->splitAttribute($attr);
00448 
00449                     $attr = $this->getStringTableEntry($this->getMBUInt());
00450                     return $element;
00451 
00452                 case WBXML_EXT_I_0:
00453                 case WBXML_EXT_I_1:
00454                 case WBXML_EXT_I_2:
00455                     $this->getTermStr();
00456                     continue;
00457 
00458                 case WBXML_PI:
00459                 case WBXML_LITERAL_C:
00460                     // Invalid
00461                     return false;
00462 
00463                 case WBXML_EXT_T_0:
00464                 case WBXML_EXT_T_1:
00465                 case WBXML_EXT_T_2:
00466                     $this->getMBUInt();
00467                     continue;
00468 
00469                 case WBXML_STR_T:
00470                     $attr .= $this->getStringTableEntry($this->getMBUInt());
00471                     return $element;
00472 
00473                 case WBXML_LITERAL_A:
00474                     return false;
00475 
00476                 case WBXML_EXT_0:
00477                 case WBXML_EXT_1:
00478                 case WBXML_EXT_2:
00479                     continue;
00480 
00481                 case WBXML_OPAQUE:
00482                     $length = $this->getMBUInt();
00483                     $attr .= $this->getOpaque($length);
00484                     return $element;
00485 
00486                 case WBXML_LITERAL_AC:
00487                     return false;
00488 
00489                 default:
00490                     if($byte < 128) {
00491                         if($attr != "") {
00492                             $attributes += $this->splitAttribute($attr);
00493                             $attr = "";
00494                         }
00495                     }
00496                     $attr .= $this->getMapping($this->attrcp, $byte);
00497                     break;
00498             }
00499         }
00500     }
00501 
00510     private function splitAttribute($attr) {
00511         $attributes = array();
00512 
00513         $pos = strpos($attr,chr(61)); // equals sign
00514 
00515         if($pos)
00516             $attributes[substr($attr, 0, $pos)] = substr($attr, $pos+1);
00517         else
00518             $attributes[$attr] = null;
00519 
00520         return $attributes;
00521     }
00522 
00529     private function getTermStr() {
00530         $str = "";
00531         while(1) {
00532             $in = $this->getByte();
00533 
00534             if($in == 0)
00535                 break;
00536             else
00537                 $str .= chr($in);
00538         }
00539 
00540         return $str;
00541     }
00542 
00551     private function getOpaque($len) {
00552         // TODO check if it's possible to do it other way
00553         // fread stops reading because the following condition is true (from php.net):
00554         // if the stream is read buffered and it does not represent a plain file,
00555         // at most one read of up to a number of bytes equal to the chunk size
00556         // (usually 8192) is made; depending on the previously buffered data,
00557         // the size of the returned data may be larger than the chunk size.
00558 
00559         // using only return fread it will return only a part of stream if chunk is smaller
00560         // than $len. Read from stream in a loop until the $len is reached.
00561         $d = "";
00562         $l = 0;
00563         while (1) {
00564             $l = (($len - strlen($d)) > 8192) ? 8192 : ($len - strlen($d));
00565             if ($l > 0) $d .= fread($this->in, $l);
00566             if (strlen($d) >= $len) break;
00567         }
00568         return $d;
00569     }
00570 
00577     private function getByte() {
00578         $ch = fread($this->in, 1);
00579         if(strlen($ch) > 0)
00580             return ord($ch);
00581         else
00582             return;
00583     }
00584 
00591     private function getMBUInt() {
00592         $uint = 0;
00593 
00594         while(1) {
00595           $byte = $this->getByte();
00596 
00597           $uint |= $byte & 0x7f;
00598 
00599           if($byte & 0x80)
00600               $uint = $uint << 7;
00601           else
00602               break;
00603         }
00604 
00605         return $uint;
00606     }
00607 
00614     private function getStringTable() {
00615         $stringtable = "";
00616 
00617         $length = $this->getMBUInt();
00618         if($length > 0)
00619             $stringtable = fread($this->in, $length);
00620 
00621         return $stringtable;
00622     }
00623 
00633     private function getMapping($cp, $id) {
00634         if(!isset($this->dtd["codes"][$cp]) || !isset($this->dtd["codes"][$cp][$id]))
00635             return false;
00636         else {
00637             if(isset($this->dtd["namespaces"][$cp])) {
00638                 return $this->dtd["namespaces"][$cp] . ":" . $this->dtd["codes"][$cp][$id];
00639             } else
00640                 return $this->dtd["codes"][$cp][$id];
00641         }
00642     }
00643 
00650     private function readVersion() {
00651         $ch = $this->getByte();
00652 
00653         if($ch != NULL) {
00654             $this->inputBuffer .= chr($ch);
00655             $this->version = $ch;
00656         }
00657     }
00658 }
00659 
00660 ?>