Back to index

d-push  2.0
mimeDecode.php
Go to the documentation of this file.
00001 <?php
00077 //require_once 'PEAR.php';
00078 
00102 class Mail_mimeDecode
00103 {
00110     var $_input;
00111 
00118     var $_header;
00119 
00126     var $_body;
00127 
00134     var $_error;
00135 
00143     var $_include_bodies;
00144 
00151     var $_decode_bodies;
00152 
00159     var $_decode_headers;
00160 
00168     var $_rfc822_bodies;
00169 
00179     function Mail_mimeDecode($input, $deprecated_linefeed = '')
00180     {
00181         list($header, $body)   = $this->_splitBodyHeader($input);
00182 
00183         $this->_input          = $input;
00184         $this->_header         = $header;
00185         $this->_body           = $body;
00186         $this->_decode_bodies  = false;
00187         $this->_include_bodies = true;
00188         $this->_rfc822_bodies  = false;
00189     }
00190 
00209     function decode($params = null)
00210     {
00211         // determine if this method has been called statically
00212         $isStatic = !(isset($this) && get_class($this) == __CLASS__);
00213 
00214         // Have we been called statically?
00215         // If so, create an object and pass details to that.
00216         if ($isStatic AND isset($params['input'])) {
00217 
00218             $obj = new Mail_mimeDecode($params['input']);
00219             $structure = $obj->decode($params);
00220 
00221         // Called statically but no input
00222         } elseif ($isStatic) {
00223             return $this->raiseError('Called statically and no input given');
00224 
00225         // Called via an object
00226         } else {
00227             $this->_include_bodies = isset($params['include_bodies']) ?
00228                                  $params['include_bodies'] : false;
00229             $this->_decode_bodies  = isset($params['decode_bodies']) ?
00230                                  $params['decode_bodies']  : false;
00231             $this->_decode_headers = isset($params['decode_headers']) ?
00232                                  $params['decode_headers'] : false;
00233             $this->_rfc822_bodies  = isset($params['rfc_822bodies']) ?
00234                                  $params['rfc_822bodies']  : false;
00235             $this->_charset = isset($params['charset']) ?
00236                                  strtolower($params['charset']) : 'utf-8';
00237 
00238             $structure = $this->_decode($this->_header, $this->_body);
00239             if ($structure === false) {
00240                 $structure = $this->raiseError($this->_error);
00241             }
00242         }
00243 
00244         return $structure;
00245     }
00246 
00257     function _decode($headers, $body, $default_ctype = 'text/plain')
00258     {
00259         $return = new stdClass;
00260         $return->headers = array();
00261         $headers = $this->_parseHeaders($headers);
00262 
00263         foreach ($headers as $value) {
00264             if (isset($return->headers[strtolower($value['name'])]) AND !is_array($return->headers[strtolower($value['name'])])) {
00265                 $return->headers[strtolower($value['name'])]   = array($return->headers[strtolower($value['name'])]);
00266                 $return->headers[strtolower($value['name'])][] = $value['value'];
00267 
00268             } elseif (isset($return->headers[strtolower($value['name'])])) {
00269                 $return->headers[strtolower($value['name'])][] = $value['value'];
00270 
00271             } else {
00272                 $return->headers[strtolower($value['name'])] = $value['value'];
00273             }
00274         }
00275 
00276         reset($headers);
00277         while (list($key, $value) = each($headers)) {
00278             $headers[$key]['name'] = strtolower($headers[$key]['name']);
00279             switch ($headers[$key]['name']) {
00280 
00281                 case 'content-type':
00282                     $content_type = $this->_parseHeaderValue($headers[$key]['value']);
00283 
00284                     if (preg_match('/([0-9a-z+.-]+)\/([0-9a-z+.-]+)/i', $content_type['value'], $regs)) {
00285                         $return->ctype_primary   = $regs[1];
00286                         $return->ctype_secondary = $regs[2];
00287                     }
00288 
00289                     if (isset($content_type['other'])) {
00290                         while (list($p_name, $p_value) = each($content_type['other'])) {
00291                             $return->ctype_parameters[$p_name] = $p_value;
00292                         }
00293                     }
00294                     break;
00295 
00296                 case 'content-disposition':
00297                     $content_disposition = $this->_parseHeaderValue($headers[$key]['value']);
00298                     $return->disposition   = $content_disposition['value'];
00299                     if (isset($content_disposition['other'])) {
00300                         while (list($p_name, $p_value) = each($content_disposition['other'])) {
00301                             $return->d_parameters[$p_name] = $p_value;
00302                         }
00303                     }
00304                     break;
00305 
00306                 case 'content-transfer-encoding':
00307                     $content_transfer_encoding = $this->_parseHeaderValue($headers[$key]['value']);
00308                     break;
00309             }
00310         }
00311 
00312         if (isset($content_type)) {
00313             switch (strtolower($content_type['value'])) {
00314                 case 'text/plain':
00315                     $encoding = isset($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit';
00316                     $charset = isset($return->ctype_parameters['charset']) ? $return->ctype_parameters['charset'] : $this->_charset;
00317                     $this->_include_bodies ? $return->body = ($this->_decode_bodies ? $this->_decodeBody($body, $encoding, $charset) : $body) : null;
00318                     break;
00319 
00320                 case 'text/html':
00321                     $encoding = isset($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit';
00322                     $charset = isset($return->ctype_parameters['charset']) ? $return->ctype_parameters['charset'] : $this->_charset;
00323                     $this->_include_bodies ? $return->body = ($this->_decode_bodies ? $this->_decodeBody($body, $encoding, $charset) : $body) : null;
00324                     break;
00325 
00326                 case 'multipart/parallel':
00327                 case 'multipart/appledouble': // Appledouble mail
00328                 case 'multipart/report': // RFC1892
00329                 case 'multipart/signed': // PGP
00330                 case 'multipart/digest':
00331                 case 'multipart/alternative':
00332                 case 'multipart/related':
00333                 case 'multipart/mixed':
00334                     if(!isset($content_type['other']['boundary'])){
00335                         $this->_error = 'No boundary found for ' . $content_type['value'] . ' part';
00336                         return false;
00337                     }
00338 
00339                     $default_ctype = (strtolower($content_type['value']) === 'multipart/digest') ? 'message/rfc822' : 'text/plain';
00340 
00341                     $parts = $this->_boundarySplit($body, $content_type['other']['boundary']);
00342                     for ($i = 0; $i < count($parts); $i++) {
00343                         list($part_header, $part_body) = $this->_splitBodyHeader($parts[$i]);
00344                         $part = $this->_decode($part_header, $part_body, $default_ctype);
00345                         if($part === false)
00346                             $part = $this->raiseError($this->_error);
00347                         $return->parts[] = $part;
00348                     }
00349                     break;
00350 
00351                 case 'message/rfc822':
00352                     if ($this->_rfc822_bodies) {
00353                         $encoding = isset($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit';
00354                         $charset = isset($return->ctype_parameters['charset']) ? $return->ctype_parameters['charset'] : $this->_charset;
00355                         $return->body = ($this->_decode_bodies ? $this->_decodeBody($body, $encoding, $charset) : $body);
00356                     }
00357 
00358                     $obj = new Mail_mimeDecode($body);
00359                     $return->parts[] = $obj->decode(array('include_bodies' => $this->_include_bodies,
00360                                                           'decode_bodies'  => $this->_decode_bodies,
00361                                                           'decode_headers' => $this->_decode_headers));
00362                     unset($obj);
00363                     break;
00364 
00365                 default:
00366                     if(!isset($content_transfer_encoding['value']))
00367                         $content_transfer_encoding['value'] = '7bit';
00368                     // if there is no explicit charset, then don't try to convert to default charset, and make sure that only text mimetypes are converted
00369                     $charset = (isset($return->ctype_parameters['charset']) && ((isset($return->ctype_primary) && $return->ctype_primary == 'text') || !isset($return->ctype_primary)) )? $return->ctype_parameters['charset']: '';
00370                     $this->_include_bodies ? $return->body = ($this->_decode_bodies ? $this->_decodeBody($body, $content_transfer_encoding['value'], $charset) : $body) : null;
00371                     break;
00372             }
00373 
00374         } else {
00375             $ctype = explode('/', $default_ctype);
00376             $return->ctype_primary   = $ctype[0];
00377             $return->ctype_secondary = $ctype[1];
00378             $this->_include_bodies ? $return->body = ($this->_decode_bodies ? $this->_decodeBody($body) : $body) : null;
00379         }
00380 
00381         return $return;
00382     }
00383 
00392     function &getMimeNumbers(&$structure, $no_refs = false, $mime_number = '', $prepend = '')
00393     {
00394         $return = array();
00395         if (!empty($structure->parts)) {
00396             if ($mime_number != '') {
00397                 $structure->mime_id = $prepend . $mime_number;
00398                 $return[$prepend . $mime_number] = &$structure;
00399             }
00400             for ($i = 0; $i < count($structure->parts); $i++) {
00401 
00402 
00403                 if (!empty($structure->headers['content-type']) AND substr(strtolower($structure->headers['content-type']), 0, 8) == 'message/') {
00404                     $prepend      = $prepend . $mime_number . '.';
00405                     $_mime_number = '';
00406                 } else {
00407                     $_mime_number = ($mime_number == '' ? $i + 1 : sprintf('%s.%s', $mime_number, $i + 1));
00408                 }
00409 
00410                 $arr = &Mail_mimeDecode::getMimeNumbers($structure->parts[$i], $no_refs, $_mime_number, $prepend);
00411                 foreach ($arr as $key => $val) {
00412                     $no_refs ? $return[$key] = '' : $return[$key] = &$arr[$key];
00413                 }
00414             }
00415         } else {
00416             if ($mime_number == '') {
00417                 $mime_number = '1';
00418             }
00419             $structure->mime_id = $prepend . $mime_number;
00420             $no_refs ? $return[$prepend . $mime_number] = '' : $return[$prepend . $mime_number] = &$structure;
00421         }
00422 
00423         return $return;
00424     }
00425 
00435     function _splitBodyHeader($input)
00436     {
00437         if (preg_match("/^(.*?)\r?\n\r?\n(.*)/s", $input, $match)) {
00438             return array($match[1], $match[2]);
00439         }
00440         $this->_error = 'Could not split header and body';
00441         return false;
00442     }
00443 
00452     function _parseHeaders($input)
00453     {
00454 
00455         if ($input !== '') {
00456             // Unfold the input
00457             $input   = preg_replace("/\r?\n/", "\r\n", $input);
00458             $input   = preg_replace("/\r\n(\t| )+/", ' ', $input);
00459             $headers = explode("\r\n", trim($input));
00460 
00461             foreach ($headers as $value) {
00462                 $hdr_name = substr($value, 0, $pos = strpos($value, ':'));
00463                 $hdr_value = substr($value, $pos+1);
00464                 if($hdr_value[0] == ' ')
00465                     $hdr_value = substr($hdr_value, 1);
00466 
00467                 $return[] = array(
00468                                   'name'  => $hdr_name,
00469                                   'value' => $this->_decode_headers ? $this->_decodeHeader($hdr_value) : $hdr_value
00470                                  );
00471             }
00472         } else {
00473             $return = array();
00474         }
00475 
00476         return $return;
00477     }
00478 
00490     function _parseHeaderValue($input)
00491     {
00492 
00493         if (($pos = strpos($input, ';')) !== false) {
00494 
00495             $return['value'] = trim(substr($input, 0, $pos));
00496             $input = trim(substr($input, $pos+1));
00497 
00498             if (strlen($input) > 0) {
00499 
00500                 // This splits on a semi-colon, if there's no preceeding backslash
00501                 // Now works with quoted values; had to glue the \; breaks in PHP
00502                 // the regex is already bordering on incomprehensible
00503                 //$splitRegex = '/([^;\'"]*[\'"]([^\'"]*([^\'"]*)*)[\'"][^;\'"]*|([^;]+))(;|$)/';
00504                 // simplyfied RegEx - Nokia Mail2 sends boundaries containing ' which break the above regex
00505                 $splitRegex = '/([^;\'"]*[\'"]([^\'"]*)[\'"][^;\'"]*|([^;]+))(;|$)/';
00506                 preg_match_all($splitRegex, $input, $matches);
00507 
00508                 $parameters = array();
00509                 for ($i=0; $i<count($matches[0]); $i++) {
00510                     $param = $matches[0][$i];
00511                     while (substr($param, -2) == '\;') {
00512                         $param .= $matches[0][++$i];
00513                     }
00514                     $parameters[] = $param;
00515                 }
00516 
00517                 for ($i = 0; $i < count($parameters); $i++) {
00518                     $param_name  = trim(substr($parameters[$i], 0, $pos = strpos($parameters[$i], '=')), "'\";\t\\ ");
00519                     $param_value = trim(str_replace('\;', ';', substr($parameters[$i], $pos + 1)), "'\";\t\\ ");
00520                     if (!empty($param_value[0]) && $param_value[0] == '"') {
00521                         $param_value = substr($param_value, 1, -1);
00522                     }
00523                     $return['other'][$param_name] = $param_value;
00524                     $return['other'][strtolower($param_name)] = $param_value;
00525                 }
00526             }
00527         } else {
00528             $return['value'] = trim($input);
00529         }
00530 
00531         return $return;
00532     }
00533 
00542     function _boundarySplit($input, $boundary)
00543     {
00544         $parts = array();
00545 
00546         $bs_possible = substr($boundary, 2, -2);
00547         $bs_check = '\"' . $bs_possible . '\"';
00548 
00549         if ($boundary == $bs_check) {
00550             $boundary = $bs_possible;
00551         }
00552 
00553         $tmp = explode('--' . $boundary, $input);
00554 
00555         for ($i = 1; $i < count($tmp) - 1; $i++) {
00556             $parts[] = $tmp[$i];
00557         }
00558 
00559         return $parts;
00560     }
00561 
00572     function _decodeHeader($input)
00573     {
00574         // Remove white space between encoded-words
00575         $input = preg_replace('/(=\?[^?]+\?(q|b)\?[^?]*\?=)(\s)+=\?/i', '\1=?', $input);
00576 
00577         // For each encoded-word...
00578         while (preg_match('/(=\?([^?]+)\?(q|b)\?([^?]*)\?=)/i', $input, $matches)) {
00579 
00580             $encoded  = $matches[1];
00581             $charset  = $matches[2];
00582             $encoding = $matches[3];
00583             $text     = $matches[4];
00584 
00585             switch (strtolower($encoding)) {
00586                 case 'b':
00587                     $text = base64_decode($text);
00588                     break;
00589 
00590                 case 'q':
00591                     $text = str_replace('_', ' ', $text);
00592                     preg_match_all('/=([a-f0-9]{2})/i', $text, $matches);
00593                     foreach($matches[1] as $value)
00594                         $text = str_replace('='.$value, chr(hexdec($value)), $text);
00595                     break;
00596             }
00597 
00598             $input = str_replace($encoded, $this->_fromCharset($charset, $text), $input);
00599         }
00600 
00601         return $input;
00602     }
00603 
00613     function _decodeBody($input, $encoding = '7bit', $charset = '')
00614     {
00615         switch (strtolower($encoding)) {
00616             case '7bit':
00617                 return $this->_fromCharset($charset, $input);;
00618                 break;
00619 
00620             case '8bit':
00621                 return $this->_fromCharset($charset, $input);
00622                 break;
00623 
00624             case 'quoted-printable':
00625                 return $this->_fromCharset($charset, $this->_quotedPrintableDecode($input));
00626                 break;
00627 
00628             case 'base64':
00629                 return $this->_fromCharset($charset, base64_decode($input));
00630                 break;
00631 
00632             default:
00633                 return $input;
00634         }
00635     }
00636 
00645     function _quotedPrintableDecode($input)
00646     {
00647         // Remove soft line breaks
00648         $input = preg_replace("/=\r?\n/", '', $input);
00649 
00650         // Replace encoded characters
00651         $input = preg_replace('/=([a-f0-9]{2})/ie', "chr(hexdec('\\1'))", $input);
00652 
00653         return $input;
00654     }
00655 
00671     function &uudecode($input)
00672     {
00673         // Find all uuencoded sections
00674         preg_match_all("/begin ([0-7]{3}) (.+)\r?\n(.+)\r?\nend/Us", $input, $matches);
00675 
00676         for ($j = 0; $j < count($matches[3]); $j++) {
00677 
00678             $str      = $matches[3][$j];
00679             $filename = $matches[2][$j];
00680             $fileperm = $matches[1][$j];
00681 
00682             $file = '';
00683             $str = preg_split("/\r?\n/", trim($str));
00684             $strlen = count($str);
00685 
00686             for ($i = 0; $i < $strlen; $i++) {
00687                 $pos = 1;
00688                 $d = 0;
00689                 $len=(int)(((ord(substr($str[$i],0,1)) -32) - ' ') & 077);
00690 
00691                 while (($d + 3 <= $len) AND ($pos + 4 <= strlen($str[$i]))) {
00692                     $c0 = (ord(substr($str[$i],$pos,1)) ^ 0x20);
00693                     $c1 = (ord(substr($str[$i],$pos+1,1)) ^ 0x20);
00694                     $c2 = (ord(substr($str[$i],$pos+2,1)) ^ 0x20);
00695                     $c3 = (ord(substr($str[$i],$pos+3,1)) ^ 0x20);
00696                     $file .= chr(((($c0 - ' ') & 077) << 2) | ((($c1 - ' ') & 077) >> 4));
00697 
00698                     $file .= chr(((($c1 - ' ') & 077) << 4) | ((($c2 - ' ') & 077) >> 2));
00699 
00700                     $file .= chr(((($c2 - ' ') & 077) << 6) |  (($c3 - ' ') & 077));
00701 
00702                     $pos += 4;
00703                     $d += 3;
00704                 }
00705 
00706                 if (($d + 2 <= $len) && ($pos + 3 <= strlen($str[$i]))) {
00707                     $c0 = (ord(substr($str[$i],$pos,1)) ^ 0x20);
00708                     $c1 = (ord(substr($str[$i],$pos+1,1)) ^ 0x20);
00709                     $c2 = (ord(substr($str[$i],$pos+2,1)) ^ 0x20);
00710                     $file .= chr(((($c0 - ' ') & 077) << 2) | ((($c1 - ' ') & 077) >> 4));
00711 
00712                     $file .= chr(((($c1 - ' ') & 077) << 4) | ((($c2 - ' ') & 077) >> 2));
00713 
00714                     $pos += 3;
00715                     $d += 2;
00716                 }
00717 
00718                 if (($d + 1 <= $len) && ($pos + 2 <= strlen($str[$i]))) {
00719                     $c0 = (ord(substr($str[$i],$pos,1)) ^ 0x20);
00720                     $c1 = (ord(substr($str[$i],$pos+1,1)) ^ 0x20);
00721                     $file .= chr(((($c0 - ' ') & 077) << 2) | ((($c1 - ' ') & 077) >> 4));
00722 
00723                 }
00724             }
00725             $files[] = array('filename' => $filename, 'fileperm' => $fileperm, 'filedata' => $file);
00726         }
00727 
00728         return $files;
00729     }
00730 
00750     function getSendArray()
00751     {
00752         // prevent warning if this is not set
00753         $this->_decode_headers = FALSE;
00754         $headerlist =$this->_parseHeaders($this->_header);
00755         $to = "";
00756         if (!$headerlist) {
00757             return $this->raiseError("Message did not contain headers");
00758         }
00759         foreach($headerlist as $item) {
00760             $header[$item['name']] = $item['value'];
00761             switch (strtolower($item['name'])) {
00762                 case "to":
00763                 case "cc":
00764                 case "bcc":
00765                     $to .= ",".$item['value'];
00766                 default:
00767                    break;
00768             }
00769         }
00770         if ($to == "") {
00771             return $this->raiseError("Message did not contain any recipents");
00772         }
00773         $to = substr($to,1);
00774         return array($to,$header,$this->_body);
00775     }
00776 
00794     function getXML($input)
00795     {
00796         $crlf    =  "\r\n";
00797         $output  = '<?xml version=\'1.0\'?>' . $crlf .
00798                    '<!DOCTYPE email SYSTEM "http://www.phpguru.org/xmail/xmail.dtd">' . $crlf .
00799                    '<email>' . $crlf .
00800                    Mail_mimeDecode::_getXML($input) .
00801                    '</email>';
00802 
00803         return $output;
00804     }
00805 
00816     function _getXML($input, $indent = 1)
00817     {
00818         $htab    =  "\t";
00819         $crlf    =  "\r\n";
00820         $output  =  '';
00821         $headers = @(array)$input->headers;
00822 
00823         foreach ($headers as $hdr_name => $hdr_value) {
00824 
00825             // Multiple headers with this name
00826             if (is_array($headers[$hdr_name])) {
00827                 for ($i = 0; $i < count($hdr_value); $i++) {
00828                     $output .= Mail_mimeDecode::_getXML_helper($hdr_name, $hdr_value[$i], $indent);
00829                 }
00830 
00831             // Only one header of this sort
00832             } else {
00833                 $output .= Mail_mimeDecode::_getXML_helper($hdr_name, $hdr_value, $indent);
00834             }
00835         }
00836 
00837         if (!empty($input->parts)) {
00838             for ($i = 0; $i < count($input->parts); $i++) {
00839                 $output .= $crlf . str_repeat($htab, $indent) . '<mimepart>' . $crlf .
00840                            Mail_mimeDecode::_getXML($input->parts[$i], $indent+1) .
00841                            str_repeat($htab, $indent) . '</mimepart>' . $crlf;
00842             }
00843         } elseif (isset($input->body)) {
00844             $output .= $crlf . str_repeat($htab, $indent) . '<body><![CDATA[' .
00845                        $input->body . ']]></body>' . $crlf;
00846         }
00847 
00848         return $output;
00849     }
00850 
00860     function _getXML_helper($hdr_name, $hdr_value, $indent)
00861     {
00862         $htab   = "\t";
00863         $crlf   = "\r\n";
00864         $return = '';
00865 
00866         $new_hdr_value = ($hdr_name != 'received') ? Mail_mimeDecode::_parseHeaderValue($hdr_value) : array('value' => $hdr_value);
00867         $new_hdr_name  = str_replace(' ', '-', ucwords(str_replace('-', ' ', $hdr_name)));
00868 
00869         // Sort out any parameters
00870         if (!empty($new_hdr_value['other'])) {
00871             foreach ($new_hdr_value['other'] as $paramname => $paramvalue) {
00872                 $params[] = str_repeat($htab, $indent) . $htab . '<parameter>' . $crlf .
00873                             str_repeat($htab, $indent) . $htab . $htab . '<paramname>' . htmlspecialchars($paramname) . '</paramname>' . $crlf .
00874                             str_repeat($htab, $indent) . $htab . $htab . '<paramvalue>' . htmlspecialchars($paramvalue) . '</paramvalue>' . $crlf .
00875                             str_repeat($htab, $indent) . $htab . '</parameter>' . $crlf;
00876             }
00877 
00878             $params = implode('', $params);
00879         } else {
00880             $params = '';
00881         }
00882 
00883         $return = str_repeat($htab, $indent) . '<header>' . $crlf .
00884                   str_repeat($htab, $indent) . $htab . '<headername>' . htmlspecialchars($new_hdr_name) . '</headername>' . $crlf .
00885                   str_repeat($htab, $indent) . $htab . '<headervalue>' . htmlspecialchars($new_hdr_value['value']) . '</headervalue>' . $crlf .
00886                   $params .
00887                   str_repeat($htab, $indent) . '</header>' . $crlf;
00888 
00889         return $return;
00890     }
00891 
00900     function _fromCharset($charset, $input) {
00901         if($charset == '' || (strtolower($charset) == $this->_charset))
00902             return $input;
00903 
00904         // all ISO-8859-1 are converted as if they were Windows-1252 - see Mantis #456
00905         if (strtolower($charset) == 'iso-8859-1')
00906             $charset = 'Windows-1252';
00907 
00908         return @iconv($charset, $this->_charset. "//TRANSLIT", $input);
00909     }
00910 
00919     function raiseError($message) {
00920         ZLog::Write(LOGLEVEL_ERROR, "mimeDecode error: ". $message);
00921         return false;
00922     }
00923 } // End of class