Back to index

d-push  2.0
z_RFC822.php
Go to the documentation of this file.
00001 <?php
00002 // +-----------------------------------------------------------------------+
00003 // | Copyright (c) 2001-2002, Richard Heyes                                |
00004 // | All rights reserved.                                                  |
00005 // |                                                                       |
00006 // | Redistribution and use in source and binary forms, with or without    |
00007 // | modification, are permitted provided that the following conditions    |
00008 // | are met:                                                              |
00009 // |                                                                       |
00010 // | o Redistributions of source code must retain the above copyright      |
00011 // |   notice, this list of conditions and the following disclaimer.       |
00012 // | o Redistributions in binary form must reproduce the above copyright   |
00013 // |   notice, this list of conditions and the following disclaimer in the |
00014 // |   documentation and/or other materials provided with the distribution.|
00015 // | o The names of the authors may not be used to endorse or promote      |
00016 // |   products derived from this software without specific prior written  |
00017 // |   permission.                                                         |
00018 // |                                                                       |
00019 // | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS   |
00020 // | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT     |
00021 // | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
00022 // | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT  |
00023 // | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
00024 // | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT      |
00025 // | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
00026 // | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
00027 // | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT   |
00028 // | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
00029 // | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  |
00030 // |                                                                       |
00031 // +-----------------------------------------------------------------------+
00032 // | Authors: Richard Heyes <richard@phpguru.org>                          |
00033 // |          Chuck Hagenbuch <chuck@horde.org>                            |
00034 // +-----------------------------------------------------------------------+
00035 
00059 class Mail_RFC822 {
00060 
00065     var $address = '';
00066 
00071     var $default_domain = 'localhost';
00072 
00077     var $nestGroups = true;
00078 
00083     var $validate = true;
00084 
00089     var $addresses = array();
00090 
00095     var $structure = array();
00096 
00101     var $error = null;
00102 
00107     var $index = null;
00108 
00114     var $num_groups = 0;
00115 
00121     var $mailRFC822 = true;
00122 
00127     var $limit = null;
00128 
00141     function Mail_RFC822($address = null, $default_domain = null, $nest_groups = null, $validate = null, $limit = null)
00142     {
00143         if (isset($address))        $this->address        = $address;
00144         if (isset($default_domain)) $this->default_domain = $default_domain;
00145         if (isset($nest_groups))    $this->nestGroups     = $nest_groups;
00146         if (isset($validate))       $this->validate       = $validate;
00147         if (isset($limit))          $this->limit          = $limit;
00148     }
00149 
00162     function parseAddressList($address = null, $default_domain = null, $nest_groups = null, $validate = null, $limit = null)
00163     {
00164         if (!isset($this) || !isset($this->mailRFC822)) {
00165             $obj = new Mail_RFC822($address, $default_domain, $nest_groups, $validate, $limit);
00166             return $obj->parseAddressList();
00167         }
00168 
00169         if (isset($address))        $this->address        = $address;
00170         if (strlen(trim($this->address)) == 0) return array();
00171         if (isset($default_domain)) $this->default_domain = $default_domain;
00172         if (isset($nest_groups))    $this->nestGroups     = $nest_groups;
00173         if (isset($validate))       $this->validate       = $validate;
00174         if (isset($limit))          $this->limit          = $limit;
00175 
00176         $this->structure  = array();
00177         $this->addresses  = array();
00178         $this->error      = null;
00179         $this->index      = null;
00180 
00181         // Unfold any long lines in $this->address.
00182         $this->address = preg_replace('/\r?\n/', "\r\n", $this->address);
00183         $this->address = preg_replace('/\r\n(\t| )+/', ' ', $this->address);
00184 
00185         while ($this->address = $this->_splitAddresses($this->address));
00186         if ($this->address === false || isset($this->error)) {
00187             //require_once 'PEAR.php';
00188             return $this->raiseError($this->error);
00189         }
00190 
00191         // Validate each address individually.  If we encounter an invalid
00192         // address, stop iterating and return an error immediately.
00193         foreach ($this->addresses as $address) {
00194             $valid = $this->_validateAddress($address);
00195 
00196             if ($valid === false || isset($this->error)) {
00197                 //require_once 'PEAR.php';
00198                 return $this->raiseError($this->error);
00199             }
00200 
00201             if (!$this->nestGroups) {
00202                 $this->structure = array_merge($this->structure, $valid);
00203             } else {
00204                 $this->structure[] = $valid;
00205             }
00206         }
00207 
00208         return $this->structure;
00209     }
00210 
00218     function _splitAddresses($address)
00219     {
00220         if (!empty($this->limit) && count($this->addresses) == $this->limit) {
00221             return '';
00222         }
00223 
00224         if ($this->_isGroup($address) && !isset($this->error)) {
00225             $split_char = ';';
00226             $is_group   = true;
00227         } elseif (!isset($this->error)) {
00228             $split_char = ',';
00229             $is_group   = false;
00230         } elseif (isset($this->error)) {
00231             return false;
00232         }
00233 
00234         // Split the string based on the above ten or so lines.
00235         $parts  = explode($split_char, $address);
00236         $string = $this->_splitCheck($parts, $split_char);
00237 
00238         // If a group...
00239         if ($is_group) {
00240             // If $string does not contain a colon outside of
00241             // brackets/quotes etc then something's fubar.
00242 
00243             // First check there's a colon at all:
00244             if (strpos($string, ':') === false) {
00245                 $this->error = 'Invalid address: ' . $string;
00246                 return false;
00247             }
00248 
00249             // Now check it's outside of brackets/quotes:
00250             if (!$this->_splitCheck(explode(':', $string), ':')) {
00251                 return false;
00252             }
00253 
00254             // We must have a group at this point, so increase the counter:
00255             $this->num_groups++;
00256         }
00257 
00258         // $string now contains the first full address/group.
00259         // Add to the addresses array.
00260         $this->addresses[] = array(
00261                                    'address' => trim($string),
00262                                    'group'   => $is_group
00263                                    );
00264 
00265         // Remove the now stored address from the initial line, the +1
00266         // is to account for the explode character.
00267         $address = trim(substr($address, strlen($string) + 1));
00268 
00269         // If the next char is a comma and this was a group, then
00270         // there are more addresses, otherwise, if there are any more
00271         // chars, then there is another address.
00272         if ($is_group && substr($address, 0, 1) == ','){
00273             $address = trim(substr($address, 1));
00274             return $address;
00275 
00276         } elseif (strlen($address) > 0) {
00277             return $address;
00278 
00279         } else {
00280             return '';
00281         }
00282 
00283         // If you got here then something's off
00284         return false;
00285     }
00286 
00294     function _isGroup($address)
00295     {
00296         // First comma not in quotes, angles or escaped:
00297         $parts  = explode(',', $address);
00298         $string = $this->_splitCheck($parts, ',');
00299 
00300         // Now we have the first address, we can reliably check for a
00301         // group by searching for a colon that's not escaped or in
00302         // quotes or angle brackets.
00303         if (count($parts = explode(':', $string)) > 1) {
00304             $string2 = $this->_splitCheck($parts, ':');
00305             return ($string2 !== $string);
00306         } else {
00307             return false;
00308         }
00309     }
00310 
00319     function _splitCheck($parts, $char)
00320     {
00321         $string = $parts[0];
00322 
00323         for ($i = 0; $i < count($parts); $i++) {
00324             if ($this->_hasUnclosedQuotes($string)
00325                 || $this->_hasUnclosedBrackets($string, '<>')
00326                 || $this->_hasUnclosedBrackets($string, '[]')
00327                 || $this->_hasUnclosedBrackets($string, '()')
00328                 || substr($string, -1) == '\\') {
00329                 if (isset($parts[$i + 1])) {
00330                     $string = $string . $char . $parts[$i + 1];
00331                 } else {
00332                     $this->error = 'Invalid address spec. Unclosed bracket or quotes';
00333                     return false;
00334                 }
00335             } else {
00336                 $this->index = $i;
00337                 break;
00338             }
00339         }
00340 
00341         return $string;
00342     }
00343 
00351     function _hasUnclosedQuotes($string)
00352     {
00353         $string     = explode('"', $string);
00354         $string_cnt = count($string);
00355 
00356         for ($i = 0; $i < (count($string) - 1); $i++)
00357             if (substr($string[$i], -1) == '\\')
00358                 $string_cnt--;
00359 
00360         return ($string_cnt % 2 === 0);
00361     }
00362 
00372     function _hasUnclosedBrackets($string, $chars)
00373     {
00374         $num_angle_start = substr_count($string, $chars[0]);
00375         $num_angle_end   = substr_count($string, $chars[1]);
00376 
00377         $this->_hasUnclosedBracketsSub($string, $num_angle_start, $chars[0]);
00378         $this->_hasUnclosedBracketsSub($string, $num_angle_end, $chars[1]);
00379 
00380         if ($num_angle_start < $num_angle_end) {
00381             $this->error = 'Invalid address spec. Unmatched quote or bracket (' . $chars . ')';
00382             return false;
00383         } else {
00384             return ($num_angle_start > $num_angle_end);
00385         }
00386     }
00387 
00397     function _hasUnclosedBracketsSub($string, &$num, $char)
00398     {
00399         $parts = explode($char, $string);
00400         for ($i = 0; $i < count($parts); $i++){
00401             if (substr($parts[$i], -1) == '\\' || $this->_hasUnclosedQuotes($parts[$i]))
00402                 $num--;
00403             if (isset($parts[$i + 1]))
00404                 $parts[$i + 1] = $parts[$i] . $char . $parts[$i + 1];
00405         }
00406 
00407         return $num;
00408     }
00409 
00417     function _validateAddress($address)
00418     {
00419         $is_group = false;
00420         $addresses = array();
00421 
00422         if ($address['group']) {
00423             $is_group = true;
00424 
00425             // Get the group part of the name
00426             $parts     = explode(':', $address['address']);
00427             $groupname = $this->_splitCheck($parts, ':');
00428             $structure = array();
00429 
00430             // And validate the group part of the name.
00431             if (!$this->_validatePhrase($groupname)){
00432                 $this->error = 'Group name did not validate.';
00433                 return false;
00434             } else {
00435                 // Don't include groups if we are not nesting
00436                 // them. This avoids returning invalid addresses.
00437                 if ($this->nestGroups) {
00438                     $structure = new stdClass;
00439                     $structure->groupname = $groupname;
00440                 }
00441             }
00442 
00443             $address['address'] = ltrim(substr($address['address'], strlen($groupname . ':')));
00444         }
00445 
00446         // If a group then split on comma and put into an array.
00447         // Otherwise, Just put the whole address in an array.
00448         if ($is_group) {
00449             while (strlen($address['address']) > 0) {
00450                 $parts       = explode(',', $address['address']);
00451                 $addresses[] = $this->_splitCheck($parts, ',');
00452                 $address['address'] = trim(substr($address['address'], strlen(end($addresses) . ',')));
00453             }
00454         } else {
00455             $addresses[] = $address['address'];
00456         }
00457 
00458         // Check that $addresses is set, if address like this:
00459         // Groupname:;
00460         // Then errors were appearing.
00461         if (!count($addresses)){
00462             $this->error = 'Empty group.';
00463             return false;
00464         }
00465 
00466         // Trim the whitespace from all of the address strings.
00467         array_map('trim', $addresses);
00468 
00469         // Validate each mailbox.
00470         // Format could be one of: name <geezer@domain.com>
00471         //                         geezer@domain.com
00472         //                         geezer
00473         // ... or any other format valid by RFC 822.
00474         for ($i = 0; $i < count($addresses); $i++) {
00475             if (!$this->validateMailbox($addresses[$i])) {
00476                 if (empty($this->error)) {
00477                     $this->error = 'Validation failed for: ' . $addresses[$i];
00478                 }
00479                 return false;
00480             }
00481         }
00482 
00483         // Nested format
00484         if ($this->nestGroups) {
00485             if ($is_group) {
00486                 $structure->addresses = $addresses;
00487             } else {
00488                 $structure = $addresses[0];
00489             }
00490 
00491         // Flat format
00492         } else {
00493             if ($is_group) {
00494                 $structure = array_merge($structure, $addresses);
00495             } else {
00496                 $structure = $addresses;
00497             }
00498         }
00499 
00500         return $structure;
00501     }
00502 
00510     function _validatePhrase($phrase)
00511     {
00512         // Splits on one or more Tab or space.
00513         $parts = preg_split('/[ \\x09]+/', $phrase, -1, PREG_SPLIT_NO_EMPTY);
00514         $phrase_parts = array();
00515         while (count($parts) > 0){
00516             $phrase_parts[] = $this->_splitCheck($parts, ' ');
00517             for ($i = 0; $i < $this->index + 1; $i++)
00518                 array_shift($parts);
00519         }
00520 
00521         foreach ($phrase_parts as $part) {
00522             // If quoted string:
00523             if (substr($part, 0, 1) == '"') {
00524                 if (!$this->_validateQuotedString($part)) {
00525                     return false;
00526                 }
00527                 continue;
00528             }
00529 
00530             // Otherwise it's an atom:
00531             if (!$this->_validateAtom($part)) return false;
00532         }
00533 
00534         return true;
00535     }
00536 
00550     function _validateAtom($atom)
00551     {
00552         if (!$this->validate) {
00553             // Validation has been turned off; assume the atom is okay.
00554             return true;
00555         }
00556         // Check for any char from ASCII 0 - ASCII 127
00557         if (!preg_match('/^[\\x00-\\x7E]+$/i', $atom, $matches)) {
00558             return false;
00559         }
00560 
00561         // Check for specials:
00562         if (preg_match('/[][()<>@,;\\:". ]/', $atom)) {
00563             return false;
00564         }
00565 
00566         // Check for control characters (ASCII 0-31):
00567         if (preg_match('/[\\x00-\\x1F]+/', $atom)) {
00568             return false;
00569         }
00570         return true;
00571     }
00572 
00581     function _validateQuotedString($qstring)
00582     {
00583         // Leading and trailing "
00584         $qstring = substr($qstring, 1, -1);
00585 
00586         // Perform check, removing quoted characters first.
00587         return !preg_match('/[\x0D\\\\"]/', preg_replace('/\\\\./', '', $qstring));
00588     }
00589 
00599     function validateMailbox(&$mailbox)
00600     {
00601         // A couple of defaults.
00602         $phrase  = '';
00603         $comment = '';
00604         $comments = array();
00605 
00606         // Catch any RFC822 comments and store them separately.
00607         $_mailbox = $mailbox;
00608         while (strlen(trim($_mailbox)) > 0) {
00609             $parts = explode('(', $_mailbox);
00610             $before_comment = $this->_splitCheck($parts, '(');
00611             if ($before_comment != $_mailbox) {
00612                 // First char should be a (.
00613                 $comment    = substr(str_replace($before_comment, '', $_mailbox), 1);
00614                 $parts      = explode(')', $comment);
00615                 $comment    = $this->_splitCheck($parts, ')');
00616                 $comments[] = $comment;
00617 
00618                 // +1 is for the trailing )
00619                 $_mailbox   = substr($_mailbox, strpos($_mailbox, $comment)+strlen($comment)+1);
00620             } else {
00621                 break;
00622             }
00623         }
00624 
00625         foreach ($comments as $comment) {
00626             $mailbox = str_replace("($comment)", '', $mailbox);
00627         }
00628 
00629         $mailbox = trim($mailbox);
00630 
00631         // Check for name + route-addr
00632         if (substr($mailbox, -1) == '>' && substr($mailbox, 0, 1) != '<') {
00633             $parts  = explode('<', $mailbox);
00634             $name   = $this->_splitCheck($parts, '<');
00635 
00636             $phrase     = trim($name);
00637             $route_addr = trim(substr($mailbox, strlen($name.'<'), -1));
00638 
00639             //z-push fix for umlauts and other special chars
00640             if (substr($phrase, 0, 1) != '"' && substr($phrase, -1) != '"') {
00641                 $phrase = '"'.$phrase.'"';
00642             }
00643 
00644             if ($this->_validatePhrase($phrase) === false || ($route_addr = $this->_validateRouteAddr($route_addr)) === false) {
00645 
00646                 return false;
00647             }
00648 
00649         // Only got addr-spec
00650         } else {
00651             // First snip angle brackets if present.
00652             if (substr($mailbox, 0, 1) == '<' && substr($mailbox, -1) == '>') {
00653                 $addr_spec = substr($mailbox, 1, -1);
00654             } else {
00655                 $addr_spec = $mailbox;
00656             }
00657 
00658             if (($addr_spec = $this->_validateAddrSpec($addr_spec)) === false) {
00659                 return false;
00660             }
00661         }
00662 
00663         // Construct the object that will be returned.
00664         $mbox = new stdClass();
00665 
00666         // Add the phrase (even if empty) and comments
00667         $mbox->personal = $phrase;
00668         $mbox->comment  = isset($comments) ? $comments : array();
00669 
00670         if (isset($route_addr)) {
00671             $mbox->mailbox = $route_addr['local_part'];
00672             $mbox->host    = $route_addr['domain'];
00673             $route_addr['adl'] !== '' ? $mbox->adl = $route_addr['adl'] : '';
00674         } else {
00675             $mbox->mailbox = $addr_spec['local_part'];
00676             $mbox->host    = $addr_spec['domain'];
00677         }
00678 
00679         $mailbox = $mbox;
00680         return true;
00681     }
00682 
00694     function _validateRouteAddr($route_addr)
00695     {
00696         // Check for colon.
00697         if (strpos($route_addr, ':') !== false) {
00698             $parts = explode(':', $route_addr);
00699             $route = $this->_splitCheck($parts, ':');
00700         } else {
00701             $route = $route_addr;
00702         }
00703 
00704         // If $route is same as $route_addr then the colon was in
00705         // quotes or brackets or, of course, non existent.
00706         if ($route === $route_addr){
00707             unset($route);
00708             $addr_spec = $route_addr;
00709             if (($addr_spec = $this->_validateAddrSpec($addr_spec)) === false) {
00710                 return false;
00711             }
00712         } else {
00713             // Validate route part.
00714             if (($route = $this->_validateRoute($route)) === false) {
00715                 return false;
00716             }
00717 
00718             $addr_spec = substr($route_addr, strlen($route . ':'));
00719 
00720             // Validate addr-spec part.
00721             if (($addr_spec = $this->_validateAddrSpec($addr_spec)) === false) {
00722                 return false;
00723             }
00724         }
00725 
00726         if (isset($route)) {
00727             $return['adl'] = $route;
00728         } else {
00729             $return['adl'] = '';
00730         }
00731 
00732         $return = array_merge($return, $addr_spec);
00733         return $return;
00734     }
00735 
00744     function _validateRoute($route)
00745     {
00746         // Split on comma.
00747         $domains = explode(',', trim($route));
00748 
00749         foreach ($domains as $domain) {
00750             $domain = str_replace('@', '', trim($domain));
00751             if (!$this->_validateDomain($domain)) return false;
00752         }
00753 
00754         return $route;
00755     }
00756 
00767     function _validateDomain($domain)
00768     {
00769         // Note the different use of $subdomains and $sub_domains
00770         $subdomains = explode('.', $domain);
00771 
00772         while (count($subdomains) > 0) {
00773             $sub_domains[] = $this->_splitCheck($subdomains, '.');
00774             for ($i = 0; $i < $this->index + 1; $i++)
00775                 array_shift($subdomains);
00776         }
00777 
00778         foreach ($sub_domains as $sub_domain) {
00779             if (!$this->_validateSubdomain(trim($sub_domain)))
00780                 return false;
00781         }
00782 
00783         // Managed to get here, so return input.
00784         return $domain;
00785     }
00786 
00795     function _validateSubdomain($subdomain)
00796     {
00797         if (preg_match('|^\[(.*)]$|', $subdomain, $arr)){
00798             if (!$this->_validateDliteral($arr[1])) return false;
00799         } else {
00800             if (!$this->_validateAtom($subdomain)) return false;
00801         }
00802 
00803         // Got here, so return successful.
00804         return true;
00805     }
00806 
00815     function _validateDliteral($dliteral)
00816     {
00817         return !preg_match('/(.)[][\x0D\\\\]/', $dliteral, $matches) && $matches[1] != '\\';
00818     }
00819 
00829     function _validateAddrSpec($addr_spec)
00830     {
00831         $addr_spec = trim($addr_spec);
00832 
00833         // Split on @ sign if there is one.
00834         if (strpos($addr_spec, '@') !== false) {
00835             $parts      = explode('@', $addr_spec);
00836             $local_part = $this->_splitCheck($parts, '@');
00837             $domain     = substr($addr_spec, strlen($local_part . '@'));
00838 
00839         // No @ sign so assume the default domain.
00840         } else {
00841             $local_part = $addr_spec;
00842             $domain     = $this->default_domain;
00843         }
00844 
00845         if (($local_part = $this->_validateLocalPart($local_part)) === false) return false;
00846         if (($domain     = $this->_validateDomain($domain)) === false) return false;
00847 
00848         // Got here so return successful.
00849         return array('local_part' => $local_part, 'domain' => $domain);
00850     }
00851 
00860     function _validateLocalPart($local_part)
00861     {
00862         $parts = explode('.', $local_part);
00863         $words = array();
00864 
00865         // Split the local_part into words.
00866         while (count($parts) > 0){
00867             $words[] = $this->_splitCheck($parts, '.');
00868             for ($i = 0; $i < $this->index + 1; $i++) {
00869                 array_shift($parts);
00870             }
00871         }
00872 
00873         // Validate each word.
00874         foreach ($words as $word) {
00875             // If this word contains an unquoted space, it is invalid. (6.2.4)
00876             if (strpos($word, ' ') && $word[0] !== '"')
00877             {
00878                 return false;
00879             }
00880 
00881             if ($this->_validatePhrase(trim($word)) === false) return false;
00882         }
00883 
00884         // Managed to get here, so return the input.
00885         return $local_part;
00886     }
00887 
00898     function approximateCount($data)
00899     {
00900         return count(preg_split('/(?<!\\\\),/', $data));
00901     }
00902 
00916     function isValidInetAddress($data, $strict = false)
00917     {
00918         $regex = $strict ? '/^([.0-9a-z_+-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})$/i' : '/^([*+!.&#$|\'\\%\/0-9a-z^_`{}=?~:-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})$/i';
00919         if (preg_match($regex, trim($data), $matches)) {
00920             return array($matches[1], $matches[2]);
00921         } else {
00922             return false;
00923         }
00924     }
00933     function raiseError($message) {
00934         ZLog::Write(LOGLEVEL_ERROR, "z_RFC822 error: ". $message);
00935         return false;
00936     }
00937 }