Back to index

php5  5.3.10
soundex.c
Go to the documentation of this file.
00001 /*
00002    +----------------------------------------------------------------------+
00003    | PHP Version 5                                                        |
00004    +----------------------------------------------------------------------+
00005    | Copyright (c) 1997-2012 The PHP Group                                |
00006    +----------------------------------------------------------------------+
00007    | This source file is subject to version 3.01 of the PHP license,      |
00008    | that is bundled with this package in the file LICENSE, and is        |
00009    | available through the world-wide-web at the following url:           |
00010    | http://www.php.net/license/3_01.txt                                  |
00011    | If you did not receive a copy of the PHP license and are unable to   |
00012    | obtain it through the world-wide-web, please send a note to          |
00013    | license@php.net so we can mail you a copy immediately.               |
00014    +----------------------------------------------------------------------+
00015    | Author: Bjørn Borud - Guardian Networks AS <borud@guardian.no>       |
00016    +----------------------------------------------------------------------+
00017  */
00018 /* $Id: soundex.c 321634 2012-01-01 13:15:04Z felipe $ */
00019 
00020 #include "php.h"
00021 #include <stdlib.h>
00022 #include <errno.h>
00023 #include <ctype.h>
00024 #include "php_string.h"
00025 
00026 /* Simple soundex algorithm as described by Knuth in TAOCP, vol 3 */
00027 /* {{{ proto string soundex(string str)
00028    Calculate the soundex key of a string */
00029 PHP_FUNCTION(soundex)
00030 {
00031        char   *str;
00032        int    i, _small, str_len, code, last;
00033        char   soundex[4 + 1];
00034 
00035        static char soundex_table[26] =
00036        {0,                                              /* A */
00037         '1',                                     /* B */
00038         '2',                                     /* C */
00039         '3',                                     /* D */
00040         0,                                              /* E */
00041         '1',                                     /* F */
00042         '2',                                     /* G */
00043         0,                                              /* H */
00044         0,                                              /* I */
00045         '2',                                     /* J */
00046         '2',                                     /* K */
00047         '4',                                     /* L */
00048         '5',                                     /* M */
00049         '5',                                     /* N */
00050         0,                                              /* O */
00051         '1',                                     /* P */
00052         '2',                                     /* Q */
00053         '6',                                     /* R */
00054         '2',                                     /* S */
00055         '3',                                     /* T */
00056         0,                                              /* U */
00057         '1',                                     /* V */
00058         0,                                              /* W */
00059         '2',                                     /* X */
00060         0,                                              /* Y */
00061         '2'};                                    /* Z */
00062 
00063        if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &str, &str_len) == FAILURE) {
00064               return;
00065        }
00066        if (str_len == 0) {
00067               RETURN_FALSE;
00068        }
00069 
00070        /* build soundex string */
00071        last = -1;
00072        for (i = 0, _small = 0; i < str_len && _small < 4; i++) {
00073               /* convert chars to upper case and strip non-letter chars */
00074               /* BUG: should also map here accented letters used in non */
00075               /* English words or names (also found in English text!): */
00076               /* esstsett, thorn, n-tilde, c-cedilla, s-caron, ... */
00077               code = toupper((int)(unsigned char)str[i]);
00078               if (code >= 'A' && code <= 'Z') {
00079                      if (_small == 0) {
00080                             /* remember first valid char */
00081                             soundex[_small++] = code;
00082                             last = soundex_table[code - 'A'];
00083                      }
00084                      else {
00085                             /* ignore sequences of consonants with same soundex */
00086                             /* code in trail, and vowels unless they separate */
00087                             /* consonant letters */
00088                             code = soundex_table[code - 'A'];
00089                             if (code != last) {
00090                                    if (code != 0) {
00091                                           soundex[_small++] = code;
00092                                    }
00093                                    last = code;
00094                             }
00095                      }
00096               }
00097        }
00098        /* pad with '0' and terminate with 0 ;-) */
00099        while (_small < 4) {
00100               soundex[_small++] = '0';
00101        }
00102        soundex[_small] = '\0';
00103 
00104        RETURN_STRINGL(soundex, _small, 1);
00105 }
00106 /* }}} */
00107 
00108 /*
00109  * Local variables:
00110  * tab-width: 4
00111  * c-basic-offset: 4
00112  * End:
00113  * vim600: sw=4 ts=4 fdm=marker
00114  * vim<600: sw=4 ts=4
00115  */