Back to index

php5  5.3.10
Defines | Functions | Variables
metaphone.c File Reference
#include "php.h"
#include "php_metaphone.h"

Go to the source code of this file.

Defines

#define SH   'X'
#define TH   '0'
#define ENCODE(c)   (isalpha(c) ? _codes[((toupper(c)) - 'A')] : 0)
#define isvowel(c)   (ENCODE(c) & 1) /* AEIOU */
#define NOCHANGE(c)   (ENCODE(c) & 2) /* FJMNR */
#define AFFECTH(c)   (ENCODE(c) & 4) /* CGPST */
#define MAKESOFT(c)   (ENCODE(c) & 8) /* EIY */
#define NOGHTOF(c)   (ENCODE(c) & 16) /* BDH */
#define Next_Letter   (toupper(word[w_idx+1]))
#define Curr_Letter   (toupper(word[w_idx]))
#define Look_Back_Letter(n)   (w_idx >= n ? toupper(word[w_idx-n]) : '\0')
#define Prev_Letter   (Look_Back_Letter(1))
#define After_Next_Letter
#define Look_Ahead_Letter(n)   (toupper(Lookahead(word+w_idx, n)))
#define Phonize(c)
#define End_Phoned_Word
#define Phone_Len   (p_idx)
#define Isbreak(c)   (!isalpha(c))

Functions

static int metaphone (unsigned char *word, int word_len, long max_phonemes, char **phoned_word, int traditional)
 PHP_FUNCTION (metaphone)
static char Lookahead (char *word, int how_far)

Variables

char _codes [26]

Define Documentation

#define AFFECTH (   c)    (ENCODE(c) & 4) /* CGPST */

Definition at line 96 of file metaphone.c.

Value:
(Next_Letter != '\0' ? toupper(word[w_idx+2]) \
                                                                                  : '\0')

Definition at line 120 of file metaphone.c.

#define Curr_Letter   (toupper(word[w_idx]))

Definition at line 114 of file metaphone.c.

#define ENCODE (   c)    (isalpha(c) ? _codes[((toupper(c)) - 'A')] : 0)

Definition at line 88 of file metaphone.c.

#define End_Phoned_Word
Value:
{ \
                                                 if (p_idx == max_buffer_len) { \
                                                        *phoned_word = safe_erealloc(*phoned_word, 1, sizeof(char), max_buffer_len); \
                                                 } \
                                                 (*phoned_word)[p_idx] = '\0'; \
                                          }

Definition at line 153 of file metaphone.c.

#define Isbreak (   c)    (!isalpha(c))

Definition at line 163 of file metaphone.c.

#define isvowel (   c)    (ENCODE(c) & 1) /* AEIOU */

Definition at line 90 of file metaphone.c.

#define Look_Ahead_Letter (   n)    (toupper(Lookahead(word+w_idx, n)))

Definition at line 122 of file metaphone.c.

#define Look_Back_Letter (   n)    (w_idx >= n ? toupper(word[w_idx-n]) : '\0')

Definition at line 116 of file metaphone.c.

#define MAKESOFT (   c)    (ENCODE(c) & 8) /* EIY */

Definition at line 99 of file metaphone.c.

#define Next_Letter   (toupper(word[w_idx+1]))

Definition at line 112 of file metaphone.c.

#define NOCHANGE (   c)    (ENCODE(c) & 2) /* FJMNR */

Definition at line 93 of file metaphone.c.

#define NOGHTOF (   c)    (ENCODE(c) & 16) /* BDH */

Definition at line 102 of file metaphone.c.

#define Phone_Len   (p_idx)

Definition at line 160 of file metaphone.c.

#define Phonize (   c)
Value:
{ \
                                          if (p_idx >= max_buffer_len) { \
                                                 *phoned_word = safe_erealloc(*phoned_word, 2, sizeof(char), max_buffer_len); \
                                                 max_buffer_len += 2; \
                                          } \
                                          (*phoned_word)[p_idx++] = c; \
                                   }

Definition at line 145 of file metaphone.c.

#define Prev_Letter   (Look_Back_Letter(1))

Definition at line 118 of file metaphone.c.

#define SH   'X'

Definition at line 67 of file metaphone.c.

#define TH   '0'

Definition at line 68 of file metaphone.c.


Function Documentation

static char Lookahead ( char *  word,
int  how_far 
) [static]

Definition at line 127 of file metaphone.c.

{
       char letter_ahead = '\0';   /* null by default */
       int idx;
       for (idx = 0; word[idx] != '\0' && idx < how_far; idx++);
       /* Edge forward in the string... */

       letter_ahead = word[idx];   /* idx will be either == to how_far or
                                                         * at the end of the string
                                                         */
       return letter_ahead;
}
static int metaphone ( unsigned char *  word,
int  word_len,
long  max_phonemes,
char **  phoned_word,
int  traditional 
) [static]

Definition at line 167 of file metaphone.c.

{
       int w_idx = 0;                            /* point in the phonization we're at. */
       int p_idx = 0;                            /* end of the phoned phrase */
       int max_buffer_len = 0;            /* maximum length of the destination buffer */

/*-- Parameter checks --*/
       /* Negative phoneme length is meaningless */

       if (max_phonemes < 0)
              return -1;

       /* Empty/null string is meaningless */
       /* Overly paranoid */
       /* assert(word != NULL && word[0] != '\0'); */

       if (word == NULL)
              return -1;

/*-- Allocate memory for our phoned_phrase --*/
       if (max_phonemes == 0) {    /* Assume largest possible */
              max_buffer_len = word_len;
              *phoned_word = safe_emalloc(sizeof(char), word_len, 1);
       } else {
              max_buffer_len = max_phonemes;
              *phoned_word = safe_emalloc(sizeof(char), max_phonemes, 1);
       }


/*-- The first phoneme has to be processed specially. --*/
       /* Find our first letter */
       for (; !isalpha(Curr_Letter); w_idx++) {
              /* On the off chance we were given nothing but crap... */
              if (Curr_Letter == '\0') {
                     End_Phoned_Word
                            return SUCCESS;      /* For testing */
              }
       }

       switch (Curr_Letter) {
              /* AE becomes E */
       case 'A':
              if (Next_Letter == 'E') {
                     Phonize('E');
                     w_idx += 2;
              }
              /* Remember, preserve vowels at the beginning */
              else {
                     Phonize('A');
                     w_idx++;
              }
              break;
              /* [GKP]N becomes N */
       case 'G':
       case 'K':
       case 'P':
              if (Next_Letter == 'N') {
                     Phonize('N');
                     w_idx += 2;
              }
              break;
              /* WH becomes W, 
                 WR becomes R 
                 W if followed by a vowel */
       case 'W':
              if (Next_Letter == 'R') {
                     Phonize(Next_Letter);
                     w_idx += 2;
              } else if (Next_Letter == 'H' || isvowel(Next_Letter)) {
                     Phonize('W');
                     w_idx += 2;
              }
              /* else ignore */
              break;
              /* X becomes S */
       case 'X':
              Phonize('S');
              w_idx++;
              break;
              /* Vowels are kept */
              /* We did A already
                 case 'A':
                 case 'a':
               */
       case 'E':
       case 'I':
       case 'O':
       case 'U':
              Phonize(Curr_Letter);
              w_idx++;
              break;
       default:
              /* do nothing */
              break;
       }



       /* On to the metaphoning */
       for (; Curr_Letter != '\0' &&
               (max_phonemes == 0 || Phone_Len < max_phonemes);
               w_idx++) {
              /* How many letters to skip because an eariler encoding handled     
               * multiple letters */
              unsigned short int skip_letter = 0;


              /* THOUGHT:  It would be nice if, rather than having things like...
               * well, SCI.  For SCI you encode the S, then have to remember
               * to skip the C.  So the phonome SCI invades both S and C.  It would
               * be better, IMHO, to skip the C from the S part of the encoding.
               * Hell, I'm trying it.
               */

              /* Ignore non-alphas */
              if (!isalpha(Curr_Letter))
                     continue;

              /* Drop duplicates, except CC */
              if (Curr_Letter == Prev_Letter &&
                     Curr_Letter != 'C')
                     continue;

              switch (Curr_Letter) {
                     /* B -> B unless in MB */
              case 'B':
                     if (Prev_Letter != 'M')
                            Phonize('B');
                     break;
                     /* 'sh' if -CIA- or -CH, but not SCH, except SCHW.
                      * (SCHW is handled in S)
                      *  S if -CI-, -CE- or -CY-
                      *  dropped if -SCI-, SCE-, -SCY- (handed in S)
                      *  else K
                      */
              case 'C':
                     if (MAKESOFT(Next_Letter)) {       /* C[IEY] */
                            if (After_Next_Letter == 'A' &&
                                   Next_Letter == 'I') {       /* CIA */
                                   Phonize(SH);
                            }
                            /* SC[IEY] */
                            else if (Prev_Letter == 'S') {
                                   /* Dropped */
                            } else {
                                   Phonize('S');
                            }
                     } else if (Next_Letter == 'H') {
                            if ((!traditional) && (After_Next_Letter == 'R' || Prev_Letter == 'S')) {    /* Christ, School */
                                   Phonize('K');
                            } else {
                                   Phonize(SH);
                            }
                            skip_letter++;
                     } else {
                            Phonize('K');
                     }
                     break;
                     /* J if in -DGE-, -DGI- or -DGY-
                      * else T
                      */
              case 'D':
                     if (Next_Letter == 'G' &&
                            MAKESOFT(After_Next_Letter)) {
                            Phonize('J');
                            skip_letter++;
                     } else
                            Phonize('T');
                     break;
                     /* F if in -GH and not B--GH, D--GH, -H--GH, -H---GH
                      * else dropped if -GNED, -GN, 
                      * else dropped if -DGE-, -DGI- or -DGY- (handled in D)
                      * else J if in -GE-, -GI, -GY and not GG
                      * else K
                      */
              case 'G':
                     if (Next_Letter == 'H') {
                            if (!(NOGHTOF(Look_Back_Letter(3)) ||
                                     Look_Back_Letter(4) == 'H')) {
                                   Phonize('F');
                                   skip_letter++;
                            } else {
                                   /* silent */
                            }
                     } else if (Next_Letter == 'N') {
                            if (Isbreak(After_Next_Letter) ||
                                   (After_Next_Letter == 'E' &&
                                    Look_Ahead_Letter(3) == 'D')) {
                                   /* dropped */
                            } else
                                   Phonize('K');
                     } else if (MAKESOFT(Next_Letter) &&
                                      Prev_Letter != 'G') {
                            Phonize('J');
                     } else {
                            Phonize('K');
                     }
                     break;
                     /* H if before a vowel and not after C,G,P,S,T */
              case 'H':
                     if (isvowel(Next_Letter) &&
                            !AFFECTH(Prev_Letter))
                            Phonize('H');
                     break;
                     /* dropped if after C
                      * else K
                      */
              case 'K':
                     if (Prev_Letter != 'C')
                            Phonize('K');
                     break;
                     /* F if before H
                      * else P
                      */
              case 'P':
                     if (Next_Letter == 'H') {
                            Phonize('F');
                     } else {
                            Phonize('P');
                     }
                     break;
                     /* K
                      */
              case 'Q':
                     Phonize('K');
                     break;
                     /* 'sh' in -SH-, -SIO- or -SIA- or -SCHW-
                      * else S
                      */
              case 'S':
                     if (Next_Letter == 'I' &&
                            (After_Next_Letter == 'O' ||
                             After_Next_Letter == 'A')) {
                            Phonize(SH);
                     } else if (Next_Letter == 'H') {
                            Phonize(SH);
                            skip_letter++;
                     } else if ((!traditional) && (Next_Letter == 'C' && Look_Ahead_Letter(2) == 'H' && Look_Ahead_Letter(3) == 'W')) {
                            Phonize(SH);
                            skip_letter += 2;
                     } else {
                            Phonize('S');
                     }
                     break;
                     /* 'sh' in -TIA- or -TIO-
                      * else 'th' before H
                      * else T
                      */
              case 'T':
                     if (Next_Letter == 'I' &&
                            (After_Next_Letter == 'O' ||
                             After_Next_Letter == 'A')) {
                            Phonize(SH);
                     } else if (Next_Letter == 'H') {
                            Phonize(TH);
                            skip_letter++;
                     } else if (!(Next_Letter == 'C' && After_Next_Letter == 'H')) {
                            Phonize('T');
                     }
                     break;
                     /* F */
              case 'V':
                     Phonize('F');
                     break;
                     /* W before a vowel, else dropped */
              case 'W':
                     if (isvowel(Next_Letter))
                            Phonize('W');
                     break;
                     /* KS */
              case 'X':
                     Phonize('K');
                     Phonize('S');
                     break;
                     /* Y if followed by a vowel */
              case 'Y':
                     if (isvowel(Next_Letter))
                            Phonize('Y');
                     break;
                     /* S */
              case 'Z':
                     Phonize('S');
                     break;
                     /* No transformation */
              case 'F':
              case 'J':
              case 'L':
              case 'M':
              case 'N':
              case 'R':
                     Phonize(Curr_Letter);
                     break;
              default:
                     /* nothing */
                     break;
              }                                         /* END SWITCH */

              w_idx += skip_letter;
       }                                                /* END FOR */

       End_Phoned_Word;

       return 0;
}                                                       /* END metaphone */

Here is the caller graph for this function:

Definition at line 32 of file metaphone.c.

{
       char *str;
       char *result = 0;
       int str_len;
       long phones = 0;

       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", &str, &str_len,
                                                   &phones) == FAILURE) {
              return;
       }

       if (metaphone((unsigned char *)str, str_len, phones, &result, 1) == 0) {
              RETVAL_STRING(result, 0);
       } else {
              if (result) {
                     efree(result);
              }
              RETURN_FALSE;
       }
}

Here is the call graph for this function:


Variable Documentation

char _codes[26]
Initial value:
{
       1, 16, 4, 16, 9, 2, 4, 16, 9, 2, 0, 2, 2, 2, 1, 4, 0, 2, 4, 4, 1, 0, 0, 0, 8, 0

}

Definition at line 81 of file metaphone.c.