Back to index

php5  5.3.10
levenshtein.c
Go to the documentation of this file.
00001 /*
00002    +----------------------------------------------------------------------+
00003    | PHP Version 5                                                        |
00004    +----------------------------------------------------------------------+
00005    | Copyright (c) 1997-2012 The PHP Group                                |
00006    +----------------------------------------------------------------------+
00007    | This source file is subject to version 3.01 of the PHP license,      |
00008    | that is bundled with this package in the file LICENSE, and is        |
00009    | available through the world-wide-web at the following url:           |
00010    | http://www.php.net/license/3_01.txt                                  |
00011    | If you did not receive a copy of the PHP license and are unable to   |
00012    | obtain it through the world-wide-web, please send a note to          |
00013    | license@php.net so we can mail you a copy immediately.               |
00014    +----------------------------------------------------------------------+
00015    | Author: Hartmut Holzgraefe <hholzgra@php.net>                        |
00016    +----------------------------------------------------------------------+
00017  */
00018 /* $Id: levenshtein.c 321634 2012-01-01 13:15:04Z felipe $ */
00019 
00020 #include "php.h"
00021 #include <stdlib.h>
00022 #include <errno.h>
00023 #include <ctype.h>
00024 #include "php_string.h"
00025 
00026 #define LEVENSHTEIN_MAX_LENGTH 255
00027 
00028 /* {{{ reference_levdist
00029  * reference implementation, only optimized for memory usage, not speed */
00030 static int reference_levdist(const char *s1, int l1, const char *s2, int l2, int cost_ins, int cost_rep, int cost_del )
00031 {
00032        int *p1, *p2, *tmp;
00033        int i1, i2, c0, c1, c2;
00034 
00035        if (l1 == 0) {
00036               return l2 * cost_ins;
00037        }
00038        if (l2 == 0) {
00039               return l1 * cost_del;
00040        }
00041 
00042        if ((l1 > LEVENSHTEIN_MAX_LENGTH) || (l2 > LEVENSHTEIN_MAX_LENGTH)) {
00043               return -1;
00044        }
00045        p1 = safe_emalloc((l2 + 1), sizeof(int), 0);
00046        p2 = safe_emalloc((l2 + 1), sizeof(int), 0);
00047 
00048        for (i2 = 0; i2 <= l2; i2++) {
00049               p1[i2] = i2 * cost_ins;
00050        }
00051        for (i1 = 0; i1 < l1 ; i1++) {
00052               p2[0] = p1[0] + cost_del;
00053 
00054               for (i2 = 0; i2 < l2; i2++) {
00055                      c0 = p1[i2] + ((s1[i1] == s2[i2]) ? 0 : cost_rep);
00056                      c1 = p1[i2 + 1] + cost_del;
00057                      if (c1 < c0) {
00058                             c0 = c1;
00059                      }
00060                      c2 = p2[i2] + cost_ins;
00061                      if (c2 < c0) {
00062                             c0 = c2;
00063                      }
00064                      p2[i2 + 1] = c0;
00065               }
00066               tmp = p1;
00067               p1 = p2;
00068               p2 = tmp;
00069        }
00070        c0 = p1[l2];
00071 
00072        efree(p1);
00073        efree(p2);
00074 
00075        return c0;
00076 }
00077 /* }}} */
00078 
00079 /* {{{ custom_levdist
00080  */
00081 static int custom_levdist(char *str1, char *str2, char *callback_name TSRMLS_DC)
00082 {
00083        php_error_docref(NULL TSRMLS_CC, E_WARNING, "The general Levenshtein support is not there yet");
00084        /* not there yet */
00085 
00086        return -1;
00087 }
00088 /* }}} */
00089 
00090 /* {{{ proto int levenshtein(string str1, string str2[, int cost_ins, int cost_rep, int cost_del])
00091    Calculate Levenshtein distance between two strings */
00092 PHP_FUNCTION(levenshtein)
00093 {
00094        int argc = ZEND_NUM_ARGS();
00095        char *str1, *str2;
00096        char *callback_name;
00097        int str1_len, str2_len, callback_len;
00098        long cost_ins, cost_rep, cost_del;
00099        int distance = -1;
00100 
00101        switch (argc) {
00102               case 2: /* just two strings: use maximum performance version */
00103                      if (zend_parse_parameters(2 TSRMLS_CC, "ss", &str1, &str1_len, &str2, &str2_len) == FAILURE) {
00104                             return;
00105                      }
00106                      distance = reference_levdist(str1, str1_len, str2, str2_len, 1, 1, 1);
00107                      break;
00108 
00109               case 5: /* more general version: calc cost by ins/rep/del weights */
00110                      if (zend_parse_parameters(5 TSRMLS_CC, "sslll", &str1, &str1_len, &str2, &str2_len, &cost_ins, &cost_rep, &cost_del) == FAILURE) {
00111                             return;
00112                      }
00113                      distance = reference_levdist(str1, str1_len, str2, str2_len, cost_ins, cost_rep, cost_del);
00114                      break;
00115 
00116               case 3: /* most general version: calc cost by user-supplied function */
00117                      if (zend_parse_parameters(3 TSRMLS_CC, "sss", &str1, &str1_len, &str2, &str2_len, &callback_name, &callback_len) == FAILURE) {
00118                             return;
00119                      }
00120                      distance = custom_levdist(str1, str2, callback_name TSRMLS_CC);
00121                      break;
00122 
00123               default:
00124                      WRONG_PARAM_COUNT;
00125        }
00126 
00127        if (distance < 0 && /* TODO */ ZEND_NUM_ARGS() != 3) {
00128               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Argument string(s) too long");
00129        }
00130 
00131        RETURN_LONG(distance);
00132 }
00133 /* }}} */
00134 
00135 /*
00136  * Local variables:
00137  * tab-width: 4
00138  * c-basic-offset: 4
00139  * End:
00140  * vim600: sw=4 ts=4 fdm=marker
00141  * vim<600: sw=4 ts=4
00142  */