1999-04-17 00:37:12 +00:00
|
|
|
|
/*
|
|
|
|
|
+----------------------------------------------------------------------+
|
1999-07-16 13:13:16 +00:00
|
|
|
|
| PHP version 4.0 |
|
1999-04-17 00:37:12 +00:00
|
|
|
|
+----------------------------------------------------------------------+
|
2001-02-26 06:11:02 +00:00
|
|
|
|
| Copyright (c) 1997-2001 The PHP Group |
|
1999-04-17 00:37:12 +00:00
|
|
|
|
+----------------------------------------------------------------------+
|
2000-05-18 15:34:45 +00:00
|
|
|
|
| This source file is subject to version 2.02 of the PHP license, |
|
1999-07-16 13:13:16 +00:00
|
|
|
|
| that is bundled with this package in the file LICENSE, and is |
|
|
|
|
|
| available at through the world-wide-web at |
|
2000-05-18 15:34:45 +00:00
|
|
|
|
| http://www.php.net/license/2_02.txt. |
|
1999-07-16 13:13:16 +00:00
|
|
|
|
| If you did not receive a copy of the PHP license and are unable to |
|
|
|
|
|
| obtain it through the world-wide-web, please send a note to |
|
|
|
|
|
| license@php.net so we can mail you a copy immediately. |
|
1999-04-17 00:37:12 +00:00
|
|
|
|
+----------------------------------------------------------------------+
|
1999-07-16 13:13:16 +00:00
|
|
|
|
| Author: Bj<EFBFBD>rn Borud - Guardian Networks AS <borud@guardian.no> |
|
1999-04-17 00:37:12 +00:00
|
|
|
|
+----------------------------------------------------------------------+
|
|
|
|
|
*/
|
|
|
|
|
/* $Id$ */
|
|
|
|
|
|
|
|
|
|
#include "php.h"
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <errno.h>
|
|
|
|
|
#include <ctype.h>
|
1999-12-04 19:19:57 +00:00
|
|
|
|
#include "php_string.h"
|
1999-04-17 00:37:12 +00:00
|
|
|
|
|
|
|
|
|
/* Simple soundex algorithm as described by Knuth in TAOCP, vol 3 */
|
|
|
|
|
/* {{{ proto string soundex(string str)
|
|
|
|
|
Calculate the soundex key of a string */
|
1999-07-24 22:16:54 +00:00
|
|
|
|
PHP_FUNCTION(soundex)
|
1999-04-17 00:37:12 +00:00
|
|
|
|
{
|
1999-07-28 07:04:26 +00:00
|
|
|
|
char *somestring;
|
1999-07-28 17:58:38 +00:00
|
|
|
|
int i, _small, len, code, last;
|
1999-09-25 12:09:50 +00:00
|
|
|
|
pval *arg, **parg;
|
1999-07-28 07:04:26 +00:00
|
|
|
|
char soundex[4 + 1];
|
1999-04-17 00:37:12 +00:00
|
|
|
|
|
|
|
|
|
static char soundex_table[26] =
|
|
|
|
|
{0, /* A */
|
|
|
|
|
'1', /* B */
|
|
|
|
|
'2', /* C */
|
|
|
|
|
'3', /* D */
|
|
|
|
|
0, /* E */
|
|
|
|
|
'1', /* F */
|
|
|
|
|
'2', /* G */
|
|
|
|
|
0, /* H */
|
|
|
|
|
0, /* I */
|
|
|
|
|
'2', /* J */
|
|
|
|
|
'2', /* K */
|
|
|
|
|
'4', /* L */
|
|
|
|
|
'5', /* M */
|
|
|
|
|
'5', /* N */
|
|
|
|
|
0, /* O */
|
|
|
|
|
'1', /* P */
|
|
|
|
|
'2', /* Q */
|
|
|
|
|
'6', /* R */
|
|
|
|
|
'2', /* S */
|
|
|
|
|
'3', /* T */
|
|
|
|
|
0, /* U */
|
|
|
|
|
'1', /* V */
|
|
|
|
|
0, /* W */
|
|
|
|
|
'2', /* X */
|
|
|
|
|
0, /* Y */
|
|
|
|
|
'2'}; /* Z */
|
1999-07-28 07:04:26 +00:00
|
|
|
|
|
2000-06-05 19:47:54 +00:00
|
|
|
|
if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &parg) == FAILURE) {
|
1999-04-17 00:37:12 +00:00
|
|
|
|
WRONG_PARAM_COUNT;
|
|
|
|
|
}
|
1999-09-25 12:09:50 +00:00
|
|
|
|
convert_to_string_ex(parg);
|
|
|
|
|
arg = *parg;
|
1999-04-17 00:37:12 +00:00
|
|
|
|
if (arg->value.str.len==0) {
|
|
|
|
|
RETURN_FALSE;
|
|
|
|
|
}
|
|
|
|
|
somestring = arg->value.str.val;
|
1999-07-28 07:04:26 +00:00
|
|
|
|
len = arg->value.str.len;
|
1999-04-17 00:37:12 +00:00
|
|
|
|
|
|
|
|
|
/* build soundex string */
|
1999-07-28 11:26:58 +00:00
|
|
|
|
last = -1;
|
1999-07-28 17:58:38 +00:00
|
|
|
|
for (i = 0, _small = 0; i < len && _small < 4; i++) {
|
1999-07-28 07:04:26 +00:00
|
|
|
|
/* convert chars to upper case and strip non-letter chars */
|
|
|
|
|
/* BUG: should also map here accented letters used in non */
|
|
|
|
|
/* English words or names (also found in English text!): */
|
|
|
|
|
/* esstsett, thorn, n-tilde, c-cedilla, s-caron, ... */
|
|
|
|
|
code = toupper(somestring[i]);
|
|
|
|
|
if (code >= 'A' && code <= 'Z') {
|
1999-07-28 17:58:38 +00:00
|
|
|
|
if (_small == 0) {
|
1999-07-28 07:04:26 +00:00
|
|
|
|
/* remember first valid char */
|
1999-07-28 17:58:38 +00:00
|
|
|
|
soundex[_small++] = code;
|
1999-07-28 07:04:26 +00:00
|
|
|
|
last = soundex_table[code - 'A'];
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
/* ignore sequences of consonants with same soundex */
|
|
|
|
|
/* code in trail, and vowels unless they separate */
|
|
|
|
|
/* consonant letters */
|
|
|
|
|
code = soundex_table[code - 'A'];
|
|
|
|
|
if (code != last) {
|
|
|
|
|
if (code != 0) {
|
1999-07-28 17:58:38 +00:00
|
|
|
|
soundex[_small++] = code;
|
1999-07-28 07:04:26 +00:00
|
|
|
|
}
|
|
|
|
|
last = code;
|
|
|
|
|
}
|
1999-04-17 00:37:12 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
1999-07-28 07:04:26 +00:00
|
|
|
|
/* pad with '0' and terminate with 0 ;-) */
|
1999-07-28 17:58:38 +00:00
|
|
|
|
while (_small < 4) {
|
|
|
|
|
soundex[_small++] = '0';
|
1999-07-28 07:04:26 +00:00
|
|
|
|
}
|
1999-07-28 17:58:38 +00:00
|
|
|
|
soundex[_small] = '\0';
|
1999-04-17 00:37:12 +00:00
|
|
|
|
|
1999-07-28 17:58:38 +00:00
|
|
|
|
return_value->value.str.val = estrndup(soundex, _small);
|
|
|
|
|
return_value->value.str.len = _small;
|
1999-04-17 00:37:12 +00:00
|
|
|
|
return_value->type = IS_STRING;
|
|
|
|
|
}
|
|
|
|
|
/* }}} */
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Local variables:
|
|
|
|
|
* tab-width: 4
|
|
|
|
|
* c-basic-offset: 4
|
|
|
|
|
* End:
|
2001-06-06 13:06:12 +00:00
|
|
|
|
* vim600: sw=4 ts=4 tw=78 fdm=marker
|
|
|
|
|
* vim<600: sw=4 ts=4 tw=78
|
1999-04-17 00:37:12 +00:00
|
|
|
|
*/
|