php-src/ext/standard/html.c

/*
   +----------------------------------------------------------------------+
   | PHP version 4.0                                                      |
   +----------------------------------------------------------------------+
   | Copyright (c) 1997-2001 The PHP Group                                |
   +----------------------------------------------------------------------+
   | This source file is subject to version 2.02 of the PHP license,      |
   | that is bundled with this package in the file LICENSE, and is        |
   | available at through the world-wide-web at                           |
   | http://www.php.net/license/2_02.txt.                                 |
   | If you did not receive a copy of the PHP license and are unable to   |
   | obtain it through the world-wide-web, please send a note to          |
   | license@php.net so we can mail you a copy immediately.               |
   +----------------------------------------------------------------------+
   | Authors: Rasmus Lerdorf <rasmus@lerdorf.on.ca>                       |
   |          Jaakko Hyv<EFBFBD>tti <jaakko.hyvatti@iki.fi>                      |
   |          Wez Furlong <wez@thebrainroom.com>                          |
   +----------------------------------------------------------------------+
*/

/* $Id$ */

#include "php.h"
#include "reg.h"
#include "html.h"

#if HAVE_LOCALE_H
#include <locale.h>
#endif
#if HAVE_LANGINFO_H
#include <langinfo.h>
#endif

/* This must be fixed to handle the input string according to LC_CTYPE.
   Defaults to ISO-8859-1 for now. */

enum entity_charset { cs_terminator, cs_8859_1, cs_cp1252,
					  cs_8859_15, cs_utf_8, cs_big5, cs_gb2312, 
 					  cs_big5hkscs, cs_sjis, cs_eucjp};
typedef const char * entity_table_t;

/* codepage 1252 is a Windows extension to iso-8859-1. */
static entity_table_t ent_cp_1252[] = {
	NULL, NULL, "sbquo", "fnof", "bdquo", "hellip", "dagger",
	"Dagger", "circ", "permil", "Scaron", "lsaquo", "OElig",
	NULL, NULL, NULL, NULL, "lsquo", "rsquo", "ldquo", "rdquo",
	"bull", "ndash", "mdash", "tilde", "trade", "scaron", "rsaquo",
	"oelig", NULL, NULL, "Yuml" 
};

static entity_table_t ent_iso_8859_1[] = {
	"nbsp", "iexcl", "cent", "pound", "curren", "yen", "brvbar",
	"sect", "uml", "copy", "ordf", "laquo", "not", "shy", "reg",
	"macr", "deg", "plusmn", "sup2", "sup3", "acute", "micro",
	"para", "middot", "cedil", "sup1", "ordm", "raquo", "frac14",
	"frac12", "frac34", "iquest", "Agrave", "Aacute", "Acirc",
	"Atilde", "Auml", "Aring", "AElig", "Ccedil", "Egrave",
	"Eacute", "Ecirc", "Euml", "Igrave", "Iacute", "Icirc",
	"Iuml", "ETH", "Ntilde", "Ograve", "Oacute", "Ocirc", "Otilde",
	"Ouml", "times", "Oslash", "Ugrave", "Uacute", "Ucirc", "Uuml",
	"Yacute", "THORN", "szlig", "agrave", "aacute", "acirc",
	"atilde", "auml", "aring", "aelig", "ccedil", "egrave",
	"eacute", "ecirc", "euml", "igrave", "iacute", "icirc",
	"iuml", "eth", "ntilde", "ograve", "oacute", "ocirc", "otilde",
	"ouml", "divide", "oslash", "ugrave", "uacute", "ucirc",
	"uuml", "yacute", "thorn", "yuml"
};

static entity_table_t ent_iso_8859_15[] = {
	"nbsp", "iexcl", "cent", "pound", "euro", "yen", "Scaron",
	"sect", "scaron", "copy", "ordf", "laquo", "not", "shy", "reg",
	"macr", "deg", "plusmn", "sup2", "sup3", NULL, /* Zcaron */
	"micro", "para", "middot", NULL, /* zcaron */ "sup1", "ordm",
	"raquo", "OElig", "oelig", "Yuml", "iquest", "Agrave", "Aacute",
	"Acirc", "Atilde", "Auml", "Aring", "AElig", "Ccedil", "Egrave",
	"Eacute", "Ecirc", "Euml", "Igrave", "Iacute", "Icirc",
	"Iuml", "ETH", "Ntilde", "Ograve", "Oacute", "Ocirc", "Otilde",
	"Ouml", "times", "Oslash", "Ugrave", "Uacute", "Ucirc", "Uuml",
	"Yacute", "THORN", "szlig", "agrave", "aacute", "acirc",
	"atilde", "auml", "aring", "aelig", "ccedil", "egrave",
	"eacute", "ecirc", "euml", "igrave", "iacute", "icirc",
	"iuml", "eth", "ntilde", "ograve", "oacute", "ocirc", "otilde",
	"ouml", "divide", "oslash", "ugrave", "uacute", "ucirc",
	"uuml", "yacute", "thorn", "yuml"
};

struct html_entity_map {
	enum entity_charset charset;	/* charset identifier */
	unsigned short basechar;			/* char code at start of table */
	unsigned short endchar;			/* last char code in the table */
	entity_table_t * table;			/* the table of mappings */
};

static const struct html_entity_map entity_map[] = {
	{ cs_cp1252, 		0x80, 0x9f, ent_cp_1252 },
	{ cs_cp1252, 		0xa0, 0xff, ent_iso_8859_1 },
	{ cs_8859_1, 		0xa0, 0xff, ent_iso_8859_1 },
	{ cs_8859_15, 		0xa0, 0xff, ent_iso_8859_15 },
	{ cs_utf_8, 		0xa0, 0xff, ent_iso_8859_1 },
	{ cs_big5, 			0xa0, 0xff, ent_iso_8859_1 },
	{ cs_gb2312, 		0xa0, 0xff, ent_iso_8859_1 },
	{ cs_big5hkscs, 	0xa0, 0xff, ent_iso_8859_1 },
 	{ cs_sjis,			0xa0, 0xff, ent_iso_8859_1 },
 	{ cs_eucjp,			0xa0, 0xff, ent_iso_8859_1 },
	{ cs_terminator }
};

static const struct {
	const char * codeset;
	enum entity_charset charset;
} charset_map[] = {
	{ "ISO-8859-1", 	cs_8859_1 },
	{ "ISO-8859-15", 	cs_8859_15 },
	{ "utf-8", 			cs_utf_8 },
	{ "cp1252", 		cs_cp1252 },
	{ "BIG5",			cs_big5 },
	{ "GB2312",			cs_gb2312 },
	{ "BIG5-HKSCS",		cs_big5hkscs },
 	{ "Shift_JIS",		cs_sjis },
 	{ "SJIS",   		cs_sjis },
 	{ "EUCJP",   		cs_eucjp },
 	{ "EUC-JP",   		cs_eucjp },
	{ NULL }
};

/* {{{ get_next_char
 */
inline static unsigned short get_next_char(enum entity_charset charset,
		unsigned char * str,
		int * newpos,
		unsigned char * mbseq,
		int * mbseqlen
)
{
	int pos = *newpos;
	int mbpos = 0;
	unsigned short this_char = str[pos++];
	
	mbseq[mbpos++] = (unsigned char)this_char;
	
	switch(charset)	{
		case cs_utf_8:
			{
				unsigned long utf = 0;
				int stat = 0;
				int more = 1;

				/* unpack utf-8 encoding into a wide char.
				 * Code stolen from the mbstring extension */

				do {
					if (this_char < 0x80)	{
						more = 0;
						break;
					}
					else if (this_char < 0xc0)	{
						switch(stat)	{
							case 0x10:	/* 2, 2nd */
							case 0x21:	/* 3, 3rd */
							case 0x32:	/* 4, 4th */
							case 0x43:	/* 5, 5th */
							case 0x54:	/* 6, 6th */
								/* last byte in sequence */
								more = 0;
								utf |= (this_char & 0x3f);
								this_char = (unsigned short)utf;
								break;
							case 0x20:	/* 3, 2nd */
							case 0x31:	/* 4, 3rd */
							case 0x42:	/* 5, 4th */
							case 0x53:	/* 6, 5th */
								/* penultimate char */
								utf |= ((this_char & 0x3f) << 6);
								stat++;
								break;
							case 0x30:	/* 4, 2nd */
							case 0x41:	/* 5, 3rd */
							case 0x52:	/* 6, 4th */
								utf |= ((this_char & 0x3f) << 12);
								stat++;
								break;
							case 0x40:	/* 5, 2nd */
							case 0x51:
								utf |= ((this_char & 0x3f) << 18);
								stat++;
								break;
							case 0x50:	/* 6, 2nd */
								utf |= ((this_char & 0x3f) << 24);
								stat++;
							default:
								/* invalid */
								more = 0;
						}
					}
					/* lead byte */
					else if (this_char < 0xe0) {
						stat = 0x10;	/* 2 byte */
						utf = (this_char & 0x1f) << 6;
					} else if (this_char < 0xf0)	{
						stat = 0x20;	/* 3 byte */
						utf = (this_char & 0xf) << 12;
					} else if (this_char < 0xf8) {
						stat = 0x30;	/* 4 byte */
						utf = (this_char & 0x7) << 18;
					} else if (this_char < 0xfc)	{
						stat = 0x40;	/* 5 byte */
						utf = (this_char & 0x3) << 24;
					} else if (this_char < 0xfe)	{
						stat = 0x50;	/* 6 byte */
						utf = (this_char & 0x1) << 30;
					}
					else	{
						/* invalid; bail */
						more = 0;
						break;
					}
					if (more)
					{
						this_char = str[pos++];
						mbseq[mbpos++] = (unsigned char)this_char;
					}
				} while(more);
			}
			break;
		case cs_big5:
		case cs_gb2312:
		case cs_big5hkscs:
			{
				/* check if this is the first of a 2-byte sequence */
				if (this_char >= 0xa1 && this_char <= 0xf9)	{
					/* peek at the next char */
					unsigned char next_char = str[pos];
					if ((next_char >= 0x40 && next_char <= 0x73) ||
							(next_char >= 0xa1 && next_char <= 0xfe))
					{
						/* yes, this a wide char */
						this_char <<= 8;
						mbseq[mbpos++] = next_char;
						this_char |= next_char;
						pos++;
					}
					
				}
				break;
			}
		case cs_sjis:
			{
				/* check if this is the first of a 2-byte sequence */
				if ( (this_char >= 0x81 && this_char <= 0x9f) ||
					 (this_char >= 0xe0 && this_char <= 0xef)
					)	{
					/* peek at the next char */
					unsigned char next_char = str[pos];
					if ((next_char >= 0x40 && next_char <= 0x7e) ||
						(next_char >= 0x80 && next_char <= 0xfc))
					{
						/* yes, this a wide char */
						this_char <<= 8;
						mbseq[mbpos++] = next_char;
						this_char |= next_char;
						pos++;
					}
					
				}
				break;
			}
		case cs_eucjp:
			{
				/* check if this is the first of a multi-byte sequence */
				if (this_char >= 0xa1 && this_char <= 0xfe)	{
					/* peek at the next char */
					unsigned char next_char = str[pos];
					if (next_char >= 0xa1 && next_char <= 0xfe)
					{
						/* yes, this a jis kanji char */
						this_char <<= 8;
						mbseq[mbpos++] = next_char;
						this_char |= next_char;
						pos++;
					}
					
				} else if (this_char == 0x8e)	{
					/* peek at the next char */
					unsigned char next_char = str[pos];
					if (next_char >= 0xa1 && next_char <= 0xdf)
					{
						/* JIS X 0201 kana */
						this_char <<= 8;
						mbseq[mbpos++] = next_char;
						this_char |= next_char;
						pos++;
					}
					
				} else if (this_char == 0x8f)	{
					/* peek at the next two char */
					unsigned char next_char = str[pos];
					unsigned char next2_char = str[pos+1];
					if ((next_char >= 0xa1 && next_char <= 0xfe) &&
						(next2_char >= 0xa1 && next2_char <= 0xfe))
					{
						/* JIS X 0212 hojo-kanji */
						this_char <<= 8;
						mbseq[mbpos++] = next_char;
						this_char |= next_char;
						this_char <<= 8;
						mbseq[mbpos++] = next2_char;
						this_char |= next2_char;
						pos+=2;
					}
					
				}
				break;
			}
	}
	*newpos = pos;
	mbseq[mbpos] = '\0';
	*mbseqlen = mbpos;
	return this_char;
}
/* }}} */

/* {{{ entity_charset determine_charset
 * returns the charset identifier based on current locale or a hint.
 * defaults to iso-8859-1 */
static enum entity_charset determine_charset(char * charset_hint)
{
	int i;
	enum entity_charset charset = cs_8859_1;
	int len;

	/* Guarantee default behaviour for backwards compatibility */
	if (charset_hint == NULL)
		return cs_8859_1;

	if (strlen(charset_hint) == 0)	{
		/* try to detect the charset for the locale */
#if HAVE_NL_LANGINFO && HAVE_LOCALE_H && defined(CODESET)
		charset_hint = nl_langinfo(CODESET);
#endif
#if HAVE_LOCALE_H
		if (charset_hint == NULL)
		{
			/* try to figure out the charset from the locale */
			char * localename;
			char * dot, * at;

			/* lang[_territory][.codeset][@modifier] */
			localename = setlocale(LC_CTYPE, NULL);

			dot = strchr(localename, '.');
			if (dot)	{
				dot++;
				/* locale specifies a codeset */
				at = strchr(dot, '@');
				if (at)
					len = at - dot;
				else
					len = strlen(dot);
				charset_hint = dot;
			}
			else	{
				/* no explicit name; see if the name itself
				 * is the charset */
				charset_hint = localename;
				len = strlen(charset_hint);
			}
		}
		else
			len = strlen(charset_hint);
#else
		if (charset_hint)
			len = strlen(charset_hint);
#endif
	}
	if (charset_hint)	{
		/* now walk the charset map and look for the codeset */
		for (i = 0; charset_map[i].codeset; i++)	{
			if (strncasecmp(charset_hint, charset_map[i].codeset, len) == 0)	{
				charset = charset_map[i].charset;
				break;
			}
		}
	}
	return charset;
}
/* }}} */

/* {{{ php_escape_html_entities
 */
PHPAPI char *php_escape_html_entities(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char * hint_charset)
{
	int i, maxlen, len;
	char *new;
	enum entity_charset charset = determine_charset(hint_charset);
	int matches_map;

	maxlen = 2 * oldlen;
	if (maxlen < 128)
		maxlen = 128;
	new = emalloc (maxlen);
	len = 0;

	i = 0;
	while (i < oldlen) {
		int mbseqlen;
		unsigned char mbsequence[16];	/* allow up to 15 characters in a multibyte sequence */
		unsigned short this_char = get_next_char(charset, old, &i, mbsequence, &mbseqlen);
			
		matches_map = 0;
		
		if (len + 9 > maxlen)
			new = erealloc (new, maxlen += 128);
		
		if (all)	{
			/* look for a match in the maps for this charset */
			int j;
			unsigned char * rep;
	

			for (j=0; entity_map[j].charset != cs_terminator; j++)	{
				if (entity_map[j].charset == charset
						&& this_char >= entity_map[j].basechar
						&& this_char <= entity_map[j].endchar)
				{
					rep = (unsigned char*)entity_map[j].table[this_char - entity_map[j].basechar];
					if (rep == NULL)	{
						/* there is no entity for this position; fall through and
						 * just output the character itself */
						break;
					}
					
					matches_map = 1;
					break;
				}
			}

			if (matches_map)	{
				new[len++] = '&';
				strcpy(new + len, rep);
				len += strlen(rep);
				new[len++] = ';';
			}
		}
		if (!matches_map)	{	
			if (38 == this_char) {
				memcpy (new + len, "&amp;", 5);
				len += 5;
			} else if (34 == this_char && !(quote_style&ENT_NOQUOTES)) {
				memcpy (new + len, "&quot;", 6);
				len += 6;
			} else if (39 == this_char && (quote_style&ENT_QUOTES)) {
				memcpy (new + len, "&#039;", 6);
				len += 6;
			} else if (60 == this_char) {
				memcpy (new + len, "&lt;", 4);
				len += 4;
			} else if (62 == this_char) {
				memcpy (new + len, "&gt;", 4);
				len += 4;
			} else if (this_char > 0xff)	{
				/* a wide char without a named entity; pass through the original sequence */
				memcpy(new + len, mbsequence, mbseqlen);
				len += mbseqlen;
			} else {
				new [len++] = (unsigned char)this_char;
			}
		}
	}
	new [len] = '\0';
	*newlen = len;

	return new;


}
/* }}} */

/* {{{ php_html_entities
 */
static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
{
	zval **arg, **quotes, **charset;
	int len, quote_style = ENT_COMPAT;
	int ac = ZEND_NUM_ARGS();
	char *hint_charset = NULL;
	char *new;

	if (ac < 1 || ac > 3 || zend_get_parameters_ex(ac, &arg, &quotes, &charset) == FAILURE) {
		WRONG_PARAM_COUNT;
	}

	convert_to_string_ex(arg);
	if(ac==2) {
		convert_to_long_ex(quotes);
		quote_style = Z_LVAL_PP(quotes);
	}
	if (ac == 3)	{
		convert_to_string_ex(charset);
		hint_charset = Z_STRVAL_PP(charset);
	}
		

	new = php_escape_html_entities(Z_STRVAL_PP(arg), Z_STRLEN_PP(arg), &len, all, quote_style, hint_charset);
	RETVAL_STRINGL(new, len, 0);
}
/* }}} */

#define HTML_SPECIALCHARS 	0
#define HTML_ENTITIES	 	1

/* {{{ register_html_constants
 */
void register_html_constants(INIT_FUNC_ARGS)
{
	REGISTER_LONG_CONSTANT("HTML_SPECIALCHARS", HTML_SPECIALCHARS, CONST_PERSISTENT|CONST_CS);
	REGISTER_LONG_CONSTANT("HTML_ENTITIES", HTML_ENTITIES, CONST_PERSISTENT|CONST_CS);
	REGISTER_LONG_CONSTANT("ENT_COMPAT", ENT_COMPAT, CONST_PERSISTENT|CONST_CS);
	REGISTER_LONG_CONSTANT("ENT_QUOTES", ENT_QUOTES, CONST_PERSISTENT|CONST_CS);
	REGISTER_LONG_CONSTANT("ENT_NOQUOTES", ENT_NOQUOTES, CONST_PERSISTENT|CONST_CS);
}
/* }}} */

/* {{{ proto string htmlspecialchars(string string [, int quote_style][, string charset])
   Convert special characters to HTML entities */
PHP_FUNCTION(htmlspecialchars)
{
	php_html_entities(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
}
/* }}} */

/* {{{ proto string htmlentities(string string [, int quote_style][, string charset])
   Convert all applicable characters to HTML entities */
PHP_FUNCTION(htmlentities)
{
	php_html_entities(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
}
/* }}} */

/* {{{ proto array get_html_translation_table([int table [, int quote_style][, string charset]])
   Returns the internal translation table used by htmlspecialchars and htmlentities */
PHP_FUNCTION(get_html_translation_table)
{
	zval **whichone, **quotes;
	int which = HTML_SPECIALCHARS, quote_style = ENT_COMPAT;
	int ac = ZEND_NUM_ARGS();
	int i, j;
	char ind[ 2 ];
	enum entity_charset charset = determine_charset(NULL);

	if (ac < 0 || ac > 2 || zend_get_parameters_ex(ac, &whichone, &quotes) == FAILURE) {
		WRONG_PARAM_COUNT;
	}

	if (ac > 0) {
		convert_to_long_ex(whichone);
		which = Z_LVAL_PP(whichone);
	} 
	if (ac == 2) {
		convert_to_long_ex(quotes);
		quote_style = Z_LVAL_PP(quotes);
	}

	array_init(return_value);

	ind[1] = 0;

	switch (which) {
		case HTML_ENTITIES:
			for (j=0; entity_map[j].charset != cs_terminator; j++)	{
				if (entity_map[j].charset != charset)
					continue;
				for (i = 0; i < entity_map[j].endchar - entity_map[j].basechar; i++)
				{
					char buffer[16];

					if (entity_map[j].table[i] == NULL)
						continue;
					/* what about wide chars here ?? */
					ind[0] = i + entity_map[j].basechar;
					sprintf(buffer, "&%s;", entity_map[j].table[i]);
					add_assoc_string(return_value, ind, buffer, 1);

				}
			}
			/* break thru */

		case HTML_SPECIALCHARS:
			ind[0]=38; add_assoc_string(return_value, ind, "&amp;", 1);
			if(quote_style&ENT_QUOTES) {
				ind[0]=39; add_assoc_string(return_value, ind, "&#039;", 1);
			}
			if(!(quote_style&ENT_NOQUOTES)) {
				ind[0]=34; add_assoc_string(return_value, ind, "&quot;", 1); 
			}
			ind[0]=60; add_assoc_string(return_value, ind, "&lt;", 1);
			ind[0]=62; add_assoc_string(return_value, ind, "&gt;", 1);
			break;
	}
}
/* }}} */

/*
 * Local variables:
 * tab-width: 4
 * c-basic-offset: 4
 * End:
 * vim600: sw=4 ts=4 fdm=marker
 * vim<600: sw=4 ts=4
 */
-												First commit of re-structuring phase one.  We have started using automake in
sub-directories and started to move extension code into ext/<name>.  For now,
I have moved the "standard" extension (which is quite a mix of everything
right now) and the GD extension into their own subdirs in ext/.
The configure script now also runs configure in the libzend directory
automatically and makes sure php4 and libzend use the same config.cache file.
To avoid running configure in libzend, use the --no-recursion option.
"make" in php4 also builds libzend now.
The Apache module doesn't compile right now, but a fix for that is
coming up.


											
										
										
											1999-04-17 00:37:12 +00:00
+								/*
 								   +----------------------------------------------------------------------+
-												License update


											
										
										
											1999-07-16 13:13:16 +00:00
+								   | PHP version 4.0                                                      |
-												First commit of re-structuring phase one.  We have started using automake in
sub-directories and started to move extension code into ext/<name>.  For now,
I have moved the "standard" extension (which is quite a mix of everything
right now) and the GD extension into their own subdirs in ext/.
The configure script now also runs configure in the libzend directory
automatically and makes sure php4 and libzend use the same config.cache file.
To avoid running configure in libzend, use the --no-recursion option.
"make" in php4 also builds libzend now.
The Apache module doesn't compile right now, but a fix for that is
coming up.


											
										
										
											1999-04-17 00:37:12 +00:00
+								   +----------------------------------------------------------------------+
-												- Fix copyright notices with 2001


											
										
										
											2001-02-26 06:11:02 +00:00
+								   | Copyright (c) 1997-2001 The PHP Group                                |
-												First commit of re-structuring phase one.  We have started using automake in
sub-directories and started to move extension code into ext/<name>.  For now,
I have moved the "standard" extension (which is quite a mix of everything
right now) and the GD extension into their own subdirs in ext/.
The configure script now also runs configure in the libzend directory
automatically and makes sure php4 and libzend use the same config.cache file.
To avoid running configure in libzend, use the --no-recursion option.
"make" in php4 also builds libzend now.
The Apache module doesn't compile right now, but a fix for that is
coming up.


											
										
										
											1999-04-17 00:37:12 +00:00
+								   +----------------------------------------------------------------------+
-												Update the license with the new clause 6


											
										
										
											2000-05-18 15:34:45 +00:00
+								   | This source file is subject to version 2.02 of the PHP license,      |
-												License update


											
										
										
											1999-07-16 13:13:16 +00:00
+								   | that is bundled with this package in the file LICENSE, and is        |
 								   | available at through the world-wide-web at                           |
-												Update the license with the new clause 6


											
										
										
											2000-05-18 15:34:45 +00:00
+								   | http://www.php.net/license/2_02.txt.                                 |
-												License update


											
										
										
											1999-07-16 13:13:16 +00:00
+								   | If you did not receive a copy of the PHP license and are unable to   |
 								   | obtain it through the world-wide-web, please send a note to          |
 								   | license@php.net so we can mail you a copy immediately.               |
-												First commit of re-structuring phase one.  We have started using automake in
sub-directories and started to move extension code into ext/<name>.  For now,
I have moved the "standard" extension (which is quite a mix of everything
right now) and the GD extension into their own subdirs in ext/.
The configure script now also runs configure in the libzend directory
automatically and makes sure php4 and libzend use the same config.cache file.
To avoid running configure in libzend, use the --no-recursion option.
"make" in php4 also builds libzend now.
The Apache module doesn't compile right now, but a fix for that is
coming up.


											
										
										
											1999-04-17 00:37:12 +00:00
+								   +----------------------------------------------------------------------+
 								   | Authors: Rasmus Lerdorf <rasmus@lerdorf.on.ca>                       |
 								   |          Jaakko Hyv<EFBFBD>tti <jaakko.hyvatti@iki.fi>                      |
-												(PHP nl_langinfo) Added function when provided by OS
(PHP htmlentities, htmlspecialchars) Uses nl_langinfo to determine charset
@- Added nl_langinfo() (when OS provides it) that returns locale
   information. (Wez Furlong)
# There are a lot of constants used by nl_langinfo; should we do something
# along the lines of what we do for syslog?


											
										
										
											2001-07-04 10:10:30 +00:00
+								   |          Wez Furlong <wez@thebrainroom.com>                          |
-												First commit of re-structuring phase one.  We have started using automake in
sub-directories and started to move extension code into ext/<name>.  For now,
I have moved the "standard" extension (which is quite a mix of everything
right now) and the GD extension into their own subdirs in ext/.
The configure script now also runs configure in the libzend directory
automatically and makes sure php4 and libzend use the same config.cache file.
To avoid running configure in libzend, use the --no-recursion option.
"make" in php4 also builds libzend now.
The Apache module doesn't compile right now, but a fix for that is
coming up.


											
										
										
											1999-04-17 00:37:12 +00:00
+								   +----------------------------------------------------------------------+
-												Changed lots of PHP 3 licence headers to PHP 4, mainly in .h files.
Added a few RCS $Id$ tags.

# Note: I have avoided changing any .h files if the corresponding .c file
# had not already been changed as I am not sure if there are any legal
# issues here. So some extensions still have PHP 3 headers.


											
										
										
											2000-07-24 01:40:02 +00:00
+								*/
 								/* $Id$ */
-												First commit of re-structuring phase one.  We have started using automake in
sub-directories and started to move extension code into ext/<name>.  For now,
I have moved the "standard" extension (which is quite a mix of everything
right now) and the GD extension into their own subdirs in ext/.
The configure script now also runs configure in the libzend directory
automatically and makes sure php4 and libzend use the same config.cache file.
To avoid running configure in libzend, use the --no-recursion option.
"make" in php4 also builds libzend now.
The Apache module doesn't compile right now, but a fix for that is
coming up.


											
										
										
											1999-04-17 00:37:12 +00:00
 								#include "php.h"
 								#include "reg.h"
 								#include "html.h"
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+								#if HAVE_LOCALE_H
 								#include <locale.h>
 								#endif
-												(PHP nl_langinfo) Added function when provided by OS
(PHP htmlentities, htmlspecialchars) Uses nl_langinfo to determine charset
@- Added nl_langinfo() (when OS provides it) that returns locale
   information. (Wez Furlong)
# There are a lot of constants used by nl_langinfo; should we do something
# along the lines of what we do for syslog?


											
										
										
											2001-07-04 10:10:30 +00:00
+								#if HAVE_LANGINFO_H
 								#include <langinfo.h>
 								#endif
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
-												First commit of re-structuring phase one.  We have started using automake in
sub-directories and started to move extension code into ext/<name>.  For now,
I have moved the "standard" extension (which is quite a mix of everything
right now) and the GD extension into their own subdirs in ext/.
The configure script now also runs configure in the libzend directory
automatically and makes sure php4 and libzend use the same config.cache file.
To avoid running configure in libzend, use the --no-recursion option.
"make" in php4 also builds libzend now.
The Apache module doesn't compile right now, but a fix for that is
coming up.


											
										
										
											1999-04-17 00:37:12 +00:00
+								/* This must be fixed to handle the input string according to LC_CTYPE.
 								   Defaults to ISO-8859-1 for now. */
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
 								enum entity_charset { cs_terminator, cs_8859_1, cs_cp1252,
-												 Added support for japanese encoding to htmlentites() and htmlspecialchars(). @ Added support for japanese encoding to htmlentites() and htmlspecialchars(). (Rui)


											
										
										
											2001-09-15 04:48:48 +00:00
+													  cs_8859_15, cs_utf_8, cs_big5, cs_gb2312,
 								 					  cs_big5hkscs, cs_sjis, cs_eucjp};
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+								typedef const char * entity_table_t;
 								/* codepage 1252 is a Windows extension to iso-8859-1. */
 								static entity_table_t ent_cp_1252[] = {
 									NULL, NULL, "sbquo", "fnof", "bdquo", "hellip", "dagger",
 									"Dagger", "circ", "permil", "Scaron", "lsaquo", "OElig",
 									NULL, NULL, NULL, NULL, "lsquo", "rsquo", "ldquo", "rdquo",
 									"bull", "ndash", "mdash", "tilde", "trade", "scaron", "rsaquo",
 									"oelig", NULL, NULL, "Yuml"
 								};
 								static entity_table_t ent_iso_8859_1[] = {
-												Whitespace


											
										
										
											2001-08-11 17:03:37 +00:00
+									"nbsp", "iexcl", "cent", "pound", "curren", "yen", "brvbar",
 									"sect", "uml", "copy", "ordf", "laquo", "not", "shy", "reg",
 									"macr", "deg", "plusmn", "sup2", "sup3", "acute", "micro",
 									"para", "middot", "cedil", "sup1", "ordm", "raquo", "frac14",
 									"frac12", "frac34", "iquest", "Agrave", "Aacute", "Acirc",
 									"Atilde", "Auml", "Aring", "AElig", "Ccedil", "Egrave",
 									"Eacute", "Ecirc", "Euml", "Igrave", "Iacute", "Icirc",
 									"Iuml", "ETH", "Ntilde", "Ograve", "Oacute", "Ocirc", "Otilde",
 									"Ouml", "times", "Oslash", "Ugrave", "Uacute", "Ucirc", "Uuml",
 									"Yacute", "THORN", "szlig", "agrave", "aacute", "acirc",
 									"atilde", "auml", "aring", "aelig", "ccedil", "egrave",
 									"eacute", "ecirc", "euml", "igrave", "iacute", "icirc",
 									"iuml", "eth", "ntilde", "ograve", "oacute", "ocirc", "otilde",
 									"ouml", "divide", "oslash", "ugrave", "uacute", "ucirc",
 									"uuml", "yacute", "thorn", "yuml"
-												First commit of re-structuring phase one.  We have started using automake in
sub-directories and started to move extension code into ext/<name>.  For now,
I have moved the "standard" extension (which is quite a mix of everything
right now) and the GD extension into their own subdirs in ext/.
The configure script now also runs configure in the libzend directory
automatically and makes sure php4 and libzend use the same config.cache file.
To avoid running configure in libzend, use the --no-recursion option.
"make" in php4 also builds libzend now.
The Apache module doesn't compile right now, but a fix for that is
coming up.


											
										
										
											1999-04-17 00:37:12 +00:00
+								};
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+								static entity_table_t ent_iso_8859_15[] = {
 									"nbsp", "iexcl", "cent", "pound", "euro", "yen", "Scaron",
 									"sect", "scaron", "copy", "ordf", "laquo", "not", "shy", "reg",
 									"macr", "deg", "plusmn", "sup2", "sup3", NULL, /* Zcaron */
 									"micro", "para", "middot", NULL, /* zcaron */ "sup1", "ordm",
 									"raquo", "OElig", "oelig", "Yuml", "iquest", "Agrave", "Aacute",
 									"Acirc", "Atilde", "Auml", "Aring", "AElig", "Ccedil", "Egrave",
-												Whitespace


											
										
										
											2001-08-11 17:03:37 +00:00
+									"Eacute", "Ecirc", "Euml", "Igrave", "Iacute", "Icirc",
 									"Iuml", "ETH", "Ntilde", "Ograve", "Oacute", "Ocirc", "Otilde",
 									"Ouml", "times", "Oslash", "Ugrave", "Uacute", "Ucirc", "Uuml",
 									"Yacute", "THORN", "szlig", "agrave", "aacute", "acirc",
 									"atilde", "auml", "aring", "aelig", "ccedil", "egrave",
 									"eacute", "ecirc", "euml", "igrave", "iacute", "icirc",
 									"iuml", "eth", "ntilde", "ograve", "oacute", "ocirc", "otilde",
 									"ouml", "divide", "oslash", "ugrave", "uacute", "ucirc",
 									"uuml", "yacute", "thorn", "yuml"
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+								};
 								struct html_entity_map {
 									enum entity_charset charset;	/* charset identifier */
 									unsigned short basechar;			/* char code at start of table */
 									unsigned short endchar;			/* last char code in the table */
 									entity_table_t * table;			/* the table of mappings */
 								};
 								static const struct html_entity_map entity_map[] = {
-												- Add support for chinese encodings to htmlentities/htmlspecialchars
  (patch from Alan Knowles <alan_k@hklc.com>)


											
										
										
											2001-08-23 10:43:15 +00:00
+									{ cs_cp1252, 		0x80, 0x9f, ent_cp_1252 },
 									{ cs_cp1252, 		0xa0, 0xff, ent_iso_8859_1 },
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+									{ cs_8859_1, 		0xa0, 0xff, ent_iso_8859_1 },
 									{ cs_8859_15, 		0xa0, 0xff, ent_iso_8859_15 },
 									{ cs_utf_8, 		0xa0, 0xff, ent_iso_8859_1 },
-												- Add support for chinese encodings to htmlentities/htmlspecialchars
  (patch from Alan Knowles <alan_k@hklc.com>)


											
										
										
											2001-08-23 10:43:15 +00:00
+									{ cs_big5, 			0xa0, 0xff, ent_iso_8859_1 },
 									{ cs_gb2312, 		0xa0, 0xff, ent_iso_8859_1 },
 									{ cs_big5hkscs, 	0xa0, 0xff, ent_iso_8859_1 },
-												 Added support for japanese encoding to htmlentites() and htmlspecialchars(). @ Added support for japanese encoding to htmlentites() and htmlspecialchars(). (Rui)


											
										
										
											2001-09-15 04:48:48 +00:00
+								 	{ cs_sjis,			0xa0, 0xff, ent_iso_8859_1 },
 								 	{ cs_eucjp,			0xa0, 0xff, ent_iso_8859_1 },
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+									{ cs_terminator }
 								};
 								static const struct {
 									const char * codeset;
 									enum entity_charset charset;
 								} charset_map[] = {
 									{ "ISO-8859-1", 	cs_8859_1 },
 									{ "ISO-8859-15", 	cs_8859_15 },
 									{ "utf-8", 			cs_utf_8 },
 									{ "cp1252", 		cs_cp1252 },
-												- Add support for chinese encodings to htmlentities/htmlspecialchars
  (patch from Alan Knowles <alan_k@hklc.com>)


											
										
										
											2001-08-23 10:43:15 +00:00
+									{ "BIG5",			cs_big5 },
 									{ "GB2312",			cs_gb2312 },
 									{ "BIG5-HKSCS",		cs_big5hkscs },
-												 Added support for japanese encoding to htmlentites() and htmlspecialchars(). @ Added support for japanese encoding to htmlentites() and htmlspecialchars(). (Rui)


											
										
										
											2001-09-15 04:48:48 +00:00
+								 	{ "Shift_JIS",		cs_sjis },
 								 	{ "SJIS",   		cs_sjis },
 								 	{ "EUCJP",   		cs_eucjp },
 								 	{ "EUC-JP",   		cs_eucjp },
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+									{ NULL }
 								};
-												Fix folding and clean up some extensions


											
										
										
											2001-06-06 13:06:12 +00:00
+								/* {{{ get_next_char
 								 */
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+								inline static unsigned short get_next_char(enum entity_charset charset,
 										unsigned char * str,
 										int * newpos,
 										unsigned char * mbseq,
 										int * mbseqlen
 								)
 								{
 									int pos = *newpos;
 									int mbpos = 0;
 									unsigned short this_char = str[pos++];
-												Fix compile warning


											
										
										
											2001-05-29 10:14:46 +00:00
+									mbseq[mbpos++] = (unsigned char)this_char;
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
-												- Add support for chinese encodings to htmlentities/htmlspecialchars
  (patch from Alan Knowles <alan_k@hklc.com>)


											
										
										
											2001-08-23 10:43:15 +00:00
+									switch(charset)	{
 										case cs_utf_8:
 											{
 												unsigned long utf = 0;
 												int stat = 0;
 												int more = 1;
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
-												- Add support for chinese encodings to htmlentities/htmlspecialchars
  (patch from Alan Knowles <alan_k@hklc.com>)


											
										
										
											2001-08-23 10:43:15 +00:00
+												/* unpack utf-8 encoding into a wide char.
 												 * Code stolen from the mbstring extension */
 												do {
 													if (this_char < 0x80)	{
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+														more = 0;
 														break;
-												- Add support for chinese encodings to htmlentities/htmlspecialchars
  (patch from Alan Knowles <alan_k@hklc.com>)


											
										
										
											2001-08-23 10:43:15 +00:00
+													}
 													else if (this_char < 0xc0)	{
 														switch(stat)	{
 															case 0x10:	/* 2, 2nd */
 															case 0x21:	/* 3, 3rd */
 															case 0x32:	/* 4, 4th */
 															case 0x43:	/* 5, 5th */
 															case 0x54:	/* 6, 6th */
 																/* last byte in sequence */
 																more = 0;
 																utf |= (this_char & 0x3f);
 																this_char = (unsigned short)utf;
 																break;
 															case 0x20:	/* 3, 2nd */
 															case 0x31:	/* 4, 3rd */
 															case 0x42:	/* 5, 4th */
 															case 0x53:	/* 6, 5th */
 																/* penultimate char */
 																utf |= ((this_char & 0x3f) << 6);
 																stat++;
 																break;
 															case 0x30:	/* 4, 2nd */
 															case 0x41:	/* 5, 3rd */
 															case 0x52:	/* 6, 4th */
 																utf |= ((this_char & 0x3f) << 12);
 																stat++;
 																break;
 															case 0x40:	/* 5, 2nd */
 															case 0x51:
 																utf |= ((this_char & 0x3f) << 18);
 																stat++;
 																break;
 															case 0x50:	/* 6, 2nd */
 																utf |= ((this_char & 0x3f) << 24);
 																stat++;
 															default:
 																/* invalid */
 																more = 0;
 														}
 													}
 													/* lead byte */
 													else if (this_char < 0xe0) {
 														stat = 0x10;	/* 2 byte */
 														utf = (this_char & 0x1f) << 6;
 													} else if (this_char < 0xf0)	{
 														stat = 0x20;	/* 3 byte */
 														utf = (this_char & 0xf) << 12;
 													} else if (this_char < 0xf8) {
 														stat = 0x30;	/* 4 byte */
 														utf = (this_char & 0x7) << 18;
 													} else if (this_char < 0xfc)	{
 														stat = 0x40;	/* 5 byte */
 														utf = (this_char & 0x3) << 24;
 													} else if (this_char < 0xfe)	{
 														stat = 0x50;	/* 6 byte */
 														utf = (this_char & 0x1) << 30;
 													}
 													else	{
 														/* invalid; bail */
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+														more = 0;
-												- Add support for chinese encodings to htmlentities/htmlspecialchars
  (patch from Alan Knowles <alan_k@hklc.com>)


											
										
										
											2001-08-23 10:43:15 +00:00
+														break;
 													}
 													if (more)
 													{
 														this_char = str[pos++];
 														mbseq[mbpos++] = (unsigned char)this_char;
 													}
 												} while(more);
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+											}
-												- Add support for chinese encodings to htmlentities/htmlspecialchars
  (patch from Alan Knowles <alan_k@hklc.com>)


											
										
										
											2001-08-23 10:43:15 +00:00
+											break;
 										case cs_big5:
 										case cs_gb2312:
 										case cs_big5hkscs:
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+											{
-												- Add support for chinese encodings to htmlentities/htmlspecialchars
  (patch from Alan Knowles <alan_k@hklc.com>)


											
										
										
											2001-08-23 10:43:15 +00:00
+												/* check if this is the first of a 2-byte sequence */
 												if (this_char >= 0xa1 && this_char <= 0xf9)	{
 													/* peek at the next char */
 													unsigned char next_char = str[pos];
 													if ((next_char >= 0x40 && next_char <= 0x73) ||
 															(next_char >= 0xa1 && next_char <= 0xfe))
 													{
 														/* yes, this a wide char */
 														this_char <<= 8;
 														mbseq[mbpos++] = next_char;
 														this_char |= next_char;
 														pos++;
 													}
-												 Added support for japanese encoding to htmlentites() and htmlspecialchars(). @ Added support for japanese encoding to htmlentites() and htmlspecialchars(). (Rui)


											
										
										
											2001-09-15 04:48:48 +00:00
+												}
 												break;
 											}
 										case cs_sjis:
 											{
 												/* check if this is the first of a 2-byte sequence */
 												if ( (this_char >= 0x81 && this_char <= 0x9f) ||
 													 (this_char >= 0xe0 && this_char <= 0xef)
 													)	{
 													/* peek at the next char */
 													unsigned char next_char = str[pos];
 													if ((next_char >= 0x40 && next_char <= 0x7e) ||
 														(next_char >= 0x80 && next_char <= 0xfc))
 													{
 														/* yes, this a wide char */
 														this_char <<= 8;
 														mbseq[mbpos++] = next_char;
 														this_char |= next_char;
 														pos++;
 													}
 												}
 												break;
 											}
 										case cs_eucjp:
 											{
 												/* check if this is the first of a multi-byte sequence */
 												if (this_char >= 0xa1 && this_char <= 0xfe)	{
 													/* peek at the next char */
 													unsigned char next_char = str[pos];
 													if (next_char >= 0xa1 && next_char <= 0xfe)
 													{
 														/* yes, this a jis kanji char */
 														this_char <<= 8;
 														mbseq[mbpos++] = next_char;
 														this_char |= next_char;
 														pos++;
 													}
 												} else if (this_char == 0x8e)	{
 													/* peek at the next char */
 													unsigned char next_char = str[pos];
 													if (next_char >= 0xa1 && next_char <= 0xdf)
 													{
 														/* JIS X 0201 kana */
 														this_char <<= 8;
 														mbseq[mbpos++] = next_char;
 														this_char |= next_char;
 														pos++;
 													}
 												} else if (this_char == 0x8f)	{
 													/* peek at the next two char */
 													unsigned char next_char = str[pos];
 													unsigned char next2_char = str[pos+1];
 													if ((next_char >= 0xa1 && next_char <= 0xfe) &&
 														(next2_char >= 0xa1 && next2_char <= 0xfe))
 													{
 														/* JIS X 0212 hojo-kanji */
 														this_char <<= 8;
 														mbseq[mbpos++] = next_char;
 														this_char |= next_char;
 														this_char <<= 8;
 														mbseq[mbpos++] = next2_char;
 														this_char |= next2_char;
 														pos+=2;
 													}
-												- Add support for chinese encodings to htmlentities/htmlspecialchars
  (patch from Alan Knowles <alan_k@hklc.com>)


											
										
										
											2001-08-23 10:43:15 +00:00
+												}
 												break;
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+											}
 									}
 									*newpos = pos;
 									mbseq[mbpos] = '\0';
 									*mbseqlen = mbpos;
 									return this_char;
 								}
-												Fix folding and clean up some extensions


											
										
										
											2001-06-06 13:06:12 +00:00
+								/* }}} */
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
-												Fix folding and clean up some extensions


											
										
										
											2001-06-06 13:06:12 +00:00
+								/* {{{ entity_charset determine_charset
 								 * returns the charset identifier based on current locale or a hint.
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+								 * defaults to iso-8859-1 */
 								static enum entity_charset determine_charset(char * charset_hint)
 								{
 									int i;
 									enum entity_charset charset = cs_8859_1;
 									int len;
-												- Add support for chinese encodings to htmlentities/htmlspecialchars
  (patch from Alan Knowles <alan_k@hklc.com>)


											
										
										
											2001-08-23 10:43:15 +00:00
+									/* Guarantee default behaviour for backwards compatibility */
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+									if (charset_hint == NULL)
-												(PHP nl_langinfo) Added function when provided by OS
(PHP htmlentities, htmlspecialchars) Uses nl_langinfo to determine charset
@- Added nl_langinfo() (when OS provides it) that returns locale
   information. (Wez Furlong)
# There are a lot of constants used by nl_langinfo; should we do something
# along the lines of what we do for syslog?


											
										
										
											2001-07-04 10:10:30 +00:00
+										return cs_8859_1;
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
-												(PHP nl_langinfo) Added function when provided by OS
(PHP htmlentities, htmlspecialchars) Uses nl_langinfo to determine charset
@- Added nl_langinfo() (when OS provides it) that returns locale
   information. (Wez Furlong)
# There are a lot of constants used by nl_langinfo; should we do something
# along the lines of what we do for syslog?


											
										
										
											2001-07-04 10:10:30 +00:00
+									if (strlen(charset_hint) == 0)	{
 										/* try to detect the charset for the locale */
-												Fix probs on OpenBSD-current and RedHat 6.1


											
										
										
											2001-08-08 20:00:09 +00:00
+								#if HAVE_NL_LANGINFO && HAVE_LOCALE_H && defined(CODESET)
-												(PHP nl_langinfo) Added function when provided by OS
(PHP htmlentities, htmlspecialchars) Uses nl_langinfo to determine charset
@- Added nl_langinfo() (when OS provides it) that returns locale
   information. (Wez Furlong)
# There are a lot of constants used by nl_langinfo; should we do something
# along the lines of what we do for syslog?


											
										
										
											2001-07-04 10:10:30 +00:00
+										charset_hint = nl_langinfo(CODESET);
 								#endif
 								#if HAVE_LOCALE_H
 										if (charset_hint == NULL)
 										{
 											/* try to figure out the charset from the locale */
 											char * localename;
 											char * dot, * at;
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
-												(PHP nl_langinfo) Added function when provided by OS
(PHP htmlentities, htmlspecialchars) Uses nl_langinfo to determine charset
@- Added nl_langinfo() (when OS provides it) that returns locale
   information. (Wez Furlong)
# There are a lot of constants used by nl_langinfo; should we do something
# along the lines of what we do for syslog?


											
										
										
											2001-07-04 10:10:30 +00:00
+											/* lang[_territory][.codeset][@modifier] */
 											localename = setlocale(LC_CTYPE, NULL);
 											dot = strchr(localename, '.');
 											if (dot)	{
 												dot++;
 												/* locale specifies a codeset */
 												at = strchr(dot, '@');
 												if (at)
 													len = at - dot;
 												else
 													len = strlen(dot);
 												charset_hint = dot;
 											}
 											else	{
 												/* no explicit name; see if the name itself
 												 * is the charset */
 												charset_hint = localename;
 												len = strlen(charset_hint);
 											}
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+										}
-												(PHP nl_langinfo) Added function when provided by OS
(PHP htmlentities, htmlspecialchars) Uses nl_langinfo to determine charset
@- Added nl_langinfo() (when OS provides it) that returns locale
   information. (Wez Furlong)
# There are a lot of constants used by nl_langinfo; should we do something
# along the lines of what we do for syslog?


											
										
										
											2001-07-04 10:10:30 +00:00
+										else
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+											len = strlen(charset_hint);
 								#else
-												(PHP nl_langinfo) Added function when provided by OS
(PHP htmlentities, htmlspecialchars) Uses nl_langinfo to determine charset
@- Added nl_langinfo() (when OS provides it) that returns locale
   information. (Wez Furlong)
# There are a lot of constants used by nl_langinfo; should we do something
# along the lines of what we do for syslog?


											
										
										
											2001-07-04 10:10:30 +00:00
+										if (charset_hint)
 											len = strlen(charset_hint);
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+								#endif
-												(PHP nl_langinfo) Added function when provided by OS
(PHP htmlentities, htmlspecialchars) Uses nl_langinfo to determine charset
@- Added nl_langinfo() (when OS provides it) that returns locale
   information. (Wez Furlong)
# There are a lot of constants used by nl_langinfo; should we do something
# along the lines of what we do for syslog?


											
										
										
											2001-07-04 10:10:30 +00:00
+									}
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+									if (charset_hint)	{
 										/* now walk the charset map and look for the codeset */
 										for (i = 0; charset_map[i].codeset; i++)	{
 											if (strncasecmp(charset_hint, charset_map[i].codeset, len) == 0)	{
 												charset = charset_map[i].charset;
 												break;
 											}
 										}
 									}
 									return charset;
 								}
-												Fix folding and clean up some extensions


											
										
										
											2001-06-06 13:06:12 +00:00
+								/* }}} */
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
-												Fix folding and clean up some extensions


											
										
										
											2001-06-06 13:06:12 +00:00
+								/* {{{ php_escape_html_entities
 								 */
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+								PHPAPI char *php_escape_html_entities(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char * hint_charset)
-												First commit of re-structuring phase one.  We have started using automake in
sub-directories and started to move extension code into ext/<name>.  For now,
I have moved the "standard" extension (which is quite a mix of everything
right now) and the GD extension into their own subdirs in ext/.
The configure script now also runs configure in the libzend directory
automatically and makes sure php4 and libzend use the same config.cache file.
To avoid running configure in libzend, use the --no-recursion option.
"make" in php4 also builds libzend now.
The Apache module doesn't compile right now, but a fix for that is
coming up.


											
										
										
											1999-04-17 00:37:12 +00:00
+								{
-												Made php_escape_html_entities() as a separate function for export.


											
										
										
											2000-02-29 04:38:14 +00:00
+									int i, maxlen, len;
-												First commit of re-structuring phase one.  We have started using automake in
sub-directories and started to move extension code into ext/<name>.  For now,
I have moved the "standard" extension (which is quite a mix of everything
right now) and the GD extension into their own subdirs in ext/.
The configure script now also runs configure in the libzend directory
automatically and makes sure php4 and libzend use the same config.cache file.
To avoid running configure in libzend, use the --no-recursion option.
"make" in php4 also builds libzend now.
The Apache module doesn't compile right now, but a fix for that is
coming up.


											
										
										
											1999-04-17 00:37:12 +00:00
+									char *new;
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+									enum entity_charset charset = determine_charset(hint_charset);
-												Fix htmlspecialchars problem.


											
										
										
											2001-08-24 08:58:10 +00:00
+									int matches_map;
-												First commit of re-structuring phase one.  We have started using automake in
sub-directories and started to move extension code into ext/<name>.  For now,
I have moved the "standard" extension (which is quite a mix of everything
right now) and the GD extension into their own subdirs in ext/.
The configure script now also runs configure in the libzend directory
automatically and makes sure php4 and libzend use the same config.cache file.
To avoid running configure in libzend, use the --no-recursion option.
"make" in php4 also builds libzend now.
The Apache module doesn't compile right now, but a fix for that is
coming up.


											
										
										
											1999-04-17 00:37:12 +00:00
-												Made php_escape_html_entities() as a separate function for export.


											
										
										
											2000-02-29 04:38:14 +00:00
+									maxlen = 2 * oldlen;
-												First commit of re-structuring phase one.  We have started using automake in
sub-directories and started to move extension code into ext/<name>.  For now,
I have moved the "standard" extension (which is quite a mix of everything
right now) and the GD extension into their own subdirs in ext/.
The configure script now also runs configure in the libzend directory
automatically and makes sure php4 and libzend use the same config.cache file.
To avoid running configure in libzend, use the --no-recursion option.
"make" in php4 also builds libzend now.
The Apache module doesn't compile right now, but a fix for that is
coming up.


											
										
										
											1999-04-17 00:37:12 +00:00
+									if (maxlen < 128)
 										maxlen = 128;
 									new = emalloc (maxlen);
 									len = 0;
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+									i = 0;
 									while (i < oldlen) {
 										int mbseqlen;
-												- Add support for chinese encodings to htmlentities/htmlspecialchars
  (patch from Alan Knowles <alan_k@hklc.com>)


											
										
										
											2001-08-23 10:43:15 +00:00
+										unsigned char mbsequence[16];	/* allow up to 15 characters in a multibyte sequence */
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+										unsigned short this_char = get_next_char(charset, old, &i, mbsequence, &mbseqlen);
-												Fix htmlspecialchars problem.


											
										
										
											2001-08-24 08:58:10 +00:00
 										matches_map = 0;
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
-												First commit of re-structuring phase one.  We have started using automake in
sub-directories and started to move extension code into ext/<name>.  For now,
I have moved the "standard" extension (which is quite a mix of everything
right now) and the GD extension into their own subdirs in ext/.
The configure script now also runs configure in the libzend directory
automatically and makes sure php4 and libzend use the same config.cache file.
To avoid running configure in libzend, use the --no-recursion option.
"make" in php4 also builds libzend now.
The Apache module doesn't compile right now, but a fix for that is
coming up.


											
										
										
											1999-04-17 00:37:12 +00:00
+										if (len + 9 > maxlen)
 											new = erealloc (new, maxlen += 128);
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
 										if (all)	{
 											/* look for a match in the maps for this charset */
 											int j;
 											unsigned char * rep;
-												- Add support for chinese encodings to htmlentities/htmlspecialchars
  (patch from Alan Knowles <alan_k@hklc.com>)


											
										
										
											2001-08-23 10:43:15 +00:00
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+											for (j=0; entity_map[j].charset != cs_terminator; j++)	{
 												if (entity_map[j].charset == charset
 														&& this_char >= entity_map[j].basechar
 														&& this_char <= entity_map[j].endchar)
 												{
 													rep = (unsigned char*)entity_map[j].table[this_char - entity_map[j].basechar];
 													if (rep == NULL)	{
 														/* there is no entity for this position; fall through and
 														 * just output the character itself */
 														break;
 													}
 													matches_map = 1;
 													break;
 												}
 											}
 											if (matches_map)	{
 												new[len++] = '&';
 												strcpy(new + len, rep);
 												len += strlen(rep);
 												new[len++] = ';';
 											}
 										}
 										if (!matches_map)	{
 											if (38 == this_char) {
 												memcpy (new + len, "&amp;", 5);
 												len += 5;
 											} else if (34 == this_char && !(quote_style&ENT_NOQUOTES)) {
 												memcpy (new + len, "&quot;", 6);
 												len += 6;
 											} else if (39 == this_char && (quote_style&ENT_QUOTES)) {
 												memcpy (new + len, "&#039;", 6);
 												len += 6;
 											} else if (60 == this_char) {
 												memcpy (new + len, "&lt;", 4);
 												len += 4;
 											} else if (62 == this_char) {
 												memcpy (new + len, "&gt;", 4);
 												len += 4;
 											} else if (this_char > 0xff)	{
 												/* a wide char without a named entity; pass through the original sequence */
 												memcpy(new + len, mbsequence, mbseqlen);
 												len += mbseqlen;
 											} else {
-												Fix compile warning


											
										
										
											2001-05-29 10:14:46 +00:00
+												new [len++] = (unsigned char)this_char;
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+											}
-												First commit of re-structuring phase one.  We have started using automake in
sub-directories and started to move extension code into ext/<name>.  For now,
I have moved the "standard" extension (which is quite a mix of everything
right now) and the GD extension into their own subdirs in ext/.
The configure script now also runs configure in the libzend directory
automatically and makes sure php4 and libzend use the same config.cache file.
To avoid running configure in libzend, use the --no-recursion option.
"make" in php4 also builds libzend now.
The Apache module doesn't compile right now, but a fix for that is
coming up.


											
										
										
											1999-04-17 00:37:12 +00:00
+										}
 									}
-												ws fix


											
										
										
											2001-03-02 00:15:49 +00:00
+									new [len] = '\0';
-												Made php_escape_html_entities() as a separate function for export.


											
										
										
											2000-02-29 04:38:14 +00:00
+									*newlen = len;
 									return new;
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
-												Made php_escape_html_entities() as a separate function for export.


											
										
										
											2000-02-29 04:38:14 +00:00
+								}
-												Fix folding and clean up some extensions


											
										
										
											2001-06-06 13:06:12 +00:00
+								/* }}} */
-												Made php_escape_html_entities() as a separate function for export.


											
										
										
											2000-02-29 04:38:14 +00:00
-												Fix folding and clean up some extensions


											
										
										
											2001-06-06 13:06:12 +00:00
+								/* {{{ php_html_entities
 								 */
-												Made php_escape_html_entities() as a separate function for export.


											
										
										
											2000-02-29 04:38:14 +00:00
+								static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
 								{
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+									zval **arg, **quotes, **charset;
-												ws fix


											
										
										
											2001-03-02 00:15:49 +00:00
+									int len, quote_style = ENT_COMPAT;
-												Clean up htmlspecialchars/htmlentities inconsistencies.
@Clean up htmlspecialchars/htmlentities inconsistencies. (Rasmus)


											
										
										
											2000-09-12 17:22:37 +00:00
+									int ac = ZEND_NUM_ARGS();
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+									char *hint_charset = NULL;
-												Made php_escape_html_entities() as a separate function for export.


											
										
										
											2000-02-29 04:38:14 +00:00
+									char *new;
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+									if (ac < 1 || ac > 3 || zend_get_parameters_ex(ac, &arg, &quotes, &charset) == FAILURE) {
-												ws fix


											
										
										
											2001-03-02 00:15:49 +00:00
+										WRONG_PARAM_COUNT;
 									}
-												Made php_escape_html_entities() as a separate function for export.


											
										
										
											2000-02-29 04:38:14 +00:00
-												ws fix


											
										
										
											2001-03-02 00:15:49 +00:00
+									convert_to_string_ex(arg);
-												Clean up htmlspecialchars/htmlentities inconsistencies.
@Clean up htmlspecialchars/htmlentities inconsistencies. (Rasmus)


											
										
										
											2000-09-12 17:22:37 +00:00
+									if(ac==2) {
 										convert_to_long_ex(quotes);
-												Back-substitute for Z_* macro's. If it breaks some extension (the script isn't optimal, it parses for example var->zval.value incorrect) please let me know.


											
										
										
											2001-09-25 21:58:48 +00:00
+										quote_style = Z_LVAL_PP(quotes);
-												Clean up htmlspecialchars/htmlentities inconsistencies.
@Clean up htmlspecialchars/htmlentities inconsistencies. (Rasmus)


											
										
										
											2000-09-12 17:22:37 +00:00
+									}
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+									if (ac == 3)	{
 										convert_to_string_ex(charset);
 										hint_charset = Z_STRVAL_PP(charset);
 									}
-												First commit of re-structuring phase one.  We have started using automake in
sub-directories and started to move extension code into ext/<name>.  For now,
I have moved the "standard" extension (which is quite a mix of everything
right now) and the GD extension into their own subdirs in ext/.
The configure script now also runs configure in the libzend directory
automatically and makes sure php4 and libzend use the same config.cache file.
To avoid running configure in libzend, use the --no-recursion option.
"make" in php4 also builds libzend now.
The Apache module doesn't compile right now, but a fix for that is
coming up.


											
										
										
											1999-04-17 00:37:12 +00:00
-												Back-substitute for Z_* macro's. If it breaks some extension (the script isn't optimal, it parses for example var->zval.value incorrect) please let me know.


											
										
										
											2001-09-25 21:58:48 +00:00
+									new = php_escape_html_entities(Z_STRVAL_PP(arg), Z_STRLEN_PP(arg), &len, all, quote_style, hint_charset);
-												Whitespace


											
										
										
											2001-08-11 17:03:37 +00:00
+									RETVAL_STRINGL(new, len, 0);
-												First commit of re-structuring phase one.  We have started using automake in
sub-directories and started to move extension code into ext/<name>.  For now,
I have moved the "standard" extension (which is quite a mix of everything
right now) and the GD extension into their own subdirs in ext/.
The configure script now also runs configure in the libzend directory
automatically and makes sure php4 and libzend use the same config.cache file.
To avoid running configure in libzend, use the --no-recursion option.
"make" in php4 also builds libzend now.
The Apache module doesn't compile right now, but a fix for that is
coming up.


											
										
										
											1999-04-17 00:37:12 +00:00
+								}
-												Fix folding and clean up some extensions


											
										
										
											2001-06-06 13:06:12 +00:00
+								/* }}} */
-												First commit of re-structuring phase one.  We have started using automake in
sub-directories and started to move extension code into ext/<name>.  For now,
I have moved the "standard" extension (which is quite a mix of everything
right now) and the GD extension into their own subdirs in ext/.
The configure script now also runs configure in the libzend directory
automatically and makes sure php4 and libzend use the same config.cache file.
To avoid running configure in libzend, use the --no-recursion option.
"make" in php4 also builds libzend now.
The Apache module doesn't compile right now, but a fix for that is
coming up.


											
										
										
											1999-04-17 00:37:12 +00:00
-												@- Implemented get_html_translation_table() function. (Thies)
(PHP get_html_translation_table) new function.


											
										
										
											1999-11-21 13:25:04 +00:00
+								#define HTML_SPECIALCHARS 	0
 								#define HTML_ENTITIES	 	1
-												Fix folding and clean up some extensions


											
										
										
											2001-06-06 13:06:12 +00:00
+								/* {{{ register_html_constants
 								 */
-												@- Implemented get_html_translation_table() function. (Thies)
(PHP get_html_translation_table) new function.


											
										
										
											1999-11-21 13:25:04 +00:00
+								void register_html_constants(INIT_FUNC_ARGS)
 								{
 									REGISTER_LONG_CONSTANT("HTML_SPECIALCHARS", HTML_SPECIALCHARS, CONST_PERSISTENT|CONST_CS);
 									REGISTER_LONG_CONSTANT("HTML_ENTITIES", HTML_ENTITIES, CONST_PERSISTENT|CONST_CS);
-												Clean up htmlspecialchars/htmlentities inconsistencies.
@Clean up htmlspecialchars/htmlentities inconsistencies. (Rasmus)


											
										
										
											2000-09-12 17:22:37 +00:00
+									REGISTER_LONG_CONSTANT("ENT_COMPAT", ENT_COMPAT, CONST_PERSISTENT|CONST_CS);
 									REGISTER_LONG_CONSTANT("ENT_QUOTES", ENT_QUOTES, CONST_PERSISTENT|CONST_CS);
 									REGISTER_LONG_CONSTANT("ENT_NOQUOTES", ENT_NOQUOTES, CONST_PERSISTENT|CONST_CS);
-												@- Implemented get_html_translation_table() function. (Thies)
(PHP get_html_translation_table) new function.


											
										
										
											1999-11-21 13:25:04 +00:00
+								}
-												Fix folding and clean up some extensions


											
										
										
											2001-06-06 13:06:12 +00:00
+								/* }}} */
-												@- Implemented get_html_translation_table() function. (Thies)
(PHP get_html_translation_table) new function.


											
										
										
											1999-11-21 13:25:04 +00:00
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+								/* {{{ proto string htmlspecialchars(string string [, int quote_style][, string charset])
-												First commit of re-structuring phase one.  We have started using automake in
sub-directories and started to move extension code into ext/<name>.  For now,
I have moved the "standard" extension (which is quite a mix of everything
right now) and the GD extension into their own subdirs in ext/.
The configure script now also runs configure in the libzend directory
automatically and makes sure php4 and libzend use the same config.cache file.
To avoid running configure in libzend, use the --no-recursion option.
"make" in php4 also builds libzend now.
The Apache module doesn't compile right now, but a fix for that is
coming up.


											
										
										
											1999-04-17 00:37:12 +00:00
+								   Convert special characters to HTML entities */
-												conv_proto *.[ch]


											
										
										
											1999-05-16 11:19:26 +00:00
+								PHP_FUNCTION(htmlspecialchars)
-												First commit of re-structuring phase one.  We have started using automake in
sub-directories and started to move extension code into ext/<name>.  For now,
I have moved the "standard" extension (which is quite a mix of everything
right now) and the GD extension into their own subdirs in ext/.
The configure script now also runs configure in the libzend directory
automatically and makes sure php4 and libzend use the same config.cache file.
To avoid running configure in libzend, use the --no-recursion option.
"make" in php4 also builds libzend now.
The Apache module doesn't compile right now, but a fix for that is
coming up.


											
										
										
											1999-04-17 00:37:12 +00:00
+								{
-												Whitespace


											
										
										
											2001-08-11 17:03:37 +00:00
+									php_html_entities(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
-												First commit of re-structuring phase one.  We have started using automake in
sub-directories and started to move extension code into ext/<name>.  For now,
I have moved the "standard" extension (which is quite a mix of everything
right now) and the GD extension into their own subdirs in ext/.
The configure script now also runs configure in the libzend directory
automatically and makes sure php4 and libzend use the same config.cache file.
To avoid running configure in libzend, use the --no-recursion option.
"make" in php4 also builds libzend now.
The Apache module doesn't compile right now, but a fix for that is
coming up.


											
										
										
											1999-04-17 00:37:12 +00:00
+								}
 								/* }}} */
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+								/* {{{ proto string htmlentities(string string [, int quote_style][, string charset])
-												First commit of re-structuring phase one.  We have started using automake in
sub-directories and started to move extension code into ext/<name>.  For now,
I have moved the "standard" extension (which is quite a mix of everything
right now) and the GD extension into their own subdirs in ext/.
The configure script now also runs configure in the libzend directory
automatically and makes sure php4 and libzend use the same config.cache file.
To avoid running configure in libzend, use the --no-recursion option.
"make" in php4 also builds libzend now.
The Apache module doesn't compile right now, but a fix for that is
coming up.


											
										
										
											1999-04-17 00:37:12 +00:00
+								   Convert all applicable characters to HTML entities */
-												conv_proto *.[ch]


											
										
										
											1999-05-16 11:19:26 +00:00
+								PHP_FUNCTION(htmlentities)
-												First commit of re-structuring phase one.  We have started using automake in
sub-directories and started to move extension code into ext/<name>.  For now,
I have moved the "standard" extension (which is quite a mix of everything
right now) and the GD extension into their own subdirs in ext/.
The configure script now also runs configure in the libzend directory
automatically and makes sure php4 and libzend use the same config.cache file.
To avoid running configure in libzend, use the --no-recursion option.
"make" in php4 also builds libzend now.
The Apache module doesn't compile right now, but a fix for that is
coming up.


											
										
										
											1999-04-17 00:37:12 +00:00
+								{
-												Whitespace


											
										
										
											2001-08-11 17:03:37 +00:00
+									php_html_entities(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
-												First commit of re-structuring phase one.  We have started using automake in
sub-directories and started to move extension code into ext/<name>.  For now,
I have moved the "standard" extension (which is quite a mix of everything
right now) and the GD extension into their own subdirs in ext/.
The configure script now also runs configure in the libzend directory
automatically and makes sure php4 and libzend use the same config.cache file.
To avoid running configure in libzend, use the --no-recursion option.
"make" in php4 also builds libzend now.
The Apache module doesn't compile right now, but a fix for that is
coming up.


											
										
										
											1999-04-17 00:37:12 +00:00
+								}
 								/* }}} */
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+								/* {{{ proto array get_html_translation_table([int table [, int quote_style][, string charset]])
-												Maybe later on today. Must buy some beer first.


											
										
										
											1999-11-21 14:06:30 +00:00
+								   Returns the internal translation table used by htmlspecialchars and htmlentities */
-												@- Implemented get_html_translation_table() function. (Thies)
(PHP get_html_translation_table) new function.


											
										
										
											1999-11-21 13:25:04 +00:00
+								PHP_FUNCTION(get_html_translation_table)
 								{
-												Clean up htmlspecialchars/htmlentities inconsistencies.
@Clean up htmlspecialchars/htmlentities inconsistencies. (Rasmus)


											
										
										
											2000-09-12 17:22:37 +00:00
+									zval **whichone, **quotes;
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+									int which = HTML_SPECIALCHARS, quote_style = ENT_COMPAT;
-												- ARG_COUNT(ht) -> ZEND_NUM_ARGS() mega patch


											
										
										
											2000-06-05 19:47:54 +00:00
+									int ac = ZEND_NUM_ARGS();
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+									int i, j;
-												@- Implemented get_html_translation_table() function. (Thies)
(PHP get_html_translation_table) new function.


											
										
										
											1999-11-21 13:25:04 +00:00
+									char ind[ 2 ];
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+									enum entity_charset charset = determine_charset(NULL);
-												@- Implemented get_html_translation_table() function. (Thies)
(PHP get_html_translation_table) new function.


											
										
										
											1999-11-21 13:25:04 +00:00
-												Clean up htmlspecialchars/htmlentities inconsistencies.
@Clean up htmlspecialchars/htmlentities inconsistencies. (Rasmus)


											
										
										
											2000-09-12 17:22:37 +00:00
+									if (ac < 0 || ac > 2 || zend_get_parameters_ex(ac, &whichone, &quotes) == FAILURE) {
-												ws fix


											
										
										
											2001-03-02 00:15:49 +00:00
+										WRONG_PARAM_COUNT;
 									}
-												@- Implemented get_html_translation_table() function. (Thies)
(PHP get_html_translation_table) new function.


											
										
										
											1999-11-21 13:25:04 +00:00
-												Clean up htmlspecialchars/htmlentities inconsistencies.
@Clean up htmlspecialchars/htmlentities inconsistencies. (Rasmus)


											
										
										
											2000-09-12 17:22:37 +00:00
+									if (ac > 0) {
-												@- Implemented get_html_translation_table() function. (Thies)
(PHP get_html_translation_table) new function.


											
										
										
											1999-11-21 13:25:04 +00:00
+										convert_to_long_ex(whichone);
-												Back-substitute for Z_* macro's. If it breaks some extension (the script isn't optimal, it parses for example var->zval.value incorrect) please let me know.


											
										
										
											2001-09-25 21:58:48 +00:00
+										which = Z_LVAL_PP(whichone);
-												Clean up htmlspecialchars/htmlentities inconsistencies.
@Clean up htmlspecialchars/htmlentities inconsistencies. (Rasmus)


											
										
										
											2000-09-12 17:22:37 +00:00
+									}
 									if (ac == 2) {
 										convert_to_long_ex(quotes);
-												Back-substitute for Z_* macro's. If it breaks some extension (the script isn't optimal, it parses for example var->zval.value incorrect) please let me know.


											
										
										
											2001-09-25 21:58:48 +00:00
+										quote_style = Z_LVAL_PP(quotes);
-												@- Implemented get_html_translation_table() function. (Thies)
(PHP get_html_translation_table) new function.


											
										
										
											1999-11-21 13:25:04 +00:00
+									}
 									array_init(return_value);
 									ind[1] = 0;
 									switch (which) {
 										case HTML_ENTITIES:
-												Added charset awareness to htmlentities() and htmlspecialchars(); use an
optional third parameter to specify the charset; otherwise tries to determine
it from the LC_CTYPE locale setting.


											
										
										
											2001-05-28 11:00:06 +00:00
+											for (j=0; entity_map[j].charset != cs_terminator; j++)	{
 												if (entity_map[j].charset != charset)
 													continue;
 												for (i = 0; i < entity_map[j].endchar - entity_map[j].basechar; i++)
 												{
 													char buffer[16];
 													if (entity_map[j].table[i] == NULL)
 														continue;
 													/* what about wide chars here ?? */
 													ind[0] = i + entity_map[j].basechar;
 													sprintf(buffer, "&%s;", entity_map[j].table[i]);
 													add_assoc_string(return_value, ind, buffer, 1);
 												}
-												@- Implemented get_html_translation_table() function. (Thies)
(PHP get_html_translation_table) new function.


											
										
										
											1999-11-21 13:25:04 +00:00
+											}
 											/* break thru */
 										case HTML_SPECIALCHARS:
-												Whitespace


											
										
										
											2001-08-11 17:03:37 +00:00
+											ind[0]=38; add_assoc_string(return_value, ind, "&amp;", 1);
-												fixed bug 7961


											
										
										
											2000-11-24 16:17:58 +00:00
+											if(quote_style&ENT_QUOTES) {
-												Whitespace


											
										
										
											2001-08-11 17:03:37 +00:00
+												ind[0]=39; add_assoc_string(return_value, ind, "&#039;", 1);
-												ws fix


											
										
										
											2001-03-02 00:15:49 +00:00
+											}
-												fixed bug 7961


											
										
										
											2000-11-24 16:17:58 +00:00
+											if(!(quote_style&ENT_NOQUOTES)) {
-												Whitespace


											
										
										
											2001-08-11 17:03:37 +00:00
+												ind[0]=34; add_assoc_string(return_value, ind, "&quot;", 1);
-												ws fix


											
										
										
											2001-03-02 00:15:49 +00:00
+											}
-												Whitespace


											
										
										
											2001-08-11 17:03:37 +00:00
+											ind[0]=60; add_assoc_string(return_value, ind, "&lt;", 1);
 											ind[0]=62; add_assoc_string(return_value, ind, "&gt;", 1);
-												@- Implemented get_html_translation_table() function. (Thies)
(PHP get_html_translation_table) new function.


											
										
										
											1999-11-21 13:25:04 +00:00
+											break;
 									}
 								}
 								/* }}} */
-												First commit of re-structuring phase one.  We have started using automake in
sub-directories and started to move extension code into ext/<name>.  For now,
I have moved the "standard" extension (which is quite a mix of everything
right now) and the GD extension into their own subdirs in ext/.
The configure script now also runs configure in the libzend directory
automatically and makes sure php4 and libzend use the same config.cache file.
To avoid running configure in libzend, use the --no-recursion option.
"make" in php4 also builds libzend now.
The Apache module doesn't compile right now, but a fix for that is
coming up.


											
										
										
											1999-04-17 00:37:12 +00:00
+								/*
 								 * Local variables:
 								 * tab-width: 4
 								 * c-basic-offset: 4
 								 * End:
-												- Don't wrap lines... this is annoying while coding.


											
										
										
											2001-09-09 13:29:31 +00:00
+								 * vim600: sw=4 ts=4 fdm=marker
 								 * vim<600: sw=4 ts=4
-												First commit of re-structuring phase one.  We have started using automake in
sub-directories and started to move extension code into ext/<name>.  For now,
I have moved the "standard" extension (which is quite a mix of everything
right now) and the GD extension into their own subdirs in ext/.
The configure script now also runs configure in the libzend directory
automatically and makes sure php4 and libzend use the same config.cache file.
To avoid running configure in libzend, use the --no-recursion option.
"make" in php4 also builds libzend now.
The Apache module doesn't compile right now, but a fix for that is
coming up.


											
										
										
											1999-04-17 00:37:12 +00:00
+								 */