php-src/ext/xml/compat.c
2006-01-01 12:51:34 +00:00

722 lines
19 KiB
C

/*
+----------------------------------------------------------------------+
| PHP Version 5 |
+----------------------------------------------------------------------+
| Copyright (c) 1997-2006 The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Sterling Hughes <sterling@php.net> |
+----------------------------------------------------------------------+
*/
#include "php.h"
#if defined(HAVE_LIBXML) && (defined(HAVE_XML) || defined(HAVE_XMLRPC)) && !defined(HAVE_LIBEXPAT)
#include "expat_compat.h"
typedef struct _php_xml_ns {
xmlNsPtr nsptr;
int ref_count;
void *next;
void *prev;
} php_xml_ns;
#ifdef LIBXML_EXPAT_COMPAT
#define IS_NS_DECL(__ns) \
((__ns) != NULL && strlen(__ns) == 5 && *(__ns) == 'x' && *((__ns)+1) == 'm' && \
*((__ns)+2) == 'l' && *((__ns)+3) == 'n' && *((__ns)+4) == 's')
static void
_qualify_namespace(XML_Parser parser, const xmlChar *name, const xmlChar *URI, xmlChar **qualified)
{
if (URI) {
/* Use libxml functions otherwise its memory deallocation is screwed up */
*qualified = xmlStrdup(URI);
*qualified = xmlStrncat(*qualified, parser->_ns_seperator, 1);
*qualified = xmlStrncat(*qualified, name, strlen(name));
} else {
*qualified = xmlStrdup(name);
}
}
static void
_start_element_handler(void *user, const xmlChar *name, const xmlChar **attributes)
{
XML_Parser parser = (XML_Parser) user;
xmlChar *qualified_name = NULL;
if (parser->h_start_element == NULL) {
if (parser->h_default) {
int attno = 0;
qualified_name = xmlStrncatNew((xmlChar *)"<", name, xmlStrlen(name));
if (attributes) {
while (attributes[attno] != NULL) {
int att_len;
char *att_string, *att_name, *att_value;
att_name = (char *)attributes[attno++];
att_value = (char *)attributes[attno++];
att_len = spprintf(&att_string, 0, " %s=\"%s\"", att_name, att_value);
qualified_name = xmlStrncat(qualified_name, (xmlChar *)att_string, att_len);
efree(att_string);
}
}
qualified_name = xmlStrncat(qualified_name, (xmlChar *)">", 1);
parser->h_default(parser->user, (const XML_Char *) qualified_name, xmlStrlen(qualified_name));
xmlFree(qualified_name);
}
return;
}
qualified_name = xmlStrdup(name);
parser->h_start_element(parser->user, (const XML_Char *) qualified_name, (const XML_Char **) attributes);
xmlFree(qualified_name);
}
static void
_start_element_handler_ns(void *user, const xmlChar *name, const xmlChar *prefix, const xmlChar *URI, int nb_namespaces, const xmlChar ** namespaces, int nb_attributes, int nb_defaulted, const xmlChar ** attributes)
{
XML_Parser parser = (XML_Parser) user;
xmlChar *qualified_name = NULL;
xmlChar **attrs = NULL;
int i;
int z = 0;
int y = 0;
if (nb_namespaces > 0 && parser->h_start_ns != NULL) {
for (i = 0; i < nb_namespaces; i += 1) {
parser->h_start_ns(parser->user, (const XML_Char *) namespaces[y], (const XML_Char *) namespaces[y+1]);
y += 2;
}
y = 0;
}
if (parser->h_start_element == NULL && parser->h_default == NULL) {
return;
}
_qualify_namespace(parser, name, URI, &qualified_name);
if (attributes != NULL) {
xmlChar *qualified_name_attr = NULL;
attrs = safe_emalloc((nb_attributes * 2) + 1, sizeof(int *), 0);
for (i = 0; i < nb_attributes; i += 1) {
if (attributes[y+1] != NULL) {
_qualify_namespace(parser, attributes[y] , attributes[y + 2], &qualified_name_attr);
} else {
qualified_name_attr = xmlStrdup(attributes[y]);
}
attrs[z] = qualified_name_attr;
attrs[z + 1] = xmlStrndup(attributes[y + 3] , (int) (attributes[y + 4] - attributes[y + 3]));
z += 2;
y += 5;
}
attrs[z] = NULL;
}
parser->h_start_element(parser->user, (const XML_Char *) qualified_name, (const XML_Char **) attrs);
if (attrs) {
for (i = 0; i < z; i++) {
xmlFree(attrs[i]);
}
efree(attrs);
}
xmlFree(qualified_name);
}
static void
_namespace_handler(XML_Parser parser, xmlNsPtr nsptr)
{
if (nsptr != NULL) {
_namespace_handler(parser, nsptr->next);
parser->h_end_ns(parser->user, nsptr->prefix);
}
}
static void
_end_element_handler(void *user, const xmlChar *name)
{
xmlChar *qualified_name;
XML_Parser parser = (XML_Parser) user;
if (parser->h_end_element == NULL) {
if (parser->h_default) {
char *end_element;
spprintf(&end_element, 0, "</%s>", (char *)name);
parser->h_default(parser->user, (const XML_Char *) end_element, strlen(end_element));
efree(end_element);
}
return;
}
qualified_name = xmlStrdup(name);
parser->h_end_element(parser->user, (const XML_Char *) qualified_name);
xmlFree(qualified_name);
}
static void
_end_element_handler_ns(void *user, const xmlChar *name, const xmlChar * prefix, const xmlChar *URI)
{
xmlChar *qualified_name;
XML_Parser parser = (XML_Parser) user;
if (parser->h_end_element == NULL) {
return;
}
_qualify_namespace(parser, name, URI, &qualified_name);
parser->h_end_element(parser->user, (const XML_Char *) qualified_name);
xmlFree(qualified_name);
}
static void
_cdata_handler(void *user, const xmlChar *cdata, int cdata_len)
{
XML_Parser parser = (XML_Parser) user;
if (parser->h_cdata == NULL) {
if (parser->h_default) {
parser->h_default(parser->user, (const XML_Char *) cdata, cdata_len);
}
return;
}
parser->h_cdata(parser->user, (const XML_Char *) cdata, cdata_len);
}
static void
_pi_handler(void *user, const xmlChar *target, const xmlChar *data)
{
XML_Parser parser = (XML_Parser) user;
if (parser->h_pi == NULL) {
if (parser->h_default) {
char *full_pi;
spprintf(&full_pi, 0, "<?%s %s?>", (char *)target, (char *)data);
parser->h_default(parser->user, (const XML_Char *) full_pi, xmlStrlen(full_pi));
efree(full_pi);
}
return;
}
parser->h_pi(parser->user, (const XML_Char *) target, (const XML_Char *) data);
}
static void
_unparsed_entity_decl_handler(void *user,
const xmlChar *name,
const xmlChar *pub_id,
const xmlChar *sys_id,
const xmlChar *notation)
{
XML_Parser parser = (XML_Parser) user;
if (parser->h_unparsed_entity_decl == NULL) {
return;
}
parser->h_unparsed_entity_decl(parser->user, name, NULL, sys_id, pub_id, notation);
}
static void
_notation_decl_handler(void *user, const xmlChar *notation, const xmlChar *pub_id, const xmlChar *sys_id)
{
XML_Parser parser = (XML_Parser) user;
if (parser->h_notation_decl == NULL) {
return;
}
parser->h_notation_decl(parser->user, notation, NULL, sys_id, pub_id);
}
static void
_build_comment(const xmlChar *data, int data_len, xmlChar **comment, int *comment_len)
{
*comment_len = data_len + 7;
*comment = xmlMalloc(*comment_len + 1);
memcpy(*comment, "<!--", 4);
memcpy(*comment + 4, data, data_len);
memcpy(*comment + 4 + data_len, "-->", 3);
(*comment)[*comment_len] = '\0';
}
static void
_comment_handler(void *user, const xmlChar *comment)
{
XML_Parser parser = (XML_Parser) user;
if (parser->h_default) {
xmlChar *d_comment;
int d_comment_len;
_build_comment(comment, xmlStrlen(comment), &d_comment, &d_comment_len);
parser->h_default(parser->user, d_comment, d_comment_len);
xmlFree(d_comment);
}
}
static void
_build_entity(const xmlChar *name, int len, xmlChar **entity, int *entity_len)
{
*entity_len = len + 2;
*entity = xmlMalloc(*entity_len + 1);
(*entity)[0] = '&';
memcpy(*entity+1, name, len);
(*entity)[len+1] = ';';
(*entity)[*entity_len] = '\0';
}
static void
_external_entity_ref_handler(void *user, const xmlChar *names, int type, const xmlChar *sys_id, const xmlChar *pub_id, xmlChar *content)
{
XML_Parser parser = (XML_Parser) user;
if (parser->h_external_entity_ref == NULL) {
return;
}
parser->h_external_entity_ref(parser, names, "", sys_id, pub_id);
}
static xmlEntityPtr
_get_entity(void *user, const xmlChar *name)
{
XML_Parser parser = (XML_Parser) user;
xmlEntityPtr ret = NULL;
if (parser->parser->inSubset == 0) {
ret = xmlGetPredefinedEntity(name);
if (ret == NULL)
ret = xmlGetDocEntity(parser->parser->myDoc, name);
if (ret == NULL || (parser->parser->instate != XML_PARSER_ENTITY_VALUE && parser->parser->instate != XML_PARSER_ATTRIBUTE_VALUE)) {
if (ret == NULL || ret->etype == XML_INTERNAL_GENERAL_ENTITY || ret->etype == XML_INTERNAL_PARAMETER_ENTITY || ret->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
/* Predefined entities will expand unless no cdata handler is present */
if (parser->h_default && ! (ret && ret->etype == XML_INTERNAL_PREDEFINED_ENTITY && parser->h_cdata)) {
xmlChar *entity;
int len;
_build_entity(name, xmlStrlen(name), &entity, &len);
parser->h_default(parser->user, (const xmlChar *) entity, len);
xmlFree(entity);
} else {
/* expat will not expand internal entities if default handler is present otherwise
it will expand and pass them to cdata handler */
if (parser->h_cdata && ret) {
parser->h_cdata(parser->user, ret->content, xmlStrlen(ret->content));
}
}
} else {
if (ret->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
_external_entity_ref_handler(user, ret->name, ret->etype, ret->SystemID, ret->ExternalID, NULL);
}
}
}
}
return ret;
}
static xmlSAXHandler
php_xml_compat_handlers = {
NULL, /* internalSubset */
NULL, /* isStandalone */
NULL, /* hasInternalSubset */
NULL, /* hasExternalSubset */
NULL, /* resolveEntity */
_get_entity, /* getEntity */
NULL, /* entityDecl */
_notation_decl_handler,
NULL, /* attributeDecl */
NULL, /* elementDecl */
_unparsed_entity_decl_handler, /* unparsedEntity */
NULL, /* setDocumentLocator */
NULL, /* startDocument */
NULL, /* endDocument */
_start_element_handler, /* startElement */
_end_element_handler, /* endElement */
NULL, /* reference */
_cdata_handler,
NULL, /* ignorableWhitespace */
_pi_handler,
_comment_handler, /* comment */
NULL, /* warning */
NULL, /* error */
NULL, /* fatalError */
NULL, /* getParameterEntity */
_cdata_handler, /* cdataBlock */
NULL, /* externalSubset */
XML_SAX2_MAGIC,
NULL,
_start_element_handler_ns,
_end_element_handler_ns,
NULL
};
PHPAPI XML_Parser
XML_ParserCreate(const XML_Char *encoding)
{
return XML_ParserCreate_MM(encoding, NULL, NULL);
}
PHPAPI XML_Parser
XML_ParserCreateNS(const XML_Char *encoding, const XML_Char sep)
{
XML_Char tmp[2];
tmp[0] = sep;
tmp[1] = '\0';
return XML_ParserCreate_MM(encoding, NULL, tmp);
}
PHPAPI XML_Parser
XML_ParserCreate_MM(const XML_Char *encoding, const XML_Memory_Handling_Suite *memsuite, const XML_Char *sep)
{
XML_Parser parser;
parser = (XML_Parser) emalloc(sizeof(struct _XML_Parser));
memset(parser, 0, sizeof(struct _XML_Parser));
parser->use_namespace = 0;
parser->_ns_seperator = NULL;
parser->parser = xmlCreatePushParserCtxt((xmlSAXHandlerPtr) &php_xml_compat_handlers, (void *) parser, NULL, 0, NULL);
if (parser->parser == NULL) {
efree(parser);
return NULL;
}
#if LIBXML_VERSION <= 20617
/* for older versions of libxml2, allow correct detection of
* charset in documents with a BOM: */
parser->parser->charset = XML_CHAR_ENCODING_NONE;
#endif
parser->parser->replaceEntities = 1;
parser->parser->wellFormed = 0;
if (sep != NULL) {
parser->use_namespace = 1;
parser->parser->sax2 = 1;
parser->_ns_seperator = xmlStrdup(sep);
} else {
/* Reset flag as XML_SAX2_MAGIC is needed for xmlCreatePushParserCtxt
so must be set in the handlers */
parser->parser->sax->initialized = 1;
}
return parser;
}
PHPAPI void
XML_SetUserData(XML_Parser parser, void *user)
{
parser->user = user;
}
PHPAPI void *
XML_GetUserData(XML_Parser parser)
{
return parser->user;
}
PHPAPI void
XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, XML_EndElementHandler end)
{
parser->h_start_element = start;
parser->h_end_element = end;
}
PHPAPI void
XML_SetCharacterDataHandler(XML_Parser parser, XML_CharacterDataHandler cdata)
{
parser->h_cdata = cdata;
}
PHPAPI void
XML_SetProcessingInstructionHandler(XML_Parser parser, XML_ProcessingInstructionHandler pi)
{
parser->h_pi = pi;
}
PHPAPI void
XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler comment)
{
parser->h_comment = comment;
}
PHPAPI void
XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler d)
{
parser->h_default = d;
}
PHPAPI void
XML_SetUnparsedEntityDeclHandler(XML_Parser parser, XML_UnparsedEntityDeclHandler unparsed_decl)
{
parser->h_unparsed_entity_decl = unparsed_decl;
}
PHPAPI void
XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler notation_decl)
{
parser->h_notation_decl = notation_decl;
}
PHPAPI void
XML_SetExternalEntityRefHandler(XML_Parser parser, XML_ExternalEntityRefHandler ext_entity)
{
parser->h_external_entity_ref = ext_entity;
}
PHPAPI void
XML_SetStartNamespaceDeclHandler(XML_Parser parser, XML_StartNamespaceDeclHandler start_ns)
{
parser->h_start_ns = start_ns;
}
PHPAPI void
XML_SetEndNamespaceDeclHandler(XML_Parser parser, XML_EndNamespaceDeclHandler end_ns)
{
parser->h_end_ns = end_ns;
}
PHPAPI int
XML_Parse(XML_Parser parser, const XML_Char *data, int data_len, int is_final)
{
int error;
/* The following is a hack to keep BC with PHP 4 while avoiding
the inifite loop in libxml <= 2.6.17 which occurs when no encoding
has been defined and none can be detected */
#if LIBXML_VERSION <= 20617
if (parser->parser->charset == XML_CHAR_ENCODING_NONE) {
if (data_len >= 4 || (parser->parser->input->buf->buffer->use + data_len >= 4)) {
xmlChar start[4];
int char_count;
char_count = parser->parser->input->buf->buffer->use;
if (char_count > 4) {
char_count = 4;
}
memcpy(start, parser->parser->input->buf->buffer->content, (size_t)char_count);
memcpy(start + char_count, data, (size_t)(4 - char_count));
if (xmlDetectCharEncoding(&start[0], 4) == XML_CHAR_ENCODING_NONE) {
parser->parser->charset = XML_CHAR_ENCODING_UTF8;
}
}
}
#endif
error = xmlParseChunk(parser->parser, data, data_len, is_final);
if (!error) {
return 1;
} else if (parser->parser->lastError.level > XML_ERR_WARNING ){
return 0;
} else {
return 1;
}
}
PHPAPI int
XML_GetErrorCode(XML_Parser parser)
{
return parser->parser->errNo;
}
static const XML_Char *const error_mapping[] = {
"No error",
"Internal error",
"No memory",
"Invalid document start",
"Empty document",
"Invalid document end",
"Invalid hexadecimal character reference",
"Invalid decimal character reference",
"Invalid character reference",
"Invalid character",
"XML_ERR_CHARREF_AT_EOF",
"XML_ERR_CHARREF_IN_PROLOG",
"XML_ERR_CHARREF_IN_EPILOG",
"XML_ERR_CHARREF_IN_DTD",
"XML_ERR_ENTITYREF_AT_EOF",
"XML_ERR_ENTITYREF_IN_PROLOG",
"XML_ERR_ENTITYREF_IN_EPILOG",
"XML_ERR_ENTITYREF_IN_DTD",
"PEReference at end of document",
"PEReference in prolog",
"PEReference in epilog",
"PEReference: forbidden within markup decl in internal subset",
"XML_ERR_ENTITYREF_NO_NAME",
"EntityRef: expecting ';'",
"PEReference: no name",
"PEReference: expecting ';'",
"Undeclared entity error",
"Undeclared entity warning",
"Unparsed Entity",
"XML_ERR_ENTITY_IS_EXTERNAL",
"XML_ERR_ENTITY_IS_PARAMETER",
"Unknown encoding",
"Unsupported encoding",
"String not started expecting ' or \"",
"String not closed expecting \" or '",
"Namespace declaration error",
"EntityValue: \" or ' expected",
"EntityValue: \" or ' expected",
"< in attribute",
"Attribute not started",
"Attribute not finished",
"Attribute without value",
"Attribute redefined",
"SystemLiteral \" or ' expected",
"SystemLiteral \" or ' expected",
/* "XML_ERR_COMMENT_NOT_STARTED", <= eliminated on purpose */
"Comment not finished",
"Processing Instruction not started",
"Processing Instruction not finished",
"NOTATION: Name expected here",
"'>' required to close NOTATION declaration",
"'(' required to start ATTLIST enumeration",
"'(' required to start ATTLIST enumeration",
"MixedContentDecl : '|' or ')*' expected",
"XML_ERR_MIXED_NOT_FINISHED",
"ELEMENT in DTD not started",
"ELEMENT in DTD not finished",
"XML declaration not started",
"XML declaration not finished",
"XML_ERR_CONDSEC_NOT_STARTED",
"XML conditional section not closed",
"Content error in the external subset",
"DOCTYPE not finished",
"Sequence ']]>' not allowed in content",
"CDATA not finished",
"Reserved XML Name",
"Space required",
"XML_ERR_SEPARATOR_REQUIRED",
"NmToken expected in ATTLIST enumeration",
"XML_ERR_NAME_REQUIRED",
"MixedContentDecl : '#PCDATA' expected",
"SYSTEM or PUBLIC, the URI is missing",
"PUBLIC, the Public Identifier is missing",
"< required",
"> required",
"</ required",
"= required",
"Mismatched tag",
"Tag not finished",
"standalone accepts only 'yes' or 'no'",
"Invalid XML encoding name",
"Comment must not contain '--' (double-hyphen)",
"Invalid encoding",
"external parsed entities cannot be standalone",
"XML conditional section '[' expected",
"Entity value required",
"chunk is not well balanced",
"extra content at the end of well balanced chunk",
"XML_ERR_ENTITY_CHAR_ERROR",
"PEReferences forbidden in internal subset",
"Detected an entity reference loop",
"XML_ERR_ENTITY_BOUNDARY",
"Invalid URI",
"Fragment not allowed",
"XML_WAR_CATALOG_PI",
"XML_ERR_NO_DTD",
"conditional section INCLUDE or IGNORE keyword expected", /* 95 */
"Version in XML Declaration missing", /* 96 */
"XML_WAR_UNKNOWN_VERSION", /* 97 */
"XML_WAR_LANG_VALUE", /* 98 */
"XML_WAR_NS_URI", /* 99 */
"XML_WAR_NS_URI_RELATIVE", /* 100 */
"Missing encoding in text declaration" /* 101 */
};
PHPAPI const XML_Char *
XML_ErrorString(int code)
{
if (code < 0 || code >= (int)(sizeof(error_mapping) / sizeof(error_mapping[0]))) {
return "Unknown";
}
return error_mapping[code];
}
PHPAPI int
XML_GetCurrentLineNumber(XML_Parser parser)
{
return parser->parser->input->line;
}
PHPAPI int
XML_GetCurrentColumnNumber(XML_Parser parser)
{
return parser->parser->input->col;
}
PHPAPI int
XML_GetCurrentByteIndex(XML_Parser parser)
{
return parser->parser->input->consumed +
(parser->parser->input->cur - parser->parser->input->base);
}
PHPAPI int
XML_GetCurrentByteCount(XML_Parser parser)
{
/* WARNING: this is identical to ByteIndex; it should probably
* be different */
return parser->parser->input->consumed +
(parser->parser->input->cur - parser->parser->input->base);
}
PHPAPI const XML_Char *XML_ExpatVersion(void)
{
return "1.0";
}
PHPAPI void
XML_ParserFree(XML_Parser parser)
{
if (parser->use_namespace) {
if (parser->_ns_seperator) {
xmlFree(parser->_ns_seperator);
}
}
if (parser->parser->myDoc) {
xmlFreeDoc(parser->parser->myDoc);
parser->parser->myDoc = NULL;
}
xmlFreeParserCtxt(parser->parser);
efree(parser);
}
#endif /* LIBXML_EXPAT_COMPAT */
#endif
/**
* Local Variables:
* tab-width: 4
* c-basic-offset: 4
* indent-tabs-mode: t
* End:
* vim600: fdm=marker
* vim: ts=4 noet sw=4
*/