From 68aca051a61608e6cbbd2d36263e4470008e64f5 Mon Sep 17 00:00:00 2001 From: "William M. Brack" Date: Sat, 11 Oct 2003 15:22:13 +0000 Subject: [PATCH] new files for a different method for doing range validation of character * genChRange.py, chvalid.def, chvalid.c, include/libxml/chvalid.h: new files for a different method for doing range validation of character data. * Makefile.am, parserInternals.c, include/libxml/Makefile.am, include/libxml/parserInternals.h: modified for new range method. * catalog.c: small enhance for warning message (using one of the new range routines) --- ChangeLog | 10 + Makefile.am | 4 +- catalog.c | 8 +- chvalid.c | 186 ++++++++++++ chvalid.def | 342 +++++++++++++++++++++ chvalid.h | 105 +++++++ genChRanges.py | 465 +++++++++++++++++++++++++++++ include/libxml/Makefile.am | 3 +- include/libxml/parserInternals.h | 23 +- parserInternals.c | 490 +------------------------------ 10 files changed, 1122 insertions(+), 514 deletions(-) create mode 100755 chvalid.c create mode 100755 chvalid.def create mode 100644 chvalid.h create mode 100755 genChRanges.py diff --git a/ChangeLog b/ChangeLog index c4f9a84bd..50dac47d7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +Sat Oct 11 23:11:22 HKT 2003 William Brack + + * genChRange.py, chvalid.def, chvalid.c, include/libxml/chvalid.h: + new files for a different method for doing range validation + of character data. + * Makefile.am, parserInternals.c, include/libxml/Makefile.am, + include/libxml/parserInternals.h: modified for new range method. + * catalog.c: small enhance for warning message (using one + of the new range routines) + Sat Oct 11 13:24:57 CEST 2003 Daniel Veillard * valid.c include/libxml/valid.h: adding an serror field to diff --git a/Makefile.am b/Makefile.am index 8fc01b5b7..887234155 100644 --- a/Makefile.am +++ b/Makefile.am @@ -27,7 +27,7 @@ libxml2_la_SOURCES = SAX.c entities.c encoding.c error.c parserInternals.c \ catalog.c globals.c threads.c c14n.c \ xmlregexp.c xmlschemas.c xmlschemastypes.c xmlunicode.c \ triostr.c trio.c xmlreader.c relaxng.c dict.c SAX2.c \ - legacy.c walker.c + legacy.c walker.c chvalid.c else libxml2_la_SOURCES = SAX.c entities.c encoding.c error.c parserInternals.c \ parser.c tree.c hash.c list.c xmlIO.c xmlmemory.c uri.c \ @@ -36,7 +36,7 @@ libxml2_la_SOURCES = SAX.c entities.c encoding.c error.c parserInternals.c \ catalog.c globals.c threads.c c14n.c \ xmlregexp.c xmlschemas.c xmlschemastypes.c xmlunicode.c \ xmlreader.c relaxng.c dict.c SAX2.c \ - legacy.c xmldwalk.c + legacy.c xmldwalk.c chvalid.c endif DEPS = $(top_builddir)/libxml2.la diff --git a/catalog.c b/catalog.c index 06dbb52e5..4375ebb06 100644 --- a/catalog.c +++ b/catalog.c @@ -2932,11 +2932,11 @@ xmlInitializeCatalog(void) { cur = catalogs; nextent = &catal->xml; while (*cur != '\0') { - while (IS_BLANK(*cur)) + while (xmlIsBlank_ch(*cur)) cur++; if (*cur != 0) { paths = cur; - while ((*cur != 0) && (!IS_BLANK(*cur))) + while ((*cur != 0) && (!xmlIsBlank_ch(*cur))) cur++; path = (char *) xmlStrndup((const xmlChar *)paths, cur - paths); if (path != NULL) { @@ -3015,10 +3015,10 @@ xmlLoadCatalogs(const char *pathss) { cur = pathss; while ((cur != NULL) && (*cur != 0)) { - while (IS_BLANK(*cur)) cur++; + while (xmlIsBlank_ch(*cur)) cur++; if (*cur != 0) { paths = cur; - while ((*cur != 0) && (*cur != ':') && (!IS_BLANK(*cur))) + while ((*cur != 0) && (*cur != ':') && (!xmlIsBlank_ch(*cur))) cur++; path = xmlStrndup((const xmlChar *)paths, cur - paths); if (path != NULL) { diff --git a/chvalid.c b/chvalid.c new file mode 100755 index 000000000..fbfd32b46 --- /dev/null +++ b/chvalid.c @@ -0,0 +1,186 @@ +/* + * chvalid.c: this module implements the character range + * validation APIs + * + * This file is automatically generated from the cvs source + * definition files using the genChRanges.py Python script + * + * Generation date: Sat Oct 11 20:57:37 2003 + * Sources: chvalid.def + * William Brack + */ + +#include "chvalid.h" + +/* + * The initial tables ({func_name}_tab) are used to validate whether a + * single-byte character is within the specified group. Each table + * contains 256 bytes, with each byte representing one of the 256 + * possible characters. If the table byte is set, the character is + * allowed. + * + */ +unsigned char xmlIsPubidChar_tab[256] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x01, + 0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x00, 0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x01, 0x00, 0x00, 0x01, + 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 }; + +static xmlChSRange xmlIsBaseChar_srng[] = { {0x100, 0x131}, {0x134, 0x13e}, + {0x141, 0x148}, {0x14a, 0x17e}, {0x180, 0x1c3}, {0x1cd, 0x1f0}, + {0x1f4, 0x1f5}, {0x1fa, 0x217}, {0x250, 0x2a8}, {0x2bb, 0x2c1}, + {0x386, 0x386}, {0x388, 0x38a}, {0x38c, 0x38c}, {0x38e, 0x3a1}, + {0x3a3, 0x3ce}, {0x3d0, 0x3d6}, {0x3da, 0x3da}, {0x3dc, 0x3dc}, + {0x3de, 0x3de}, {0x3e0, 0x3e0}, {0x3e2, 0x3f3}, {0x401, 0x40c}, + {0x40e, 0x44f}, {0x451, 0x45c}, {0x45e, 0x481}, {0x490, 0x4c4}, + {0x4c7, 0x4c8}, {0x4cb, 0x4cc}, {0x4d0, 0x4eb}, {0x4ee, 0x4f5}, + {0x4f8, 0x4f9}, {0x531, 0x556}, {0x559, 0x559}, {0x561, 0x586}, + {0x5d0, 0x5ea}, {0x5f0, 0x5f2}, {0x621, 0x63a}, {0x641, 0x64a}, + {0x671, 0x6b7}, {0x6ba, 0x6be}, {0x6c0, 0x6ce}, {0x6d0, 0x6d3}, + {0x6d5, 0x6d5}, {0x6e5, 0x6e6}, {0x905, 0x939}, {0x93d, 0x93d}, + {0x958, 0x961}, {0x985, 0x98c}, {0x98f, 0x990}, {0x993, 0x9a8}, + {0x9aa, 0x9b0}, {0x9b2, 0x9b2}, {0x9b6, 0x9b9}, {0x9dc, 0x9dd}, + {0x9df, 0x9e1}, {0x9f0, 0x9f1}, {0xa05, 0xa0a}, {0xa0f, 0xa10}, + {0xa13, 0xa28}, {0xa2a, 0xa30}, {0xa32, 0xa33}, {0xa35, 0xa36}, + {0xa38, 0xa39}, {0xa59, 0xa5c}, {0xa5e, 0xa5e}, {0xa72, 0xa74}, + {0xa85, 0xa8b}, {0xa8d, 0xa8d}, {0xa8f, 0xa91}, {0xa93, 0xaa8}, + {0xaaa, 0xab0}, {0xab2, 0xab3}, {0xab5, 0xab9}, {0xabd, 0xabd}, + {0xae0, 0xae0}, {0xb05, 0xb0c}, {0xb0f, 0xb10}, {0xb13, 0xb28}, + {0xb2a, 0xb30}, {0xb32, 0xb33}, {0xb36, 0xb39}, {0xb3d, 0xb3d}, + {0xb5c, 0xb5d}, {0xb5f, 0xb61}, {0xb85, 0xb8a}, {0xb8e, 0xb90}, + {0xb92, 0xb95}, {0xb99, 0xb9a}, {0xb9c, 0xb9c}, {0xb9e, 0xb9f}, + {0xba3, 0xba4}, {0xba8, 0xbaa}, {0xbae, 0xbb5}, {0xbb7, 0xbb9}, + {0xc05, 0xc0c}, {0xc0e, 0xc10}, {0xc12, 0xc28}, {0xc2a, 0xc33}, + {0xc35, 0xc39}, {0xc60, 0xc61}, {0xc85, 0xc8c}, {0xc8e, 0xc90}, + {0xc92, 0xca8}, {0xcaa, 0xcb3}, {0xcb5, 0xcb9}, {0xcde, 0xcde}, + {0xce0, 0xce1}, {0xd05, 0xd0c}, {0xd0e, 0xd10}, {0xd12, 0xd28}, + {0xd2a, 0xd39}, {0xd60, 0xd61}, {0xe01, 0xe2e}, {0xe30, 0xe30}, + {0xe32, 0xe33}, {0xe40, 0xe45}, {0xe81, 0xe82}, {0xe84, 0xe84}, + {0xe87, 0xe88}, {0xe8a, 0xe8a}, {0xe8d, 0xe8d}, {0xe94, 0xe97}, + {0xe99, 0xe9f}, {0xea1, 0xea3}, {0xea5, 0xea5}, {0xea7, 0xea7}, + {0xeaa, 0xeab}, {0xead, 0xeae}, {0xeb0, 0xeb0}, {0xeb2, 0xeb3}, + {0xebd, 0xebd}, {0xec0, 0xec4}, {0xf40, 0xf47}, {0xf49, 0xf69}, + {0x10a0, 0x10c5}, {0x10d0, 0x10f6}, {0x1100, 0x1100}, {0x1102, 0x1103}, + {0x1105, 0x1107}, {0x1109, 0x1109}, {0x110b, 0x110c}, {0x110e, 0x1112}, + {0x113c, 0x113c}, {0x113e, 0x113e}, {0x1140, 0x1140}, {0x114c, 0x114c}, + {0x114e, 0x114e}, {0x1150, 0x1150}, {0x1154, 0x1155}, {0x1159, 0x1159}, + {0x115f, 0x1161}, {0x1163, 0x1163}, {0x1165, 0x1165}, {0x1167, 0x1167}, + {0x1169, 0x1169}, {0x116d, 0x116e}, {0x1172, 0x1173}, {0x1175, 0x1175}, + {0x119e, 0x119e}, {0x11a8, 0x11a8}, {0x11ab, 0x11ab}, {0x11ae, 0x11af}, + {0x11b7, 0x11b8}, {0x11ba, 0x11ba}, {0x11bc, 0x11c2}, {0x11eb, 0x11eb}, + {0x11f0, 0x11f0}, {0x11f9, 0x11f9}, {0x1e00, 0x1e9b}, {0x1ea0, 0x1ef9}, + {0x1f00, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45}, {0x1f48, 0x1f4d}, + {0x1f50, 0x1f57}, {0x1f59, 0x1f59}, {0x1f5b, 0x1f5b}, {0x1f5d, 0x1f5d}, + {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, {0x1fb6, 0x1fbc}, {0x1fbe, 0x1fbe}, + {0x1fc2, 0x1fc4}, {0x1fc6, 0x1fcc}, {0x1fd0, 0x1fd3}, {0x1fd6, 0x1fdb}, + {0x1fe0, 0x1fec}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffc}, {0x2126, 0x2126}, + {0x212a, 0x212b}, {0x212e, 0x212e}, {0x2180, 0x2182}, {0x3041, 0x3094}, + {0x30a1, 0x30fa}, {0x3105, 0x312c}, {0xac00, 0xd7a3}}; +xmlChRangeGroup xmlIsBaseCharGroup = {197, 0, xmlIsBaseChar_srng}; + +static xmlChSRange xmlIsChar_srng[] = { {0x100, 0xd7ff}, {0xe000, 0xfffd}}; +static xmlChLRange xmlIsChar_lrng[] = { {0x10000, 0x10ffff}}; +xmlChRangeGroup xmlIsCharGroup = {2, 1, xmlIsChar_srng, xmlIsChar_lrng}; + +static xmlChSRange xmlIsCombining_srng[] = { {0x300, 0x345}, + {0x360, 0x361}, {0x483, 0x486}, {0x591, 0x5a1}, {0x5a3, 0x5b9}, + {0x5bb, 0x5bd}, {0x5bf, 0x5bf}, {0x5c1, 0x5c2}, {0x5c4, 0x5c4}, + {0x64b, 0x652}, {0x670, 0x670}, {0x6d6, 0x6dc}, {0x6dd, 0x6df}, + {0x6e0, 0x6e4}, {0x6e7, 0x6e8}, {0x6ea, 0x6ed}, {0x901, 0x903}, + {0x93c, 0x93c}, {0x93e, 0x94c}, {0x94d, 0x94d}, {0x951, 0x954}, + {0x962, 0x963}, {0x981, 0x983}, {0x9bc, 0x9bc}, {0x9be, 0x9be}, + {0x9bf, 0x9bf}, {0x9c0, 0x9c4}, {0x9c7, 0x9c8}, {0x9cb, 0x9cd}, + {0x9d7, 0x9d7}, {0x9e2, 0x9e3}, {0xa02, 0xa02}, {0xa3c, 0xa3c}, + {0xa3e, 0xa3e}, {0xa3f, 0xa3f}, {0xa40, 0xa42}, {0xa47, 0xa48}, + {0xa4b, 0xa4d}, {0xa70, 0xa71}, {0xa81, 0xa83}, {0xabc, 0xabc}, + {0xabe, 0xac5}, {0xac7, 0xac9}, {0xacb, 0xacd}, {0xb01, 0xb03}, + {0xb3c, 0xb3c}, {0xb3e, 0xb43}, {0xb47, 0xb48}, {0xb4b, 0xb4d}, + {0xb56, 0xb57}, {0xb82, 0xb83}, {0xbbe, 0xbc2}, {0xbc6, 0xbc8}, + {0xbca, 0xbcd}, {0xbd7, 0xbd7}, {0xc01, 0xc03}, {0xc3e, 0xc44}, + {0xc46, 0xc48}, {0xc4a, 0xc4d}, {0xc55, 0xc56}, {0xc82, 0xc83}, + {0xcbe, 0xcc4}, {0xcc6, 0xcc8}, {0xcca, 0xccd}, {0xcd5, 0xcd6}, + {0xd02, 0xd03}, {0xd3e, 0xd43}, {0xd46, 0xd48}, {0xd4a, 0xd4d}, + {0xd57, 0xd57}, {0xe31, 0xe31}, {0xe34, 0xe3a}, {0xe47, 0xe4e}, + {0xeb1, 0xeb1}, {0xeb4, 0xeb9}, {0xebb, 0xebc}, {0xec8, 0xecd}, + {0xf18, 0xf19}, {0xf35, 0xf35}, {0xf37, 0xf37}, {0xf39, 0xf39}, + {0xf3e, 0xf3e}, {0xf3f, 0xf3f}, {0xf71, 0xf84}, {0xf86, 0xf8b}, + {0xf90, 0xf95}, {0xf97, 0xf97}, {0xf99, 0xfad}, {0xfb1, 0xfb7}, + {0xfb9, 0xfb9}, {0x20d0, 0x20dc}, {0x20e1, 0x20e1}, {0x302a, 0x302f}, + {0x3099, 0x3099}, {0x309a, 0x309a}}; +xmlChRangeGroup xmlIsCombiningGroup = {95, 0, xmlIsCombining_srng}; + +static xmlChSRange xmlIsDigit_srng[] = { {0x660, 0x669}, {0x6f0, 0x6f9}, + {0x966, 0x96f}, {0x9e6, 0x9ef}, {0xa66, 0xa6f}, {0xae6, 0xaef}, + {0xb66, 0xb6f}, {0xbe7, 0xbef}, {0xc66, 0xc6f}, {0xce6, 0xcef}, + {0xd66, 0xd6f}, {0xe50, 0xe59}, {0xed0, 0xed9}, {0xf20, 0xf29}}; +xmlChRangeGroup xmlIsDigitGroup = {14, 0, xmlIsDigit_srng}; + +static xmlChSRange xmlIsExtender_srng[] = { {0x2d0, 0x2d0}, {0x2d1, 0x2d1}, + {0x387, 0x387}, {0x640, 0x640}, {0xe46, 0xe46}, {0xec6, 0xec6}, + {0x3005, 0x3005}, {0x3031, 0x3031}, {0x3032, 0x3032}, {0x3033, 0x3033}, + {0x3034, 0x3034}, {0x3035, 0x3035}, {0x309d, 0x309d}, {0x309e, 0x309e}, + {0x30fc, 0x30fc}, {0x30fd, 0x30fd}, {0x30fe, 0x30fe}}; +xmlChRangeGroup xmlIsExtenderGroup = {17, 0, xmlIsExtender_srng}; + +static xmlChSRange xmlIsIdeographic_srng[] = { {0x3007, 0x3007}, + {0x3021, 0x3029}, {0x4300, 0x9fa5}, {0xf900, 0xfa2d}}; +xmlChRangeGroup xmlIsIdeographicGroup = {4, 0, xmlIsIdeographic_srng}; + + +int +xmlCharInRange (unsigned int val, xmlChRangeGroupPtr rptr) { + int low, high, mid; + xmlChSRangePtr sptr; + xmlChLRangePtr lptr; + if (val < 0x10000) { /* is val in 'short' or 'long' array? */ + if (rptr->nbShortRange == 0) + return 0; + low = 0; + high = rptr->nbShortRange; + sptr = rptr->shortRange; + while (low <= high) { + mid = (low + high) / 2; + if ((unsigned short) val < sptr[mid].low) + high = mid - 1; + else if ((unsigned short) val > sptr[mid].high) + low = mid + 1; + else + return 1; + } + } else { + if (rptr->nbLongRange == 0) + return 0; + low = 0; + high = rptr->nbLongRange; + lptr = rptr->longRange; + while (low <= high) { + mid = (low + high) / 2; + if (val < lptr[mid].low) + high = mid - 1; + else if (val > lptr[mid].high) + low = mid + 1; + else + return 1; + } + } + return 0; +} + diff --git a/chvalid.def b/chvalid.def new file mode 100755 index 000000000..eed7ab5fb --- /dev/null +++ b/chvalid.def @@ -0,0 +1,342 @@ +name xmlIsChar +ur 0x09 0x0a 0x0d 0x20..0xff +ur 0x000100..0x00d7ff +ur 0x00e000..0x00fffd +ur 0x010000..0x10ffff +end xmlIsChar + +name xmlIsPubidChar +ur 0x20 0x0d 0x0a 'a'..'z' 'A'..'Z' '0'..'9' +ur '-' '\' '(' ')' '+' ',' '.' '/' +ur ':' '=' '?' ';' '!' '*' '#' '@' +ur '$' '_' '%' +end + +name xmlIsBlank +ur 0x09 0x0a 0x0d 0x20 +end xmlIsBlank + +name xmlIsBaseChar +ur 0x41..0x5a 0x61..0x7a 0xc0..0xd6 0xd8..0xf6 0xf8..0xff +ur 0x000100..0x000131 +ur 0x000134..0x00013e +ur 0x000141..0x000148 +ur 0x00014a..0x00017e +ur 0x000180..0x0001c3 +ur 0x0001cd..0x0001f0 +ur 0x0001f4..0x0001f5 +ur 0x0001fa..0x000217 +ur 0x000250..0x0002a8 +ur 0x0002bb..0x0002c1 +ur 0x000386..0x000386 +ur 0x000388..0x00038a +ur 0x00038c +ur 0x00038e..0x0003a1 +ur 0x0003a3..0x0003ce +ur 0x0003d0..0x0003d6 +ur 0x0003da +ur 0x0003dc +ur 0x0003de +ur 0x0003e0 +ur 0x0003e2..0x0003f3 +ur 0x000401..0x00040c +ur 0x00040e..0x00044f +ur 0x000451..0x00045c +ur 0x00045e..0x000481 +ur 0x000490..0x0004c4 +ur 0x0004c7..0x0004c8 +ur 0x0004cb..0x0004cc +ur 0x0004d0..0x0004eb +ur 0x0004ee..0x0004f5 +ur 0x0004f8..0x0004f9 +ur 0x000531..0x000556 +ur 0x000559 +ur 0x000561..0x000586 +ur 0x0005d0..0x0005ea +ur 0x0005f0..0x0005f2 +ur 0x000621..0x00063a +ur 0x000641..0x00064a +ur 0x000671..0x0006b7 +ur 0x0006ba..0x0006be +ur 0x0006c0..0x0006ce +ur 0x0006d0..0x0006d3 +ur 0x0006d5 +ur 0x0006e5..0x0006e6 +ur 0x000905..0x000939 +ur 0x00093d +ur 0x000958..0x000961 +ur 0x000985..0x00098c +ur 0x00098f..0x000990 +ur 0x000993..0x0009a8 +ur 0x0009aa..0x0009b0 +ur 0x0009b2 +ur 0x0009b6..0x0009b9 +ur 0x0009dc..0x0009dd +ur 0x0009df..0x0009e1 +ur 0x0009f0..0x0009f1 +ur 0x000a05..0x000a0a +ur 0x000a0f..0x000a10 +ur 0x000a13..0x000a28 +ur 0x000a2a..0x000a30 +ur 0x000a32..0x000a33 +ur 0x000a35..0x000a36 +ur 0x000a38..0x000a39 +ur 0x000a59..0x000a5c +ur 0x000a5e +ur 0x000a72..0x000a74 +ur 0x000a85..0x000a8b +ur 0x000a8d +ur 0x000a8f..0x000a91 +ur 0x000a93..0x000aa8 +ur 0x000aaa..0x000ab0 +ur 0x000ab2..0x000ab3 +ur 0x000ab5..0x000ab9 +ur 0x000abd +ur 0x000ae0 +ur 0x000b05..0x000b0c +ur 0x000b0f..0x000b10 +ur 0x000b13..0x000b28 +ur 0x000b2a..0x000b30 +ur 0x000b32..0x000b33 +ur 0x000b36..0x000b39 +ur 0x000b3d +ur 0x000b5c..0x000b5d +ur 0x000b5f..0x000b61 +ur 0x000b85..0x000b8a +ur 0x000b8e..0x000b90 +ur 0x000b92..0x000b95 +ur 0x000b99..0x000b9a +ur 0x000b9c +ur 0x000b9e..0x000b9f +ur 0x000ba3..0x000ba4 +ur 0x000ba8..0x000baa +ur 0x000bae..0x000bb5 +ur 0x000bb7..0x000bb9 +ur 0x000c05..0x000c0c +ur 0x000c0e..0x000c10 +ur 0x000c12..0x000c28 +ur 0x000c2a..0x000c33 +ur 0x000c35..0x000c39 +ur 0x000c60..0x000c61 +ur 0x000c85..0x000c8c +ur 0x000c8e..0x000c90 +ur 0x000c92..0x000ca8 +ur 0x000caa..0x000cb3 +ur 0x000cb5..0x000cb9 +ur 0x000cde +ur 0x000ce0..0x000ce1 +ur 0x000d05..0x000d0c +ur 0x000d0e..0x000d10 +ur 0x000d12..0x000d28 +ur 0x000d2a..0x000d39 +ur 0x000d60..0x000d61 +ur 0x000e01..0x000e2e +ur 0x000e30 +ur 0x000e32..0x000e33 +ur 0x000e40..0x000e45 +ur 0x000e81..0x000e82 +ur 0x000e84..0x000e84 +ur 0x000e87..0x000e88 +ur 0x000e8a +ur 0x000e8d +ur 0x000e94..0x000e97 +ur 0x000e99..0x000e9f +ur 0x000ea1..0x000ea3 +ur 0x000ea5 +ur 0x000ea7 +ur 0x000eaa..0x000eab +ur 0x000ead..0x000eae +ur 0x000eb0 +ur 0x000eb2..0x000eb3 +ur 0x000ebd +ur 0x000ec0..0x000ec4 +ur 0x000f40..0x000f47 +ur 0x000f49..0x000f69 +ur 0x0010a0..0x0010c5 +ur 0x0010d0..0x0010f6 +ur 0x001100 +ur 0x001102..0x001103 +ur 0x001105..0x001107 +ur 0x001109 +ur 0x00110b..0x00110c +ur 0x00110e..0x001112 +ur 0x00113c +ur 0x00113e +ur 0x001140 +ur 0x00114c +ur 0x00114e +ur 0x001150 +ur 0x001154..0x001155 +ur 0x001159 +ur 0x00115f..0x001161 +ur 0x001163 +ur 0x001165 +ur 0x001167 +ur 0x001169 +ur 0x00116d..0x00116e +ur 0x001172..0x001173 +ur 0x001175 +ur 0x00119e +ur 0x0011a8 +ur 0x0011ab +ur 0x0011ae..0x0011af +ur 0x0011b7..0x0011b8 +ur 0x0011ba +ur 0x0011bc..0x0011c2 +ur 0x0011eb +ur 0x0011f0 +ur 0x0011f9 +ur 0x001e00..0x001e9b +ur 0x001ea0..0x001ef9 +ur 0x001f00..0x001f15 +ur 0x001f18..0x001f1d +ur 0x001f20..0x001f45 +ur 0x001f48..0x001f4d +ur 0x001f50..0x001f57 +ur 0x001f59 +ur 0x001f5b +ur 0x001f5d +ur 0x001f5f..0x001f7d +ur 0x001f80..0x001fb4 +ur 0x001fb6..0x001fbc +ur 0x001fbe +ur 0x001fc2..0x001fc4 +ur 0x001fc6..0x001fcc +ur 0x001fd0..0x001fd3 +ur 0x001fd6..0x001fdb +ur 0x001fe0..0x001fec +ur 0x001ff2..0x001ff4 +ur 0x001ff6..0x001ffc +ur 0x002126 +ur 0x00212a..0x00212b +ur 0x00212e +ur 0x002180..0x002182 +ur 0x003041..0x003094 +ur 0x0030a1..0x0030fa +ur 0x003105..0x00312c +ur 0x00ac00..0x00d7a3 +end +name xmlIsDigit +ur 0x30..0x39 +ur 0x660..0x669 +ur 0x6f0..0x6f9 +ur 0x966..0x96f +ur 0x9e6..0x9ef +ur 0xa66..0xa6f +ur 0xae6..0xaef +ur 0xb66..0xb6f +ur 0xbe7..0xbef +ur 0xc66..0xc6f +ur 0xce6..0xcef +ur 0xd66..0xd6f +ur 0xe50..0xe59 +ur 0xed0..0xed9 +ur 0xf20..0xf29 +end +name xmlIsCombining +ur 0x0300..0x0345 +ur 0x0360..0x0361 +ur 0x0483..0x0486 +ur 0x0591..0x05A1 +ur 0x05A3..0x05B9 +ur 0x05BB..0x05BD +ur 0x05BF +ur 0x05C1..0x05C2 +ur 0x05C4 +ur 0x064B..0x0652 +ur 0x0670 +ur 0x06D6..0x06DC +ur 0x06DD..0x06DF +ur 0x06E0..0x06E4 +ur 0x06E7..0x06E8 +ur 0x06EA..0x06ED +ur 0x0901..0x0903 +ur 0x093C +ur 0x093E..0x094C +ur 0x094D +ur 0x0951..0x0954 +ur 0x0962..0x0963 +ur 0x0981..0x0983 +ur 0x09BC +ur 0x09BE +ur 0x09BF +ur 0x09C0..0x09C4 +ur 0x09C7..0x09C8 +ur 0x09CB..0x09CD +ur 0x09D7 +ur 0x09E2..0x09E3 +ur 0x0A02 +ur 0x0A3C +ur 0x0A3E +ur 0x0A3F +ur 0x0A40..0x0A42 +ur 0x0A47..0x0A48 +ur 0x0A4B..0x0A4D +ur 0x0A70..0x0A71 +ur 0x0A81..0x0A83 +ur 0x0ABC +ur 0x0ABE..0x0AC5 +ur 0x0AC7..0x0AC9 +ur 0x0ACB..0x0ACD +ur 0x0B01..0x0B03 +ur 0x0B3C +ur 0x0B3E..0x0B43 +ur 0x0B47..0x0B48 +ur 0x0B4B..0x0B4D +ur 0x0B56..0x0B57 +ur 0x0B82..0x0B83 +ur 0x0BBE..0x0BC2 +ur 0x0BC6..0x0BC8 +ur 0x0BCA..0x0BCD +ur 0x0BD7 +ur 0x0C01..0x0C03 +ur 0x0C3E..0x0C44 +ur 0x0C46..0x0C48 +ur 0x0C4A..0x0C4D +ur 0x0C55..0x0C56 +ur 0x0C82..0x0C83 +ur 0x0CBE..0x0CC4 +ur 0x0CC6..0x0CC8 +ur 0x0CCA..0x0CCD +ur 0x0CD5..0x0CD6 +ur 0x0D02..0x0D03 +ur 0x0D3E..0x0D43 +ur 0x0D46..0x0D48 +ur 0x0D4A..0x0D4D +ur 0x0D57 +ur 0x0E31 +ur 0x0E34..0x0E3A +ur 0x0E47..0x0E4E +ur 0x0EB1 +ur 0x0EB4..0x0EB9 +ur 0x0EBB..0x0EBC +ur 0x0EC8..0x0ECD +ur 0x0F18..0x0F19 +ur 0x0F35 +ur 0x0F37 +ur 0x0F39 +ur 0x0F3E +ur 0x0F3F +ur 0x0F71..0x0F84 +ur 0x0F86..0x0F8B +ur 0x0F90..0x0F95 +ur 0x0F97 +ur 0x0F99..0x0FAD +ur 0x0FB1..0x0FB7 +ur 0x0FB9 +ur 0x20D0..0x20DC +ur 0x20E1 +ur 0x302A..0x302F +ur 0x3099 +ur 0x309A +end +name xmlIsExtender +ur 0xb7 0x2d0 0x2d1 0x387 0x640 0xe46 0xec6 0x3005 0x3031 0x3032 +ur 0x3033 0x3034 0x3035 0x309d 0x309e 0x30fc 0x30fd 0x30fe +end +name xmlIsIdeographic +ur 0x4300..0x9fa5 +ur 0xf900..0xfa2d +ur 0x3021..0x3029 +ur 0x3007 +end diff --git a/chvalid.h b/chvalid.h new file mode 100644 index 000000000..0b538ebda --- /dev/null +++ b/chvalid.h @@ -0,0 +1,105 @@ +/* + * chvalid.h: this header exports interfaces for the character + * range validation APIs + * + * This file is automatically generated from the cvs source + * definition files using the genChRanges.py Python script + * + * Generation date: Sat Oct 11 20:57:37 2003 + * Sources: chvalid.def + * William Brack + */ + +#ifndef __XML_CHVALID_H__ +#define __XML_CHVALID_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Define our typedefs and structures + * + */ +typedef struct _xmlChSRange xmlChSRange; +typedef xmlChSRange *xmlChSRangePtr; +struct _xmlChSRange { + unsigned short low; + unsigned short high; +}; + +typedef struct _xmlChLRange xmlChLRange; +typedef xmlChLRange *xmlChLRangePtr; +struct _xmlChLRange { + unsigned low; + unsigned high; +}; + +typedef struct _xmlChRangeGroup xmlChRangeGroup; +typedef xmlChRangeGroup *xmlChRangeGroupPtr; +struct _xmlChRangeGroup { + int nbShortRange; + int nbLongRange; + xmlChSRangePtr shortRange; /* points to an array of ranges */ + xmlChLRangePtr longRange; +}; + +/* Range checking routine */ +int xmlCharInRange(unsigned int val, const xmlChRangeGroupPtr group); + +#define xmlIsBaseChar_ch(c) ( ((0x41<= (c)) && ((c) <= 0x5a)) || \ + ((0x61<= (c)) && ((c) <= 0x7a)) || \ + ((0xc0<= (c)) && ((c) <= 0xd6)) || \ + ((0xd8<= (c)) && ((c) <= 0xf6)) || \ + ((0xf8<= (c)) && ((c) <= 0xff))) +#define xmlIsBaseChar(c) (((c) < 0x100) ? \ + xmlIsBaseChar_ch((c)) : \ + xmlCharInRange((c), &xmlIsBaseCharGroup)) + +extern xmlChRangeGroup xmlIsBaseCharGroup; +#define xmlIsBlank_ch(c) ( ((c) == 0x20) || \ + ((0x9<= (c)) && ((c) <= 0xa)) || \ + ((c) == 0xd)) +#define xmlIsBlank(c) (((c) < 0x100) ? \ + xmlIsBlank_ch((c)) : 0) + +#define xmlIsChar_ch(c) ( ((0x9<= (c)) && ((c) <= 0xa)) || \ + ((c) == 0xd) || \ + ((0x20<= (c)) && ((c) <= 0xff))) +#define xmlIsChar(c) (((c) < 0x100) ? \ + xmlIsChar_ch((c)) : \ + xmlCharInRange((c), &xmlIsCharGroup)) + +extern xmlChRangeGroup xmlIsCharGroup; +#define xmlIsCombining(c) (((c) < 0x100) ? \ + 0 : \ + xmlCharInRange((c), &xmlIsCombiningGroup)) + +extern xmlChRangeGroup xmlIsCombiningGroup; +#define xmlIsDigit_ch(c) ( ((0x30<= (c)) && ((c) <= 0x39))) +#define xmlIsDigit(c) (((c) < 0x100) ? \ + xmlIsDigit_ch((c)) : \ + xmlCharInRange((c), &xmlIsDigitGroup)) + +extern xmlChRangeGroup xmlIsDigitGroup; +#define xmlIsExtender_ch(c) ( ((c) == 0xb7)) +#define xmlIsExtender(c) (((c) < 0x100) ? \ + xmlIsExtender_ch((c)) : \ + xmlCharInRange((c), &xmlIsExtenderGroup)) + +extern xmlChRangeGroup xmlIsExtenderGroup; +#define xmlIsIdeographic(c) (((c) < 0x100) ? \ + 0 : \ + xmlCharInRange((c), &xmlIsIdeographicGroup)) + +extern xmlChRangeGroup xmlIsIdeographicGroup; +extern unsigned char xmlIsPubidChar_tab[256]; +#define xmlIsPubidChar_ch(c) (xmlIsPubidChar_tab[(c)]) +#define xmlIsPubidChar(c) (((c) < 0x100) ? \ + xmlIsPubidChar_ch((c)) : 0) + + +#ifdef __cplusplus +} +#endif +#endif /* __XML_CHVALID_H__ */ diff --git a/genChRanges.py b/genChRanges.py new file mode 100755 index 000000000..f35cc5cbd --- /dev/null +++ b/genChRanges.py @@ -0,0 +1,465 @@ +#!/usr/bin/python -u +# +# Portions of this script have been (shamelessly) stolen from the +# prior work of Daniel Veillard (genUnicode.py) +# +# I, however, take full credit for any bugs, errors or difficulties :-) +# +# William Brack +# October 2003 +# + +import sys +import string +import time + +# +# A little routine to assign a 'meaningful' name to a range +# +def rangename( intvl ): + (start, end) = intvl + rname = "r" + hex(start)[2:] + "x" + hex(end)[2:] + return rname + +# +# A routine to take a list of yes/no (1, 0) values and turn it +# into a list of ranges. This will later be used to determine whether +# to generate single-byte lookup tables, or inline comparisons +# +def makeRange(lst): + ret = [] + pos = 0 + while pos < len(lst): + try: # index generates exception if not present + s = lst[pos:].index(1) # look for start of next range + except: + break # if no more, finished + pos += s # pointer to start of possible range + try: + e = lst[pos:].index(0) # look for end of range + e += pos + except: # if no end, set to end of list + e = len(lst) + ret.append((pos, e-1)) # append range tuple to list + pos = e + 1 # ready to check for next range + return ret + +sources = "chvalid.def" # input filename + +# minTableSize gives the minimum number of ranges which must be present +# before a 256-byte lookup table is produced. If there are less than this +# number, a macro with inline comparisons is generated +minTableSize = 6 + +# dictionary of ranges, key=range, element contains list of funcs using it +Ranges = {} + +# dictionary of functions, key=name, element contains char-map and range-list +Functs = {} + +state = 0 + +try: + defines = open("chvalid.def", "r") +except: + print "Missing chvalid.def, aborting ..." + sys.exit(1) + +# +# The lines in the .def file have three types:- +# name: Defines a new function block +# ur: Defines individual or ranges of unicode values +# end: Indicates the end of the function block +# +# These lines are processed below. +# +for line in defines.readlines(): + # ignore blank lines, or lines beginning with '#' + if line[0] == '#': + continue + line = string.strip(line) + if line == '': + continue + # split line into space-separated fields, then split on type + try: + fields = string.split(line, ' ') + # + # name line: + # validate any previous function block already ended + # validate this function not already defined + # initialize an entry in the function dicitonary + # including a mask table with no values yet defined + # + if fields[0] == 'name': + name = fields[1] + if state != 0: + print "'name' %s found before previous name" \ + "completed" % (fields[1]) + continue + state = 1 + if Functs.has_key(name): + print "name '%s' already present - may give" \ + " wrong results" % (name) + else: + # dict entry with two list elements (chdata, rangedata) + Functs[name] = [ [], [] ] + for v in range(256): + Functs[name][0].append(0) + # + # end line: + # validate there was a preceding function name line + # set state to show no current function active + # + elif fields[0] == 'end': + if state == 0: + print "'end' found outside of function block" + continue + state = 0 + + # + # ur line: + # validate function has been defined + # process remaining fields on the line, which may be either + # individual unicode values or ranges of values + # + elif fields[0] == 'ur': + if state != 1: + raise ValidationError, "'ur' found outside of 'name' block" + for el in fields[1:]: + pos = string.find(el, '..') + # pos <=0 means not a range, so must be individual value + if pos <= 0: + # cheap handling of hex or decimal values + if el[0:2] == '0x': + value = int(el[2:],16) + elif el[0] == "'": + value = ord(el[1]) + else: + value = int(el) + if ((value < 0) | (value > 0x1fffff)): + raise ValidationError, 'Illegal value (%s) in ch for'\ + ' name %s' % (el,name) + # for ur we have only ranges (makes things simpler), + # so convert val to range + currange = (value, value) + # pos > 0 means this is a range, so isolate/validate + # the interval + else: + # split the range into it's first-val, last-val + (first, last) = string.split(el, "..") + # convert values from text into binary + if first[0:2] == '0x': + start = int(first[2:],16) + elif first[0] == "'": + start = ord(first[1]) + else: + start = int(first) + if last[0:2] == '0x': + end = int(last[2:],16) + elif last[0] == "'": + end = ord(last[1]) + else: + end = int(last) + if (start < 0) | (end > 0x1fffff) | (start > end): + raise ValidationError, "Invalid range '%s'" % el + currange = (start, end) + # common path - 'currange' has the range, now take care of it + # We split on single-byte values vs. multibyte + if currange[1] < 0x100: # single-byte + for ch in range(currange[0],currange[1]+1): + # validate that value not previously defined + if Functs[name][0][ch]: + msg = "Duplicate ch value '%s' for name '%s'" % (el, name) + raise ValidationError, msg + Functs[name][0][ch] = 1 + else: # multi-byte + if Ranges.has_key(currange): + Ranges[currange].append(name) + else: + Ranges[currange] = [ name ] + if currange in Functs[name][1]: + raise ValidationError, "range already defined in" \ + " function" + else: + Functs[name][1].append(currange) + + except: + print "Failed to process line: %s" % (line) + raise +# +# At this point, the entire definition file has been processed. Now we +# enter the output phase, where we generate the two files chvalid.c and' +# chvalid.h +# +# To do this, we first output the 'static' data (heading, fixed +# definitions, etc.), then output the 'dynamic' data (the results +# of the above processing), and finally output closing 'static' data +# (e.g. the subroutine to process the ranges) +# + +# +# Generate the headings: +# +try: + header = open("chvalid.h", "w") +except: + print "Failed to open chvalid.h" + sys.exit(1) + +try: + output = open("chvalid.c", "w") +except: + print "Failed to open chvalid.c" + sys.exit(1) + +date = time.asctime(time.localtime(time.time())) + +header.write( +"""/* + * chvalid.h: this header exports interfaces for the character + * range validation APIs + * + * This file is automatically generated from the cvs source + * definition files using the genChRanges.py Python script + * + * Generation date: %s + * Sources: %s + * William Brack + */ + +#ifndef __XML_CHVALID_H__ +#define __XML_CHVALID_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Define our typedefs and structures + * + */ +typedef struct _xmlChSRange xmlChSRange; +typedef xmlChSRange *xmlChSRangePtr; +struct _xmlChSRange { + unsigned short low; + unsigned short high; +}; + +typedef struct _xmlChLRange xmlChLRange; +typedef xmlChLRange *xmlChLRangePtr; +struct _xmlChLRange { + unsigned low; + unsigned high; +}; + +typedef struct _xmlChRangeGroup xmlChRangeGroup; +typedef xmlChRangeGroup *xmlChRangeGroupPtr; +struct _xmlChRangeGroup { + int nbShortRange; + int nbLongRange; + xmlChSRangePtr shortRange; /* points to an array of ranges */ + xmlChLRangePtr longRange; +}; + +/* Range checking routine */ +int xmlCharInRange(unsigned int val, const xmlChRangeGroupPtr group); + +""" % (date, sources)); +output.write( +"""/* + * chvalid.c: this module implements the character range + * validation APIs + * + * This file is automatically generated from the cvs source + * definition files using the genChRanges.py Python script + * + * Generation date: %s + * Sources: %s + * William Brack + */ + +#include "chvalid.h" + +/* + * The initial tables ({func_name}_tab) are used to validate whether a + * single-byte character is within the specified group. Each table + * contains 256 bytes, with each byte representing one of the 256 + * possible characters. If the table byte is set, the character is + * allowed. + * + */ +""" % (date, sources)); + +# +# Now output the generated data. +# We try to produce the best execution times. Tests have shown that validation +# with direct table lookup is, when there are a "small" number of valid items, +# still not as fast as a sequence of inline compares. So, if the single-byte +# portion of a range has a "small" number of ranges, we output a macro for inline +# compares, otherwise we output a 256-byte table and a macro to use it. +# + +fkeys = Functs.keys() # Dictionary of all defined functions +fkeys.sort() # Put some order to our output + +for f in fkeys: + +# First we convert the specified single-byte values into a group of ranges. +# If the total number of such ranges is less than minTableSize, we generate +# an inline macro for direct comparisons; if greater, we generate a lookup +# table. + if max(Functs[f][0]) > 0: # only check if at least one entry + rangeTable = makeRange(Functs[f][0]) + numRanges = len(rangeTable) + if numRanges >= minTableSize: # table is worthwhile + header.write("extern unsigned char %s_tab[256];\n" % f) + header.write("#define %s_ch(c)\t(%s_tab[(c)])\n" % (f, f)) + + # write the constant data to the code file + output.write("unsigned char %s_tab[256] = {\n" % f) + pline = " " + for n in range(255): + pline += " 0x%02x," % Functs[f][0][n] + if len(pline) > 72: + output.write(pline + "\n") + pline = " " + output.write(pline + " 0x%02x };\n\n" % Functs[f][0][255]) + + else: # inline check is used + # first another little optimisation - if space is present, + # put it at the front of the list so it is checked first + try: + ix = rangeTable.remove((0x20, 0x20)) + rangeTable.insert(0, (0x20, 0x20)) + except: + pass + pline = "#define %s_ch(c)\t( " % f + firstFlag = 1 + for rg in rangeTable: + if not firstFlag: + pline += " || \\\n\t\t\t" + else: + firstFlag = 0 + if rg[0] == rg[1]: # single value - check equal + pline += "((c) == " + hex(rg[0]) + ")" + else: # value range + pline += "((" + hex(rg[0]) + "<= (c)) &&" + pline += " ((c) <= " + hex(rg[1]) + "))" + pline += ")\n" + header.write(pline) + + header.write("#define %s(c)\t(((c) < 0x100) ? \\\n\t\t\t\t" % f) + if max(Functs[f][0]) > 0: + header.write("%s_ch((c)) :" % f) + else: + header.write("0 :") + + # if no ranges defined, value invalid if >= 0x100 + if len(Functs[f][1]) == 0: + header.write(" 0)\n\n") + else: + header.write(" \\\n\t\t\t\txmlCharInRange((c), &%sGroup))\n\n" % f) + + if len(Functs[f][1]) > 0: + header.write("extern xmlChRangeGroup %sGroup;\n" % f) + + +# +# Next we do the unicode ranges +# + +for f in fkeys: + if len(Functs[f][1]) > 0: # only generate if unicode ranges present + rangeTable = Functs[f][1] + rangeTable.sort() # ascending tuple sequence + numShort = 0 + numLong = 0 + for rg in rangeTable: + if rg[1] < 0x10000: # if short value + if numShort == 0: # first occurence + pline = "static xmlChSRange %s_srng[] = { " % f + else: + pline += ", " + numShort += 1 + if len(pline) > 60: + output.write(pline + "\n") + pline = " " + pline += "{0x%x, 0x%x}" % (rg[0], rg[1]) + else: # if long value + if numLong == 0: # first occurence + if numShort > 0: # if there were shorts, finish them off + output.write(pline + "};\n") + pline = "static xmlChLRange %s_lrng[] = { " % f + else: + pline += ", " + numLong += 1 + if len(pline) > 60: + output.write(pline + "\n") + pline = " " + pline += "{0x%x, 0x%x}" % (rg[0], rg[1]) + output.write(pline + "};\n") # finish off last group + + pline = "xmlChRangeGroup %sGroup = {%d, %d, " % (f, numShort, numLong) + if numShort > 0: + pline += "%s_srng" % f + if numLong > 0: + pline += ", %s_lrng" % f + + output.write(pline + "};\n\n") +# +# Run complete - write trailers and close the output files +# + +header.write(""" +#ifdef __cplusplus +} +#endif +#endif /* __XML_CHVALID_H__ */ +"""); + +header.close() + +output.write( +""" +int +xmlCharInRange (unsigned int val, xmlChRangeGroupPtr rptr) { + int low, high, mid; + xmlChSRangePtr sptr; + xmlChLRangePtr lptr; + if (val < 0x10000) { /* is val in 'short' or 'long' array? */ + if (rptr->nbShortRange == 0) + return 0; + low = 0; + high = rptr->nbShortRange; + sptr = rptr->shortRange; + while (low <= high) { + mid = (low + high) / 2; + if ((unsigned short) val < sptr[mid].low) + high = mid - 1; + else if ((unsigned short) val > sptr[mid].high) + low = mid + 1; + else + return 1; + } + } else { + if (rptr->nbLongRange == 0) + return 0; + low = 0; + high = rptr->nbLongRange; + lptr = rptr->longRange; + while (low <= high) { + mid = (low + high) / 2; + if (val < lptr[mid].low) + high = mid - 1; + else if (val > lptr[mid].high) + low = mid + 1; + else + return 1; + } + } + return 0; +} + +"""); + +output.close() diff --git a/include/libxml/Makefile.am b/include/libxml/Makefile.am index b9b7014c5..1bf338cec 100644 --- a/include/libxml/Makefile.am +++ b/include/libxml/Makefile.am @@ -43,7 +43,8 @@ xmlinc_HEADERS = \ dict.h \ SAX2.h \ xmlexports.h \ - xmldwalk.h + xmldwalk.h \ + chvalid.h install-exec-hook: $(mkinstalldirs) $(DESTDIR)$(xmlincdir) diff --git a/include/libxml/parserInternals.h b/include/libxml/parserInternals.h index 800cdef5a..295606455 100644 --- a/include/libxml/parserInternals.h +++ b/include/libxml/parserInternals.h @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -48,8 +49,7 @@ extern "C" { * [2] Char ::= #x9 | #xA | #xD | [#x20...] * any byte character in the accepted range */ -#define IS_BYTE_CHAR(c) \ - (((c) >= 0x20) || ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D)) +#define IS_BYTE_CHAR(c) xmlIsChar_ch(c) /** * IS_CHAR: @@ -61,11 +61,7 @@ extern "C" { * | [#x10000-#x10FFFF] * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */ -#define IS_CHAR(c) \ - ((((c) >= 0x20) && ((c) <= 0xD7FF)) || \ - ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) || \ - (((c) >= 0xE000) && ((c) <= 0xFFFD)) || \ - (((c) >= 0x10000) && ((c) <= 0x10FFFF))) +#define IS_CHAR(c) xmlIsChar(c) /** * IS_BLANK: @@ -75,8 +71,7 @@ extern "C" { * * [3] S ::= (#x20 | #x9 | #xD | #xA)+ */ -#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || \ - ((c) == 0x0D)) +#define IS_BLANK(c) xmlIsBlank(c) /** * IS_BASECHAR: @@ -193,15 +188,7 @@ XMLPUBVAR const xmlChar xmlStringComment[]; /* * Function to finish the work of the macros where needed. */ -XMLPUBFUN int XMLCALL xmlIsBaseChar (int c); -XMLPUBFUN int XMLCALL xmlIsBlank (int c); -XMLPUBFUN int XMLCALL xmlIsPubidChar (int c); -XMLPUBFUN int XMLCALL xmlIsLetter (int c); -XMLPUBFUN int XMLCALL xmlIsDigit (int c); -XMLPUBFUN int XMLCALL xmlIsIdeographic(int c); -XMLPUBFUN int XMLCALL xmlIsExtender (int c); -XMLPUBFUN int XMLCALL xmlIsCombining (int c); -XMLPUBFUN int XMLCALL xmlIsChar (int c); +XMLPUBFUN int XMLCALL xmlIsLetter (int c); /** * Parser context. diff --git a/parserInternals.c b/parserInternals.c index 562c57806..2d1bd697f 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -53,6 +53,7 @@ #include #endif #include +#include /* * Various global defaults for parsing @@ -196,472 +197,6 @@ xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, } } -/************************************************************************ - * * - * Some functions to avoid too large macros * - * * - ************************************************************************/ - -/** - * xmlIsChar: - * @c: an unicode character (int) - * - * Check whether the character is allowed by the production - * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] - * | [#x10000-#x10FFFF] - * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. - * Also available as a macro IS_CHAR() - * - * Returns 0 if not, non-zero otherwise - */ -int -xmlIsChar(int c) { - return( - ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) || - (((c) >= 0x20) && ((c) <= 0xD7FF)) || - (((c) >= 0xE000) && ((c) <= 0xFFFD)) || - (((c) >= 0x10000) && ((c) <= 0x10FFFF))); -} - -/** - * xmlIsBlank: - * @c: an unicode character (int) - * - * Check whether the character is allowed by the production - * [3] S ::= (#x20 | #x9 | #xD | #xA)+ - * Also available as a macro IS_BLANK() - * - * Returns 0 if not, non-zero otherwise - */ -int -xmlIsBlank(int c) { - return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D)); -} - -static int xmlBaseArray[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */ - 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */ - 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */ -}; - -/** - * xmlIsBaseChar: - * @c: an unicode character (int) - * - * Check whether the character is allowed by the production - * [85] BaseChar ::= ... long list see REC ... - * - * VI is your friend ! - * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/ - * and - * :1,$ s/#x\([0-9A-Z]*\)/ ((c) == 0x\1) ||/ - * - * Returns 0 if not, non-zero otherwise - */ -int -xmlIsBaseChar(int c) { - if (c < 0x0100) return(xmlBaseArray[c]); - return((((c) >= 0x0100) && ((c) <= 0x0131)) || - (((c) >= 0x0134) && ((c) <= 0x013E)) || - (((c) >= 0x0141) && ((c) <= 0x0148)) || - (((c) >= 0x014A) && ((c) <= 0x017E)) || - (((c) >= 0x0180) && ((c) <= 0x01C3)) || - (((c) >= 0x01CD) && ((c) <= 0x01F0)) || - (((c) >= 0x01F4) && ((c) <= 0x01F5)) || - (((c) >= 0x01FA) && ((c) <= 0x0217)) || - (((c) >= 0x0250) && ((c) <= 0x02A8)) || - (((c) >= 0x02BB) && ((c) <= 0x02C1)) || - ((c) == 0x0386) || - (((c) >= 0x0388) && ((c) <= 0x038A)) || - ((c) == 0x038C) || - (((c) >= 0x038E) && ((c) <= 0x03A1)) || - (((c) >= 0x03A3) && ((c) <= 0x03CE)) || - (((c) >= 0x03D0) && ((c) <= 0x03D6)) || - ((c) == 0x03DA) || - ((c) == 0x03DC) || - ((c) == 0x03DE) || - ((c) == 0x03E0) || - (((c) >= 0x03E2) && ((c) <= 0x03F3)) || - (((c) >= 0x0401) && ((c) <= 0x040C)) || - (((c) >= 0x040E) && ((c) <= 0x044F)) || - (((c) >= 0x0451) && ((c) <= 0x045C)) || - (((c) >= 0x045E) && ((c) <= 0x0481)) || - (((c) >= 0x0490) && ((c) <= 0x04C4)) || - (((c) >= 0x04C7) && ((c) <= 0x04C8)) || - (((c) >= 0x04CB) && ((c) <= 0x04CC)) || - (((c) >= 0x04D0) && ((c) <= 0x04EB)) || - (((c) >= 0x04EE) && ((c) <= 0x04F5)) || - (((c) >= 0x04F8) && ((c) <= 0x04F9)) || - (((c) >= 0x0531) && ((c) <= 0x0556)) || - ((c) == 0x0559) || - (((c) >= 0x0561) && ((c) <= 0x0586)) || - (((c) >= 0x05D0) && ((c) <= 0x05EA)) || - (((c) >= 0x05F0) && ((c) <= 0x05F2)) || - (((c) >= 0x0621) && ((c) <= 0x063A)) || - (((c) >= 0x0641) && ((c) <= 0x064A)) || - (((c) >= 0x0671) && ((c) <= 0x06B7)) || - (((c) >= 0x06BA) && ((c) <= 0x06BE)) || - (((c) >= 0x06C0) && ((c) <= 0x06CE)) || - (((c) >= 0x06D0) && ((c) <= 0x06D3)) || - ((c) == 0x06D5) || - (((c) >= 0x06E5) && ((c) <= 0x06E6)) || - (((c) >= 0x905) && ( /* accelerator */ - (((c) >= 0x0905) && ((c) <= 0x0939)) || - ((c) == 0x093D) || - (((c) >= 0x0958) && ((c) <= 0x0961)) || - (((c) >= 0x0985) && ((c) <= 0x098C)) || - (((c) >= 0x098F) && ((c) <= 0x0990)) || - (((c) >= 0x0993) && ((c) <= 0x09A8)) || - (((c) >= 0x09AA) && ((c) <= 0x09B0)) || - ((c) == 0x09B2) || - (((c) >= 0x09B6) && ((c) <= 0x09B9)) || - (((c) >= 0x09DC) && ((c) <= 0x09DD)) || - (((c) >= 0x09DF) && ((c) <= 0x09E1)) || - (((c) >= 0x09F0) && ((c) <= 0x09F1)) || - (((c) >= 0x0A05) && ((c) <= 0x0A0A)) || - (((c) >= 0x0A0F) && ((c) <= 0x0A10)) || - (((c) >= 0x0A13) && ((c) <= 0x0A28)) || - (((c) >= 0x0A2A) && ((c) <= 0x0A30)) || - (((c) >= 0x0A32) && ((c) <= 0x0A33)) || - (((c) >= 0x0A35) && ((c) <= 0x0A36)) || - (((c) >= 0x0A38) && ((c) <= 0x0A39)) || - (((c) >= 0x0A59) && ((c) <= 0x0A5C)) || - ((c) == 0x0A5E) || - (((c) >= 0x0A72) && ((c) <= 0x0A74)) || - (((c) >= 0x0A85) && ((c) <= 0x0A8B)) || - ((c) == 0x0A8D) || - (((c) >= 0x0A8F) && ((c) <= 0x0A91)) || - (((c) >= 0x0A93) && ((c) <= 0x0AA8)) || - (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) || - (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) || - (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) || - ((c) == 0x0ABD) || - ((c) == 0x0AE0) || - (((c) >= 0x0B05) && ((c) <= 0x0B0C)) || - (((c) >= 0x0B0F) && ((c) <= 0x0B10)) || - (((c) >= 0x0B13) && ((c) <= 0x0B28)) || - (((c) >= 0x0B2A) && ((c) <= 0x0B30)) || - (((c) >= 0x0B32) && ((c) <= 0x0B33)) || - (((c) >= 0x0B36) && ((c) <= 0x0B39)) || - ((c) == 0x0B3D) || - (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) || - (((c) >= 0x0B5F) && ((c) <= 0x0B61)) || - (((c) >= 0x0B85) && ((c) <= 0x0B8A)) || - (((c) >= 0x0B8E) && ((c) <= 0x0B90)) || - (((c) >= 0x0B92) && ((c) <= 0x0B95)) || - (((c) >= 0x0B99) && ((c) <= 0x0B9A)) || - ((c) == 0x0B9C) || - (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) || - (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) || - (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) || - (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) || - (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) || - (((c) >= 0x0C05) && ((c) <= 0x0C0C)) || - (((c) >= 0x0C0E) && ((c) <= 0x0C10)) || - (((c) >= 0x0C12) && ((c) <= 0x0C28)) || - (((c) >= 0x0C2A) && ((c) <= 0x0C33)) || - (((c) >= 0x0C35) && ((c) <= 0x0C39)) || - (((c) >= 0x0C60) && ((c) <= 0x0C61)) || - (((c) >= 0x0C85) && ((c) <= 0x0C8C)) || - (((c) >= 0x0C8E) && ((c) <= 0x0C90)) || - (((c) >= 0x0C92) && ((c) <= 0x0CA8)) || - (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) || - (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) || - ((c) == 0x0CDE) || - (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) || - (((c) >= 0x0D05) && ((c) <= 0x0D0C)) || - (((c) >= 0x0D0E) && ((c) <= 0x0D10)) || - (((c) >= 0x0D12) && ((c) <= 0x0D28)) || - (((c) >= 0x0D2A) && ((c) <= 0x0D39)) || - (((c) >= 0x0D60) && ((c) <= 0x0D61)) || - (((c) >= 0x0E01) && ((c) <= 0x0E2E)) || - ((c) == 0x0E30) || - (((c) >= 0x0E32) && ((c) <= 0x0E33)) || - (((c) >= 0x0E40) && ((c) <= 0x0E45)) || - (((c) >= 0x0E81) && ((c) <= 0x0E82)) || - ((c) == 0x0E84) || - (((c) >= 0x0E87) && ((c) <= 0x0E88)) || - ((c) == 0x0E8A) || - ((c) == 0x0E8D) || - (((c) >= 0x0E94) && ((c) <= 0x0E97)) || - (((c) >= 0x0E99) && ((c) <= 0x0E9F)) || - (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) || - ((c) == 0x0EA5) || - ((c) == 0x0EA7) || - (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) || - (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) || - ((c) == 0x0EB0) || - (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) || - ((c) == 0x0EBD) || - (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) || - (((c) >= 0x0F40) && ((c) <= 0x0F47)) || - (((c) >= 0x0F49) && ((c) <= 0x0F69)) || - (((c) >= 0x10A0) && ( /* accelerator */ - (((c) >= 0x10A0) && ((c) <= 0x10C5)) || - (((c) >= 0x10D0) && ((c) <= 0x10F6)) || - ((c) == 0x1100) || - (((c) >= 0x1102) && ((c) <= 0x1103)) || - (((c) >= 0x1105) && ((c) <= 0x1107)) || - ((c) == 0x1109) || - (((c) >= 0x110B) && ((c) <= 0x110C)) || - (((c) >= 0x110E) && ((c) <= 0x1112)) || - ((c) == 0x113C) || - ((c) == 0x113E) || - ((c) == 0x1140) || - ((c) == 0x114C) || - ((c) == 0x114E) || - ((c) == 0x1150) || - (((c) >= 0x1154) && ((c) <= 0x1155)) || - ((c) == 0x1159) || - (((c) >= 0x115F) && ((c) <= 0x1161)) || - ((c) == 0x1163) || - ((c) == 0x1165) || - ((c) == 0x1167) || - ((c) == 0x1169) || - (((c) >= 0x116D) && ((c) <= 0x116E)) || - (((c) >= 0x1172) && ((c) <= 0x1173)) || - ((c) == 0x1175) || - ((c) == 0x119E) || - ((c) == 0x11A8) || - ((c) == 0x11AB) || - (((c) >= 0x11AE) && ((c) <= 0x11AF)) || - (((c) >= 0x11B7) && ((c) <= 0x11B8)) || - ((c) == 0x11BA) || - (((c) >= 0x11BC) && ((c) <= 0x11C2)) || - ((c) == 0x11EB) || - ((c) == 0x11F0) || - ((c) == 0x11F9) || - (((c) >= 0x1E00) && ((c) <= 0x1E9B)) || - (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) || - (((c) >= 0x1F00) && ((c) <= 0x1F15)) || - (((c) >= 0x1F18) && ((c) <= 0x1F1D)) || - (((c) >= 0x1F20) && ((c) <= 0x1F45)) || - (((c) >= 0x1F48) && ((c) <= 0x1F4D)) || - (((c) >= 0x1F50) && ((c) <= 0x1F57)) || - ((c) == 0x1F59) || - ((c) == 0x1F5B) || - ((c) == 0x1F5D) || - (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) || - (((c) >= 0x1F80) && ((c) <= 0x1FB4)) || - (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) || - ((c) == 0x1FBE) || - (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) || - (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) || - (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) || - (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) || - (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) || - (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) || - (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) || - ((c) == 0x2126) || - (((c) >= 0x212A) && ((c) <= 0x212B)) || - ((c) == 0x212E) || - (((c) >= 0x2180) && ((c) <= 0x2182)) || - (((c) >= 0x3041) && ((c) <= 0x3094)) || - (((c) >= 0x30A1) && ((c) <= 0x30FA)) || - (((c) >= 0x3105) && ((c) <= 0x312C)) || - (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */ - )))); -} - -/** - * xmlIsDigit: - * @c: an unicode character (int) - * - * Check whether the character is allowed by the production - * [88] Digit ::= ... long list see REC ... - * - * Returns 0 if not, non-zero otherwise - */ -int -xmlIsDigit(int c) { - return( - (((c) >= 0x0030) && ((c) <= 0x0039)) || - (((c) >= 0x660) && ( /* accelerator */ - (((c) >= 0x0660) && ((c) <= 0x0669)) || - (((c) >= 0x06F0) && ((c) <= 0x06F9)) || - (((c) >= 0x0966) && ((c) <= 0x096F)) || - (((c) >= 0x09E6) && ((c) <= 0x09EF)) || - (((c) >= 0x0A66) && ((c) <= 0x0A6F)) || - (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) || - (((c) >= 0x0B66) && ((c) <= 0x0B6F)) || - (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) || - (((c) >= 0x0C66) && ((c) <= 0x0C6F)) || - (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) || - (((c) >= 0x0D66) && ((c) <= 0x0D6F)) || - (((c) >= 0x0E50) && ((c) <= 0x0E59)) || - (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) || - (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ )); -} - -/** - * xmlIsCombining: - * @c: an unicode character (int) - * - * Check whether the character is allowed by the production - * [87] CombiningChar ::= ... long list see REC ... - * - * Returns 0 if not, non-zero otherwise - */ -int -xmlIsCombining(int c) { - return( - (((c) >= 0x300) && ( /* accelerator */ - (((c) >= 0x0300) && ((c) <= 0x0345)) || - (((c) >= 0x0360) && ((c) <= 0x0361)) || - (((c) >= 0x0483) && ((c) <= 0x0486)) || - (((c) >= 0x0591) && ((c) <= 0x05A1)) || - (((c) >= 0x05A3) && ((c) <= 0x05B9)) || - (((c) >= 0x05BB) && ((c) <= 0x05BD)) || - ((c) == 0x05BF) || - (((c) >= 0x05C1) && ((c) <= 0x05C2)) || - ((c) == 0x05C4) || - (((c) >= 0x064B) && ((c) <= 0x0652)) || - ((c) == 0x0670) || - (((c) >= 0x06D6) && ((c) <= 0x06DC)) || - (((c) >= 0x06DD) && ((c) <= 0x06DF)) || - (((c) >= 0x06E0) && ((c) <= 0x06E4)) || - (((c) >= 0x06E7) && ((c) <= 0x06E8)) || - (((c) >= 0x06EA) && ((c) <= 0x06ED)) || - (((c) >= 0x0901) && ( /* accelerator */ - (((c) >= 0x0901) && ((c) <= 0x0903)) || - ((c) == 0x093C) || - (((c) >= 0x093E) && ((c) <= 0x094C)) || - ((c) == 0x094D) || - (((c) >= 0x0951) && ((c) <= 0x0954)) || - (((c) >= 0x0962) && ((c) <= 0x0963)) || - (((c) >= 0x0981) && ((c) <= 0x0983)) || - ((c) == 0x09BC) || - ((c) == 0x09BE) || - ((c) == 0x09BF) || - (((c) >= 0x09C0) && ((c) <= 0x09C4)) || - (((c) >= 0x09C7) && ((c) <= 0x09C8)) || - (((c) >= 0x09CB) && ((c) <= 0x09CD)) || - ((c) == 0x09D7) || - (((c) >= 0x09E2) && ((c) <= 0x09E3)) || - (((c) >= 0x0A02) && ( /* accelerator */ - ((c) == 0x0A02) || - ((c) == 0x0A3C) || - ((c) == 0x0A3E) || - ((c) == 0x0A3F) || - (((c) >= 0x0A40) && ((c) <= 0x0A42)) || - (((c) >= 0x0A47) && ((c) <= 0x0A48)) || - (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) || - (((c) >= 0x0A70) && ((c) <= 0x0A71)) || - (((c) >= 0x0A81) && ((c) <= 0x0A83)) || - ((c) == 0x0ABC) || - (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) || - (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) || - (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) || - (((c) >= 0x0B01) && ((c) <= 0x0B03)) || - ((c) == 0x0B3C) || - (((c) >= 0x0B3E) && ((c) <= 0x0B43)) || - (((c) >= 0x0B47) && ((c) <= 0x0B48)) || - (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) || - (((c) >= 0x0B56) && ((c) <= 0x0B57)) || - (((c) >= 0x0B82) && ((c) <= 0x0B83)) || - (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) || - (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) || - (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) || - ((c) == 0x0BD7) || - (((c) >= 0x0C01) && ((c) <= 0x0C03)) || - (((c) >= 0x0C3E) && ((c) <= 0x0C44)) || - (((c) >= 0x0C46) && ((c) <= 0x0C48)) || - (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) || - (((c) >= 0x0C55) && ((c) <= 0x0C56)) || - (((c) >= 0x0C82) && ((c) <= 0x0C83)) || - (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) || - (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) || - (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) || - (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) || - (((c) >= 0x0D02) && ((c) <= 0x0D03)) || - (((c) >= 0x0D3E) && ((c) <= 0x0D43)) || - (((c) >= 0x0D46) && ((c) <= 0x0D48)) || - (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) || - ((c) == 0x0D57) || - (((c) >= 0x0E31) && ( /* accelerator */ - ((c) == 0x0E31) || - (((c) >= 0x0E34) && ((c) <= 0x0E3A)) || - (((c) >= 0x0E47) && ((c) <= 0x0E4E)) || - ((c) == 0x0EB1) || - (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) || - (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) || - (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) || - (((c) >= 0x0F18) && ((c) <= 0x0F19)) || - ((c) == 0x0F35) || - ((c) == 0x0F37) || - ((c) == 0x0F39) || - ((c) == 0x0F3E) || - ((c) == 0x0F3F) || - (((c) >= 0x0F71) && ((c) <= 0x0F84)) || - (((c) >= 0x0F86) && ((c) <= 0x0F8B)) || - (((c) >= 0x0F90) && ((c) <= 0x0F95)) || - ((c) == 0x0F97) || - (((c) >= 0x0F99) && ((c) <= 0x0FAD)) || - (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) || - ((c) == 0x0FB9) || - (((c) >= 0x20D0) && ((c) <= 0x20DC)) || - ((c) == 0x20E1) || - (((c) >= 0x302A) && ((c) <= 0x302F)) || - ((c) == 0x3099) || - ((c) == 0x309A)))))))))); -} - -/** - * xmlIsExtender: - * @c: an unicode character (int) - * - * Check whether the character is allowed by the production - * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | - * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | - * [#x309D-#x309E] | [#x30FC-#x30FE] - * - * Returns 0 if not, non-zero otherwise - */ -int -xmlIsExtender(int c) { - switch (c) { - case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387: - case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005: - case 0x3031: case 0x3032: case 0x3033: case 0x3034: - case 0x3035: case 0x309D: case 0x309E: case 0x30FC: - case 0x30FD: case 0x30FE: - return 1; - default: - return 0; - } -} - -/** - * xmlIsIdeographic: - * @c: an unicode character (int) - * - * Check whether the character is allowed by the production - * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029] - * - * Returns 0 if not, non-zero otherwise - */ -int -xmlIsIdeographic(int c) { - return(((c) < 0x0100) ? 0 : - (((c) >= 0x4e00) && ((c) <= 0x9fa5)) || - (((c) >= 0xf900) && ((c) <= 0xfa2d)) || - (((c) >= 0x3021) && ((c) <= 0x3029)) || - ((c) == 0x3007)); -} - /** * xmlIsLetter: * @c: an unicode character (int) @@ -676,29 +211,6 @@ xmlIsLetter(int c) { return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)); } -/** - * xmlIsPubidChar: - * @c: an unicode character (int) - * - * Check whether the character is allowed by the production - * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] - * - * Returns 0 if not, non-zero otherwise - */ -int -xmlIsPubidChar(int c) { - return( - ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) || - (((c) >= 'a') && ((c) <= 'z')) || - (((c) >= 'A') && ((c) <= 'Z')) || - (((c) >= '0') && ((c) <= '9')) || - ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') || - ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') || - ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') || - ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') || - ((c) == '$') || ((c) == '_') || ((c) == '%')); -} - /************************************************************************ * * * Input handling functions for progressive parsing *