XMLReader.cpp 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. #include "XMLReader.h"
  2. #include "ifc_xmlreadercallback.h"
  3. #include "XMLParameters.h"
  4. #include <memory.h>
  5. #include "../nu/regexp.h"
  6. #include "../nu/strsafe.h"
  7. #include <wctype.h>
  8. #include <vector>
  9. #include <atlconv.h>
  10. /* TODO:
  11. try to remove CharUpper (but towupper doesn't deal with non-english very well)
  12. */
  13. #ifdef __APPLE__
  14. void CharUpper(wchar_t* src)
  15. {
  16. while (src && *src)
  17. {
  18. *src = (wint_t)towupper(*src);
  19. src++;
  20. }
  21. }
  22. wchar_t* _wcsdup(const wchar_t* src)
  23. {
  24. if (!src)
  25. return 0;
  26. size_t len = wcslen(src) + 1;
  27. if (len) // check for integer wraparound
  28. {
  29. wchar_t* newstr = (wchar_t*)malloc(sizeof(wchar_t) * len);
  30. wcscpy(newstr, src);
  31. return newstr;
  32. }
  33. return 0;
  34. }
  35. #endif
  36. //---------------------------------------------------------------------------------------------------
  37. CallbackStruct::CallbackStruct(ifc_xmlreadercallback* _callback, const wchar_t* _match, bool doUpper)
  38. {
  39. match = _wcsdup(_match);
  40. if (doUpper)
  41. CharUpper(match);
  42. callback = _callback;
  43. }
  44. //------------------------------------------------------
  45. CallbackStruct::CallbackStruct()
  46. : callback(0), match(0)
  47. {
  48. }
  49. //-------------------------------
  50. CallbackStruct::~CallbackStruct()
  51. {
  52. if (match)
  53. {
  54. free(match);
  55. match = 0;
  56. }
  57. }
  58. /* --- */
  59. void XMLCALL DStartTag(void* data, const XML_Char* name, const XML_Char** atts) { ((XMLReader*)data)->StartTag(name, atts); }
  60. void XMLCALL DEndTag(void* data, const XML_Char* name) { ((XMLReader*)data)->EndTag(name); }
  61. void XMLCALL DTextHandler(void* data, const XML_Char* s, int len) { ((XMLReader*)data)->TextHandler(s, len); }
  62. int XMLCALL UnknownEncoding(void* data, const XML_Char* name, XML_Encoding* info);
  63. //--------------------
  64. XMLReader::XMLReader()
  65. : parser(0)
  66. {
  67. case_sensitive = false;
  68. }
  69. //---------------------
  70. XMLReader::~XMLReader()
  71. {
  72. for (size_t i = 0; i != callbacks.size(); i++)
  73. {
  74. delete callbacks[i];
  75. callbacks[i] = 0;
  76. }
  77. }
  78. //----------------------------------------------------------------------------------------
  79. void XMLReader::RegisterCallback(const wchar_t* matchstr, ifc_xmlreadercallback* callback)
  80. {
  81. callbacks.push_back(new CallbackStruct(callback, matchstr, !case_sensitive));
  82. }
  83. //-----------------------------------------------------------------
  84. void XMLReader::UnregisterCallback(ifc_xmlreadercallback* callback)
  85. {
  86. for (size_t i = 0; i != callbacks.size(); i++)
  87. {
  88. if (callbacks[i] && callbacks[i]->callback == callback)
  89. {
  90. delete callbacks[i];
  91. callbacks[i] = 0; // we set it to 0 so this can be called during a callback
  92. }
  93. }
  94. }
  95. //-------------------
  96. int XMLReader::Open()
  97. {
  98. parser = XML_ParserCreate(0); // create the expat parser
  99. if (!parser)
  100. return OBJ_XML_FAILURE;
  101. XML_SetUserData(parser, this); // give our object pointer as context
  102. XML_SetElementHandler(parser, DStartTag, DEndTag); // set the tag callbacks
  103. XML_SetCharacterDataHandler(parser, DTextHandler); // set the text callbacks
  104. XML_SetUnknownEncodingHandler(parser, UnknownEncoding, 0); // setup the character set encoding stuff
  105. return OBJ_XML_SUCCESS;
  106. }
  107. //----------------------------
  108. int XMLReader::OpenNamespace()
  109. {
  110. parser = XML_ParserCreateNS(0, L'#'); // create the expat parser, using # to separate namespace URI from element name
  111. if (!parser)
  112. return OBJ_XML_FAILURE;
  113. XML_SetUserData(parser, this); // give our object pointer as context
  114. XML_SetElementHandler(parser, DStartTag, DEndTag); // set the tag callbacks
  115. XML_SetCharacterDataHandler(parser, DTextHandler); // set the text callbacks
  116. XML_SetUnknownEncodingHandler(parser, UnknownEncoding, 0); // setup the character set encoding stuff
  117. return OBJ_XML_SUCCESS;
  118. }
  119. //--------------------------------------------------
  120. void XMLReader::OldFeed(void* data, size_t dataSize)
  121. {
  122. Feed(data, dataSize);
  123. }
  124. //----------------------------------------------
  125. int XMLReader::Feed(void* data, size_t dataSize)
  126. {
  127. XML_Status error;
  128. if (data && dataSize)
  129. {
  130. while (dataSize >= 0x7FFFFFFFU) // handle really really big data sizes (hopefully this won't happen)
  131. {
  132. XML_Parse(parser, reinterpret_cast<const char*>(data), 0x7FFFFFFF, 0);
  133. dataSize -= 0x7FFFFFFFU;
  134. }
  135. error = XML_Parse(parser, reinterpret_cast<const char*>(data), static_cast<int>(dataSize), 0);
  136. }
  137. else
  138. error = XML_Parse(parser, 0, 0, 1); // passing this sequence tells expat that we're done
  139. if (error == XML_STATUS_ERROR)
  140. {
  141. // TODO: set a flag to prevent further parsing until a Reset occurs
  142. XML_Error errorCode = XML_GetErrorCode(parser);
  143. int line = XML_GetCurrentLineNumber(parser);
  144. // TODO: int column = XML_GetCurrentColumnNumber(parser);
  145. wa::strings::wa_string szError(XML_ErrorString(errorCode));
  146. for (CallbackStruct* l_callback : callbacks)
  147. {
  148. if (l_callback != NULL)
  149. l_callback->callback->xmlReaderOnError(line, errorCode, szError.GetW().c_str());
  150. }
  151. return OBJ_XML_FAILURE;
  152. }
  153. return OBJ_XML_SUCCESS;
  154. }
  155. //---------------------
  156. void XMLReader::Close()
  157. {
  158. if (parser)
  159. XML_ParserFree(parser);
  160. parser = 0;
  161. }
  162. //-----------------------------------
  163. const wchar_t* XMLReader::BuildPath()
  164. {
  165. return pathString.c_str();
  166. }
  167. //----------------------------------------------------
  168. const wchar_t* XMLReader::AddPath(const wchar_t* node)
  169. {
  170. currentNode.assign(node);
  171. if (pathString.length())
  172. {
  173. pathString.append(L"\f");
  174. }
  175. pathString.append(node);
  176. if (!case_sensitive)
  177. {
  178. std::transform(
  179. pathString.begin(), pathString.end(),
  180. pathString.begin(),
  181. towupper);
  182. }
  183. return pathString.c_str();
  184. }
  185. //-------------------------------------------------
  186. const wchar_t* XMLReader::AddPath(const char* node)
  187. {
  188. wa::strings::wa_string wszNode(node);
  189. return AddPath(wszNode.GetW().c_str());
  190. }
  191. //-------------------------------------------------------
  192. const wchar_t* XMLReader::RemovePath(const wchar_t* node)
  193. {
  194. size_t pathSize = pathString.length();
  195. size_t removeLength = wcslen(node);
  196. removeLength = pathSize > removeLength ? removeLength + 1 : removeLength;
  197. pathString = pathString.substr(0, pathSize - removeLength);
  198. if (pathString.length())
  199. {
  200. const wchar_t* last_node = wcsrchr(pathString.c_str(), '\f');
  201. if (last_node)
  202. {
  203. currentNode.assign(last_node + 1);
  204. }
  205. else
  206. {
  207. currentNode.assign(pathString);
  208. }
  209. }
  210. else
  211. {
  212. currentNode = L"";
  213. }
  214. return pathString.c_str();
  215. }
  216. //----------------------------------------------------
  217. const wchar_t* XMLReader::RemovePath(const char* node)
  218. {
  219. wa::strings::wa_string wszNode(node);
  220. return RemovePath(wszNode.GetW().c_str());
  221. }
  222. //-------------------------------------------------------------------------
  223. void XMLCALL XMLReader::StartTag(const wchar_t* name, const wchar_t** atts)
  224. {
  225. const wchar_t* xmlpath = AddPath(name);
  226. XMLParameters xmlParameters(atts);
  227. for (size_t i = 0; i != callbacks.size(); i++)
  228. {
  229. if (callbacks[i] && Match(callbacks[i]->match, xmlpath))
  230. callbacks[i]->callback->xmlReaderOnStartElementCallback(xmlpath, name, static_cast<ifc_xmlreaderparams*>(&xmlParameters));
  231. }
  232. }
  233. //-------------------------------------------------------------------
  234. void XMLCALL XMLReader::StartTag(const char* name, const char** atts)
  235. {
  236. wa::strings::wa_string wszName(name);
  237. size_t nAttrCount = 0;
  238. const char** a = atts;
  239. while (*a)
  240. {
  241. nAttrCount++;
  242. a++;
  243. }
  244. wchar_t** wszAtts = new wchar_t* [nAttrCount + 1];
  245. if (nAttrCount)
  246. {
  247. size_t n = 0;
  248. while (*atts)
  249. {
  250. const char* pszAttr = *atts;
  251. size_t nAttrLen = strlen(pszAttr);
  252. wchar_t* wc = new wchar_t[nAttrLen + 1];
  253. mbstowcs_s(NULL, wc, nAttrLen + 1, pszAttr, nAttrLen);
  254. wszAtts[n++] = wc;
  255. atts++;
  256. }
  257. }
  258. wszAtts[nAttrCount] = 0;
  259. StartTag(wszName.GetW().c_str(), const_cast<const wchar_t**>(wszAtts));
  260. }
  261. //-------------------------------------------------
  262. void XMLCALL XMLReader::EndTag(const wchar_t* name)
  263. {
  264. endPathString = BuildPath();
  265. RemovePath(name);
  266. for (size_t i = 0; i != callbacks.size(); i++)
  267. {
  268. if (callbacks[i] && Match(callbacks[i]->match, endPathString.c_str()))
  269. callbacks[i]->callback->xmlReaderOnEndElementCallback(endPathString.c_str(), name);
  270. }
  271. }
  272. //----------------------------------------------
  273. void XMLCALL XMLReader::EndTag(const char* name)
  274. {
  275. wa::strings::wa_string wszName(name);
  276. return EndTag(wszName.GetW().c_str());
  277. }
  278. //------------------------------------------------------------
  279. void XMLCALL XMLReader::TextHandler(const wchar_t* s, int len)
  280. {
  281. if (len)
  282. {
  283. textCache.assign(s, len);
  284. const wchar_t* xmlpath = BuildPath();
  285. for (size_t i = 0; i != callbacks.size(); i++)
  286. {
  287. if (callbacks[i] && Match(callbacks[i]->match, xmlpath))
  288. callbacks[i]->callback->xmlReaderOnCharacterDataCallback(xmlpath, currentNode.c_str(), textCache.c_str());
  289. }
  290. }
  291. }
  292. //---------------------------------------------------------
  293. void XMLCALL XMLReader::TextHandler(const char* s, int len)
  294. {
  295. wa::strings::wa_string wszText(s);
  296. return TextHandler(wszText.GetW().c_str(), len);
  297. }
  298. //---------------------------
  299. void XMLReader::PushContext()
  300. {
  301. context.push_back(parser);
  302. parser = XML_ExternalEntityParserCreate(parser, L"\0", NULL);
  303. }
  304. //--------------------------
  305. void XMLReader::PopContext()
  306. {
  307. if (parser)
  308. XML_ParserFree(parser);
  309. parser = context.back();
  310. context.pop_back();
  311. }
  312. //---------------------
  313. void XMLReader::Reset()
  314. {
  315. if (parser)
  316. {
  317. XML_ParserReset(parser, 0);
  318. XML_SetUserData(parser, this); // give our object pointer as context
  319. XML_SetElementHandler(parser, DStartTag, DEndTag); // set the tag callbacks
  320. XML_SetCharacterDataHandler(parser, DTextHandler); // set the text callbacks
  321. }
  322. }
  323. //--------------------------------------------------
  324. void XMLReader::SetEncoding(const wchar_t* encoding)
  325. {
  326. wa::strings::wa_string szEncoding(encoding);
  327. XML_SetEncoding(parser, szEncoding.GetW().c_str());
  328. }
  329. //-------------------------------
  330. int XMLReader::SetCaseSensitive()
  331. {
  332. case_sensitive = true;
  333. return OBJ_XML_SUCCESS;
  334. }
  335. #define CBCLASS XMLReader
  336. START_DISPATCH;
  337. VCB(OBJ_XML_REGISTERCALLBACK, RegisterCallback)
  338. VCB(OBJ_XML_UNREGISTERCALLBACK, UnregisterCallback)
  339. CB(OBJ_XML_OPEN, Open)
  340. CB(OBJ_XML_OPEN2, OpenNamespace)
  341. VCB(OBJ_XML_OLDFEED, OldFeed)
  342. CB(OBJ_XML_FEED, Feed)
  343. VCB(OBJ_XML_CLOSE, Close)
  344. VCB(OBJ_XML_INTERRUPT, PushContext)
  345. VCB(OBJ_XML_RESUME, PopContext)
  346. VCB(OBJ_XML_RESET, Reset)
  347. VCB(OBJ_XML_SETENCODING, SetEncoding)
  348. CB(OBJ_XML_SETCASESENSITIVE, SetCaseSensitive)
  349. END_DISPATCH;
  350. #undef CBCLASS