xmlparser.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  * *
3  * Copyright (C) 2007-2013 by Johan De Taeye, frePPLe bvba *
4  * *
5  * This library is free software; you can redistribute it and/or modify it *
6  * under the terms of the GNU Affero General Public License as published *
7  * by the Free Software Foundation; either version 3 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This library is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU Affero General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU Affero General Public *
16  * License along with this program. *
17  * If not, see <http://www.gnu.org/licenses/>. *
18  * *
19  ***************************************************************************/
20 
21 #define FREPPLE_CORE
22 #include "frepple/utils.h"
23 #include <sys/stat.h>
24 
25 /* Uncomment the next line to create a lot of debugging messages during
26  * the parsing of XML-data. */
27 //#define PARSE_DEBUG
28 
29 // With VC++ we use the Win32 functions to browse a directory
30 #ifdef _MSC_VER
31 #define WIN32_LEAN_AND_MEAN
32 #include <windows.h>
33 #else
34 // With Unix-like systems we use a check suggested by the autoconf tools
35 #if HAVE_DIRENT_H
36 # include <dirent.h>
37 # define NAMLEN(dirent) strlen((dirent)->d_name)
38 #else
39 # define dirent direct
40 # define NAMLEN(dirent) (dirent)->d_namlen
41 # if HAVE_SYS_NDIR_H
42 # include <sys/ndir.h>
43 # endif
44 # if HAVE_SYS_DIR_H
45 # include <sys/dir.h>
46 # endif
47 # if HAVE_NDIR_H
48 # include <ndir.h>
49 # endif
50 #endif
51 #endif
52 
53 
54 namespace frepple
55 {
56 namespace utils
57 {
58 
62 xercesc::XMLTranscoder* XMLInput::utf8_encoder = NULL;
63 char XMLInput::encodingbuffer[16*1024+4];
64 
65 
66 char* XMLInput::transcodeUTF8(const XMLCh* xercesChars)
67 {
68  XMLSize_t charsEaten;
69  XMLSize_t charsReturned = utf8_encoder->transcodeTo(xercesChars,
70  xercesc::XMLString::stringLen(xercesChars),
71  (XMLByte*) encodingbuffer, 16*1024,
72  charsEaten, xercesc::XMLTranscoder::UnRep_RepChar );
73  encodingbuffer[charsReturned] = 0;
74  return encodingbuffer;
75 }
76 
77 
78 DECLARE_EXPORT XMLInput::XMLInput(unsigned short maxNestedElmnts)
79  : parser(NULL), maxdepth(maxNestedElmnts), m_EStack(maxNestedElmnts+2),
80  numElements(-1), ignore(0), objectEnded(false),
81  abortOnDataException(true), attributes(NULL)
82 {
83  if (!utf8_encoder)
84  {
85  xercesc::XMLTransService::Codes resCode;
86  utf8_encoder = xercesc::XMLPlatformUtils::fgTransService->makeNewTranscoderFor("UTF-8", resCode, 16*1024);
87  if (!XMLInput::utf8_encoder)
88  logger << "Can't initialize UTF-8 transcoder: reason " << resCode << endl;
89  }
90 }
91 
92 
93 DECLARE_EXPORT void XMLInput::processingInstruction
94 (const XMLCh *const target, const XMLCh *const data)
95 {
96  char* type = xercesc::XMLString::transcode(target);
97  char* value = xercesc::XMLString::transcode(data);
98  try
99  {
100  if (!strcmp(type,"python"))
101  {
102  // "python" is the only processing instruction which we process.
103  // Others will be silently ignored
104  try
105  {
106  // Execute the processing instruction
108  }
109  catch (const DataException& e)
110  {
111  if (abortOnDataException)
112  {
113  xercesc::XMLString::release(&type);
114  xercesc::XMLString::release(&value);
115  throw;
116  }
117  else logger << "Continuing after data error: " << e.what() << endl;
118  }
119  }
120  xercesc::XMLString::release(&type);
121  xercesc::XMLString::release(&value);
122  }
123  catch (...)
124  {
125  xercesc::XMLString::release(&type);
126  xercesc::XMLString::release(&value);
127  throw;
128  }
129 }
130 
131 
132 DECLARE_EXPORT void XMLInput::startElement(const XMLCh* const uri,
133  const XMLCh* const n, const XMLCh* const qname,
134  const xercesc::Attributes& atts)
135 {
136  // Validate the state
137  assert(!states.empty());
138 
139  // Check for excessive number of open objects
140  if (numElements >= maxdepth)
141  throw DataException("XML-document with elements nested excessively deep");
142 
143  // Push the element on the stack
144  datapair *pElement = &m_EStack[numElements+1];
145  pElement->first.reset(n);
146  pElement->second.reset();
147 
148  // Store a pointer to the attributes
149  attributes = &atts;
150 
151  switch (states.top())
152  {
153  case SHUTDOWN:
154  // STATE: Parser is shutting down, and we can ignore all input that
155  // is still coming
156  return;
157 
158  case IGNOREINPUT:
159  // STATE: Parser is ignoring a part of the input
160  if (pElement->first.getHash() == endingHashes.top())
161  // Increase the count of occurences before the ignore section ends
162  ++ignore;
163  ++numElements;
164  return;
165 
166  case INIT:
167  // STATE: The only time the parser comes in this state is when we read
168  // opening tag of the ROOT tag.
169 #ifdef PARSE_DEBUG
170  if (!m_EHStack.empty())
171  logger << "Initialize root tag for reading object "
172  << getCurrentObject() << " ("
173  << typeid(*getCurrentObject()).name() << ")" << endl;
174  else
175  logger << "Initialize root tag for reading object NULL" << endl;
176 #endif
177  states.top() = READOBJECT;
178  endingHashes.push(pElement->first.getHash());
179  // Note that there is no break or return here. We also execute the
180  // statements of the following switch-case.
181 
182  case READOBJECT:
183  // STATE: Parser is reading data elements of an object
184  // Debug
185 #ifdef PARSE_DEBUG
186  logger << " Start element " << pElement->first.getName()
187  << " - object " << getCurrentObject() << endl;
188 #endif
189 
190  // Call the handler of the object
191  assert(!m_EHStack.empty());
192  try {getCurrentObject()->beginElement(*this, pElement->first);}
193  catch (const DataException& e)
194  {
195  if (abortOnDataException) throw;
196  else logger << "Continuing after data error: " << e.what() << endl;
197  }
198 
199  // Now process all attributes. For attributes we only call the
200  // endElement() member and skip the beginElement() method.
201  numElements += 1;
202  if (states.top() != IGNOREINPUT)
203  for (unsigned int i=0, cnt=atts.getLength(); i<cnt; i++)
204  {
205  char* val = transcodeUTF8(atts.getValue(i));
206  m_EStack[numElements+1].first.reset(atts.getLocalName(i));
207  m_EStack[numElements+1].second.setData(val);
208 #ifdef PARSE_DEBUG
209  char* attname = xercesc::XMLString::transcode(atts.getQName(i));
210  logger << " Processing attribute " << attname
211  << " - object " << getCurrentObject() << endl;
212  xercesc::XMLString::release(&attname);
213 #endif
214  try {getCurrentObject()->endElement(*this, m_EStack[numElements+1].first, m_EStack[numElements+1].second);}
215  catch (const DataException& e)
216  {
217  if (abortOnDataException) throw;
218  else logger << "Continuing after data error: " << e.what() << endl;
219  }
220  // Stop processing attributes if we are now in the ignore mode
221  if (states.top() == IGNOREINPUT) break;
222  }
223  } // End of switch statement
224 
225  // Outside of this handler, no attributes are available
226  attributes = NULL;
227 }
228 
229 
230 DECLARE_EXPORT void XMLInput::endElement(const XMLCh* const uri,
231  const XMLCh* const s,
232  const XMLCh* const qname)
233 {
234  // Validate the state
235  assert(numElements >= 0);
236  assert(!states.empty());
237  assert(numElements < maxdepth);
238 
239  // Remove an element from the stack
240  datapair *pElement = &(m_EStack[numElements--]);
241 
242  switch (states.top())
243  {
244  case INIT:
245  // This should never happen!
246  throw LogicException("Unreachable code reached");
247 
248  case SHUTDOWN:
249  // STATE: Parser is shutting down, and we can ignore all input that is
250  // still coming
251  return;
252 
253  case IGNOREINPUT:
254  // STATE: Parser is ignoring a part of the input
255 #ifdef PARSE_DEBUG
256  logger << " End element " << pElement->first.getName()
257  << " - IGNOREINPUT state" << endl;
258 #endif
259  // Continue if we aren't dealing with the tag being ignored
260  if (pElement->first.getHash() != endingHashes.top()) return;
261  if (ignore == 0)
262  {
263  // Finished ignoring now
264  states.pop();
265  endingHashes.pop();
266 #ifdef PARSE_DEBUG
267  logger << "Finish IGNOREINPUT state" << endl;
268 #endif
269  }
270  else
271  --ignore;
272  break;
273 
274  case READOBJECT:
275  // STATE: Parser is reading data elements of an object
276 #ifdef PARSE_DEBUG
277  logger << " End element " << pElement->first.getName()
278  << " - object " << getCurrentObject() << endl;
279 #endif
280 
281  // Check if we finished with the current handler
282  assert(!m_EHStack.empty());
283  if (pElement->first.getHash() == endingHashes.top())
284  {
285  // Call the ending handler of the Object, with a special
286  // flag to specify that this object is now ended
287  objectEnded = true;
288  try
289  {
290  getCurrentObject()->endElement(*this, pElement->first, pElement->second);
291  if (userexit) userexit.call(getCurrentObject());
292  }
293  catch (const DataException& e)
294  {
295  if (abortOnDataException) throw;
296  else logger << "Continuing after data error: " << e.what() << endl;
297  }
298  objectEnded = false;
299 #ifdef PARSE_DEBUG
300  logger << "Finish reading object " << getCurrentObject() << endl;
301 #endif
302  // Pop from the handler object stack
303  prev = getCurrentObject();
304  m_EHStack.pop_back();
305  endingHashes.pop();
306 
307  // Pop from the state stack
308  states.pop();
309  if (m_EHStack.empty())
310  shutdown();
311  else
312  {
313  // Call also the endElement function on the owning object
314  try {getCurrentObject()->endElement(*this, pElement->first, pElement->second);}
315  catch (const DataException& e)
316  {
317  if (abortOnDataException) throw;
318  else logger << "Continuing after data error: " << e.what() << endl;
319  }
320 #ifdef PARSE_DEBUG
321  logger << " End element " << pElement->first.getName()
322  << " - object " << getCurrentObject() << endl;
323 #endif
324  }
325  }
326  else
327  // This tag is not the ending tag of an object
328  // Call the function of the Object
329  try {getCurrentObject()->endElement(*this, pElement->first, pElement->second);}
330  catch (const DataException& e)
331  {
332  if (abortOnDataException) throw;
333  else logger << "Continuing after data error: " << e.what() << endl;
334  }
335  }
336 }
337 
338 
339 // Unfortunately the prototype for this handler function differs between
340 // Xerces-c 2.x and 3.x
341 #if XERCES_VERSION_MAJOR==2
342 DECLARE_EXPORT void XMLInput::characters(const XMLCh *const c, const unsigned int n)
343 #else
344 DECLARE_EXPORT void XMLInput::characters(const XMLCh *const c, const XMLSize_t n)
345 #endif
346 {
347  // No data capture during the ignore state
348  if (states.top()==IGNOREINPUT) return;
349 
350  // Process the data
351  char* name = transcodeUTF8(c);
352  m_EStack[numElements].second.addData(name, strlen(name));
353 }
354 
355 
356 DECLARE_EXPORT void XMLInput::warning(const xercesc::SAXParseException& e)
357 {
358  char* message = xercesc::XMLString::transcode(e.getMessage());
359  logger << "Warning: " << message;
360  if (e.getLineNumber() > 0) logger << " at line: " << e.getLineNumber();
361  logger << endl;
362  xercesc::XMLString::release(&message);
363 }
364 
365 
366 DECLARE_EXPORT void XMLInput::fatalError(const xercesc::SAXParseException& e)
367 {
368  char* message = xercesc::XMLString::transcode(e.getMessage());
369  ostringstream ch;
370  ch << message;
371  if (e.getLineNumber() > 0) ch << " at line " << e.getLineNumber();
372  xercesc::XMLString::release(&message);
373  throw DataException(ch.str());
374 }
375 
376 
377 DECLARE_EXPORT void XMLInput::error(const xercesc::SAXParseException& e)
378 {
379  char* message = xercesc::XMLString::transcode(e.getMessage());
380  ostringstream ch;
381  ch << message;
382  if (e.getLineNumber() > 0) ch << " at line " << e.getLineNumber();
383  xercesc::XMLString::release(&message);
384  throw DataException(ch.str());
385 }
386 
387 
389 {
390  // Keep track of the tag where this object will end
391  assert(numElements >= -1);
392  endingHashes.push(m_EStack[numElements+1].first.getHash());
393  if (pPI)
394  {
395  // Push a new object on the handler stack
396 #ifdef PARSE_DEBUG
397  logger << "Start reading object " << pPI
398  << " (" << typeid(*pPI).name() << ")" << endl;
399 #endif
400  prev = getCurrentObject();
401  m_EHStack.push_back(make_pair(pPI,static_cast<void*>(NULL)));
402  states.push(READOBJECT);
403  }
404  else
405  {
406  // Ignore the complete content of this element
407 #ifdef PARSE_DEBUG
408  logger << "Start ignoring input" << endl;
409 #endif
410  states.push(IGNOREINPUT);
411  }
412 }
413 
414 
416 {
417  // Already shutting down...
418  if (states.empty() || states.top() == SHUTDOWN) return;
419 
420  // Message
421 #ifdef PARSE_DEBUG
422  logger << " Forcing a shutdown - SHUTDOWN state" << endl;
423 #endif
424 
425  // Change the state
426  states.push(SHUTDOWN);
427 
428  // Done if we have no elements on the stack, i.e. a normal end.
429  if (numElements<0) return;
430 
431  // Call the ending handling of all objects on the stack
432  // This allows them to finish off in a valid state, and delete any temporary
433  // objects they may have allocated.
434  objectEnded = true;
435  m_EStack[numElements].first.reset("Not a real tag");
436  m_EStack[numElements].second.reset();
437  while (!m_EHStack.empty())
438  {
439  try
440  {
441  getCurrentObject()->endElement(*this, m_EStack[numElements].first, m_EStack[numElements].second);
442  if (userexit) userexit.call(getCurrentObject());
443  }
444  catch (const DataException& e)
445  {
446  if (abortOnDataException) throw;
447  else logger << "Continuing after data error: " << e.what() << endl;
448  }
449  m_EHStack.pop_back();
450  }
451 }
452 
453 
454 DECLARE_EXPORT void XMLInput::reset()
455 {
456  // Delete the xerces parser object
457  delete parser;
458  parser = NULL;
459 
460  // Call the ending handling of all objects on the stack
461  // This allows them to finish off in a valid state, and delete any temporary
462  // objects they may have allocated.
463  if (!m_EHStack.empty())
464  {
465  // The next line is to avoid calling the endElement handler twice for the
466  // last object. E.g. endElement handler causes and exception, and as part
467  // of the exception handling we call the reset method.
468  if (objectEnded) m_EHStack.pop_back();
469  objectEnded = true;
470  m_EStack[++numElements].first.reset("Not a real tag");
471  m_EStack[++numElements].second.reset();
472  while (!m_EHStack.empty())
473  {
474  try
475  {
476  getCurrentObject()->endElement(*this, m_EStack[numElements].first, m_EStack[numElements].second);
477  if (userexit) userexit.call(getCurrentObject());
478  }
479  catch (const DataException& e)
480  {
481  if (abortOnDataException) throw;
482  else logger << "Continuing after data error: " << e.what() << endl;
483  }
484  m_EHStack.pop_back();
485  }
486  }
487 
488  // Cleanup of stacks
489  while (!states.empty()) states.pop();
490  while (!endingHashes.empty()) endingHashes.pop();
491 
492  // Set all variables back to their starting values
493  numElements = -1;
494  ignore = 0;
495  objectEnded = false;
496  attributes = NULL;
497 }
498 
499 
500 void XMLInput::parse(xercesc::InputSource &in, Object *pRoot, bool validate)
501 {
502  try
503  {
504  // Create a Xerces parser
505  parser = xercesc::XMLReaderFactory::createXMLReader();
506 
507  // Set the features of the parser. A bunch of the options are dependent
508  // on whether we want to validate the input or not.
509  parser->setProperty(xercesc::XMLUni::fgXercesScannerName, const_cast<XMLCh*>
510  (validate ? xercesc::XMLUni::fgSGXMLScanner : xercesc::XMLUni::fgWFXMLScanner));
511  parser->setFeature(xercesc::XMLUni::fgSAX2CoreValidation, validate);
512  parser->setFeature(xercesc::XMLUni::fgSAX2CoreNameSpacePrefixes, false);
513  parser->setFeature(xercesc::XMLUni::fgXercesIdentityConstraintChecking, false);
514  parser->setFeature(xercesc::XMLUni::fgXercesDynamic, false);
515  parser->setFeature(xercesc::XMLUni::fgXercesSchema, validate);
516  parser->setFeature(xercesc::XMLUni::fgXercesSchemaFullChecking, false);
517  parser->setFeature(xercesc::XMLUni::fgXercesValidationErrorAsFatal,true);
518  parser->setFeature(xercesc::XMLUni::fgXercesIgnoreAnnotations,true);
519 
520  if (validate)
521  {
522  // Specify the no-namespace schema file
523  string schema = Environment::searchFile("frepple.xsd");
524  if (schema.empty())
525  throw RuntimeException("Can't find XML schema file 'frepple.xsd'");
526  XMLCh *c = xercesc::XMLString::transcode(schema.c_str());
527  parser->setProperty(
528  xercesc::XMLUni::fgXercesSchemaExternalNoNameSpaceSchemaLocation, c
529  );
530  xercesc::XMLString::release(&c);
531  }
532 
533  // If we are reading into a NULL object, there is no need to use a
534  // content handler or a handler stack.
535  if (pRoot)
536  {
537  // Set the event handler. If we are reading into a NULL object, there is
538  // no need to use a content handler.
539  parser->setContentHandler(this);
540 
541  // Get the parser to read data into the object pRoot.
542  m_EHStack.push_back(make_pair(pRoot,static_cast<void*>(NULL)));
543  states.push(INIT);
544  }
545 
546  // Set the error handler
547  parser->setErrorHandler(this);
548 
549  // Parse the input
550  parser->parse(in);
551  }
552  // Note: the reset() method needs to be called in all circumstances. The
553  // reset method allows all objects to finish in a valid state and clean up
554  // any memory they may have allocated.
555  catch (const xercesc::XMLException& toCatch)
556  {
557  char* message = xercesc::XMLString::transcode(toCatch.getMessage());
558  string msg(message);
559  xercesc::XMLString::release(&message);
560  reset();
561  throw RuntimeException("Parsing error: " + msg);
562  }
563  catch (const exception& toCatch)
564  {
565  reset();
566  ostringstream msg;
567  msg << "Error during XML parsing: " << toCatch.what();
568  throw RuntimeException(msg.str());
569  }
570  catch (...)
571  {
572  reset();
573  throw RuntimeException(
574  "Parsing error: Unexpected exception during XML parsing");
575  }
576  reset();
577 }
578 
579 
580 DECLARE_EXPORT ostream& operator << (ostream& os, const XMLEscape& x)
581 {
582  for (const char* p = x.data; *p; ++p)
583  {
584  switch (*p)
585  {
586  case '&': os << "&amp;"; break;
587  case '<': os << "&lt;"; break;
588  case '>': os << "&gt;"; break;
589  case '"': os << "&quot;"; break;
590  case '\'': os << "&apos;"; break;
591  default: os << *p;
592  }
593  }
594  return os;
595 }
596 
597 
598 DECLARE_EXPORT void XMLOutput::incIndent()
599 {
600  indentstring[m_nIndent++] = '\t';
601  if (m_nIndent > 40) m_nIndent = 40;
602  indentstring[m_nIndent] = '\0';
603 }
604 
605 
606 DECLARE_EXPORT void XMLOutput::decIndent()
607 {
608  if (--m_nIndent < 0) m_nIndent = 0;
609  indentstring[m_nIndent] = '\0';
610 }
611 
612 
614 (const Keyword& tag, const Object* object, mode m)
615 {
616  // Avoid NULL pointers and skip hidden objects
617  if (!object || object->getHidden()) return;
618 
619  // Adjust current and parent object pointer
620  const Object *previousParent = parentObject;
621  parentObject = currentObject;
622  currentObject = object;
623  ++numObjects;
624  ++numParents;
625 
626  // Call the write method on the object
627  if (m != DEFAULT)
628  // Mode is overwritten
629  object->writeElement(this, tag, m);
630  else
631  // Choose wether to save a reference of the object.
632  // The root object can't be saved as a reference.
633  object->writeElement(this, tag, numParents>2 ? REFERENCE : DEFAULT);
634 
635  // Adjust current and parent object pointer
636  --numParents;
637  currentObject = parentObject;
638  parentObject = previousParent;
639 }
640 
641 
643 {
644  // Root object can't be null...
645  if (!object)
646  throw RuntimeException("Can't accept a NULL object as XML root");
647 
648  // There should not be any saved objects yet
649  if (numObjects > 0)
650  throw LogicException("Can't have multiple headers in a document");
651  assert(!parentObject);
652  assert(!currentObject);
653 
654  // Write the first line for the xml document
656 
657  // Adjust current object pointer
658  currentObject = object;
659 
660  // Write the object
661  ++numObjects;
662  ++numParents;
663  BeginObject(tag, getHeaderAtts());
664  object->writeElement(this, tag, NOHEAD);
665 
666  // Adjust current and parent object pointer
667  currentObject = NULL;
668  parentObject = NULL;
669 }
670 
671 
673 {
674  // There should not be any saved objects yet
675  if (numObjects > 0 || !parentObject || !currentObject)
676  throw LogicException("Writing invalid header to XML document");
677 
678  // Write the first line and the opening tag
680  BeginObject(tag, getHeaderAtts());
681 
682  // Fake a dummy parent
683  numParents += 2;
684 }
685 
686 
688 {
689  char* s = XMLInput::transcodeUTF8(atts->getValue(key.getXMLCharacters()));
690  const_cast<XMLAttributeList*>(this)->result.setData(s ? s : "");
691  return &result;
692 }
693 
694 
696 {
697  switch (getData()[0])
698  {
699  case 'T':
700  case 't':
701  case '1':
702  return true;
703  case 'F':
704  case 'f':
705  case '0':
706  return false;
707  }
708  throw DataException("Invalid boolean value: " + string(getData()));
709 }
710 
711 
713 {
714  if (ch) return ch;
715  Keyword::tagtable::const_iterator i = Keyword::getTags().find(hash);
716  if (i == Keyword::getTags().end())
717  throw LogicException("Undefined element keyword");
718  return i->second->getName().c_str();
719 }
720 
721 
722 DECLARE_EXPORT Keyword::Keyword(const string& name) : strName(name)
723 {
724  // Error condition: name is empty
725  if (name.empty()) throw LogicException("Creating keyword without name");
726 
727  // Create a number of variations of the tag name
728  strStartElement = string("<") + name;
729  strEndElement = string("</") + name + ">\n";
730  strElement = string("<") + name + ">";
731  strAttribute = string(" ") + name + "=\"";
732 
733  // Compute the hash value
734  dw = hash(name.c_str());
735 
736  // Create a properly encoded Xerces string
737  xercesc::XMLPlatformUtils::Initialize();
738  xmlname = xercesc::XMLString::transcode(name.c_str());
739 
740  // Verify that the hash is "perfect".
741  check();
742 }
743 
744 
745 DECLARE_EXPORT Keyword::Keyword(const string& name, const string& nspace)
746  : strName(name)
747 {
748  // Error condition: name is empty
749  if (name.empty())
750  throw LogicException("Creating keyword without name");
751  if (nspace.empty())
752  throw LogicException("Creating keyword with empty namespace");
753 
754  // Create a number of variations of the tag name
755  strStartElement = string("<") + nspace + ":" + name;
756  strEndElement = string("</") + nspace + ":" + name + ">\n";
757  strElement = string("<") + nspace + ":" + name + ">";
758  strAttribute = string(" ") + nspace + ":" + name + "=\"";
759 
760  // Compute the hash value
761  dw = hash(name);
762 
763  // Create a properly encoded Xerces string
764  xercesc::XMLPlatformUtils::Initialize();
765  xmlname = xercesc::XMLString::transcode(string(nspace + ":" + name).c_str());
766 
767  // Verify that the hash is "perfect".
768  check();
769 }
770 
771 
772 void Keyword::check()
773 {
774  // To be thread-safe we make sure only a single thread at a time
775  // can execute this check.
776  static Mutex dd;
777  {
778  ScopeMutexLock l(dd);
779  tagtable::const_iterator i = getTags().find(dw);
780  if (i!=getTags().end() && i->second->getName()!=strName)
781  throw LogicException("Tag XML-tag hash function clashes for "
782  + i->second->getName() + " and " + strName);
783  getTags().insert(make_pair(dw,this));
784  }
785 }
786 
787 
789 {
790  // Remove from the tag list
791  tagtable::iterator i = getTags().find(dw);
792  if (i!=getTags().end()) getTags().erase(i);
793 
794  // Destroy the xerces string
795  xercesc::XMLString::release(&xmlname);
796  xercesc::XMLPlatformUtils::Terminate();
797 }
798 
799 
800 DECLARE_EXPORT const Keyword& Keyword::find(const char* name)
801 {
802  tagtable::const_iterator i = getTags().find(hash(name));
803  return *(i!=getTags().end() ? i->second : new Keyword(name));
804 }
805 
806 
808 {
809  static tagtable alltags;
810  return alltags;
811 }
812 
813 
815 {
816  if (c == 0 || *c == 0) return 0;
817 
818  // Compute hash
819  const char* curCh = c;
820  hashtype hashVal = *curCh;
821  while (*curCh)
822  hashVal = (hashVal * 37) + (hashVal >> 24) + *curCh++;
823 
824  // Divide by modulus
825  return hashVal % 954991;
826 }
827 
828 
830 {
831  char* c = xercesc::XMLString::transcode(t);
832  if (c == 0 || *c == 0)
833  {
834  xercesc::XMLString::release(&c);
835  return 0;
836  }
837 
838  // Compute hash
839  const char* curCh = c;
840  hashtype hashVal = *curCh;
841  while (*curCh)
842  hashVal = (hashVal * 37) + (hashVal >> 24) + *curCh++;
843 
844  // Divide by modulus
845  xercesc::XMLString::release(&c);
846  return hashVal % 954991;
847 }
848 
849 
851 {
852  for (tagtable::iterator i = getTags().begin(); i != getTags().end(); ++i)
853  logger << i->second->getName() << " " << i->second->dw << endl;
854 }
855 
856 
857 DECLARE_EXPORT void XMLInputFile::parse(Object *pRoot, bool validate)
858 {
859  // Check if string has been set
860  if (filename.empty())
861  throw DataException("Missing input file or directory");
862 
863  // Check if the parameter is the name of a directory
864  struct stat stat_p;
865  if (stat(filename.c_str(), &stat_p))
866  // Can't verify the status
867  throw RuntimeException("Couldn't open input file '" + filename + "'");
868  else if (stat_p.st_mode & S_IFDIR)
869  {
870  // Data is a directory: loop through all *.xml files now. No recursion in
871  // subdirectories is done.
872  // The code is unfortunately different for Windows & Linux. Sigh...
873 #ifdef _MSC_VER
874  string f = filename + "\\*.xml";
875  WIN32_FIND_DATA dir_entry_p;
876  HANDLE h = FindFirstFile(f.c_str(), &dir_entry_p);
877  if (h == INVALID_HANDLE_VALUE)
878  throw RuntimeException("Couldn't open input file '" + f + "'");
879  do
880  {
881  f = filename + '/' + dir_entry_p.cFileName;
882  XMLInputFile(f.c_str()).parse(pRoot);
883  }
884  while (FindNextFile(h, &dir_entry_p));
885  FindClose(h);
886 #elif HAVE_DIRENT_H
887  struct dirent *dir_entry_p;
888  DIR *dir_p = opendir(filename.c_str());
889  while (NULL != (dir_entry_p = readdir(dir_p)))
890  {
891  int n = NAMLEN(dir_entry_p);
892  if (n > 4 && !strcmp(".xml", dir_entry_p->d_name + n - 4))
893  {
894  string f = filename + '/' + dir_entry_p->d_name;
895  XMLInputFile(f.c_str()).parse(pRoot, validate);
896  }
897  }
898  closedir(dir_p);
899 #else
900  throw RuntimeException("Can't process a directory on your platform");
901 #endif
902  }
903  else
904  {
905  // Normal file
906  // Parse the file
907  XMLCh *f = xercesc::XMLString::transcode(filename.c_str());
908  xercesc::LocalFileInputSource in(f);
909  xercesc::XMLString::release(&f);
910  XMLInput::parse(in, pRoot, validate);
911  }
912 }
913 
914 } // end namespace
915 } // end namespace