c++ - libxml2 sax parser - extracting text nodes -


i wanted extract values text nodes xml input. have got following code web official documentation of libxml has many broken links of sax parser one. please me obtain value of text node. in startelementns when tried text node, null. appreciate here.

my xml looks this:

<a>    <b>       <c> text values </c>    </b> </a> 

my code looks this:

#include <stdio.h> #include <assert.h> #include <memory.h> #include <libxml/xmlmemory.h> #include <libxml/parser.h> #include <string>   class parsefsm { public:    /** sax2 callback when element start has been detected parser. provides namespace informations element, new namespace declarations on element.       ctx:  user data (xml parser context)       localname:  local name of element       prefix:  element namespace prefix if available       uri:  element namespace name if available       nb_namespaces: number of namespace definitions on node       namespaces: pointer array of prefix/uri pairs namespace definitions       nb_attributes: number of attributes on node       nb_defaulted:  number of defaulted attributes. defaulted ones @ end of array       attributes: pointer array of (localname/prefix/uri/value/end) attribute values.       **/   static void startelementns (void *ctx,                   const xmlchar * localname,                   const xmlchar * prefix,                   const xmlchar * uri,                   int nb_namespaces,                   const xmlchar ** namespaces,                   int nb_attributes,                   int nb_defaulted, const xmlchar ** attributes)   {     parsefsm & fsm = *(static_cast < parsefsm * >(ctx));     printf ("startelementns: name = '%s' prefix = '%s' uri = (%p)'%s'\n", localname, prefix, uri, uri);     (int indexnamespace = 0; indexnamespace < nb_namespaces; ++indexnamespace)       {         const xmlchar *prefix = namespaces[indexnamespace * 2];         const xmlchar *nsuri = namespaces[indexnamespace * 2 + 1];         printf ("  namespace: name='%s' uri=(%p)'%s'\n", prefix, nsuri, nsuri);       }     unsigned int index = 0;     (int indexattribute = 0; indexattribute < nb_attributes; ++indexattribute, index += 5)       {         const xmlchar *localname = attributes[index];         const xmlchar *prefix = attributes[index + 1];         const xmlchar *nsuri = attributes[index + 2];         const xmlchar *valuebegin = attributes[index + 3];         const xmlchar *valueend = attributes[index + 4];         std::string value ((const char *) valuebegin, (const char *) valueend);         printf ("  %sattribute: localname='%s', prefix='%s', uri=(%p)'%s', value='%s'\n", indexattribute >= (nb_attributes - nb_defaulted) ? "defaulted " : "", localname, prefix, nsuri, nsuri, value.c_str ());       }   }    /** sax2 callback when element end has been detected parser. provides namespace informations element.       ctx:  user data (xml parser context)       localname:  local name of element       prefix:  element namespace prefix if available       uri:  element namespace name if available       **/   static void endelementns (void *ctx,                 const xmlchar * localname,                 const xmlchar * prefix, const xmlchar * uri)   {     parsefsm & fsm = *(static_cast < parsefsm * >(ctx));     printf ("endelementns: name = '%s' prefix = '%s' uri = '%s'\n", localname,         prefix, uri);   }    /** display , format error messages, callback.       ctx:  xml parser context       msg:  message display/transmit       ...:  parameters message display       */   static void error (void *ctx, const char *msg, ...)   {     parsefsm & fsm = *(static_cast < parsefsm * >(ctx));     va_list args;     va_start (args, msg);     vprintf (msg, args);     va_end (args);   }     /** display , format warning messages, callback.       ctx:  xml parser context       msg:  message display/transmit       ...:  parameters message display       */   static void warning (void *ctx, const char *msg, ...)   {     parsefsm & fsm = *(static_cast < parsefsm * >(ctx));     va_list args;     va_start (args, msg);     vprintf (msg, args);     va_end (args);   } }; int main (int argc, const char *argv[]) {   std::string xmlin = "<a><b><c> text values </c> </b> </a>"   /*    * initialize library , check potential abi mismatches    * between version compiled , actual shared    * library used.    */   libxml_test_version xmlsaxhandler saxhandler; // see http://xmlsoft.org/html/libxml-tree.html#xmlsaxhandler   memset (&saxhandler, 0, sizeof (saxhandler));   // using xmlsaxversion( &saxhandler, 2 ) generate crash sets plenty of other pointers...   saxhandler.initialized = xml_sax2_magic;  // force parsing sax2.   saxhandler.startelementns = &parsefsm::startelementns;   saxhandler.endelementns = &parsefsm::endelementns;   saxhandler.warning = &parsefsm::warning;   saxhandler.error = &parsefsm::error;    parsefsm fsm;   int result =     xmlsaxuserparsememory (&saxhandler, &fsm, xmlin.c_str (),                int (xmlin.length ()));   if (result != 0)     {       printf ("failed parse document.\n");       return 1;     }    /*    * cleanup function xml library.    */   xmlcleanupparser ();   /*    * debug memory regression tests    */   xmlmemorydump ();    return 0; } 

  1. you need use characters callback

    void characters( void * user_data, const xmlchar * ch, int len);

  2. strings not null terminated, u need use ch,len determine string

  3. another problem call can called multiple times in between start , end element. u cant blindly assume in call string in between tag. may need use string builder or thing collect strings.

in callback, want copy characters other buffer can used endelement callback. optimise callback bit, might adjust callback copies characters if parser in state. note characters callback may called more once between calls startelement , endelement.

hope answers you, if late others might help


Comments

Popular posts from this blog

java.util.scanner - How to read and add only numbers to array from a text file -

rewrite - Trouble with Wordpress multiple custom querystrings -