The Reader
parser allows you to effectively pull parse an XML
document. Once instantiated, call Nokogiri::XML::Reader#each
to iterate over each node. Note that you may only iterate over the document once!
Nokogiri::XML::Reader
parses an XML
document similar to the way a cursor would move. The Reader
is given an XML
document, and yields nodes to an each block.
Here is an example of usage:
reader = Nokogiri::XML::Reader(<<-eoxml) <x xmlns:tenderlove='http://tenderlovemaking.com/'> <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo> </x> eoxml reader.each do |node| # node is an instance of Nokogiri::XML::Reader puts node.name end
Note that Nokogiri::XML::Reader#each
can only be called once!! Once the cursor moves through the entire document, you must parse the document again. So make sure that you capture any information you need during the first iteration.
The Reader
parser is good for when you need the speed of a SAX
parser, but do not want to write a Document
handler.
Attribute node type
CDATA
node type
Comment
node type
Document
node type
Document
Fragment node type
Document
Type node type
Element
node type
Element
end node type
Entity end node type
Entity node type
Entity Reference node type
Notation
node type
PI node type
Significant Whitespace node type
Text
node type
Whitespace node type
XML
Declaration node type
A list of errors encountered while parsing
The XML
source
static VALUE from_io(int argc, VALUE *argv, VALUE klass) { VALUE rb_io, rb_url, encoding, rb_options; xmlTextReaderPtr reader; const char *c_url = NULL; const char *c_encoding = NULL; int c_options = 0; VALUE rb_reader, args[3]; rb_scan_args(argc, argv, "13", &rb_io, &rb_url, &encoding, &rb_options); if (!RTEST(rb_io)) { rb_raise(rb_eArgError, "io cannot be nil"); } if (RTEST(rb_url)) { c_url = StringValueCStr(rb_url); } if (RTEST(encoding)) { c_encoding = StringValueCStr(encoding); } if (RTEST(rb_options)) { c_options = (int)NUM2INT(rb_options); } reader = xmlReaderForIO( (xmlInputReadCallback)noko_io_read, (xmlInputCloseCallback)noko_io_close, (void *)rb_io, c_url, c_encoding, c_options ); if (reader == NULL) { xmlFreeTextReader(reader); rb_raise(rb_eRuntimeError, "couldn't create a parser"); } rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader); args[0] = rb_io; args[1] = rb_url; args[2] = encoding; rb_obj_call_init(rb_reader, 3, args); return rb_reader; }
Create a new reader that parses io
static VALUE from_memory(int argc, VALUE *argv, VALUE klass) { VALUE rb_buffer, rb_url, encoding, rb_options; xmlTextReaderPtr reader; const char *c_url = NULL; const char *c_encoding = NULL; int c_options = 0; VALUE rb_reader, args[3]; rb_scan_args(argc, argv, "13", &rb_buffer, &rb_url, &encoding, &rb_options); if (!RTEST(rb_buffer)) { rb_raise(rb_eArgError, "string cannot be nil"); } if (RTEST(rb_url)) { c_url = StringValueCStr(rb_url); } if (RTEST(encoding)) { c_encoding = StringValueCStr(encoding); } if (RTEST(rb_options)) { c_options = (int)NUM2INT(rb_options); } reader = xmlReaderForMemory( StringValuePtr(rb_buffer), (int)RSTRING_LEN(rb_buffer), c_url, c_encoding, c_options ); if (reader == NULL) { xmlFreeTextReader(reader); rb_raise(rb_eRuntimeError, "couldn't create a parser"); } rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader); args[0] = rb_buffer; args[1] = rb_url; args[2] = encoding; rb_obj_call_init(rb_reader, 3, args); return rb_reader; }
Create a new reader that parses string
static VALUE reader_attribute(VALUE self, VALUE name) { xmlTextReaderPtr reader; xmlChar *value ; VALUE rb_value; Data_Get_Struct(self, xmlTextReader, reader); if (NIL_P(name)) { return Qnil; } name = StringValue(name) ; value = xmlTextReaderGetAttribute(reader, (xmlChar *)StringValueCStr(name)); if (value == NULL) { return Qnil; } rb_value = NOKOGIRI_STR_NEW2(value); xmlFree(value); return rb_value; }
Get the value of attribute named name
static VALUE attribute_at(VALUE self, VALUE index) { xmlTextReaderPtr reader; xmlChar *value; VALUE rb_value; Data_Get_Struct(self, xmlTextReader, reader); if (NIL_P(index)) { return Qnil; } index = rb_Integer(index); value = xmlTextReaderGetAttributeNo( reader, (int)NUM2INT(index) ); if (value == NULL) { return Qnil; } rb_value = NOKOGIRI_STR_NEW2(value); xmlFree(value); return rb_value; }
Get the value of attribute at index
static VALUE attribute_count(VALUE self) { xmlTextReaderPtr reader; int count; Data_Get_Struct(self, xmlTextReader, reader); count = xmlTextReaderAttributeCount(reader); if (count == -1) { return Qnil; } return INT2NUM((long)count); }
Get the number of attributes for the current node
static VALUE rb_xml_reader_attribute_nodes(VALUE rb_reader) { xmlTextReaderPtr c_reader; xmlNodePtr c_node; VALUE attr_nodes; int j; Data_Get_Struct(rb_reader, xmlTextReader, c_reader); if (! has_attributes(c_reader)) { return rb_ary_new() ; } c_node = xmlTextReaderExpand(c_reader); if (c_node == NULL) { return Qnil; } attr_nodes = noko_xml_node_attrs(c_node); /* ensure that the Reader won't be GCed as long as a node is referenced */ for (j = 0 ; j < RARRAY_LEN(attr_nodes) ; j++) { rb_iv_set(rb_ary_entry(attr_nodes, j), "@reader", rb_reader); } return attr_nodes; }
Get the attributes of the current node as an Array of Attr
# File lib/nokogiri/xml/reader.rb, line 89 def attributes attrs_hash = attribute_nodes.each_with_object({}) do |node, hash| hash[node.name] = node.to_s end ns = namespaces attrs_hash.merge!(ns) if ns attrs_hash end
Get the attributes of the current node as a Hash
(Hash<String, String>) Attribute names and values
static VALUE attributes_eh(VALUE self) { xmlTextReaderPtr reader; int eh; Data_Get_Struct(self, xmlTextReader, reader); eh = has_attributes(reader); if (eh == 0) { return Qfalse; } if (eh == 1) { return Qtrue; } return Qnil; }
Does this node have attributes?
static VALUE rb_xml_reader_base_uri(VALUE rb_reader) { VALUE rb_base_uri; xmlTextReaderPtr c_reader; xmlChar *c_base_uri; Data_Get_Struct(rb_reader, xmlTextReader, c_reader); c_base_uri = xmlTextReaderBaseUri(c_reader); if (c_base_uri == NULL) { return Qnil; } rb_base_uri = NOKOGIRI_STR_NEW2(c_base_uri); xmlFree(c_base_uri); return rb_base_uri; }
Get the xml:base of the node
static VALUE default_eh(VALUE self) { xmlTextReaderPtr reader; int eh; Data_Get_Struct(self, xmlTextReader, reader); eh = xmlTextReaderIsDefault(reader); if (eh == 0) { return Qfalse; } if (eh == 1) { return Qtrue; } return Qnil; }
Was an attribute generated from the default value in the DTD
or schema?
static VALUE depth(VALUE self) { xmlTextReaderPtr reader; int depth; Data_Get_Struct(self, xmlTextReader, reader); depth = xmlTextReaderDepth(reader); if (depth == -1) { return Qnil; } return INT2NUM((long)depth); }
Get the depth of the node
# File lib/nokogiri/xml/reader.rb, line 100 def each while (cursor = read) yield cursor end end
Move the cursor through the document yielding the cursor to the block
static VALUE empty_element_p(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); if (xmlTextReaderIsEmptyElement(reader)) { return Qtrue; } return Qfalse; }
Returns true if the current node is empty, otherwise false.
static VALUE rb_xml_reader_encoding(VALUE rb_reader) { xmlTextReaderPtr c_reader; const char *parser_encoding; VALUE constructor_encoding; constructor_encoding = rb_iv_get(rb_reader, "@encoding"); if (RTEST(constructor_encoding)) { return constructor_encoding; } Data_Get_Struct(rb_reader, xmlTextReader, c_reader); parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader); if (parser_encoding == NULL) { return Qnil; } return NOKOGIRI_STR_NEW2(parser_encoding); }
static VALUE inner_xml(VALUE self) { xmlTextReaderPtr reader; xmlChar *value; VALUE str; Data_Get_Struct(self, xmlTextReader, reader); value = xmlTextReaderReadInnerXml(reader); str = Qnil; if (value) { str = NOKOGIRI_STR_NEW2((char *)value); xmlFree(value); } return str; }
Read the contents of the current node, including child nodes and markup. Returns a utf-8 encoded string.
static VALUE lang(VALUE self) { xmlTextReaderPtr reader; const char *lang; Data_Get_Struct(self, xmlTextReader, reader); lang = (const char *)xmlTextReaderConstXmlLang(reader); if (lang == NULL) { return Qnil; } return NOKOGIRI_STR_NEW2(lang); }
Get the xml:lang scope within which the node resides.
static VALUE local_name(VALUE self) { xmlTextReaderPtr reader; const char *name; Data_Get_Struct(self, xmlTextReader, reader); name = (const char *)xmlTextReaderConstLocalName(reader); if (name == NULL) { return Qnil; } return NOKOGIRI_STR_NEW2(name); }
Get the local name of the node
static VALUE name(VALUE self) { xmlTextReaderPtr reader; const char *name; Data_Get_Struct(self, xmlTextReader, reader); name = (const char *)xmlTextReaderConstName(reader); if (name == NULL) { return Qnil; } return NOKOGIRI_STR_NEW2(name); }
Get the name of the node. Returns a utf-8 encoded string.
static VALUE namespace_uri(VALUE self) { xmlTextReaderPtr reader; const char *uri; Data_Get_Struct(self, xmlTextReader, reader); uri = (const char *)xmlTextReaderConstNamespaceUri(reader); if (uri == NULL) { return Qnil; } return NOKOGIRI_STR_NEW2(uri); }
Get the URI defining the namespace associated with the node
static VALUE namespaces(VALUE self) { xmlTextReaderPtr reader; xmlNodePtr ptr; VALUE attr ; Data_Get_Struct(self, xmlTextReader, reader); attr = rb_hash_new() ; if (! has_attributes(reader)) { return attr ; } ptr = xmlTextReaderExpand(reader); if (ptr == NULL) { return Qnil; } Nokogiri_xml_node_namespaces(ptr, attr); return attr ; }
Get a hash of namespaces for this Node
static VALUE node_type(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); return INT2NUM((long)xmlTextReaderNodeType(reader)); }
Get the type of readers current node
static VALUE outer_xml(VALUE self) { xmlTextReaderPtr reader; xmlChar *value; VALUE str = Qnil; Data_Get_Struct(self, xmlTextReader, reader); value = xmlTextReaderReadOuterXml(reader); if (value) { str = NOKOGIRI_STR_NEW2((char *)value); xmlFree(value); } return str; }
Read the current node and its contents, including child nodes and markup. Returns a utf-8 encoded string.
static VALUE prefix(VALUE self) { xmlTextReaderPtr reader; const char *prefix; Data_Get_Struct(self, xmlTextReader, reader); prefix = (const char *)xmlTextReaderConstPrefix(reader); if (prefix == NULL) { return Qnil; } return NOKOGIRI_STR_NEW2(prefix); }
Get the shorthand reference to the namespace associated with the node.
static VALUE read_more(VALUE self) { xmlTextReaderPtr reader; xmlErrorPtr error; VALUE error_list; int ret; Data_Get_Struct(self, xmlTextReader, reader); error_list = rb_funcall(self, rb_intern("errors"), 0); xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher); ret = xmlTextReaderRead(reader); xmlSetStructuredErrorFunc(NULL, NULL); if (ret == 1) { return self; } if (ret == 0) { return Qnil; } error = xmlGetLastError(); if (error) { rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error)); } else { rb_raise(rb_eRuntimeError, "Error pulling: %d", ret); } return Qnil; }
static VALUE state(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); return INT2NUM((long)xmlTextReaderReadState(reader)); }
Get the state of the reader
static VALUE value(VALUE self) { xmlTextReaderPtr reader; const char *value; Data_Get_Struct(self, xmlTextReader, reader); value = (const char *)xmlTextReaderConstValue(reader); if (value == NULL) { return Qnil; } return NOKOGIRI_STR_NEW2(value); }
Get the text value of the node if present. Returns a utf-8 encoded string.
static VALUE value_eh(VALUE self) { xmlTextReaderPtr reader; int eh; Data_Get_Struct(self, xmlTextReader, reader); eh = xmlTextReaderHasValue(reader); if (eh == 0) { return Qfalse; } if (eh == 1) { return Qtrue; } return Qnil; }
Does this node have a text value?
static VALUE xml_version(VALUE self) { xmlTextReaderPtr reader; const char *version; Data_Get_Struct(self, xmlTextReader, reader); version = (const char *)xmlTextReaderConstXmlVersion(reader); if (version == NULL) { return Qnil; } return NOKOGIRI_STR_NEW2(version); }
Get the XML
version of the document being read
© 2008–2018 Aaron Patterson, Mike Dalessio, Charles Nutter, Sergio Arbeo,
Patrick Mahoney, Yoko Harada, Akinori MUSHA, John Shahid, Lars Kanis
Licensed under the MIT License.
https://nokogiri.org/rdoc/Nokogiri/XML/Reader.html