W3cubDocs

/Nokogiri

class Nokogiri::XML::Reader

Parent:
Object
Included modules:

The Reader parser allows you to effectively pull parse an XML document. Once instantiated, call Nokogiri::XML::Reader#each to iterate over each node. Note that you may only iterate over the document once!

Nokogiri::XML::Reader parses an XML document similar to the way a cursor would move. The Reader is given an XML document, and yields nodes to an each block.

Here is an example of usage:

reader = Nokogiri::XML::Reader(<<-eoxml)
  <x xmlns:tenderlove='http://tenderlovemaking.com/'>
    <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
  </x>
eoxml

reader.each do |node|

  # node is an instance of Nokogiri::XML::Reader
  puts node.name

end

Note that Nokogiri::XML::Reader#each can only be called once!! Once the cursor moves through the entire document, you must parse the document again. So make sure that you capture any information you need during the first iteration.

The Reader parser is good for when you need the speed of a SAX parser, but do not want to write a Document handler.

Constants

TYPE_ATTRIBUTE

Attribute node type

TYPE_CDATA

CDATA node type

TYPE_COMMENT

Comment node type

TYPE_DOCUMENT

Document node type

TYPE_DOCUMENT_FRAGMENT

Document Fragment node type

TYPE_DOCUMENT_TYPE

Document Type node type

TYPE_ELEMENT

Element node type

TYPE_END_ELEMENT

Element end node type

TYPE_END_ENTITY

Entity end node type

TYPE_ENTITY

Entity node type

TYPE_ENTITY_REFERENCE

Entity Reference node type

TYPE_NONE
TYPE_NOTATION

Notation node type

TYPE_PROCESSING_INSTRUCTION

PI node type

TYPE_SIGNIFICANT_WHITESPACE

Significant Whitespace node type

TYPE_TEXT

Text node type

TYPE_WHITESPACE

Whitespace node type

TYPE_XML_DECLARATION

XML Declaration node type

Attributes

encoding[R]

The encoding for the document

errors[RW]

A list of errors encountered while parsing

source[R]

The XML source

Public Class Methods

from_io(io, url = nil, encoding = nil, options = 0) Show source
static VALUE from_io(int argc, VALUE *argv, VALUE klass)
{
  VALUE rb_io, rb_url, encoding, rb_options;
  xmlTextReaderPtr reader;
  const char * c_url      = NULL;
  const char * c_encoding = NULL;
  int c_options           = 0;
  VALUE rb_reader, args[3];

  rb_scan_args(argc, argv, "13", &rb_io, &rb_url, &encoding, &rb_options);

  if (!RTEST(rb_io)) rb_raise(rb_eArgError, "io cannot be nil");
  if (RTEST(rb_url)) c_url = StringValueCStr(rb_url);
  if (RTEST(encoding)) c_encoding = StringValueCStr(encoding);
  if (RTEST(rb_options)) c_options = (int)NUM2INT(rb_options);

  reader = xmlReaderForIO(
      (xmlInputReadCallback)io_read_callback,
      (xmlInputCloseCallback)io_close_callback,
      (void *)rb_io,
      c_url,
      c_encoding,
      c_options
  );

  if(reader == NULL) {
    xmlFreeTextReader(reader);
    rb_raise(rb_eRuntimeError, "couldn't create a parser");
  }

  rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
  args[0] = rb_io;
  args[1] = rb_url;
  args[2] = encoding;
  rb_obj_call_init(rb_reader, 3, args);

  return rb_reader;
}

Create a new reader that parses io

from_memory(string, url = nil, encoding = nil, options = 0) Show source
static VALUE from_memory(int argc, VALUE *argv, VALUE klass)
{
  VALUE rb_buffer, rb_url, encoding, rb_options;
  xmlTextReaderPtr reader;
  const char * c_url      = NULL;
  const char * c_encoding = NULL;
  int c_options           = 0;
  VALUE rb_reader, args[3];

  rb_scan_args(argc, argv, "13", &rb_buffer, &rb_url, &encoding, &rb_options);

  if (!RTEST(rb_buffer)) rb_raise(rb_eArgError, "string cannot be nil");
  if (RTEST(rb_url)) c_url = StringValueCStr(rb_url);
  if (RTEST(encoding)) c_encoding = StringValueCStr(encoding);
  if (RTEST(rb_options)) c_options = (int)NUM2INT(rb_options);

  reader = xmlReaderForMemory(
      StringValuePtr(rb_buffer),
      (int)RSTRING_LEN(rb_buffer),
      c_url,
      c_encoding,
      c_options
  );

  if(reader == NULL) {
    xmlFreeTextReader(reader);
    rb_raise(rb_eRuntimeError, "couldn't create a parser");
  }

  rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
  args[0] = rb_buffer;
  args[1] = rb_url;
  args[2] = encoding;
  rb_obj_call_init(rb_reader, 3, args);

  return rb_reader;
}

Create a new reader that parses string

Public Instance Methods

attribute(name) Show source
static VALUE reader_attribute(VALUE self, VALUE name)
{
  xmlTextReaderPtr reader;
  xmlChar *value ;
  VALUE rb_value;

  Data_Get_Struct(self, xmlTextReader, reader);

  if(NIL_P(name)) return Qnil;
  name = StringValue(name) ;

  value = xmlTextReaderGetAttribute(reader, (xmlChar*)StringValueCStr(name));
  if(value == NULL) return Qnil;

  rb_value = NOKOGIRI_STR_NEW2(value);
  xmlFree(value);
  return rb_value;
}

Get the value of attribute named name

attribute_at(index) Show source
static VALUE attribute_at(VALUE self, VALUE index)
{
  xmlTextReaderPtr reader;
  xmlChar *value;
  VALUE rb_value;

  Data_Get_Struct(self, xmlTextReader, reader);

  if(NIL_P(index)) return Qnil;
  index = rb_Integer(index);

  value = xmlTextReaderGetAttributeNo(
      reader,
      (int)NUM2INT(index)
  );
  if(value == NULL) return Qnil;

  rb_value = NOKOGIRI_STR_NEW2(value);
  xmlFree(value);
  return rb_value;
}

Get the value of attribute at index

attribute_count Show source
static VALUE attribute_count(VALUE self)
{
  xmlTextReaderPtr reader;
  int count;

  Data_Get_Struct(self, xmlTextReader, reader);
  count = xmlTextReaderAttributeCount(reader);
  if(count == -1) return Qnil;

  return INT2NUM((long)count);
}

Get the number of attributes for the current node

attribute_nodes() Show source
# File lib/nokogiri/xml/reader.rb, line 97
def attribute_nodes
  nodes = attr_nodes
  nodes.each { |v| v.instance_variable_set(:@_r, self) }
  nodes
end

Get a list of attributes for the current node

attributes() Show source
# File lib/nokogiri/xml/reader.rb, line 89
def attributes
  Hash[attribute_nodes.map { |node|
    [node.name, node.to_s]
  }].merge(namespaces || {})
end

Get a list of attributes for the current node.

attributes? Show source
static VALUE attributes_eh(VALUE self)
{
  xmlTextReaderPtr reader;
  int eh;

  Data_Get_Struct(self, xmlTextReader, reader);
  eh = has_attributes(reader);
  if(eh == 0) return Qfalse;
  if(eh == 1) return Qtrue;

  return Qnil;
}

Does this node have attributes?

base_uri Show source
static VALUE base_uri(VALUE self)
{
  xmlTextReaderPtr reader;
  const char * base_uri;

  Data_Get_Struct(self, xmlTextReader, reader);
  base_uri = (const char *)xmlTextReaderBaseUri(reader);
  if (base_uri == NULL) return Qnil;

  return NOKOGIRI_STR_NEW2(base_uri);
}

Get the xml:base of the node

default? Show source
static VALUE default_eh(VALUE self)
{
  xmlTextReaderPtr reader;
  int eh;

  Data_Get_Struct(self, xmlTextReader, reader);
  eh = xmlTextReaderIsDefault(reader);
  if(eh == 0) return Qfalse;
  if(eh == 1) return Qtrue;

  return Qnil;
}

Was an attribute generated from the default value in the DTD or schema?

depth Show source
static VALUE depth(VALUE self)
{
  xmlTextReaderPtr reader;
  int depth;

  Data_Get_Struct(self, xmlTextReader, reader);
  depth = xmlTextReaderDepth(reader);
  if(depth == -1) return Qnil;

  return INT2NUM((long)depth);
}

Get the depth of the node

each() { |cursor| ... } Show source
# File lib/nokogiri/xml/reader.rb, line 105
def each
  while cursor = self.read
    yield cursor
  end
end

Move the cursor through the document yielding the cursor to the block

empty_element? # → true or false Show source
static VALUE empty_element_p(VALUE self)
{
  xmlTextReaderPtr reader;

  Data_Get_Struct(self, xmlTextReader, reader);

  if(xmlTextReaderIsEmptyElement(reader))
    return Qtrue;

  return Qfalse;
}

Returns true if the current node is empty, otherwise false.

Also aliased as: self_closing?
inner_xml Show source
static VALUE inner_xml(VALUE self)
{
  xmlTextReaderPtr reader;
  xmlChar* value;
  VALUE str;

  Data_Get_Struct(self, xmlTextReader, reader);

  value = xmlTextReaderReadInnerXml(reader);

  str = Qnil;
  if(value) {
    str = NOKOGIRI_STR_NEW2((char*)value);
    xmlFree(value);
  }

  return str;
}

Read the contents of the current node, including child nodes and markup. Returns a utf-8 encoded string.

static VALUE lang(VALUE self)
{
  xmlTextReaderPtr reader;
  const char *lang;

  Data_Get_Struct(self, xmlTextReader, reader);
  lang = (const char *)xmlTextReaderConstXmlLang(reader);
  if(lang == NULL) return Qnil;

  return NOKOGIRI_STR_NEW2(lang);
}

Get the xml:lang scope within which the node resides.

local_name Show source
static VALUE local_name(VALUE self)
{
  xmlTextReaderPtr reader;
  const char *name;

  Data_Get_Struct(self, xmlTextReader, reader);
  name = (const char *)xmlTextReaderConstLocalName(reader);
  if(name == NULL) return Qnil;

  return NOKOGIRI_STR_NEW2(name);
}

Get the local name of the node

static VALUE name(VALUE self)
{
  xmlTextReaderPtr reader;
  const char *name;

  Data_Get_Struct(self, xmlTextReader, reader);
  name = (const char *)xmlTextReaderConstName(reader);
  if(name == NULL) return Qnil;

  return NOKOGIRI_STR_NEW2(name);
}

Get the name of the node. Returns a utf-8 encoded string.

namespace_uri Show source
static VALUE namespace_uri(VALUE self)
{
  xmlTextReaderPtr reader;
  const char *uri;

  Data_Get_Struct(self, xmlTextReader, reader);
  uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
  if(uri == NULL) return Qnil;

  return NOKOGIRI_STR_NEW2(uri);
}

Get the URI defining the namespace associated with the node

namespaces Show source
static VALUE namespaces(VALUE self)
{
  xmlTextReaderPtr reader;
  xmlNodePtr ptr;
  VALUE attr ;

  Data_Get_Struct(self, xmlTextReader, reader);

  attr = rb_hash_new() ;

  if (! has_attributes(reader))
    return attr ;

  ptr = xmlTextReaderExpand(reader);
  if(ptr == NULL) return Qnil;

  Nokogiri_xml_node_namespaces(ptr, attr);

  return attr ;
}

Get a hash of namespaces for this Node

node_type Show source
static VALUE node_type(VALUE self)
{
  xmlTextReaderPtr reader;
  Data_Get_Struct(self, xmlTextReader, reader);
  return INT2NUM((long)xmlTextReaderNodeType(reader));
}

Get the type of readers current node

outer_xml Show source
static VALUE outer_xml(VALUE self)
{
  xmlTextReaderPtr reader;
  xmlChar *value;
  VALUE str = Qnil;

  Data_Get_Struct(self, xmlTextReader, reader);

  value = xmlTextReaderReadOuterXml(reader);

  if(value) {
    str = NOKOGIRI_STR_NEW2((char*)value);
    xmlFree(value);
  }
  return str;
}

Read the current node and its contents, including child nodes and markup. Returns a utf-8 encoded string.

prefix Show source
static VALUE prefix(VALUE self)
{
  xmlTextReaderPtr reader;
  const char *prefix;

  Data_Get_Struct(self, xmlTextReader, reader);
  prefix = (const char *)xmlTextReaderConstPrefix(reader);
  if(prefix == NULL) return Qnil;

  return NOKOGIRI_STR_NEW2(prefix);
}

Get the shorthand reference to the namespace associated with the node.

static VALUE read_more(VALUE self)
{
  xmlTextReaderPtr reader;
  xmlErrorPtr error;
  VALUE error_list;
  int ret;

  Data_Get_Struct(self, xmlTextReader, reader);

  error_list = rb_funcall(self, rb_intern("errors"), 0);

  xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
  ret = xmlTextReaderRead(reader);
  xmlSetStructuredErrorFunc(NULL, NULL);

  if(ret == 1) return self;
  if(ret == 0) return Qnil;

  error = xmlGetLastError();
  if(error)
    rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
  else
    rb_raise(rb_eRuntimeError, "Error pulling: %d", ret);

  return Qnil;
}

Move the Reader forward through the XML document.

self_closing?()
Alias for: empty_element?
state Show source
static VALUE state(VALUE self)
{
  xmlTextReaderPtr reader;
  Data_Get_Struct(self, xmlTextReader, reader);
  return INT2NUM((long)xmlTextReaderReadState(reader));
}

Get the state of the reader

value Show source
static VALUE value(VALUE self)
{
  xmlTextReaderPtr reader;
  const char *value;

  Data_Get_Struct(self, xmlTextReader, reader);
  value = (const char *)xmlTextReaderConstValue(reader);
  if(value == NULL) return Qnil;

  return NOKOGIRI_STR_NEW2(value);
}

Get the text value of the node if present. Returns a utf-8 encoded string.

value? Show source
static VALUE value_eh(VALUE self)
{
  xmlTextReaderPtr reader;
  int eh;

  Data_Get_Struct(self, xmlTextReader, reader);
  eh = xmlTextReaderHasValue(reader);
  if(eh == 0) return Qfalse;
  if(eh == 1) return Qtrue;

  return Qnil;
}

Does this node have a text value?

xml_version Show source
static VALUE xml_version(VALUE self)
{
  xmlTextReaderPtr reader;
  const char *version;

  Data_Get_Struct(self, xmlTextReader, reader);
  version = (const char *)xmlTextReaderConstXmlVersion(reader);
  if(version == NULL) return Qnil;

  return NOKOGIRI_STR_NEW2(version);
}

Get the XML version of the document being read

© 2008–2018 Aaron Patterson, Mike Dalessio, Charles Nutter, Sergio Arbeo,
Patrick Mahoney, Yoko Harada, Akinori MUSHA, John Shahid, Lars Kanis
Licensed under the MIT License.