module Ox

Description:

Ox handles XML documents in two ways. It is a generic XML parser and writer as well as a fast Object / XML marshaller. Ox was written for speed as a replacement for Nokogiri and for Marshal.

As an XML parser it is 2 or more times faster than Nokogiri and as a generic XML writer it is 14 times faster than Nokogiri. Of course different files may result in slightly different times.

As an Object serializer Ox is 4 times faster than the standard Ruby Marshal.dump(). Ox is 3 times faster than Marshal.load().

Object Dump Sample:

require 'ox'

class Sample
  attr_accessor :a, :b, :c

  def initialize(a, b, c)
    @a = a
    @b = b
    @c = c
  end
end

# Create Object
obj = Sample.new(1, "bee", ['x', :y, 7.0])
# Now dump the Object to an XML String.
xml = Ox.dump(obj)
# Convert the object back into a Sample Object.
obj2 = Ox.parse_obj(xml)

Generic XML Writing and Parsing:

require 'ox'

doc = Ox::Document.new(:version => '1.0')

top = Ox::Element.new('top')
top[:name] = 'sample'
doc << top

mid = Ox::Element.new('middle')
mid[:name] = 'second'
top << mid

bot = Ox::Element.new('bottom')
bot[:name] = 'third'
mid << bot

xml = Ox.dump(doc)
puts xml
doc2 = Ox.parse(xml)
puts "Same? #{doc == doc2}"

Constants

VERSION

Current version of the module.

Public Class Methods

cache8_test() click to toggle source
static VALUE
cache8_test(VALUE self) {
    ox_cache8_test();
    return Qnil;
}
cache_test() click to toggle source
static VALUE
cache_test(VALUE self) {
    ox_cache_test();
    return Qnil;
}
ox_default_options() → Hash click to toggle source

Returns the default load and dump options as a Hash. The options are

  • indent: [Fixnum] number of spaces to indent each element in an XML document

  • trace: [Fixnum] trace level where 0 is silent

  • encoding: [String] character encoding for the XML file

  • with_dtd: [true|false|nil] include DTD in the dump

  • with_instruct: [true|false|nil] include instructions in the dump

  • with_xml: [true|false|nil] include XML prolog in the dump

  • circular: [true|false|nil] support circular references while dumping

  • xsd_date: [true|false|nil] use XSD date format instead of decimal format

  • mode: [:object|:generic|:limited|nil] load method to use for XML

  • effort: [:strict|:tolerant|:auto_define] set the tolerance level for loading

  • symbolize_keys: [true|false|nil] symbolize element attribute keys or leave as Strings

  • skip: [:skip_none|:skip_return|:skip_white] determines how to handle white space in text

@return [Hash] all current option settings.

Note that an indent of less than zero will result in a tight one line output unless the text in the XML fields contain new line characters.

static VALUE
get_def_opts(VALUE self) {
    VALUE       opts = rb_hash_new();
    int         elen = (int)strlen(ox_default_options.encoding);

    rb_hash_aset(opts, ox_encoding_sym, (0 == elen) ? Qnil : rb_str_new(ox_default_options.encoding, elen));
    rb_hash_aset(opts, indent_sym, INT2FIX(ox_default_options.indent));
    rb_hash_aset(opts, trace_sym, INT2FIX(ox_default_options.trace));
    rb_hash_aset(opts, with_dtd_sym, (Yes == ox_default_options.with_dtd) ? Qtrue : ((No == ox_default_options.with_dtd) ? Qfalse : Qnil));
    rb_hash_aset(opts, with_xml_sym, (Yes == ox_default_options.with_xml) ? Qtrue : ((No == ox_default_options.with_xml) ? Qfalse : Qnil));
    rb_hash_aset(opts, with_instruct_sym, (Yes == ox_default_options.with_instruct) ? Qtrue : ((No == ox_default_options.with_instruct) ? Qfalse : Qnil));
    rb_hash_aset(opts, circular_sym, (Yes == ox_default_options.circular) ? Qtrue : ((No == ox_default_options.circular) ? Qfalse : Qnil));
    rb_hash_aset(opts, xsd_date_sym, (Yes == ox_default_options.xsd_date) ? Qtrue : ((No == ox_default_options.xsd_date) ? Qfalse : Qnil));
    rb_hash_aset(opts, symbolize_keys_sym, (Yes == ox_default_options.sym_keys) ? Qtrue : ((No == ox_default_options.sym_keys) ? Qfalse : Qnil));
    switch (ox_default_options.mode) {
    case ObjMode:       rb_hash_aset(opts, mode_sym, object_sym);     break;
    case GenMode:       rb_hash_aset(opts, mode_sym, generic_sym);    break;
    case LimMode:       rb_hash_aset(opts, mode_sym, limited_sym);    break;
    case NoMode:
    default:            rb_hash_aset(opts, mode_sym, Qnil);               break;
    }
    switch (ox_default_options.effort) {
    case StrictEffort:          rb_hash_aset(opts, effort_sym, strict_sym);             break;
    case TolerantEffort:        rb_hash_aset(opts, effort_sym, tolerant_sym);          break;
    case AutoEffort:            rb_hash_aset(opts, effort_sym, auto_define_sym);  break;
    case NoEffort:
    default:                    rb_hash_aset(opts, effort_sym, Qnil);                    break;
    }
    switch (ox_default_options.skip) {
    case NoSkip:                rb_hash_aset(opts, skip_sym, skip_none_sym);          break;
    case CrSkip:                rb_hash_aset(opts, skip_sym, skip_return_sym);                break;
    case SpcSkip:               rb_hash_aset(opts, skip_sym, skip_white_sym);                break;
    default:                    rb_hash_aset(opts, skip_sym, Qnil);                      break;
    }
    return opts;
}
ox_default_options=(opts) click to toggle source

Sets the default options for load and dump. @param [Hash] opts options to change @param [Fixnum] :indent number of spaces to indent each element in an XML document @param [Fixnum] :trace trace level where 0 is silent @param [String] :encoding character encoding for the XML file @param [true|false|nil] :with_dtd include DTD in the dump @param [true|false|nil] :with_instruct include instructions in the dump @param [true|false|nil] :with_xml include XML prolog in the dump @param [true|false|nil] :circular support circular references while dumping @param [true|false|nil] :xsd_date use XSD date format instead of decimal format @param [:object|:generic|:limited|nil] :mode load method to use for XML @param [:strict|:tolerant|:auto_define] :effort set the tolerance level for loading @param [true|false|nil] :symbolize_keys symbolize element attribute keys or leave as Strings @param [:skip_none|:skip_return|:skip_white] determines how to handle white space in text @return [nil]

static VALUE
set_def_opts(VALUE self, VALUE opts) {
    struct _YesNoOpt    ynos[] = {
        { with_xml_sym, &ox_default_options.with_xml },
        { with_dtd_sym, &ox_default_options.with_dtd },
        { with_instruct_sym, &ox_default_options.with_instruct },
        { xsd_date_sym, &ox_default_options.xsd_date },
        { circular_sym, &ox_default_options.circular },
        { symbolize_keys_sym, &ox_default_options.sym_keys },
        { Qnil, 0 }
    };
    YesNoOpt    o;
    VALUE       v;
    
    Check_Type(opts, T_HASH);

    v = rb_hash_aref(opts, ox_encoding_sym);
    if (Qnil == v) {
        *ox_default_options.encoding = '\0';
    } else {
        Check_Type(v, T_STRING);
        strncpy(ox_default_options.encoding, StringValuePtr(v), sizeof(ox_default_options.encoding) - 1);
#if HAS_ENCODING_SUPPORT
        ox_default_options.rb_enc = rb_enc_find(ox_default_options.encoding);
#elif HAS_PRIVATE_ENCODING
        ox_default_options.rb_enc = rb_str_new2(ox_default_options.encoding);
        rb_gc_register_address(&ox_default_options.rb_enc);
#endif
    }

    v = rb_hash_aref(opts, indent_sym);
    if (Qnil != v) {
        Check_Type(v, T_FIXNUM);
        ox_default_options.indent = FIX2INT(v);
    }

    v = rb_hash_aref(opts, trace_sym);
    if (Qnil != v) {
        Check_Type(v, T_FIXNUM);
        ox_default_options.trace = FIX2INT(v);
    }

    v = rb_hash_aref(opts, mode_sym);
    if (Qnil == v) {
        ox_default_options.mode = NoMode;
    } else if (object_sym == v) {
        ox_default_options.mode = ObjMode;
    } else if (generic_sym == v) {
        ox_default_options.mode = GenMode;
    } else if (limited_sym == v) {
        ox_default_options.mode = LimMode;
    } else {
        rb_raise(ox_parse_error_class, ":mode must be :object, :generic, :limited, or nil.\n");
    }

    v = rb_hash_aref(opts, effort_sym);
    if (Qnil == v) {
        ox_default_options.effort = NoEffort;
    } else if (strict_sym == v) {
        ox_default_options.effort = StrictEffort;
    } else if (tolerant_sym == v) {
        ox_default_options.effort = TolerantEffort;
    } else if (auto_define_sym == v) {
        ox_default_options.effort = AutoEffort;
    } else {
        rb_raise(ox_parse_error_class, ":effort must be :strict, :tolerant, :auto_define, or nil.\n");
    }

    v = rb_hash_aref(opts, skip_sym);
    if (Qnil == v) {
        ox_default_options.skip = NoSkip;
    } else if (skip_none_sym == v) {
        ox_default_options.skip = NoSkip;
    } else if (skip_return_sym == v) {
        ox_default_options.skip = CrSkip;
    } else if (skip_white_sym == v) {
        ox_default_options.skip = SpcSkip;
    } else {
        rb_raise(ox_parse_error_class, ":skip must be :skip_none, :skip_return, :skip_white, or nil.\n");
    }

    for (o = ynos; 0 != o->attr; o++) {
        v = rb_hash_lookup(opts, o->sym);
        if (Qnil == v) {
            *o->attr = NotSet;
        } else if (Qtrue == v) {
            *o->attr = Yes;
        } else if (Qfalse == v) {
            *o->attr = No;
        } else {
            rb_raise(ox_parse_error_class, "%s must be true or false.\n", rb_id2name(SYM2ID(o->sym)));
        }
    }
    return Qnil;
}
dump(obj, options) → xml-string click to toggle source

Dumps an Object (obj) to a string. @param [Object] obj Object to serialize as an XML document String @param [Hash] options formating options @param [Fixnum] :indent format expected @param [true|false] :xsd_date use XSD date format if true, default: false @param [true|false] :circular allow circular references, default: false @param [:strict|:tolerant] :effort effort to use when an undumpable object (e.g., IO) is encountered, default: :strict

- *:strict* - raise an NotImplementedError if an undumpable object is encountered
- *:tolerant* - replaces undumplable objects with nil

Note that an indent of less than zero will result in a tight one line output unless the text in the XML fields contain new line characters.

static VALUE
dump(int argc, VALUE *argv, VALUE self) {
    char                *xml;
    struct _Options     copts = ox_default_options;
    VALUE               rstr;
    
    if (2 == argc) {
        parse_dump_options(argv[1], &copts);
    }
    if (0 == (xml = ox_write_obj_to_str(*argv, &copts))) {
        rb_raise(rb_eNoMemError, "Not enough memory.\n");
    }
    rstr = rb_str_new2(xml);
#if HAS_ENCODING_SUPPORT
    if ('\0' != *copts.encoding) {
        rb_enc_associate(rstr, rb_enc_find(copts.encoding));
    }
#elif HAS_PRIVATE_ENCODING
    if ('\0' != *copts.encoding) {
        rb_funcall(rstr, ox_force_encoding_id, 1, rb_str_new2(copts.encoding));
    }
#endif
    xfree(xml);

    return rstr;
}
load(xml, options) → Ox::Document or Ox::Element or Object click to toggle source

Parses and XML document String into an Ox::Document, or Ox::Element, or Object depending on the options. Raises an exception if the XML is malformed or the classes specified are not valid. @param [String] xml XML String @param [Hash] options load options @param [:object|:generic|:limited] :mode format expected

- *:object* - object format
- *:generic* - read as a generic XML file
- *:limited* - read as a generic XML file but with callbacks on text and elements events only

@param [:strict|:tolerant|:auto_define] :effort effort to use when an undefined class is encountered, default: :strict

- *:strict* - raise an NameError for missing classes and modules
- *:tolerant* - return nil for missing classes and modules
- *:auto_define* - auto define missing classes and modules

@param [Fixnum] :trace trace level as a Fixnum, default: 0 (silent) @param [true|false|nil] :symbolize_keys symbolize element attribute keys or leave as Strings

static VALUE
load_str(int argc, VALUE *argv, VALUE self) {
    char        *xml;
    size_t      len;
    VALUE       obj;
    VALUE       encoding;
    struct _Err err;

    err_init(&err);
    Check_Type(*argv, T_STRING);
    /* the xml string gets modified so make a copy of it */
    len = RSTRING_LEN(*argv) + 1;
    if (SMALL_XML < len) {
        xml = ALLOC_N(char, len);
    } else {
        xml = ALLOCA_N(char, len);
    }
#if HAS_ENCODING_SUPPORT
#ifdef MACRUBY_RUBY
    encoding = rb_funcall(*argv, rb_intern("encoding"), 0);
#else
    encoding = rb_obj_encoding(*argv);
#endif
#elif HAS_PRIVATE_ENCODING
    encoding = rb_funcall(*argv, rb_intern("encoding"), 0);
#else
    encoding = Qnil;
#endif
    memcpy(xml, StringValuePtr(*argv), len);
    obj = load(xml, argc - 1, argv + 1, self, encoding, &err);
    if (SMALL_XML < len) {
        xfree(xml);
    }
    if (err_has(&err)) {
        ox_err_raise(&err);
    }
    return obj;
}
load_file(file_path, options) → Ox::Document or Ox::Element or Object click to toggle source

Parses and XML document from a file into an Ox::Document, or Ox::Element, or Object depending on the options. Raises an exception if the XML is malformed or the classes specified are not valid. @param [String] file_path file path to read the XML document from @param [Hash] options load options @param [:object|:generic|:limited] :mode format expected

- *:object* - object format
- *:generic* - read as a generic XML file
- *:limited* - read as a generic XML file but with callbacks on text and elements events only

@param [:strict|:tolerant|:auto_define] :effort effort to use when an undefined class is encountered, default: :strict

- *:strict* - raise an NameError for missing classes and modules
- *:tolerant* - return nil for missing classes and modules
- *:auto_define* - auto define missing classes and modules

@param [Fixnum] :trace trace level as a Fixnum, default: 0 (silent) @param [true|false|nil] :symbolize_keys symbolize element attribute keys or leave as Strings

static VALUE
load_file(int argc, VALUE *argv, VALUE self) {
    char        *path;
    char        *xml;
    FILE        *f;
    size_t      len;
    VALUE       obj;
    struct _Err err;

    err_init(&err);
    Check_Type(*argv, T_STRING);
    path = StringValuePtr(*argv);
    if (0 == (f = fopen(path, "r"))) {
        rb_raise(rb_eIOError, "%s\n", strerror(errno));
    }
    fseek(f, 0, SEEK_END);
    len = ftell(f);
    if (SMALL_XML < len) {
        xml = ALLOC_N(char, len + 1);
    } else {
        xml = ALLOCA_N(char, len + 1);
    }
    fseek(f, 0, SEEK_SET);
    if (len != fread(xml, 1, len, f)) {
        ox_err_set(&err, rb_eLoadError, "Failed to read %ld bytes from %s.\n", (long)len, path);
        obj = Qnil;
    } else {
        xml[len] = '\0';
        obj = load(xml, argc - 1, argv + 1, self, Qnil, &err);
    }
    fclose(f);
    if (SMALL_XML < len) {
        xfree(xml);
    }
    if (err_has(&err)) {
        ox_err_raise(&err);
    }
    return obj;
}
parse(xml) → Ox::Document or Ox::Element click to toggle source

Parses and XML document String into an Ox::Document or Ox::Element. @param [String] xml XML String @return [Ox::Document or Ox::Element] parsed XML document. @raise [Exception] if the XML is malformed.

static VALUE
to_gen(VALUE self, VALUE ruby_xml) {
    char                *xml, *x;
    size_t              len;
    VALUE               obj;
    struct _Options     options = ox_default_options;
    struct _Err         err;

    err_init(&err);
    Check_Type(ruby_xml, T_STRING);
    /* the xml string gets modified so make a copy of it */
    len = RSTRING_LEN(ruby_xml) + 1;
    x = defuse_bom(StringValuePtr(ruby_xml), &options);
    if (SMALL_XML < len) {
        xml = ALLOC_N(char, len);
    } else {
        xml = ALLOCA_N(char, len);
    }
    memcpy(xml, x, len);
    obj = ox_parse(xml, ox_gen_callbacks, 0, &options, &err);
    if (SMALL_XML < len) {
        xfree(xml);
    }
    if (err_has(&err)) {
        ox_err_raise(&err);
    }
    return obj;
}
parse_obj(xml) → Object click to toggle source

Parses an XML document String that is in the object format and returns an Object of the type represented by the XML. This function expects an optimized XML formated String. For other formats use the more generic ::load method. Raises an exception if the XML is malformed or the classes specified in the file are not valid. @param [String] xml XML String in optimized Object format. @return [Object] deserialized Object.

static VALUE
to_obj(VALUE self, VALUE ruby_xml) {
    char                *xml, *x;
    size_t              len;
    VALUE               obj;
    struct _Options     options = ox_default_options;
    struct _Err         err;

    err_init(&err);
    Check_Type(ruby_xml, T_STRING);
    /* the xml string gets modified so make a copy of it */
    len = RSTRING_LEN(ruby_xml) + 1;
    x = defuse_bom(StringValuePtr(ruby_xml), &options);
    if (SMALL_XML < len) {
        xml = ALLOC_N(char, len);
    } else {
        xml = ALLOCA_N(char, len);
    }
    memcpy(xml, x, len);
#if HAS_GC_GUARD
    rb_gc_disable();
#endif
    obj = ox_parse(xml, ox_obj_callbacks, 0, &options, &err);
    if (SMALL_XML < len) {
        xfree(xml);
    }
#if HAS_GC_GUARD
    RB_GC_GUARD(obj);
    rb_gc_enable();
#endif
    if (err_has(&err)) {
        ox_err_raise(&err);
    }
    return obj;
}
sax_parse(handler, io, options) click to toggle source

Parses an IO stream or file containing an XML document. Raises an exception if the XML is malformed or the classes specified are not valid. @param [Ox::Sax] handler SAX (responds to OX::Sax methods) like handler @param [IO|String] io IO Object to read from @param [Hash] options parse options @param [true|false] :convert_special flag indicating special characters like &lt; are converted @param [true|false] :symbolize flag indicating the parser symbolize element and attribute names @param [true|false] :smart flag indicating the parser uses hints if available (use with html) @param [:skip_return|:skip_white] :skip flag indicating the parser skips r or collpase white space into a single space. Default (skip nothing)

static VALUE
sax_parse(int argc, VALUE *argv, VALUE self) {
    struct _SaxOptions  options;

    options.symbolize = 1;
    options.convert_special = 0;
    options.smart = 0;
    options.skip = NoSkip;

    if (argc < 2) {
        rb_raise(ox_parse_error_class, "Wrong number of arguments to sax_parse.\n");
    }
    if (3 <= argc && rb_cHash == rb_obj_class(argv[2])) {
        VALUE  h = argv[2];
        VALUE  v;
        
        if (Qnil != (v = rb_hash_lookup(h, convert_special_sym))) {
            options.convert_special = (Qtrue == v);
        }
        if (Qnil != (v = rb_hash_lookup(h, smart_sym))) {
            options.smart = (Qtrue == v);
        }
        if (Qnil != (v = rb_hash_lookup(h, symbolize_sym))) {
            options.symbolize = (Qtrue == v);
        }
        if (Qnil != (v = rb_hash_lookup(h, skip_sym))) {
            if (skip_return_sym == v) {
                options.skip = CrSkip;
            } else if (skip_white_sym == v) {
                options.skip = SpcSkip;
            }
        }
    }
    ox_sax_parse(argv[0], argv[1], &options);

    return Qnil;
}
to_file(file_path, obj, options) click to toggle source

Dumps an Object to the specified file. @param [String] file_path file path to write the XML document to @param [Object] obj Object to serialize as an XML document String @param [Hash] options formating options @param [Fixnum] :indent format expected @param [true|false] :xsd_date use XSD date format if true, default: false @param [true|false] :circular allow circular references, default: false @param [:strict|:tolerant] :effort effort to use when an undumpable object (e.g., IO) is encountered, default: :strict

- *:strict* - raise an NotImplementedError if an undumpable object is encountered
- *:tolerant* - replaces undumplable objects with nil

Note that an indent of less than zero will result in a tight one line output unless the text in the XML fields contain new line characters.

static VALUE
to_file(int argc, VALUE *argv, VALUE self) {
    struct _Options     copts = ox_default_options;
    
    if (3 == argc) {
        parse_dump_options(argv[2], &copts);
    }
    Check_Type(*argv, T_STRING);
    ox_write_obj_to_file(argv[1], StringValuePtr(*argv), &copts);

    return Qnil;
}

Dumps an Object (obj) to a string. @param [Object] obj Object to serialize as an XML document String @param [Hash] options formating options @param [Fixnum] :indent format expected @param [true|false] :xsd_date use XSD date format if true, default: false @param [true|false] :circular allow circular references, default: false @param [:strict|:tolerant] :effort effort to use when an undumpable object (e.g., IO) is encountered, default: :strict

- *:strict* - raise an NotImplementedError if an undumpable object is encountered
- *:tolerant* - replaces undumplable objects with nil

Note that an indent of less than zero will result in a tight one line output unless the text in the XML fields contain new line characters.

static VALUE
dump(int argc, VALUE *argv, VALUE self) {
    char                *xml;
    struct _Options     copts = ox_default_options;
    VALUE               rstr;
    
    if (2 == argc) {
        parse_dump_options(argv[1], &copts);
    }
    if (0 == (xml = ox_write_obj_to_str(*argv, &copts))) {
        rb_raise(rb_eNoMemError, "Not enough memory.\n");
    }
    rstr = rb_str_new2(xml);
#if HAS_ENCODING_SUPPORT
    if ('\0' != *copts.encoding) {
        rb_enc_associate(rstr, rb_enc_find(copts.encoding));
    }
#elif HAS_PRIVATE_ENCODING
    if ('\0' != *copts.encoding) {
        rb_funcall(rstr, ox_force_encoding_id, 1, rb_str_new2(copts.encoding));
    }
#endif
    xfree(xml);

    return rstr;
}