libdap++  Updated for version 3.14.0
D4ParserSax2.cc
Go to the documentation of this file.
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
4 // Access Protocol.
5 
6 // Copyright (c) 2012 OPeNDAP, Inc.
7 // Author: James Gallagher <jgallagher@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 
25 #include "config.h"
26 
27 //#define DODS_DEBUG 1
28 
29 #include <iostream>
30 #include <sstream>
31 
32 #include <cstring>
33 #include <cstdarg>
34 #include <cassert>
35 
36 #include <libxml/parserInternals.h>
37 
38 #include "DMR.h"
39 
40 #include "BaseType.h"
41 #include "Array.h"
42 #include "D4Group.h"
43 #include "D4Attributes.h"
44 #include "D4Maps.h"
45 #include "D4Enum.h"
46 
47 #include "D4BaseTypeFactory.h"
48 
49 #include "D4ParserSax2.h"
50 
51 #include "util.h"
52 #include "debug.h"
53 
54 namespace libdap {
55 
56 static const char *states[] = {
57  "parser_start",
58 
59  "inside_dataset",
60 
61  // inside_group is the state just after parsing the start of a Group
62  // element.
63  "inside_group",
64 
65  "inside_attribute_container",
66  "inside_attribute",
67  "inside_attribute_value",
68  "inside_other_xml_attribute",
69 
70  "inside_enum_def",
71  "inside_enum_const",
72 
73  "inside_dim_def",
74 
75  // This covers Byte, ..., Url, Opaque
76  "inside_simple_type",
77 
78  // "inside_array",
79  "inside_dim",
80  "inside_map",
81 
82  "inside_constructor",
83 
84  "parser_unknown",
85  "parser_error",
86  "parser_fatal_error",
87 
88  "parser_end"
89 };
90 
91 static bool is_not(const char *name, const char *tag)
92 {
93  return strcmp(name, tag) != 0;
94 }
95 
104 D4EnumDef *
105 D4ParserSax2::enum_def()
106 {
107  if (!d_enum_def) d_enum_def = new D4EnumDef;
108 
109  return d_enum_def;
110 }
111 
118 D4Dimension *
119 D4ParserSax2::dim_def() {
120  if (!d_dim_def) d_dim_def = new D4Dimension;
121 
122  return d_dim_def;
123 }
124 
130 void D4ParserSax2::transfer_xml_attrs(const xmlChar **attributes, int nb_attributes)
131 {
132  if (!xml_attrs.empty())
133  xml_attrs.clear(); // erase old attributes
134 
135  // Make a value using the attribute name and the prefix, namespace URI
136  // and the value. The prefix might be null.
137  unsigned int index = 0;
138  for (int i = 0; i < nb_attributes; ++i, index += 5) {
139  xml_attrs.insert(map<string, XMLAttribute>::value_type(string((const char *)attributes[index]),
140  XMLAttribute(attributes + index + 1)));
141 
142  DBG(cerr << "XML Attribute '" << (const char *)attributes[index] << "': "
143  << xml_attrs[(const char *)attributes[index]].value << endl);
144  }
145 }
146 
153 void D4ParserSax2::transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces)
154 {
155  // make a value with the prefix and namespace URI. The prefix might be null.
156  for (int i = 0; i < nb_namespaces; ++i) {
157  namespace_table.insert(map<string, string>::value_type(namespaces[i * 2] != 0 ? (const char *)namespaces[i * 2] : "",
158  (const char *)namespaces[i * 2 + 1]));
159  }
160 }
161 
168 bool D4ParserSax2::check_required_attribute(const string & attr)
169 {
170  if (xml_attrs.find(attr) == xml_attrs.end()) {
171  dmr_error(this, "Required attribute '%s' not found.", attr.c_str());
172  return false;
173  }
174  else
175  return true;
176 }
177 
184 bool D4ParserSax2::check_attribute(const string & attr)
185 {
186  return (xml_attrs.find(attr) != xml_attrs.end());
187 }
188 
189 bool D4ParserSax2::process_dimension_def(const char *name, const xmlChar **attrs, int nb_attributes)
190 {
191  if (is_not(name, "Dimension"))
192  return false;
193 
194  transfer_xml_attrs(attrs, nb_attributes);
195 
196  if (!(check_required_attribute("name") && check_required_attribute("size"))) {
197  dmr_error(this, "The required attribute 'name' or 'size' was missing from a Dimension element.");
198  return false;
199  }
200 
201  // This getter (dim_def) allocates a new object if needed.
202  dim_def()->set_name(xml_attrs["name"].value);
203  try {
204  dim_def()->set_size(xml_attrs["size"].value);
205  }
206  catch (Error &e) {
207  dmr_error(this, e.get_error_message().c_str());
208  return false;
209  }
210 
211  return true;
212 }
213 
231 bool D4ParserSax2::process_dimension(const char *name, const xmlChar **attrs, int nb_attributes)
232 {
233  if (is_not(name, "Dim"))
234  return false;
235 
236  transfer_xml_attrs(attrs, nb_attributes);
237 
238  if (check_attribute("size") && check_attribute("name")) {
239  dmr_error(this, "Only one of 'size' and 'name' are allowed in a Dim element, but both were used.");
240  return false;
241  }
242  if (!(check_attribute("size") || check_attribute("name"))) {
243  dmr_error(this, "Either 'size' or 'name' must be used in a Dim element.");
244  return false;
245  }
246 
247  if (!top_basetype()->is_vector_type()) {
248  // Make the top BaseType* an array
249  BaseType *b = top_basetype();
250  pop_basetype();
251 
252  Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
253  a->set_is_dap4(true);
254  a->add_var_nocopy(b);
255  a->set_attributes_nocopy(b->attributes());
256  // trick: instead of popping b's attributes, copying them and then pushing
257  // a's copy, just move the pointer (but make sure there's only one object that
258  // references that pointer).
259  b->set_attributes_nocopy(0);
260 
261  push_basetype(a);
262  }
263 
264  assert(top_basetype()->is_vector_type());
265 
266  Array *a = static_cast<Array*>(top_basetype());
267  if (check_attribute("size")) {
268  a->append_dim(atoi(xml_attrs["size"].value.c_str())); // low budget code for now. jhrg 8/20/13
269  return true;
270  }
271  else if (check_attribute("name")) {
272  string name = xml_attrs["name"].value;
273 
274  D4Dimension *dim = 0;
275  if (name[0] == '/') // lookup the Dimension in the root group
276  dim = dmr()->root()->find_dim(name);
277  else // get enclosing Group and lookup Dimension there
278  dim = top_group()->find_dim(name);
279 
280  if (!dim)
281  throw Error("The dimension '" + name + "' was not found while parsing the variable '" + a->name() + "'.");
282  a->append_dim(dim);
283  return true;
284  }
285 
286  return false;
287 }
288 
289 bool D4ParserSax2::process_map(const char *name, const xmlChar **attrs, int nb_attributes)
290 {
291  if (is_not(name, "Map"))
292  return false;
293 
294  transfer_xml_attrs(attrs, nb_attributes);
295 
296  if (!check_attribute("name")) {
297  dmr_error(this, "The 'name' attribute must be used in a Map element.");
298  return false;
299  }
300 
301  if (!top_basetype()->is_vector_type()) {
302  // Make the top BaseType* an array
303  BaseType *b = top_basetype();
304  pop_basetype();
305 
306  Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
307  a->set_is_dap4(true);
308  a->add_var_nocopy(b);
309  a->set_attributes_nocopy(b->attributes());
310  // trick: instead of popping b's attributes, copying them and then pushing
311  // a's copy, just move the pointer (but make sure there's only one object that
312  // references that pointer).
313  b->set_attributes_nocopy(0);
314 
315  push_basetype(a);
316  }
317 
318  assert(top_basetype()->is_vector_type());
319 
320  Array *a = static_cast<Array*>(top_basetype());
321 
322  string map_name = xml_attrs["name"].value;
323  if (xml_attrs["name"].value[0] != '/')
324  map_name = top_group()->FQN() + map_name;
325 
326  Array *map_source = 0; // The array variable that holds the data for the Map
327 
328  if (map_name[0] == '/') // lookup the Map in the root group
329  map_source = dmr()->root()->find_map_source(map_name);
330  else // get enclosing Group and lookup Map there
331  map_source = top_group()->find_map_source(map_name);
332 
333  if (!map_source)
334  throw Error("The Map '" + map_name + "' was not found while parsing the variable '" + a->name() + "'.");
335 
336  a->maps()->add_map(new D4Map(map_name, map_source));
337 
338  return true;
339 }
340 
341 bool D4ParserSax2::process_group(const char *name, const xmlChar **attrs, int nb_attributes)
342 {
343  if (is_not(name, "Group"))
344  return false;
345 
346  transfer_xml_attrs(attrs, nb_attributes);
347 
348  if (!check_required_attribute("name")) {
349  dmr_error(this, "The required attribute 'name' was missing from a Group element.");
350  return false;
351  }
352 
353  BaseType *btp = dmr()->factory()->NewVariable(dods_group_c, xml_attrs["name"].value);
354  if (!btp) {
355  dmr_fatal_error(this, "Could not instantiate the Group '%s'.", xml_attrs["name"].value.c_str());
356  return false;
357  }
358 
359  D4Group *grp = static_cast<D4Group*>(btp);
360 
361  // Need to set this to get the D4Attribute behavior in the type classes
362  // shared between DAP2 and DAP4. jhrg 4/18/13
363  grp->set_is_dap4(true);
364 
365  // link it up and change the current group
366  D4Group *parent = top_group();
367  if (!parent) {
368  dmr_fatal_error(this, "No Group on the Group stack.");
369  return false;
370  }
371 
372  grp->set_parent(parent);
373  parent->add_group_nocopy(grp);
374 
375  push_group(grp);
376  push_attributes(grp->attributes());
377  return true;
378 }
379 
386 inline bool D4ParserSax2::process_attribute(const char *name, const xmlChar **attrs, int nb_attributes)
387 {
388  if (is_not(name, "Attribute"))
389  return false;
390 
391  // These methods set the state to parser_error if a problem is found.
392  transfer_xml_attrs(attrs, nb_attributes);
393 
394  // add error
395  if (!(check_required_attribute(string("name")) && check_required_attribute(string("type")))) {
396  dmr_error(this, "The required attribute 'name' or 'type' was missing from an Attribute element.");
397  return false;
398  }
399 
400  if (xml_attrs["type"].value == "Container") {
401  push_state(inside_attribute_container);
402 
403  DBG(cerr << "Pushing attribute container " << xml_attrs["name"].value << endl);
404  D4Attribute *child = new D4Attribute(xml_attrs["name"].value, attr_container_c);
405 
406  D4Attributes *tos = top_attributes();
407  // add return
408  if (!tos) {
409  dmr_fatal_error(this, "Expected an Attribute container on the top of the attribute stack.");
410  return false;
411  }
412 
413  tos->add_attribute_nocopy(child);
414  push_attributes(child->attributes());
415  }
416  else if (xml_attrs["type"].value == "OtherXML") {
417  push_state(inside_other_xml_attribute);
418 
419  dods_attr_name = xml_attrs["name"].value;
420  dods_attr_type = xml_attrs["type"].value;
421  }
422  else {
423  push_state(inside_attribute);
424 
425  dods_attr_name = xml_attrs["name"].value;
426  dods_attr_type = xml_attrs["type"].value;
427  }
428 
429  return true;
430 }
431 
437 inline bool D4ParserSax2::process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes)
438 {
439  if (is_not(name, "Enumeration"))
440  return false;
441 
442  transfer_xml_attrs(attrs, nb_attributes);
443 
444  if (!(check_required_attribute("name") && check_required_attribute("basetype"))) {
445  dmr_error(this, "The required attribute 'name' or 'basetype' was missing from an Enumeration element.");
446  return false;
447  }
448 
449  Type t = get_type(xml_attrs["basetype"].value.c_str());
450  if (!is_integer_type(t)) {
451  dmr_error(this, "The Enumeration '%s' must have an integer type, instead the type '%s' was used.",
452  xml_attrs["name"].value.c_str(), xml_attrs["basetype"].value.c_str());
453  return false;
454  }
455 
456  // This getter allocates a new object if needed.
457  string enum_def_path = xml_attrs["name"].value;
458 #if 0
459  // Use FQNs when things are referenced, not when they are defined
460  if (xml_attrs["name"].value[0] != '/')
461  enum_def_path = top_group()->FQN() + enum_def_path;
462 #endif
463  enum_def()->set_name(enum_def_path);
464  enum_def()->set_type(t);
465 
466  return true;
467 }
468 
469 inline bool D4ParserSax2::process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes)
470 {
471  if (is_not(name, "EnumConst"))
472  return false;
473 
474  // These methods set the state to parser_error if a problem is found.
475  transfer_xml_attrs(attrs, nb_attributes);
476 
477  if (!(check_required_attribute("name") && check_required_attribute("value"))) {
478  dmr_error(this, "The required attribute 'name' or 'value' was missing from an EnumConst element.");
479  return false;
480  }
481 
482  istringstream iss(xml_attrs["value"].value);
483  long long value = 0;
484  iss >> skipws >> value;
485  if (iss.fail() || iss.bad()) {
486  dmr_error(this, "Expected an integer value for an Enumeration constant, got '%s' instead.",
487  xml_attrs["value"].value.c_str());
488  }
489  else if (!enum_def()->is_valid_enum_value(value)) {
490  dmr_error(this, "In an Enumeration constant, the value '%s' cannot fit in a variable of type '%s'.",
491  xml_attrs["value"].value.c_str(), D4type_name(d_enum_def->type()).c_str());
492  }
493  else {
494  // unfortunate choice of names... args are 'label' and 'value'
495  enum_def()->add_value(xml_attrs["name"].value, value);
496  }
497 
498  return true;
499 }
500 
506 inline bool D4ParserSax2::process_variable(const char *name, const xmlChar **attrs, int nb_attributes)
507 {
508  Type t = get_type(name);
509  if (is_simple_type(t)) {
510  process_variable_helper(t, inside_simple_type, attrs, nb_attributes);
511  return true;
512  }
513  else {
514  switch(t) {
515  case dods_structure_c:
516  process_variable_helper(t, inside_constructor, attrs, nb_attributes);
517  return true;
518 
519  case dods_sequence_c:
520  process_variable_helper(t, inside_constructor, attrs, nb_attributes);
521  return true;
522 
523  default:
524  return false;
525  }
526  }
527 }
528 
536 void D4ParserSax2::process_variable_helper(Type t, ParseState s, const xmlChar **attrs, int nb_attributes)
537 {
538  transfer_xml_attrs(attrs, nb_attributes);
539 
540  if (check_required_attribute("name")) {
541  BaseType *btp = dmr()->factory()->NewVariable(t, xml_attrs["name"].value);
542  if (!btp) {
543  dmr_fatal_error(this, "Could not instantiate the variable '%s'.", xml_attrs["name"].value.c_str());
544  return;
545  }
546 
547  if ((t == dods_enum_c) && check_required_attribute("enum")) {
548  D4EnumDef *enum_def = 0;
549  string enum_path = xml_attrs["enum"].value;
550  if (enum_path[0] == '/')
551  enum_def = dmr()->root()->find_enum_def(enum_path);
552  else
553  enum_def = top_group()->find_enum_def(enum_path);
554 
555  if (!enum_def)
556  dmr_fatal_error(this, "Could not find the Enumeration definition '%s'.", enum_path.c_str());
557 
558  static_cast<D4Enum*>(btp)->set_enumeration(enum_def);
559  }
560 
561  btp->set_is_dap4(true); // see comment above
562  push_basetype(btp);
563 
564  push_attributes(btp->attributes());
565 
566  push_state(s);
567  }
568 }
569 
576 
582 {
583  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
584  parser->error_msg = "";
585  parser->char_data = "";
586 
587  // Set this in intern_helper so that the loop test for the parser_end
588  // state works for the first iteration. It seems like XMLParseChunk calls this
589  // function on it's first run. jhrg 9/16/13
590  // parser->push_state(parser_start);
591 
592  parser->push_attributes(parser->dmr()->root()->attributes());
593 
594  if (parser->debug()) cerr << "Parser start state: " << states[parser->get_state()] << endl;
595 }
596 
600 {
601  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
602 
603  if (parser->debug()) cerr << "Parser end state: " << states[parser->get_state()] << endl;
604 
605  if (parser->get_state() != parser_end)
606  D4ParserSax2::dmr_error(parser, "The document contained unbalanced tags.");
607 
608  // If we've found any sort of error, don't make the DMR; intern() will
609  // take care of the error.
610  if (parser->get_state() == parser_error || parser->get_state() == parser_fatal_error)
611  return;
612 
613  if (!parser->empty_basetype() || parser->empty_group())
614  D4ParserSax2::dmr_error(parser, "The document did not contain a valid root Group or contained unbalanced tags.");
615 
616  parser->pop_group(); // leave the stack 'clean'
617  parser->pop_attributes();
618 }
619 
620 void D4ParserSax2::dmr_start_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI,
621  int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int /*nb_defaulted*/,
622  const xmlChar **attributes)
623 {
624  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
625  const char *localname = (const char *) l;
626 
627  if (parser->debug()) cerr << "Start element " << localname << " (state " << states[parser->get_state()] << ")" << endl;
628 
629  switch (parser->get_state()) {
630  case parser_start:
631  if (is_not(localname, "Dataset"))
632  D4ParserSax2::dmr_error(parser, "Expected DMR to start with a Dataset element; found '%s' instead.", localname);
633 
634  parser->root_ns = URI ? (const char *) URI : "";
635  parser->transfer_xml_attrs(attributes, nb_attributes);
636 
637  if (parser->check_required_attribute(string("name")))
638  parser->dmr()->set_name(parser->xml_attrs["name"].value);
639 
640  if (parser->check_attribute("dapVersion"))
641  parser->dmr()->set_dap_version(parser->xml_attrs["dapVersion"].value);
642 
643  if (parser->check_attribute("dmrVersion"))
644  parser->dmr()->set_dmr_version(parser->xml_attrs["dmrVersion"].value);
645 
646  if (parser->check_attribute("base"))
647  parser->dmr()->set_request_xml_base(parser->xml_attrs["base"].value);
648 
649  if (!parser->root_ns.empty())
650  parser->dmr()->set_namespace(parser->root_ns);
651 
652  // Push the root Group on the stack
653  parser->push_group(parser->dmr()->root());
654 
655  parser->push_state(inside_dataset);
656 
657  break;
658 
659  // Both inside dataset and inside group can have the same stuff.
660  // The difference is that the Dataset holds the root group, which
661  // must be present; other groups are optional
662  case inside_dataset:
663  case inside_group:
664  if (parser->process_enum_def(localname, attributes, nb_attributes))
665  parser->push_state(inside_enum_def);
666  else if (parser->process_dimension_def(localname, attributes, nb_attributes))
667  parser->push_state(inside_dim_def);
668  else if (parser->process_group(localname, attributes, nb_attributes))
669  parser->push_state(inside_group);
670  else if (parser->process_variable(localname, attributes, nb_attributes))
671  // This will push either inside_simple_type or inside_structure
672  // onto the parser state stack.
673  break;
674  else if (parser->process_attribute(localname, attributes, nb_attributes))
675  // This will push either inside_attribute, inside_attribute_container
676  // or inside_otherxml_attribute onto the parser state stack
677  break;
678  else
679  D4ParserSax2::dmr_error(parser, "Expected an Attribute, Enumeration, Dimension, Group or variable element; found '%s' instead.", localname);
680  break;
681 
682  case inside_attribute_container:
683  if (parser->process_attribute(localname, attributes, nb_attributes))
684  break;
685  else
686  D4ParserSax2::dmr_error(parser, "Expected an Attribute element; found '%s' instead.", localname);
687  break;
688 
689  case inside_attribute:
690  if (parser->process_attribute(localname, attributes, nb_attributes))
691  break;
692  else if (strcmp(localname, "Value") == 0)
693  parser->push_state(inside_attribute_value);
694  else
695  dmr_error(parser, "Expected an 'Attribute' or 'Value' element; found '%s' instead.", localname);
696  break;
697 
698  case inside_attribute_value:
699  // Attribute values are processed by the end element code.
700  break;
701 
702  case inside_other_xml_attribute:
703  parser->other_xml_depth++;
704 
705  // Accumulate the elements here
706  parser->other_xml.append("<");
707  if (prefix) {
708  parser->other_xml.append((const char *) prefix);
709  parser->other_xml.append(":");
710  }
711  parser->other_xml.append(localname);
712 
713  if (nb_namespaces != 0) {
714  parser->transfer_xml_ns(namespaces, nb_namespaces);
715 
716  for (map<string, string>::iterator i = parser->namespace_table.begin();
717  i != parser->namespace_table.end(); ++i) {
718  parser->other_xml.append(" xmlns");
719  if (!i->first.empty()) {
720  parser->other_xml.append(":");
721  parser->other_xml.append(i->first);
722  }
723  parser->other_xml.append("=\"");
724  parser->other_xml.append(i->second);
725  parser->other_xml.append("\"");
726  }
727  }
728 
729  if (nb_attributes != 0) {
730  parser->transfer_xml_attrs(attributes, nb_attributes);
731  for (XMLAttrMap::iterator i = parser->xml_attr_begin(); i != parser->xml_attr_end(); ++i) {
732  parser->other_xml.append(" ");
733  if (!i->second.prefix.empty()) {
734  parser->other_xml.append(i->second.prefix);
735  parser->other_xml.append(":");
736  }
737  parser->other_xml.append(i->first);
738  parser->other_xml.append("=\"");
739  parser->other_xml.append(i->second.value);
740  parser->other_xml.append("\"");
741  }
742  }
743 
744  parser->other_xml.append(">");
745  break;
746 
747  case inside_enum_def:
748  // process an EnumConst element
749  if (parser->process_enum_const(localname, attributes, nb_attributes))
750  parser->push_state(inside_enum_const);
751  else
752  dmr_error(parser, "Expected an 'EnumConst' element; found '%s' instead.", localname);
753  break;
754 
755  case inside_enum_const:
756  // No content; nothing to do
757  break;
758 
759  case inside_dim_def:
760  // No content; nothing to do
761  break;
762 #if 0
763  case inside_dimension:
764  // No content.
765  break;
766 #endif
767  case inside_dim:
768  // No content.
769  break;
770 
771  case inside_map:
772  // No content.
773  break;
774 
775  case inside_simple_type:
776  if (parser->process_attribute(localname, attributes, nb_attributes))
777  break;
778  else if (parser->process_dimension(localname, attributes, nb_attributes))
779  parser->push_state(inside_dim);
780  else if (parser->process_map(localname, attributes, nb_attributes))
781  parser->push_state(inside_map);
782  else
783  dmr_error(parser, "Expected an 'Attribute', 'Dim' or 'Map' element; found '%s' instead.", localname);
784  break;
785 
786  case inside_constructor:
787  if (parser->process_variable(localname, attributes, nb_attributes))
788  // This will push either inside_simple_type or inside_structure
789  // onto the parser state stack.
790  break;
791  else if (parser->process_attribute(localname, attributes, nb_attributes))
792  break;
793  else if (parser->process_dimension(localname, attributes, nb_attributes))
794  parser->push_state(inside_dim);
795  else if (parser->process_map(localname, attributes, nb_attributes))
796  parser->push_state(inside_map);
797  else
798  D4ParserSax2::dmr_error(parser, "Expected an Attribute, Dim, Map or variable element; found '%s' instead.", localname);
799  break;
800 
801  case parser_unknown:
802  // FIXME?
803  // *** Never used? If so remove/error
804  parser->push_state(parser_unknown);
805  break;
806 
807  case parser_error:
808  case parser_fatal_error:
809  break;
810 
811  case parser_end:
812  // FIXME Error?
813  break;
814  }
815 
816  if (parser->debug()) cerr << "Start element exit state: " << states[parser->get_state()] << endl;
817 }
818 
819 void D4ParserSax2::dmr_end_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI)
820 {
821  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
822  const char *localname = (const char *) l;
823 
824  if (parser->debug()) cerr << "End element " << localname << " (state " << states[parser->get_state()] << ")" << endl;
825 
826  switch (parser->get_state()) {
827  case parser_start:
828  dmr_fatal_error(parser, "Unexpected state, inside start state while processing element '%s'.", localname);
829  break;
830 
831  case inside_dataset:
832  if (is_not(localname, "Dataset"))
833  D4ParserSax2::dmr_error(parser, "Expected an end Dataset tag; found '%s' instead.", localname);
834 
835  parser->pop_state();
836  if (parser->get_state() != parser_start)
837  dmr_fatal_error(parser, "Unexpected state, expected start state.");
838  else {
839  parser->pop_state();
840  parser->push_state(parser_end);
841  }
842  break;
843 
844  case inside_group: {
845  if (is_not(localname, "Group"))
846  D4ParserSax2::dmr_error(parser, "Expected an end tag for a Group; found '%s' instead.", localname);
847 
848  if (!parser->empty_basetype() || parser->empty_group())
849  D4ParserSax2::dmr_error(parser, "The document did not contain a valid root Group or contained unbalanced tags.");
850 
851  parser->pop_group();
852  parser->pop_state();
853  break;
854  }
855 
856  case inside_attribute_container:
857  if (is_not(localname, "Attribute"))
858  D4ParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
859 
860  parser->pop_state();
861  parser->pop_attributes();
862  break;
863 
864  case inside_attribute:
865  if (is_not(localname, "Attribute"))
866  D4ParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
867 
868  parser->pop_state();
869  break;
870 
871  case inside_attribute_value: {
872  if (is_not(localname, "Value"))
873  D4ParserSax2::dmr_error(parser, "Expected an end value tag; found '%s' instead.", localname);
874 
875  parser->pop_state();
876 
877  // The old code added more values using the name and type as
878  // indexes to find the correct attribute. Use get() for that
879  // now. Or fix this code to keep a pointer to the to attribute...
880  D4Attributes *attrs = parser->top_attributes();
881  D4Attribute *attr = attrs->get(parser->dods_attr_name);
882  if (!attr) {
883  attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
884  attrs->add_attribute_nocopy(attr);
885  }
886  attr->add_value(parser->char_data);
887 
888  parser->char_data = ""; // Null this after use.
889  break;
890  }
891 
892  case inside_other_xml_attribute: {
893  if (strcmp(localname, "Attribute") == 0 && parser->root_ns == (const char *) URI) {
894  parser->pop_state();
895 
896  // The old code added more values using the name and type as
897  // indexes to find the correct attribute. Use get() for that
898  // now. Or fix this code to keep a pointer to the to attribute...
899  D4Attributes *attrs = parser->top_attributes();
900  D4Attribute *attr = attrs->get(parser->dods_attr_name);
901  if (!attr) {
902  attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
903  attrs->add_attribute_nocopy(attr);
904  }
905  attr->add_value(parser->other_xml);
906 
907  parser->other_xml = ""; // Null this after use.
908  }
909  else {
910  if (parser->other_xml_depth == 0) {
911  D4ParserSax2::dmr_error(parser, "Expected an OtherXML attribute to end! Instead I found '%s'", localname);
912  break;
913  }
914  parser->other_xml_depth--;
915 
916  parser->other_xml.append("</");
917  if (prefix) {
918  parser->other_xml.append((const char *) prefix);
919  parser->other_xml.append(":");
920  }
921  parser->other_xml.append(localname);
922  parser->other_xml.append(">");
923  }
924  break;
925  }
926 
927  case inside_enum_def:
928  if (is_not(localname, "Enumeration"))
929  D4ParserSax2::dmr_error(parser, "Expected an end Enumeration tag; found '%s' instead.", localname);
930  if (!parser->top_group())
931  D4ParserSax2::dmr_fatal_error(parser, "Expected a Group to be the current item, while finishing up an Enumeration.");
932  else {
933  // copy the pointer; not a deep copy
934  parser->top_group()->enum_defs()->add_enum_nocopy(parser->enum_def());
935  // Set the enum_def to null; next call to enum_def() will
936  // allocate a new object
937  parser->clear_enum_def();
938  parser->pop_state();
939  }
940  break;
941 
942  case inside_enum_const:
943  if (is_not(localname, "EnumConst"))
944  D4ParserSax2::dmr_error(parser, "Expected an end EnumConst tag; found '%s' instead.", localname);
945 
946  parser->pop_state();
947  break;
948 
949  case inside_dim_def: {
950  if (is_not(localname, "Dimension"))
951  D4ParserSax2::dmr_error(parser, "Expected an end Dimension tag; found '%s' instead.", localname);
952 
953  if (!parser->top_group())
954  D4ParserSax2::dmr_error(parser, "Expected a Group to be the current item, while finishing up an Dimension.");
955 
956  // FIXME Use the Group on the top of the group stack
957  // copy the pointer; not a deep copy
958  parser->top_group()->dims()->add_dim_nocopy(parser->dim_def());
959  //parser->dmr()->root()->dims()->add_dim_nocopy(parser->dim_def());
960  // Set the dim_def to null; next call to dim_def() will
961  // allocate a new object. Calling 'clear' is important because
962  // the cleanup method will free dim_def if it's not null and
963  // we just copied the pointer in the add_dim_nocopy() call
964  // above.
965  parser->clear_dim_def();
966  parser->pop_state();
967  break;
968  }
969 
970  case inside_simple_type:
971  if (is_simple_type(get_type(localname))) {
972  BaseType *btp = parser->top_basetype();
973  parser->pop_basetype();
974  parser->pop_attributes();
975 
976  BaseType *parent = 0;
977  if (!parser->empty_basetype())
978  parent = parser->top_basetype();
979  else if (!parser->empty_group())
980  parent = parser->top_group();
981  else {
982  dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.", localname);
983  delete btp;
984  }
985 
986  if (parent->type() == dods_array_c)
987  static_cast<Array*>(parent)->prototype()->add_var_nocopy(btp);
988  else
989  parent->add_var_nocopy(btp);
990  }
991  else
992  D4ParserSax2::dmr_error(parser, "Expected an end tag for a simple type; found '%s' instead.", localname);
993 
994  parser->pop_state();
995 #if 0
996  // Check that we have a constructor BaseType (Structure, Sequence, or Group)
997  if (parent && parent->is_constructor_type())
998 
999  else {
1000  D4ParserSax2::dmr_error(parser,
1001  "Tried to add the simple-type variable '%s' to a non-constructor type (%s %s).", localname,
1002  parser->top_basetype()->type_name().c_str(), parser->top_basetype()->name().c_str());
1003  // since the BaseType* was popped and not copied anywhere,
1004  // it must be deleted.
1005  delete btp;
1006  }
1007  }
1008  else
1009  D4ParserSax2::dmr_error(parser, "Expected an end tag for a simple type; found '%s' instead.", localname);
1010 #endif
1011  break;
1012 
1013  case inside_dim:
1014  if (is_not(localname, "Dim"))
1015  D4ParserSax2::dmr_fatal_error(parser, "Expected an end Dim tag; found '%s' instead.", localname);
1016 
1017  parser->pop_state();
1018  break;
1019 
1020  case inside_map:
1021  if (is_not(localname, "Map"))
1022  D4ParserSax2::dmr_fatal_error(parser, "Expected an end Map tag; found '%s' instead.", localname);
1023 
1024  parser->pop_state();
1025  break;
1026 #if 0
1027  // a nicer name, but not what we chose
1028  case inside_dimension:
1029  if (is_not(localname, "dimension"))
1030  D4ParserSax2::dmr_fatal_error(parser, "Expected an end Dimension tag; found '%s' instead.", localname);
1031 
1032  parser->pop_state();
1033  break;
1034 #endif
1035  case inside_constructor: {
1036  if (strcmp(localname, "Structure") != 0 && strcmp(localname, "Sequence") != 0) {
1037  D4ParserSax2::dmr_error(parser, "Expected an end tag for a constructor; found '%s' instead.", localname);
1038  return;
1039  }
1040 
1041  BaseType *btp = parser->top_basetype();
1042  parser->pop_basetype();
1043  parser->pop_attributes();
1044 
1045  BaseType *parent = 0;
1046  if (!parser->empty_basetype())
1047  parent = parser->top_basetype();
1048  else if (!parser->empty_group())
1049  parent = parser->top_group();
1050  else {
1051  dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.", localname);
1052  delete btp;
1053  }
1054 
1055  // TODO Why doesn't this code mirror the simple_var case and test
1056  // for the parent being an array? jhrg 10/13/13
1057  parent->add_var_nocopy(btp);
1058  parser->pop_state();
1059  break;
1060  }
1061 
1062  case parser_unknown:
1063  parser->pop_state();
1064  break;
1065 
1066  case parser_error:
1067  case parser_fatal_error:
1068  break;
1069 
1070  case parser_end:
1071  // FIXME Error?
1072  break;
1073  }
1074 
1075  if (parser->debug()) cerr << "End element exit state: " << states[parser->get_state()] << endl;
1076 }
1077 
1081 void D4ParserSax2::dmr_get_characters(void * p, const xmlChar * ch, int len)
1082 {
1083  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1084 
1085  switch (parser->get_state()) {
1086  case inside_attribute_value:
1087  parser->char_data.append((const char *) (ch), len);
1088  DBG(cerr << "Characters: '" << parser->char_data << "'" << endl);
1089  break;
1090 
1091  case inside_other_xml_attribute:
1092  parser->other_xml.append((const char *) (ch), len);
1093  DBG(cerr << "Other XML Characters: '" << parser->other_xml << "'" << endl);
1094  break;
1095 
1096  default:
1097  break;
1098  }
1099 }
1100 
1105 void D4ParserSax2::dmr_ignoreable_whitespace(void *p, const xmlChar *ch, int len)
1106 {
1107  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1108 
1109  switch (parser->get_state()) {
1110  case inside_other_xml_attribute:
1111  parser->other_xml.append((const char *) (ch), len);
1112  break;
1113 
1114  default:
1115  break;
1116  }
1117 }
1118 
1124 void D4ParserSax2::dmr_get_cdata(void *p, const xmlChar *value, int len)
1125 {
1126  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1127 
1128  switch (parser->get_state()) {
1129  case inside_other_xml_attribute:
1130  parser->other_xml.append((const char *) (value), len);
1131  break;
1132 
1133  case parser_unknown:
1134  break;
1135 
1136  default:
1137  D4ParserSax2::dmr_error(parser, "Found a CData block but none are allowed by DAP4.");
1138 
1139  break;
1140  }
1141 }
1142 
1147 xmlEntityPtr D4ParserSax2::dmr_get_entity(void *, const xmlChar * name)
1148 {
1149  return xmlGetPredefinedEntity(name);
1150 }
1151 
1162 void D4ParserSax2::dmr_fatal_error(void * p, const char *msg, ...)
1163 {
1164  va_list args;
1165  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1166 
1167  parser->push_state(parser_fatal_error);
1168 
1169  va_start(args, msg);
1170  char str[1024];
1171  vsnprintf(str, 1024, msg, args);
1172  va_end(args);
1173 
1174  int line = xmlSAX2GetLineNumber(parser->context);
1175 
1176  if (!parser->error_msg.empty()) parser->error_msg += "\n";
1177  parser->error_msg += "At line " + long_to_string(line) + ": " + string(str);
1178 }
1179 
1180 void D4ParserSax2::dmr_error(void *p, const char *msg, ...)
1181 {
1182  va_list args;
1183  D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1184 
1185  parser->push_state(parser_error);
1186 
1187  va_start(args, msg);
1188  char str[1024];
1189  vsnprintf(str, 1024, msg, args);
1190  va_end(args);
1191 
1192  int line = xmlSAX2GetLineNumber(parser->context);
1193 
1194  if (!parser->error_msg.empty()) parser->error_msg += "\n";
1195  parser->error_msg += "At line " + long_to_string(line) + ": " + string(str);
1196 }
1198 
1202 void D4ParserSax2::cleanup_parse()
1203 {
1204  bool wellFormed = context->wellFormed;
1205  bool valid = context->valid;
1206 
1207  context->sax = NULL;
1208  xmlFreeParserCtxt(context);
1209 
1210  delete d_enum_def;
1211  d_enum_def = 0;
1212 
1213  delete d_dim_def;
1214  d_dim_def = 0;
1215 
1216  // If there's an error, there may still be items on the stack at the
1217  // end of the parse.
1218  while (!btp_stack.empty()) {
1219  delete top_basetype();
1220  pop_basetype();
1221  }
1222 
1223  if (!wellFormed)
1224  throw Error("The DMR was not well formed. " + error_msg);
1225  else if (!valid)
1226  throw Error("The DMR was not valid." + error_msg);
1227  else if (get_state() == parser_error)
1228  throw Error(error_msg);
1229  else if (get_state() == parser_fatal_error)
1230  throw InternalErr(error_msg);
1231 }
1232 
1247 void D4ParserSax2::intern(istream &f, DMR *dest_dmr, bool debug)
1248 {
1249  d_debug = debug;
1250 
1251  // Code example from libxml2 docs re: read from a stream.
1252 
1253  if (!f.good())
1254  throw Error("Input stream not open or read error");
1255  if (!dest_dmr)
1256  throw InternalErr(__FILE__, __LINE__, "DMR object is null");
1257 
1258  d_dmr = dest_dmr; // dump values here
1259 
1260  const int size = 1024;
1261  char chars[size];
1262  int line = 1;
1263 
1264  f.getline(chars, size);
1265  int res = f.gcount();
1266  if (res == 0) throw Error("No input found while parsing the DMR.");
1267 
1268  if (debug) cerr << "line: (" << line++ << "): " << chars << endl;
1269 
1270  context = xmlCreatePushParserCtxt(&ddx_sax_parser, this, chars, res - 1, "stream");
1271  context->validate = true;
1272  push_state(parser_start);
1273 
1274  f.getline(chars, size);
1275  while ((f.gcount() > 0) && (get_state() != parser_end)) {
1276  if (debug) cerr << "line: (" << line++ << "): " << chars << endl;
1277  xmlParseChunk(context, chars, f.gcount() - 1, 0);
1278  f.getline(chars, size);
1279  }
1280 
1281  // This call ends the parse.
1282  xmlParseChunk(context, chars, 0, 1/*terminate*/);
1283 
1284  // This checks that the state on the parser stack is parser_end and throws
1285  // an exception if it's not (i.e., the loop exited with gcount() == 0).
1286  cleanup_parse();
1287 }
1288 
1299 void D4ParserSax2::intern(const string &document, DMR *dest_dmr, bool debug)
1300 {
1301  intern(document.c_str(), document.length(), dest_dmr, debug);
1302 }
1303 
1314 void D4ParserSax2::intern(const char *buffer, int size, DMR *dest_dmr, bool debug)
1315 {
1316  if (!(size > 0)) return;
1317 
1318  d_debug = debug;
1319 
1320  // Code example from libxml2 docs re: read from a stream.
1321 
1322  if (!dest_dmr) throw InternalErr(__FILE__, __LINE__, "DMR object is null");
1323  d_dmr = dest_dmr; // dump values in dest_dmr
1324 
1325  push_state(parser_start);
1326  context = xmlCreatePushParserCtxt(&ddx_sax_parser, this, buffer, size, "stream");
1327  context->validate = true;
1328  //push_state(parser_start);
1329  //xmlParseChunk(context, buffer, size, 0);
1330 
1331  // This call ends the parse.
1332  xmlParseChunk(context, buffer, 0, 1/*terminate*/);
1333 
1334  // This checks that the state on the parser stack is parser_end and throws
1335  // an exception if it's not (i.e., the loop exited with gcount() == 0).
1336  cleanup_parse();
1337 }
1338 
1339 } // namespace libdap
D4AttributeType StringToD4AttributeType(string s)
void add_value(const string &label, long long value)
Definition: D4EnumDefs.h:75
void set_dmr_version(const string &v)
Definition: DMR.h:146
static void dmr_end_document(void *parser)
void set_namespace(const string &ns)
Set the namespace for this DDS/DDX object/response.
Definition: DMR.h:158
bool is_valid_enum_value(long long value)
Definition: D4EnumDefs.cc:43
Type type() const
Definition: D4EnumDefs.h:67
void set_name(const string &n)
Definition: D4EnumDefs.h:65
D4Dimension * find_dim(const string &path)
Find the dimension using a path. Using the DAP4 name syntax, lookup a dimension. The dimension must b...
Definition: D4Group.cc:245
D4Group * root()
Definition: DMR.cc:243
static void dmr_start_document(void *parser)
virtual D4Maps * maps()
Definition: Array.cc:668
bool is_vector_type(Type t)
Returns true if the instance is a vector (i.e., array) type variable.
Definition: util.cc:858
Array * find_map_source(const string &path)
Definition: D4Group.cc:272
void set_size(unsigned long size)
Definition: D4Dimensions.h:63
void set_type(Type t)
Definition: D4EnumDefs.h:68
void set_request_xml_base(const string &xb)
Definition: DMR.h:152
Type
Identifies the data type.
Definition: Type.h:94
Type type() const
Returns the type of the class instance.
Definition: BaseType.cc:306
A class for software fault reporting.
Definition: InternalErr.h:64
static void dmr_start_element(void *parser, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int nb_defaulted, const xmlChar **attributes)
#define DBG(x)
Definition: debug.h:58
string type_name() const
Returns the type of the class instance as a string.
Definition: BaseType.cc:320
void add_dim_nocopy(D4Dimension *dim)
Definition: D4Dimensions.h:160
virtual D4BaseTypeFactory * factory()
Definition: DMR.h:125
D4EnumDef * find_enum_def(const string &path)
Definition: D4Group.cc:310
ObjectType get_type(const string &value)
Definition: mime_util.cc:326
string D4type_name(Type t)
Returns the type of the class instance as a string. Supports all DAP4 types and not the DAP2-only typ...
Definition: util.cc:732
static void dmr_end_element(void *parser, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI)
bool is_simple_type(Type t)
Returns true if the instance is a numeric, string or URL type variable.
Definition: util.cc:815
virtual D4Attributes * attributes()
Definition: BaseType.cc:529
virtual void add_var_nocopy(BaseType *bt, Part part=nil)
Definition: BaseType.cc:727
string name() const
Returns the name of the class instance.
Definition: BaseType.cc:261
string long_to_string(long val, int base)
Definition: util.cc:1012
void add_attribute_nocopy(D4Attribute *attr)
Definition: D4Attributes.h:135
void add_enum_nocopy(D4EnumDef *enum_def)
Definition: D4EnumDefs.h:139
The basic data type for the DODS DAP types.
Definition: BaseType.h:117
void set_dap_version(const string &version_string)
Definition: DMR.cc:255
D4Attribute * get(const string &fqn)
void add_map(D4Map *map)
Definition: D4Maps.h:97
A class for error processing.
Definition: Error.h:90
void add_value(const string &value)
Definition: D4Attributes.h:77
void set_name(const string &n)
Definition: DMR.h:118
virtual std::string FQN() const
Definition: D4Group.cc:153
D4EnumDefs * enum_defs()
Get the enumerations defined for this Group.
Definition: D4Group.h:95
static void dmr_fatal_error(void *parser, const char *msg,...)
bool is_integer_type(Type t)
Definition: util.cc:948
static void dmr_error(void *parser, const char *msg,...)
D4Dimensions * dims()
Get the dimensions defined for this Group.
Definition: D4Group.h:80
virtual BaseType * NewVariable(Type t, const string &name) const
void set_name(const string &name)
Definition: D4Dimensions.h:59