libdap++  Updated for version 3.12.0
D4ParserSax2.h
Go to the documentation of this file.
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2012 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 #ifndef d4_parser_sax2_h
27 #define d4_parser_sax2_h
28 
29 #include <string>
30 #include <iostream>
31 #include <map>
32 #include <stack>
33 
34 #include <libxml/parserInternals.h>
35 
36 #include "DDS.h"
37 #include "BaseType.h"
38 //#include "D4EnumDef.h"
39 #include "D4BaseTypeFactory.h"
40 
41 #include "D4ParseError.h"
42 
43 namespace libdap
44 {
45 
70 {
71 private:
74  enum ParseState {
75  parser_start,
76 
77  // inside_group is the state just after parsing the start of a Group
78  // element.
79  inside_group,
80 
81  // @TODO Parse attributes once the variables are working.
82  inside_attribute_container,
83  inside_attribute,
84  inside_attribute_value,
85  inside_other_xml_attribute,
86 
87  inside_enum_def,
88  inside_enum_const,
89 
90  // This covers Byte, ..., Url, Opaque
91  inside_simple_type,
92 
93  inside_array,
94  inside_dimension,
95 
96  inside_grid,
97  inside_map,
98 
99  inside_structure,
100  inside_sequence,
101 
102  parser_unknown,
103  parser_error
104  };
105 
106  D4BaseTypeFactory *d_factory;
107 
108  // These stacks hold the state of the parse as it progresses.
109  stack<ParseState> s; // Current parse state
110  stack<BaseType*> bt_stack; // current variable(s)/groups(s)
111  stack<AttrTable*> at_stack; // current attribute table
112 
113 #if 0
114  // If an enumeration being defined, hold it here until its complete
115  D4EnumDef *d_enum_def;
116 #endif
117 
118  // Accumulate stuff inside an 'OtherXML' DAP attribute here
119  string other_xml;
120 
121  // When we're parsing unknown XML, how deeply is it nested? This is used
122  // for the OtherXML DAP attributes.
123  unsigned int other_xml_depth;
124  unsigned int unknown_depth;
125 
126  // These are used for processing errors.
127  string error_msg; // Error message(s), if any.
128  xmlParserCtxtPtr ctxt; // used for error message line numbers
129 
130  // The results of the parse operation are stored in these fields.
131  DDS *dds; // dump DMR here
132 
133  // These hold temporary values read during the parse.
134  string dods_attr_name; // DAP4 attributes, not XML attributes
135  string dods_attr_type; // ... not XML ...
136  string char_data; // char data in value elements; null after use
137  string root_ns; // What is the namespace of the root node (Group)
138 
139  class XMLAttribute {
140  public:
141  string prefix;
142  string nsURI;
143  string value;
144 
145  void clone(const XMLAttribute &src) {
146  prefix = src.prefix;
147  nsURI = src.nsURI;
148  value = src.value;
149  }
150 
151  XMLAttribute() : prefix(""), nsURI(""), value("") {}
152  XMLAttribute(const string &p, const string &ns, const string &v)
153  : prefix(p), nsURI(ns), value(v) {}
154  // 'attributes' as passed from libxml2 is a five element array but this
155  // ctor gets the back four elements.
156  XMLAttribute(const xmlChar **attributes/*[4]*/) {
157  prefix = attributes[0] != 0 ? (const char *)attributes[0]: "";
158  nsURI = attributes[1] != 0 ? (const char *)attributes[1]: "";
159  value = string((const char *)attributes[2], (const char *)attributes[3]);
160  }
161  XMLAttribute(const XMLAttribute &rhs) {
162  clone(rhs);
163  }
164  XMLAttribute &operator=(const XMLAttribute &rhs) {
165  if (this == &rhs)
166  return *this;
167  clone(rhs);
168  return *this;
169  }
170  };
171 
172  typedef map<string, XMLAttribute> XMLAttrMap;
173  XMLAttrMap xml_attrs; // dump XML attributes here
174 
175  XMLAttrMap::iterator xml_attr_begin() {
176  return xml_attrs.begin();
177  }
178 
179  XMLAttrMap::iterator xml_attr_end() {
180  return xml_attrs.end();
181  }
182 
183  map<string, string> namespace_table;
184 
185  // These are kind of silly...
186  void set_state(D4ParserSax2::ParseState state);
187  D4ParserSax2::ParseState get_state() const;
188  void pop_state();
189 
190  // Glue for the BaseTypeFactory class.
191  BaseType *factory(Type t, const string &name);
192 
193  // Common cleanup code for intern() and intern_stream()
194  void cleanup_parse(xmlParserCtxtPtr &context) const;
195 
202  void transfer_xml_attrs(const xmlChar **attrs, int nb_attributes);
203  void transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces);
204  bool check_required_attribute(const string &attr);
205  bool check_attribute(const string & attr);
206 
207  void process_attribute_helper(const xmlChar **attrs, int nb_attrs);
208 
209  void process_variable_helper(Type t, ParseState s, const xmlChar **attrs, int nb_attributes);
210 
211  void process_enum_const_helper(const xmlChar **attrs, int nb_attributes);
212  void process_enum_def_helper(const xmlChar **attrs, int nb_attributes);
213 
214  void process_dimension(const xmlChar **attrs, int nb_attrs);
215 
216  bool process_attribute(const char *name, const xmlChar **attrs, int nb_attributes);
217  bool process_variable(const char *name, const xmlChar **attrs, int nb_attributes);
218 
219  bool process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes);
220  bool process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes);
221 
222  void finish_variable(const char *tag, Type t, const char *expected);
224 
225  friend class D4ParserSax2Test;
226 
227 public:
228  // Read the factory class used to build BaseTypes from the DDS passed
229  // into the intern() method.
230  D4ParserSax2() : d_factory(0),
231  other_xml(""), other_xml_depth(0), unknown_depth(0),
232  error_msg(""), ctxt(0), dds(0),
233  dods_attr_name(""), dods_attr_type(""),
234  char_data(""), root_ns("")
235  {}
236 
237  void intern(const string &document, DDS *dest_dds);
238  void intern(istream &in, DDS *dest_dds);
239 
240  static void ddx_start_document(void *parser);
241  static void ddx_end_document(void *parser);
242 
243  static void ddx_start_element(void *parser,
244  const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI,
245  int nb_namespaces, const xmlChar **namespaces, int nb_attributes,
246  int nb_defaulted, const xmlChar **attributes);
247  static void ddx_end_element(void *parser, const xmlChar *localname,
248  const xmlChar *prefix, const xmlChar *URI);
249 
250  static void ddx_get_characters(void *parser, const xmlChar *ch, int len);
251  static void ddx_ignoreable_whitespace(void *parser,
252  const xmlChar * ch, int len);
253  static void ddx_get_cdata(void *parser, const xmlChar *value, int len);
254 
255  static xmlEntityPtr ddx_get_entity(void *parser, const xmlChar *name);
256  static void ddx_fatal_error(void *parser, const char *msg, ...);
257 };
258 
259 } // namespace libdap
260 
261 #endif // d4_parser_sax2_h
friend class D4ParserSax2Test
Definition: D4ParserSax2.h:225
static void ddx_get_cdata(void *parser, const xmlChar *value, int len)
static void ddx_ignoreable_whitespace(void *parser, const xmlChar *ch, int len)
Type
Identifies the data type.
Definition: BaseType.h:137
static void ddx_start_document(void *parser)
static void ddx_start_element(void *parser, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int nb_defaulted, const xmlChar **attributes)
static void ddx_get_characters(void *parser, const xmlChar *ch, int len)
static xmlEntityPtr ddx_get_entity(void *parser, const xmlChar *name)
The basic data type for the DODS DAP types.
Definition: BaseType.h:199
static void ddx_fatal_error(void *parser, const char *msg,...)
void intern(const string &document, DDS *dest_dds)
static void ddx_end_document(void *parser)
static void ddx_end_element(void *parser, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI)