data/xml.rs
1//! A simple XML parser based on quick-xml.
2//!
3//! This is basically an interface on top of quick-xml. It provides an
4//! API to let the user write callback functions for specific parts of
5//! the document, and hides the reader from the user.
6//!
7//! Because of its simple nature, it has a set of limitations:
8//!
9//! * It only parses from a byte buffer. If the XML is from a file,
10//! the user needs to read the file into a buffer before using the
11//! parser.
12//!
13//! * The XML has to be encoded in UTF-8.
14//!
15//! ## Example
16//!
17//! ```
18//! # use std::str;
19//! # use data::xml::Parser;
20//! struct A { a: String }
21//!
22//! let mut aaa: A = A { a: String::new() };
23//! let mut iin = 0;
24//! let mut out = 0;
25//! let mut d = String::new();
26//!
27//! let mut p = Parser::new();
28//! p.addBeginHandler("a", |_, _| {
29//! iin += 1;
30//! Ok(())
31//! });
32//! p.addEndHandler("a", |_, _| {
33//! out += 1;
34//! Ok(())
35//! });
36//! p.addTextHandler("a", |_, t| {
37//! aaa.a = t.to_owned();
38//! Ok(())
39//! });
40//! p.addTagHandler("d", |_, t| {
41//! d = str::from_utf8(t).unwrap().to_owned();
42//! Ok(())
43//! });
44//! p.parse(r#"
45//! <c>
46//! <a>aaa</a>
47//! <b/>
48//! <d>
49//! <e/>
50//! </d>
51//! </c>
52//! "#.as_bytes()).unwrap();
53//! drop(p);
54//! assert_eq!(iin, 1);
55//! assert_eq!(out, 1);
56//! assert_eq!(aaa.a, "aaa".to_owned());
57//! assert_eq!(d, r#"<d>
58//! <e/>
59//! </d>"#);
60//! ```
61
62use std::str;
63use std::collections::HashMap;
64use std::str::FromStr;
65
66use quick_xml::events::{Event, BytesEnd, BytesStart};
67
68use error::Error;
69
70type BeginHandler<'a> = Box<dyn FnMut(&[String], &BytesStart) ->
71 Result<(), Error> + 'a>;
72type EndHandler<'a> = Box<dyn FnMut(&[String], &BytesEnd) ->
73 Result<(), Error> + 'a>;
74type TextHandler<'a> = Box<dyn FnMut(&[String], &str) ->
75 Result<(), Error> + 'a>;
76type TagHandler<'a> = Box<dyn FnMut(&[String], &[u8]) ->
77 Result<(), Error> + 'a>;
78type BeginHandlerMap<'a> = HashMap<&'static str, BeginHandler<'a>>;
79type EndHandlerMap<'a> = HashMap<&'static str, EndHandler<'a>>;
80type TextHandlerMap<'a> = HashMap<&'static str, TextHandler<'a>>;
81type TagHandlerMap<'a> = HashMap<&'static str, TagHandler<'a>>;
82
83/// A simple event callback-based XML parser.
84pub struct Parser<'a>
85{
86 begin_handlers: BeginHandlerMap<'a>,
87 end_handlers: EndHandlerMap<'a>,
88 text_handlers: TextHandlerMap<'a>,
89 tag_handlers: TagHandlerMap<'a>,
90}
91
92impl<'a> Parser<'a>
93{
94 /// Create a parser with no callbacks.
95 pub fn new() -> Self
96 {
97 Self
98 {
99 begin_handlers: HashMap::new(),
100 end_handlers: HashMap::new(),
101 text_handlers: HashMap::new(),
102 tag_handlers: HashMap::new(),
103 }
104 }
105
106 /// Add a callback for an opening tag. If the parser encounters an
107 /// opening tag whose name coincides with the value of `tag`, it
108 /// calls `handler` with the opening tag event. Self-closing tags
109 /// also trigger begin handlers.
110 pub fn addBeginHandler<F>(&mut self, tag: &'static str, handler: F)
111 where F: FnMut(&[String], &BytesStart) -> Result<(), Error> + 'a
112 {
113 self.begin_handlers.insert(tag, Box::new(handler));
114 }
115
116 /// Add a callback for an end tag. If the parser encounters an end
117 /// tag whose name coincides with the value of `tag` (not
118 /// including the starting `/`), it calls `handler` with the end
119 /// tag event. Self-closing tags also trigger end handlers.
120 pub fn addEndHandler<F>(&mut self, tag: &'static str, handler: F)
121 where F: FnMut(&[String], &BytesEnd) -> Result<(), Error> + 'a
122 {
123 self.end_handlers.insert(tag, Box::new(handler));
124 }
125
126 /// Add a callback for text element directly inside some tag. If
127 /// the parser encounters a text element where it’s enclosing tag
128 /// coincides with the value of `tag`, it calls `handler` with the
129 /// decoded text string.
130 ///
131 /// Note that this does not mean the text needs to be the only or
132 /// the last element in the enclosing tag.
133 pub fn addTextHandler<F>(&mut self, tag: &'static str, handler: F)
134 where F: FnMut(&[String], &str) -> Result<(), Error> + 'a
135 {
136 self.text_handlers.insert(tag, Box::new(handler));
137 }
138
139 /// Add a callback for a whole tag. If the parser encounters an
140 /// opening element (including self-closing tags) whose name
141 /// coincides with the value of `tag`, it calls `handler` with the
142 /// content of the whole tag, including the opening and the
143 /// closing tag. The parser then skips the whole tag.
144 ///
145 /// This is useful if the user wants to delegate the parsing of a
146 /// tag to another parser.
147 pub fn addTagHandler<F>(&mut self, tag: &'static str, handler: F)
148 where F: FnMut(&[String], &[u8]) -> Result<(), Error> + 'a
149 {
150 self.tag_handlers.insert(tag, Box::new(handler));
151 }
152
153 /// Parse the XML in the byte buffer `x`, triggering the callbacks
154 /// in the process. It is important to note that *this buffer
155 /// should only contains one root tag*.
156 pub fn parse(&mut self, x: &[u8]) -> Result<(), Error>
157 {
158 let mut reader = quick_xml::Reader::from_str(unsafe {
159 str::from_utf8_unchecked(x)
160 });
161 let mut path: Vec<String> = Vec::new();
162 let mut stop: bool = false;
163
164 while !stop
165 {
166 let pos_before = reader.buffer_position();
167 match reader.read_event() {
168 Ok(Event::Start(e)) =>
169 {
170 let tag: &str = str::from_utf8(e.name().into_inner())
171 .map_err(
172 |_| xmlerr!("Failed to decode UTF-8 from XML"))?;
173 path.push(tag.to_owned());
174 if let Some(f) = self.begin_handlers.get_mut(tag)
175 {
176 f(&path, &e)?;
177 }
178
179 if let Some(f) = self.tag_handlers.get_mut(tag)
180 {
181 reader.read_to_end(e.to_end().name()).map_err(
182 |_| xmlerr!("Failed to find end tag of {}.", tag))?;
183 f(&path, &x[pos_before as usize .. reader.buffer_position() as usize])?;
184 path.pop();
185 }
186 },
187 Ok(Event::Empty(e)) =>
188 {
189 let tag: &str = str::from_utf8(e.name().into_inner())
190 .map_err(
191 |_| xmlerr!("Failed to decode UTF-8 from XML"))?;
192 path.push(tag.to_owned());
193 if let Some(f) = self.begin_handlers.get_mut(tag)
194 {
195 f(&path, &e)?;
196 }
197 if let Some(f) = self.end_handlers.get_mut(tag)
198 {
199 f(&path, &e.to_end())?;
200 }
201 path.pop();
202 if path.is_empty()
203 {
204 stop = true;
205 }
206 },
207 Ok(Event::End(e)) =>
208 {
209 let tag: &str = str::from_utf8(e.name().into_inner())
210 .map_err(
211 |_| xmlerr!("Failed to decode UTF-8 from XML"))?;
212 if let Some(name) = path.last()
213 {
214 if *name != tag
215 {
216 return Err(
217 xmlerr!("Invalid XML. Expecting {}, got {}.",
218 name, tag));
219 }
220 }
221 else
222 {
223 return Err(
224 xmlerr!("Invalid XML. XML should end, got {}.",
225 tag));
226 }
227
228 if let Some(f) = self.end_handlers.get_mut(tag)
229 {
230 f(&path, &e)?;
231 }
232 path.pop();
233 if path.is_empty()
234 {
235 stop = true;
236 }
237 },
238 Ok(Event::Text(inner)) =>
239 {
240 if let Some(tag) = path.last()
241 {
242 let tag: &str = tag;
243 if let Some(f) = self.text_handlers.get_mut(tag)
244 {
245 let t = str::from_utf8(inner.as_ref()).map_err(
246 |_| xmlerr!("Failed to decode text element in \
247 {}", tag))?;
248 f(&path, t)?;
249 }
250 }
251 else
252 {
253 // If the XML ends in whitespace, this branch
254 // will trigger, which is fine.
255 }
256 },
257 Ok(_) => {},
258 Err(e) =>
259 {
260 return Err(xmlerr!("Failed to parse XML: {}", e));
261 },
262 }
263 }
264 Ok(())
265 }
266}
267
268/// Return the value of the attribute `attr` from the opening tag
269/// `tag`.
270pub fn getTagAttr<T: FromStr>(tag: &BytesStart, attr: &str) ->
271 Result<Option<T>, Error>
272{
273 if let Some(at) = tag.try_get_attribute(attr)
274 .map_err(|_| xmlerr!("Failed to get attribute '{}'.", attr))?
275 {
276 let value: T = str::from_utf8(at.value.as_ref()).map_err(
277 |_| xmlerr!("Failed to decode value of attribute '{}'.", attr))?
278 .parse().map_err(
279 |_| xmlerr!("Invalid value of attirbute '{}'.", attr))?;
280 Ok(Some(value))
281 }
282 else
283 {
284 Ok(None)
285 }
286}
287
288// ========== Tests =================================================>
289
290#[cfg(test)]
291mod tests {
292 use super::*;
293
294 struct A { a: String }
295
296 #[test]
297 fn parsing() -> Result<(), Error>
298 {
299 let mut aaa: A = A { a: String::new() };
300 let mut iin = 0;
301 let mut out = 0;
302 let mut d = String::new();
303
304 let mut p = Parser::new();
305 p.addBeginHandler("a", |_, _| {
306 iin += 1;
307 Ok(())
308 });
309 p.addEndHandler("a", |_, _| {
310 out += 1;
311 Ok(())
312 });
313 p.addTextHandler("a", |_, t| {
314 aaa.a = t.to_owned();
315 Ok(())
316 });
317 p.addTagHandler("d", |_, t| {
318 d = str::from_utf8(t).unwrap().to_owned();
319 Ok(())
320 });
321 p.parse(r#"
322<c>
323 <a>aaa</a>
324 <b/>
325 <d>
326 <e/>
327 </d>
328</c>
329"#.as_bytes())?;
330 drop(p);
331 assert_eq!(iin, 1);
332 assert_eq!(out, 1);
333 assert_eq!(aaa.a, "aaa".to_owned());
334 assert_eq!(d, r#"<d>
335 <e/>
336 </d>"#);
337 Ok(())
338 }
339}