data/
xml.rs

1//! A simple XML parser based on quick-xml.
2//!
3//! This is basically an interface on top of quick-xml. It provides an
4//! API to let the user write callback functions for specific parts of
5//! the document, and hides the reader from the user.
6//!
7//! Because of its simple nature, it has a set of limitations:
8//!
9//! * It only parses from a byte buffer. If the XML is from a file,
10//! the user needs to read the file into a buffer before using the
11//! parser.
12//!
13//! * The XML has to be encoded in UTF-8.
14//!
15//! ## Example
16//!
17//! ```
18//! # use std::str;
19//! # use data::xml::Parser;
20//! struct A { a: String }
21//!
22//! let mut aaa: A = A { a: String::new() };
23//! let mut iin = 0;
24//! let mut out = 0;
25//! let mut d = String::new();
26//!
27//! let mut p = Parser::new();
28//! p.addBeginHandler("a", |_, _| {
29//!     iin += 1;
30//!     Ok(())
31//! });
32//! p.addEndHandler("a", |_, _| {
33//!     out += 1;
34//!     Ok(())
35//! });
36//! p.addTextHandler("a", |_, t| {
37//!     aaa.a = t.to_owned();
38//!     Ok(())
39//! });
40//! p.addTagHandler("d", |_, t| {
41//!     d = str::from_utf8(t).unwrap().to_owned();
42//!     Ok(())
43//! });
44//! p.parse(r#"
45//! <c>
46//!     <a>aaa</a>
47//!     <b/>
48//!     <d>
49//!         <e/>
50//!     </d>
51//! </c>
52//! "#.as_bytes()).unwrap();
53//! drop(p);
54//! assert_eq!(iin, 1);
55//! assert_eq!(out, 1);
56//! assert_eq!(aaa.a, "aaa".to_owned());
57//! assert_eq!(d, r#"<d>
58//!         <e/>
59//!     </d>"#);
60//! ```
61
62use std::str;
63use std::collections::HashMap;
64use std::str::FromStr;
65
66use quick_xml::events::{Event, BytesEnd, BytesStart};
67
68use error::Error;
69
70type BeginHandler<'a> = Box<dyn FnMut(&[String], &BytesStart) ->
71                            Result<(), Error> + 'a>;
72type EndHandler<'a> = Box<dyn FnMut(&[String], &BytesEnd) ->
73                          Result<(), Error> + 'a>;
74type TextHandler<'a> = Box<dyn FnMut(&[String], &str) ->
75                           Result<(), Error> + 'a>;
76type TagHandler<'a> = Box<dyn FnMut(&[String], &[u8]) ->
77                           Result<(), Error> + 'a>;
78type BeginHandlerMap<'a> = HashMap<&'static str, BeginHandler<'a>>;
79type EndHandlerMap<'a> = HashMap<&'static str, EndHandler<'a>>;
80type TextHandlerMap<'a> = HashMap<&'static str, TextHandler<'a>>;
81type TagHandlerMap<'a> = HashMap<&'static str, TagHandler<'a>>;
82
83/// A simple event callback-based XML parser.
84pub struct Parser<'a>
85{
86    begin_handlers: BeginHandlerMap<'a>,
87    end_handlers: EndHandlerMap<'a>,
88    text_handlers: TextHandlerMap<'a>,
89    tag_handlers: TagHandlerMap<'a>,
90}
91
92impl<'a> Parser<'a>
93{
94    /// Create a parser with no callbacks.
95    pub fn new() -> Self
96    {
97        Self
98        {
99            begin_handlers: HashMap::new(),
100            end_handlers: HashMap::new(),
101            text_handlers: HashMap::new(),
102            tag_handlers: HashMap::new(),
103        }
104    }
105
106    /// Add a callback for an opening tag. If the parser encounters an
107    /// opening tag whose name coincides with the value of `tag`, it
108    /// calls `handler` with the opening tag event. Self-closing tags
109    /// also trigger begin handlers.
110    pub fn addBeginHandler<F>(&mut self, tag: &'static str, handler: F)
111        where F: FnMut(&[String], &BytesStart) -> Result<(), Error> + 'a
112    {
113        self.begin_handlers.insert(tag, Box::new(handler));
114    }
115
116    /// Add a callback for an end tag. If the parser encounters an end
117    /// tag whose name coincides with the value of `tag` (not
118    /// including the starting `/`), it calls `handler` with the end
119    /// tag event. Self-closing tags also trigger end handlers.
120    pub fn addEndHandler<F>(&mut self, tag: &'static str, handler: F)
121        where F: FnMut(&[String], &BytesEnd) -> Result<(), Error> + 'a
122    {
123        self.end_handlers.insert(tag, Box::new(handler));
124    }
125
126    /// Add a callback for text element directly inside some tag. If
127    /// the parser encounters a text element where it’s enclosing tag
128    /// coincides with the value of `tag`, it calls `handler` with the
129    /// decoded text string.
130    ///
131    /// Note that this does not mean the text needs to be the only or
132    /// the last element in the enclosing tag.
133    pub fn addTextHandler<F>(&mut self, tag: &'static str, handler: F)
134        where F: FnMut(&[String], &str) -> Result<(), Error> + 'a
135    {
136        self.text_handlers.insert(tag, Box::new(handler));
137    }
138
139    /// Add a callback for a whole tag. If the parser encounters an
140    /// opening element (including self-closing tags) whose name
141    /// coincides with the value of `tag`, it calls `handler` with the
142    /// content of the whole tag, including the opening and the
143    /// closing tag. The parser then skips the whole tag.
144    ///
145    /// This is useful if the user wants to delegate the parsing of a
146    /// tag to another parser.
147    pub fn addTagHandler<F>(&mut self, tag: &'static str, handler: F)
148        where F: FnMut(&[String], &[u8]) -> Result<(), Error> + 'a
149    {
150        self.tag_handlers.insert(tag, Box::new(handler));
151    }
152
153    /// Parse the XML in the byte buffer `x`, triggering the callbacks
154    /// in the process. It is important to note that *this buffer
155    /// should only contains one root tag*.
156    pub fn parse(&mut self, x: &[u8]) -> Result<(), Error>
157    {
158        let mut reader = quick_xml::Reader::from_str(unsafe {
159            str::from_utf8_unchecked(x)
160        });
161        let mut path: Vec<String> = Vec::new();
162        let mut stop: bool = false;
163
164        while !stop
165        {
166            let pos_before = reader.buffer_position();
167            match reader.read_event() {
168                Ok(Event::Start(e)) =>
169                {
170                    let tag: &str = str::from_utf8(e.name().into_inner())
171                        .map_err(
172                            |_| xmlerr!("Failed to decode UTF-8 from XML"))?;
173                    path.push(tag.to_owned());
174                    if let Some(f) = self.begin_handlers.get_mut(tag)
175                    {
176                        f(&path, &e)?;
177                    }
178
179                    if let Some(f) = self.tag_handlers.get_mut(tag)
180                    {
181                        reader.read_to_end(e.to_end().name()).map_err(
182                            |_| xmlerr!("Failed to find end tag of {}.", tag))?;
183                        f(&path, &x[pos_before as usize .. reader.buffer_position() as usize])?;
184                        path.pop();
185                    }
186                },
187                Ok(Event::Empty(e)) =>
188                {
189                    let tag: &str = str::from_utf8(e.name().into_inner())
190                        .map_err(
191                            |_| xmlerr!("Failed to decode UTF-8 from XML"))?;
192                    path.push(tag.to_owned());
193                    if let Some(f) = self.begin_handlers.get_mut(tag)
194                    {
195                        f(&path, &e)?;
196                    }
197                    if let Some(f) = self.end_handlers.get_mut(tag)
198                    {
199                        f(&path, &e.to_end())?;
200                    }
201                    path.pop();
202                    if path.is_empty()
203                    {
204                        stop = true;
205                    }
206                },
207                Ok(Event::End(e)) =>
208                {
209                    let tag: &str = str::from_utf8(e.name().into_inner())
210                        .map_err(
211                            |_| xmlerr!("Failed to decode UTF-8 from XML"))?;
212                    if let Some(name) = path.last()
213                    {
214                        if *name != tag
215                        {
216                            return Err(
217                                xmlerr!("Invalid XML. Expecting {}, got {}.",
218                                       name, tag));
219                        }
220                    }
221                    else
222                    {
223                        return Err(
224                            xmlerr!("Invalid XML. XML should end, got {}.",
225                                   tag));
226                    }
227
228                    if let Some(f) = self.end_handlers.get_mut(tag)
229                    {
230                        f(&path, &e)?;
231                    }
232                    path.pop();
233                    if path.is_empty()
234                    {
235                        stop = true;
236                    }
237                },
238                Ok(Event::Text(inner)) =>
239                {
240                    if let Some(tag) = path.last()
241                    {
242                        let tag: &str = tag;
243                        if let Some(f) = self.text_handlers.get_mut(tag)
244                        {
245                            let t = str::from_utf8(inner.as_ref()).map_err(
246                                |_| xmlerr!("Failed to decode text element in \
247                                            {}", tag))?;
248                            f(&path, t)?;
249                        }
250                    }
251                    else
252                    {
253                        // If the XML ends in whitespace, this branch
254                        // will trigger, which is fine.
255                    }
256                },
257                Ok(_) => {},
258                Err(e) =>
259                {
260                    return Err(xmlerr!("Failed to parse XML: {}", e));
261                },
262            }
263        }
264        Ok(())
265    }
266}
267
268/// Return the value of the attribute `attr` from the opening tag
269/// `tag`.
270pub fn getTagAttr<T: FromStr>(tag: &BytesStart, attr: &str) ->
271    Result<Option<T>, Error>
272{
273    if let Some(at) = tag.try_get_attribute(attr)
274        .map_err(|_| xmlerr!("Failed to get attribute '{}'.", attr))?
275    {
276        let value: T = str::from_utf8(at.value.as_ref()).map_err(
277            |_| xmlerr!("Failed to decode value of attribute '{}'.", attr))?
278            .parse().map_err(
279                |_| xmlerr!("Invalid value of attirbute '{}'.", attr))?;
280        Ok(Some(value))
281    }
282    else
283    {
284        Ok(None)
285    }
286}
287
288// ========== Tests =================================================>
289
290#[cfg(test)]
291mod tests {
292    use super::*;
293
294    struct A { a: String }
295
296    #[test]
297    fn parsing() -> Result<(), Error>
298    {
299        let mut aaa: A = A { a: String::new() };
300        let mut iin = 0;
301        let mut out = 0;
302        let mut d = String::new();
303
304        let mut p = Parser::new();
305        p.addBeginHandler("a", |_, _| {
306            iin += 1;
307            Ok(())
308        });
309        p.addEndHandler("a", |_, _| {
310            out += 1;
311            Ok(())
312        });
313        p.addTextHandler("a", |_, t| {
314            aaa.a = t.to_owned();
315            Ok(())
316        });
317        p.addTagHandler("d", |_, t| {
318            d = str::from_utf8(t).unwrap().to_owned();
319            Ok(())
320        });
321        p.parse(r#"
322<c>
323    <a>aaa</a>
324    <b/>
325    <d>
326        <e/>
327    </d>
328</c>
329"#.as_bytes())?;
330        drop(p);
331        assert_eq!(iin, 1);
332        assert_eq!(out, 1);
333        assert_eq!(aaa.a, "aaa".to_owned());
334        assert_eq!(d, r#"<d>
335        <e/>
336    </d>"#);
337        Ok(())
338    }
339}