aboutsummaryrefslogtreecommitdiff
path: root/src/reader/parser/inside_reference.rs
blob: eced6068e3e09b393531e7f387919525c4199c51 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
use crate::reader::error::SyntaxError;
use std::char;
use crate::common::{is_name_char, is_name_start_char, is_whitespace_char};
use crate::reader::lexer::Token;
use super::{PullParser, Result, State};

impl PullParser {
    pub fn inside_reference(&mut self, t: Token) -> Option<Result> {
        match t {
            Token::Character(c) if !self.data.ref_data.is_empty() && is_name_char(c) ||
                             self.data.ref_data.is_empty() && (is_name_start_char(c) || c == '#') => {
                self.data.ref_data.push(c);
                None
            }

            Token::ReferenceEnd => {
                let name = self.data.take_ref_data();
                if name.is_empty() {
                    return Some(self.error(SyntaxError::EmptyEntity));
                }

                let c = match &*name {
                    "lt"   => Some('<'),
                    "gt"   => Some('>'),
                    "amp"  => Some('&'),
                    "apos" => Some('\''),
                    "quot" => Some('"'),
                    _ if name.starts_with('#') => match self.numeric_reference_from_str(&name[1..]) {
                        Ok(c) => Some(c),
                        Err(e) => return Some(self.error(e))
                    },
                    _ => None,
                };
                if let Some(c) = c {
                    self.buf.push(c);
                } else if let Some(v) = self.config.c.extra_entities.get(&name) {
                    self.buf.push_str(v);
                } else if let Some(v) = self.entities.get(&name) {
                    if self.state_after_reference == State::OutsideTag {
                        // an entity can expand to *elements*, so outside of a tag it needs a full reparse
                        if let Err(e) = self.lexer.reparse(v) {
                            return Some(Err(e));
                        }
                    } else {
                        // however, inside attributes it's not allowed to affect attribute quoting,
                        // so it can't be fed to the lexer
                        self.buf.push_str(v);
                    }
                } else {
                    return Some(self.error(SyntaxError::UnexpectedEntity(name.into())));
                }
                let prev_st = self.state_after_reference;
                if prev_st == State::OutsideTag && !is_whitespace_char(self.buf.chars().last().unwrap_or('\0')) {
                    self.inside_whitespace = false;
                }
                self.into_state_continue(prev_st)
            }

            _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
        }
    }

    pub(crate) fn numeric_reference_from_str(&self, num_str: &str) -> std::result::Result<char, SyntaxError> {
        let val = if let Some(hex) = num_str.strip_prefix('x') {
            u32::from_str_radix(hex, 16).map_err(move |_| SyntaxError::InvalidNumericEntity(num_str.into()))?
        } else {
            u32::from_str_radix(num_str, 10).map_err(move |_| SyntaxError::InvalidNumericEntity(num_str.into()))?
        };
        match char::from_u32(val) {
            Some(c) if self.is_valid_xml_char(c) => Ok(c),
            Some(_) if self.config.c.replace_unknown_entity_references => Ok('\u{fffd}'),
            None if self.config.c.replace_unknown_entity_references => {
                Ok('\u{fffd}')
            },
            _ => Err(SyntaxError::InvalidCharacterEntity(val)),
        }
    }
}