aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrett Vickers <beevik@users.noreply.github.com>2019-01-31 16:17:18 -0800
committerBrett Vickers <brett@beevik.com>2019-01-31 16:17:50 -0800
commit30832cc4f3129bc40598373c3da571d9b256c1e7 (patch)
treed5f206060358cbe4b9caaf87de6930a58365f3c5
parent346d4ae1e96f007e0dbef76fa9f013060762484f (diff)
downloadgo-etree-30832cc4f3129bc40598373c3da571d9b256c1e7.tar.gz
Add namespace uri support
* Attributes can be queried for their associated Element. * Add NamespaceURI method for Element and Attr. This method allows you to discover the namespace URI associated with any element or attribute in the document. * Add namespace-uri() function queries to path.
-rw-r--r--etree.go117
-rw-r--r--etree_test.go171
-rw-r--r--path.go110
3 files changed, 339 insertions, 59 deletions
diff --git a/etree.go b/etree.go
index 9f7fdd8..2968da1 100644
--- a/etree.go
+++ b/etree.go
@@ -105,7 +105,7 @@ type Document struct {
// An Element represents an XML element, its attributes, and its child tokens.
type Element struct {
- Space, Tag string // namespace and tag
+ Space, Tag string // namespace prefix and tag
Attr []Attr // key-value attribute pairs
Child []Token // child tokens (elements, comments, etc.)
parent *Element // parent element
@@ -114,8 +114,9 @@ type Element struct {
// An Attr represents a key-value attribute of an XML element.
type Attr struct {
- Space, Key string // The attribute's namespace and key
- Value string // The attribute value string
+ Space, Key string // The attribute's namespace prefix and key
+ Value string // The attribute value string
+ element *Element // element containing the attribute
}
// charDataFlags are used with CharData tokens to store additional settings.
@@ -319,7 +320,7 @@ func (d *Document) IndentTabs() {
}
// NewElement creates an unparented element with the specified tag. The tag
-// may be prefixed by a namespace and a colon.
+// may be prefixed by a namespace prefix and a colon.
func NewElement(tag string) *Element {
space, stag := spaceDecompose(tag)
return newElement(space, stag, nil)
@@ -349,6 +350,57 @@ func (e *Element) Copy() *Element {
return e.dup(nil).(*Element)
}
+// NamespaceURI returns the XML namespace URI associated with the element. If
+// the element is part of the XML default namespace, NamespaceURI returns the
+// empty string.
+func (e *Element) NamespaceURI() string {
+ if e.Space == "" {
+ return e.findDefaultNamespaceURI()
+ }
+ return e.findLocalNamespaceURI(e.Space)
+}
+
+// findLocalNamespaceURI finds the namespace URI corresponding to the
+// requested prefix.
+func (e *Element) findLocalNamespaceURI(prefix string) string {
+ for _, a := range e.Attr {
+ if a.Space == "xmlns" && a.Key == prefix {
+ return a.Value
+ }
+ }
+
+ if e.parent == nil {
+ return ""
+ }
+
+ return e.parent.findLocalNamespaceURI(prefix)
+}
+
+// findDefaultNamespaceURI finds the default namespace URI of the element.
+func (e *Element) findDefaultNamespaceURI() string {
+ for _, a := range e.Attr {
+ if a.Space == "" && a.Key == "xmlns" {
+ return a.Value
+ }
+ }
+
+ if e.parent == nil {
+ return ""
+ }
+
+ return e.parent.findDefaultNamespaceURI()
+}
+
+// hasText returns true if the element has character data immediately
+// folllowing the element's opening tag.
+func (e *Element) hasText() bool {
+ if len(e.Child) == 0 {
+ return false
+ }
+ _, ok := e.Child[0].(*CharData)
+ return ok
+}
+
// Text returns all character data immediately following the element's opening
// tag.
func (e *Element) Text() string {
@@ -479,7 +531,7 @@ func (e *Element) findTermCharDataIndex(start int) int {
// CreateElement creates an element with the specified tag and adds it as the
// last child element of the element e. The tag may be prefixed by a namespace
-// and a colon.
+// prefix and a colon.
func (e *Element) CreateElement(tag string) *Element {
space, stag := spaceDecompose(tag)
return newElement(space, stag, e)
@@ -608,7 +660,7 @@ func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err er
case xml.StartElement:
e := newElement(t.Name.Space, t.Name.Local, top)
for _, a := range t.Attr {
- e.createAttr(a.Name.Space, a.Name.Local, a.Value)
+ e.createAttr(a.Name.Space, a.Name.Local, a.Value, e)
}
stack.push(e)
case xml.EndElement:
@@ -632,7 +684,7 @@ func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err er
// SelectAttr finds an element attribute matching the requested key and
// returns it if found. Returns nil if no matching attribute is found. The key
-// may be prefixed by a namespace and a colon.
+// may be prefixed by a namespace prefix and a colon.
func (e *Element) SelectAttr(key string) *Attr {
space, skey := spaceDecompose(key)
for i, a := range e.Attr {
@@ -644,8 +696,8 @@ func (e *Element) SelectAttr(key string) *Attr {
}
// SelectAttrValue finds an element attribute matching the requested key and
-// returns its value if found. The key may be prefixed by a namespace and a
-// colon. If the key is not found, the dflt value is returned instead.
+// returns its value if found. The key may be prefixed by a namespace prefix
+// and a colon. If the key is not found, the dflt value is returned instead.
func (e *Element) SelectAttrValue(key, dflt string) string {
space, skey := spaceDecompose(key)
for _, a := range e.Attr {
@@ -668,8 +720,8 @@ func (e *Element) ChildElements() []*Element {
}
// SelectElement returns the first child element with the given tag. The tag
-// may be prefixed by a namespace and a colon. Returns nil if no element with
-// a matching tag was found.
+// may be prefixed by a namespace prefix and a colon. Returns nil if no
+// element with a matching tag was found.
func (e *Element) SelectElement(tag string) *Element {
space, stag := spaceDecompose(tag)
for _, t := range e.Child {
@@ -681,7 +733,7 @@ func (e *Element) SelectElement(tag string) *Element {
}
// SelectElements returns a slice of all child elements with the given tag.
-// The tag may be prefixed by a namespace and a colon.
+// The tag may be prefixed by a namespace prefix and a colon.
func (e *Element) SelectElements(tag string) []*Element {
space, stag := spaceDecompose(tag)
var elements []*Element
@@ -974,35 +1026,46 @@ func (e *Element) addChild(t Token) {
}
// CreateAttr creates an attribute and adds it to element e. The key may be
-// prefixed by a namespace and a colon. If an attribute with the key already
-// exists, its value is replaced.
+// prefixed by a namespace prefix and a colon. If an attribute with the key
+// already exists, its value is replaced.
func (e *Element) CreateAttr(key, value string) *Attr {
space, skey := spaceDecompose(key)
- return e.createAttr(space, skey, value)
+ return e.createAttr(space, skey, value, e)
}
// createAttr is a helper function that creates attributes.
-func (e *Element) createAttr(space, key, value string) *Attr {
+func (e *Element) createAttr(space, key, value string, parent *Element) *Attr {
for i, a := range e.Attr {
if space == a.Space && key == a.Key {
e.Attr[i].Value = value
return &e.Attr[i]
}
}
- a := Attr{space, key, value}
+ a := Attr{
+ Space: space,
+ Key: key,
+ Value: value,
+ element: parent,
+ }
e.Attr = append(e.Attr, a)
return &e.Attr[len(e.Attr)-1]
}
-// RemoveAttr removes and returns the first attribute of the element whose key
-// matches the given key. The key may be prefixed by a namespace and a colon.
-// If an equal attribute does not exist, nil is returned.
+// RemoveAttr removes and returns a copy of the first attribute of the element
+// whose key matches the given key. The key may be prefixed by a namespace
+// prefix and a colon. If a matching attribute does not exist, nil is
+// returned.
func (e *Element) RemoveAttr(key string) *Attr {
space, skey := spaceDecompose(key)
for i, a := range e.Attr {
if space == a.Space && skey == a.Key {
e.Attr = append(e.Attr[0:i], e.Attr[i+1:]...)
- return &a
+ return &Attr{
+ Space: a.Space,
+ Key: a.Key,
+ Value: a.Value,
+ element: nil,
+ }
}
}
return nil
@@ -1031,6 +1094,18 @@ func (a byAttr) Less(i, j int) bool {
return sp < 0
}
+// Element returns the element containing the attribute.
+func (a *Attr) Element() *Element {
+ return a.element
+}
+
+// NamespaceURI returns the XML namespace URI associated with the attribute.
+// If the element is part of the XML default namespace, NamespaceURI returns
+// empty string.
+func (a *Attr) NamespaceURI() string {
+ return a.element.NamespaceURI()
+}
+
// writeTo serializes the attribute to the writer.
func (a *Attr) writeTo(w *bufio.Writer, s *WriteSettings) {
if a.Space != "" {
diff --git a/etree_test.go b/etree_test.go
index bf3c5cd..92927d6 100644
--- a/etree_test.go
+++ b/etree_test.go
@@ -32,6 +32,13 @@ func checkIntEq(t *testing.T, got, want int) {
}
}
+func checkElementEq(t *testing.T, got, want *Element) {
+ t.Helper()
+ if got != want {
+ t.Errorf("etree: unexpected element. Got: %v. Wanted: %v.\n", got, want)
+ }
+}
+
func checkDocEq(t *testing.T, doc *Document, expected string) {
t.Helper()
doc.Indent(NoIndent)
@@ -912,3 +919,167 @@ func TestSetTail(t *testing.T) {
checkIntEq(t, len(root.Child), 1)
checkIntEq(t, len(child.Child), 1)
}
+
+func TestAttrParent(t *testing.T) {
+ doc := NewDocument()
+ root := doc.CreateElement("root")
+ attr1 := root.CreateAttr("bar", "1")
+ attr2 := root.CreateAttr("qux", "2")
+
+ checkIntEq(t, len(root.Attr), 2)
+ checkElementEq(t, attr1.Element(), root)
+ checkElementEq(t, attr2.Element(), root)
+
+ attr1 = root.RemoveAttr("bar")
+ attr2 = root.RemoveAttr("qux")
+ checkElementEq(t, attr1.Element(), nil)
+ checkElementEq(t, attr2.Element(), nil)
+
+ s := `<root a="1" b="2" c="3" d="4"/>`
+ err := doc.ReadFromString(s)
+ if err != nil {
+ t.Error("etree: failed to parse document")
+ }
+
+ root = doc.SelectElement("root")
+ for i := range root.Attr {
+ checkElementEq(t, root.Attr[i].Element(), root)
+ }
+}
+
+func TestDefaultNamespaceURI(t *testing.T) {
+ s := `
+<root xmlns="http://root.example.com" a="foo">
+ <child1 xmlns="http://child.example.com" a="foo">
+ <grandchild1 xmlns="http://grandchild.example.com" a="foo">
+ </grandchild1>
+ <grandchild2 a="foo">
+ <greatgrandchild1 a="foo"/>
+ </grandchild2>
+ </child1>
+ <child2 a="foo"/>
+</root>`
+
+ doc := NewDocument()
+ err := doc.ReadFromString(s)
+ if err != nil {
+ t.Error("etree: failed to parse document")
+ }
+
+ root := doc.SelectElement("root")
+ child1 := root.SelectElement("child1")
+ child2 := root.SelectElement("child2")
+ grandchild1 := child1.SelectElement("grandchild1")
+ grandchild2 := child1.SelectElement("grandchild2")
+ greatgrandchild1 := grandchild2.SelectElement("greatgrandchild1")
+
+ checkStrEq(t, doc.NamespaceURI(), "")
+ checkStrEq(t, root.NamespaceURI(), "http://root.example.com")
+ checkStrEq(t, child1.NamespaceURI(), "http://child.example.com")
+ checkStrEq(t, child2.NamespaceURI(), "http://root.example.com")
+ checkStrEq(t, grandchild1.NamespaceURI(), "http://grandchild.example.com")
+ checkStrEq(t, grandchild2.NamespaceURI(), "http://child.example.com")
+ checkStrEq(t, greatgrandchild1.NamespaceURI(), "http://child.example.com")
+
+ checkStrEq(t, root.Attr[0].NamespaceURI(), "http://root.example.com")
+ checkStrEq(t, child1.Attr[0].NamespaceURI(), "http://child.example.com")
+ checkStrEq(t, child2.Attr[0].NamespaceURI(), "http://root.example.com")
+ checkStrEq(t, grandchild1.Attr[0].NamespaceURI(), "http://grandchild.example.com")
+ checkStrEq(t, grandchild2.Attr[0].NamespaceURI(), "http://child.example.com")
+ checkStrEq(t, greatgrandchild1.Attr[0].NamespaceURI(), "http://child.example.com")
+
+ f := doc.FindElements("//*[namespace-uri()='http://root.example.com']")
+ if len(f) != 2 || f[0] != root || f[1] != child2 {
+ t.Error("etree: failed namespace-uri test")
+ }
+
+ f = doc.FindElements("//*[namespace-uri()='http://child.example.com']")
+ if len(f) != 3 || f[0] != child1 || f[1] != grandchild2 || f[2] != greatgrandchild1 {
+ t.Error("etree: failed namespace-uri test")
+ }
+
+ f = doc.FindElements("//*[namespace-uri()='http://grandchild.example.com']")
+ if len(f) != 1 || f[0] != grandchild1 {
+ t.Error("etree: failed namespace-uri test")
+ }
+
+ f = doc.FindElements("//*[namespace-uri()='']")
+ if len(f) != 0 {
+ t.Error("etree: failed namespace-uri test")
+ }
+
+ f = doc.FindElements("//*[namespace-uri()='foo']")
+ if len(f) != 0 {
+ t.Error("etree: failed namespace-uri test")
+ }
+}
+
+func TestLocalNamespaceURI(t *testing.T) {
+ s := `
+<a:root xmlns:a="http://root.example.com">
+ <b:child1 xmlns:b="http://child.example.com">
+ <c:grandchild1 xmlns:c="http://grandchild.example.com"/>
+ <b:grandchild2>
+ <a:greatgrandchild1/>
+ </b:grandchild2>
+ <a:grandchild3/>
+ <grandchild4/>
+ </b:child1>
+ <a:child2>
+ </a:child2>
+ <child3>
+ </child3>
+</a:root>`
+
+ doc := NewDocument()
+ err := doc.ReadFromString(s)
+ if err != nil {
+ t.Error("etree: failed to parse document")
+ }
+
+ root := doc.SelectElement("root")
+ child1 := root.SelectElement("child1")
+ child2 := root.SelectElement("child2")
+ child3 := root.SelectElement("child3")
+ grandchild1 := child1.SelectElement("grandchild1")
+ grandchild2 := child1.SelectElement("grandchild2")
+ grandchild3 := child1.SelectElement("grandchild3")
+ grandchild4 := child1.SelectElement("grandchild4")
+ greatgrandchild1 := grandchild2.SelectElement("greatgrandchild1")
+
+ checkStrEq(t, doc.NamespaceURI(), "")
+ checkStrEq(t, root.NamespaceURI(), "http://root.example.com")
+ checkStrEq(t, child1.NamespaceURI(), "http://child.example.com")
+ checkStrEq(t, child2.NamespaceURI(), "http://root.example.com")
+ checkStrEq(t, child3.NamespaceURI(), "")
+ checkStrEq(t, grandchild1.NamespaceURI(), "http://grandchild.example.com")
+ checkStrEq(t, grandchild2.NamespaceURI(), "http://child.example.com")
+ checkStrEq(t, grandchild3.NamespaceURI(), "http://root.example.com")
+ checkStrEq(t, grandchild4.NamespaceURI(), "")
+ checkStrEq(t, greatgrandchild1.NamespaceURI(), "http://root.example.com")
+
+ f := doc.FindElements("//*[namespace-uri()='http://root.example.com']")
+ if len(f) != 4 || f[0] != root || f[1] != child2 || f[2] != grandchild3 || f[3] != greatgrandchild1 {
+ t.Error("etree: failed namespace-uri test")
+ }
+
+ f = doc.FindElements("//*[namespace-uri()='http://child.example.com']")
+ if len(f) != 2 || f[0] != child1 || f[1] != grandchild2 {
+ t.Error("etree: failed namespace-uri test")
+ }
+
+ f = doc.FindElements("//*[namespace-uri()='http://grandchild.example.com']")
+ if len(f) != 1 || f[0] != grandchild1 {
+ t.Error("etree: failed namespace-uri test")
+ }
+
+ f = doc.FindElements("//*[namespace-uri()='']")
+ if len(f) != 2 || f[0] != child3 || f[1] != grandchild4 {
+ t.Error("etree: failed namespace-uri test")
+ }
+
+ f = doc.FindElements("//*[namespace-uri()='foo']")
+ if len(f) != 0 {
+ t.Error("etree: failed namespace-uri test")
+ }
+}
diff --git a/path.go b/path.go
index a1a59bd..be3823f 100644
--- a/path.go
+++ b/path.go
@@ -17,22 +17,28 @@ similar to XPath strings, they have a more limited set of selectors and
filtering options. The following selectors and filters are supported by etree
paths:
- . Select the current element.
- .. Select the parent of the current element.
- * Select all child elements of the current element.
- / Select the root element when used at the start of a path.
- // Select all descendants of the current element. If used at
- the start of a path, select all descendants of the root.
- tag Select all child elements with the given tag.
- [#] Select the element of the given index (1-based,
- negative starts from the end).
- [@attrib] Select all elements with the given attribute.
- [@attrib='val'] Select all elements with the given attribute set to val.
- [tag] Select all elements with a child element named tag.
- [tag='val'] Select all elements with a child element named tag
- and text matching val.
- [text()] Select all elements with non-empty text.
- [text()='val'] Select all elements whose text matches val.
+ . Select the current element.
+ .. Select the parent of the current element.
+ * Select all child elements of the current element.
+ / Select the root element when used at the start of
+ a path.
+ // Select all descendants of the current element. If
+ used at the start of a path, select all
+ descendants of the root.
+ tag Select all child elements with the given tag.
+ [#] Select the element of the given index (1-based,
+ negative starts from the end).
+ [@attrib] Select all elements with the given attribute.
+ [@attrib='val'] Select all elements with the given attribute set
+ to val.
+ [tag] Select all elements with a child element named
+ tag.
+ [tag='val'] Select all elements with a child element named
+ tag and text matching val.
+ [text()] Select all elements with non-empty text.
+ [text()='val'] Select all elements whose text matches val.
+ [namespace-uri()='val'] Select all elements whose namespace URI matches
+ val.
Examples:
@@ -260,6 +266,14 @@ func (c *compiler) parseSelector(path string) selector {
}
}
+var fnTable = map[string]struct {
+ hasFn func(e *Element) bool
+ getValFn func(e *Element) string
+}{
+ "text": {(*Element).hasText, (*Element).Text},
+ "namespace-uri": {nil, (*Element).NamespaceURI},
+}
+
// parseFilter parses a path filter contained within [brackets].
func (c *compiler) parseFilter(path string) filter {
if len(path) == 0 {
@@ -267,7 +281,7 @@ func (c *compiler) parseFilter(path string) filter {
return nil
}
- // Filter contains [@attr='val'], [text()='val'], or [tag='val']?
+ // Filter contains [@attr='val'], [fn()='val'], or [tag='val']?
eqindex := strings.Index(path, "='")
if eqindex >= 0 {
rindex := nextIndex(path, "'", eqindex+2)
@@ -275,22 +289,38 @@ func (c *compiler) parseFilter(path string) filter {
c.err = ErrPath("path has mismatched filter quotes.")
return nil
}
+
+ key := path[:eqindex]
+ value := path[eqindex+2 : rindex]
+
switch {
- case path[0] == '@':
- return newFilterAttrVal(path[1:eqindex], path[eqindex+2:rindex])
- case strings.HasPrefix(path, "text()"):
- return newFilterTextVal(path[eqindex+2 : rindex])
+ case key[0] == '@':
+ return newFilterAttrVal(key[1:], value)
+ case strings.HasSuffix(key, "()"):
+ fn := key[:len(key)-2]
+ if t, ok := fnTable[fn]; ok && t.getValFn != nil {
+ return newFilterFuncVal(t.getValFn, value)
+ } else {
+ c.err = ErrPath("path has unknown function " + fn)
+ return nil
+ }
default:
- return newFilterChildText(path[:eqindex], path[eqindex+2:rindex])
+ return newFilterChildText(key, value)
}
}
- // Filter contains [@attr], [N], [tag] or [text()]
+ // Filter contains [@attr], [N], [tag] or [fn()]
switch {
case path[0] == '@':
return newFilterAttr(path[1:])
- case path == "text()":
- return newFilterText()
+ case strings.HasSuffix(path, "()"):
+ fn := path[:len(path)-2]
+ if t, ok := fnTable[fn]; ok && t.hasFn != nil {
+ return newFilterFunc(t.hasFn)
+ } else {
+ c.err = ErrPath("path has unknown function " + fn)
+ return nil
+ }
case isInteger(path):
pos, _ := strconv.Atoi(path)
switch {
@@ -448,35 +478,39 @@ func (f *filterAttrVal) apply(p *pather) {
p.candidates, p.scratch = p.scratch, p.candidates[0:0]
}
-// filterText filters the candidate list for elements having text.
-type filterText struct{}
+// filterFunc filters the candidate list for elements satisfying a custom
+// boolean function.
+type filterFunc struct {
+ fn func(e *Element) bool
+}
-func newFilterText() *filterText {
- return &filterText{}
+func newFilterFunc(fn func(e *Element) bool) *filterFunc {
+ return &filterFunc{fn}
}
-func (f *filterText) apply(p *pather) {
+func (f *filterFunc) apply(p *pather) {
for _, c := range p.candidates {
- if c.Text() != "" {
+ if f.fn(c) {
p.scratch = append(p.scratch, c)
}
}
p.candidates, p.scratch = p.scratch, p.candidates[0:0]
}
-// filterTextVal filters the candidate list for elements having
-// text equal to the specified value.
-type filterTextVal struct {
+// filterFuncVal filters the candidate list for elements containing a value
+// matching the result of a custom function.
+type filterFuncVal struct {
+ fn func(e *Element) string
val string
}
-func newFilterTextVal(value string) *filterTextVal {
- return &filterTextVal{value}
+func newFilterFuncVal(fn func(e *Element) string, value string) *filterFuncVal {
+ return &filterFuncVal{fn, value}
}
-func (f *filterTextVal) apply(p *pather) {
+func (f *filterFuncVal) apply(p *pather) {
for _, c := range p.candidates {
- if c.Text() == f.val {
+ if f.fn(c) == f.val {
p.scratch = append(p.scratch, c)
}
}