diff options
author | Brett Vickers <brett@beevik.com> | 2019-02-02 11:03:15 -0800 |
---|---|---|
committer | Brett Vickers <brett@beevik.com> | 2019-02-02 11:03:15 -0800 |
commit | 4fa5c484a3efe0a2cc3f4d724446d49ae27d5131 (patch) | |
tree | 6d15457ee2698181277e71c95c2424c3226b2652 | |
parent | b008cbda3fd60f494a3733a34c2b8cea5baebaf9 (diff) | |
download | go-etree-4fa5c484a3efe0a2cc3f4d724446d49ae27d5131.tar.gz |
Add new path filter functions.
Added local-name(), name(), and namespace-prefix().
Improved path documentation.
Updated copyright date on license file.
-rw-r--r-- | LICENSE | 2 | ||||
-rw-r--r-- | etree.go | 12 | ||||
-rw-r--r-- | path.go | 117 |
3 files changed, 78 insertions, 53 deletions
@@ -1,4 +1,4 @@ -Copyright 2015 Brett Vickers. All rights reserved. +Copyright 2015-2019 Brett Vickers. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -1,4 +1,4 @@ -// Copyright 2015 Brett Vickers. +// Copyright 2015-2019 Brett Vickers. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -410,6 +410,16 @@ func (e *Element) hasText() bool { return ok } +// namespacePrefix returns the namespace prefix associated with the element. +func (e *Element) namespacePrefix() string { + return e.Space +} + +// name returns the tag associated with the element. +func (e *Element) name() string { + return e.Tag +} + // Text returns all character data immediately following the element's opening // tag. func (e *Element) Text() string { @@ -1,4 +1,4 @@ -// Copyright 2015 Brett Vickers. +// Copyright 2015-2019 Brett Vickers. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -10,61 +10,75 @@ import ( ) /* -A Path is an object that represents an optimized version of an XPath-like -search string. A path search string is a slash-separated series of "selectors" -allowing traversal through an XML hierarchy. Although etree path strings are -similar to XPath strings, they have a more limited set of selectors and -filtering options. The following selectors and filters are supported by etree -paths: - - . Select the current element. - .. Select the parent of the current element. - * Select all child elements of the current element. - / Select the root element when used at the start of - a path. - // Select all descendants of the current element. If - used at the start of a path, select all - descendants of the root. - tag Select all child elements with the given tag. - [#] Select the element of the given index (1-based, - negative starts from the end). - [@attrib] Select all elements with the given attribute. - [@attrib='val'] Select all elements with the given attribute set - to val. - [tag] Select all elements with a child element named - tag. - [tag='val'] Select all elements with a child element named - tag and text matching val. - [text()] Select all elements with non-empty text. - [text()='val'] Select all elements whose text matches val. - [namespace-uri()='val'] Select all elements whose namespace URI matches - val. - -Examples: - -Select the bookstore child element of the root element: +A Path is a string that represents a search path through an etree starting +from the document root or an arbitrary element. Paths are used with the +Element object's Find* methods to locate and return desired elements. + +A Path consists of a series of slash-separated "selectors", each of which may +be modified by one or more bracket-enclosed "filters". Selectors are used to +traverse the etree from element to element, while filters are used to narrow +the list of candidate elements at each node. + +Although etree Path strings are similar to XPath strings +(https://www.w3.org/TR/1999/REC-xpath-19991116/), they have a more limited set +of selectors and filtering options. + +The following selectors are supported by etree Path strings: + + . Select the current element. + .. Select the parent of the current element. + * Select all child elements of the current element. + / Select the root element when used at the start of a path. + // Select all descendants of the current element. + tag Select all child elements with a name matching the tag. + +The following basic filters are supported by etree Path strings: + + [@attrib] Keep elements with an attribute named attrib. + [@attrib='val'] Keep elements with an attribute named attrib and value matching val. + [tag] Keep elements with a child element named tag. + [tag='val'] Keep elements with a child element named tag and text matching val. + [n] Keep the n-th element, where n is a numeric index starting from 1. + +The following function filters are also supported: + + [text()] Keep elements with non-empty text. + [text()='val'] Keep elements whose text matches val. + [local-name()='val'] Keep elements whose un-prefixed tag matches val. + [name()='val'] Keep elements whose full tag exactly matches val. + [namespace-prefix()='val'] Keep elements whose namespace prefix matches val. + [namespace-uri()='val'] Keep elements whose namespace URI matches val. + +Here are some examples of Path strings: + +- Select the bookstore child element of the root element: /bookstore -Beginning a search from the root element, select the title elements of all +- Beginning from the root element, select the title elements of all descendant book elements having a 'category' attribute of 'WEB': //book[@category='WEB']/title -Beginning a search from the current element, select the first descendant book -element with a title child containing the text 'Great Expectations': +- Beginning from the current element, select the first descendant +book element with a title child element containing the text 'Great +Expectations': .//book[title='Great Expectations'][1] -Beginning a search from the current element, select all children of book -elements with an attribute 'language' set to 'english': +- Beginning from the current element, select all child elements of +book elements with an attribute 'language' set to 'english': ./book/*[@language='english'] -Beginning a search from the current element, select all children of book -elements containing the text 'special': +- Beginning from the current element, select all child elements of +book elements containing the text 'special': ./book/*[text()='special'] -Beginning a search from the current element, select all descendant book -elements whose title element has an attribute 'language' equal to 'french': +- Beginning from the current element, select all descendant book +elements whose title child element has a 'language' attribute of 'french': .//book/title[@language='french']/.. +- Beginning from the current element, select all book elements +belonging to the http://www.w3.org/TR/html4/ namespace: + .//book[namespace-uri()='http://www.w3.org/TR/html4/'] + */ type Path struct { segments []segment @@ -270,8 +284,11 @@ var fnTable = map[string]struct { hasFn func(e *Element) bool getValFn func(e *Element) string }{ - "text": {(*Element).hasText, (*Element).Text}, - "namespace-uri": {nil, (*Element).NamespaceURI}, + "local-name": {nil, (*Element).name}, + "name": {nil, (*Element).FullTag}, + "namespace-prefix": {nil, (*Element).namespacePrefix}, + "namespace-uri": {nil, (*Element).NamespaceURI}, + "text": {(*Element).hasText, (*Element).Text}, } // parseFilter parses a path filter contained within [brackets]. @@ -300,10 +317,9 @@ func (c *compiler) parseFilter(path string) filter { fn := key[:len(key)-2] if t, ok := fnTable[fn]; ok && t.getValFn != nil { return newFilterFuncVal(t.getValFn, value) - } else { - c.err = ErrPath("path has unknown function " + fn) - return nil } + c.err = ErrPath("path has unknown function " + fn) + return nil default: return newFilterChildText(key, value) } @@ -317,10 +333,9 @@ func (c *compiler) parseFilter(path string) filter { fn := path[:len(path)-2] if t, ok := fnTable[fn]; ok && t.hasFn != nil { return newFilterFunc(t.hasFn) - } else { - c.err = ErrPath("path has unknown function " + fn) - return nil } + c.err = ErrPath("path has unknown function " + fn) + return nil case isInteger(path): pos, _ := strconv.Atoi(path) switch { |