aboutsummaryrefslogtreecommitdiff
path: root/src/com/sun/org/apache/xerces/internal/impl/xpath/regex/RangeToken.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/com/sun/org/apache/xerces/internal/impl/xpath/regex/RangeToken.java')
-rw-r--r--src/com/sun/org/apache/xerces/internal/impl/xpath/regex/RangeToken.java623
1 files changed, 0 insertions, 623 deletions
diff --git a/src/com/sun/org/apache/xerces/internal/impl/xpath/regex/RangeToken.java b/src/com/sun/org/apache/xerces/internal/impl/xpath/regex/RangeToken.java
deleted file mode 100644
index 3a7ac70..0000000
--- a/src/com/sun/org/apache/xerces/internal/impl/xpath/regex/RangeToken.java
+++ /dev/null
@@ -1,623 +0,0 @@
-/*
- * reserved comment block
- * DO NOT REMOVE OR ALTER!
- */
-/*
- * Copyright 1999-2002,2004,2005 The Apache Software Foundation.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.sun.org.apache.xerces.internal.impl.xpath.regex;
-
-/**
- * This class represents a character class such as [a-z] or a period.
- *
- * @xerces.internal
- *
- */
-final class RangeToken extends Token implements java.io.Serializable {
-
- private static final long serialVersionUID = 3257568399592010545L;
-
- int[] ranges;
- boolean sorted;
- boolean compacted;
- RangeToken icaseCache = null;
- int[] map = null;
- int nonMapIndex;
-
- RangeToken(int type) {
- super(type);
- this.setSorted(false);
- }
-
- // for RANGE or NRANGE
- protected void addRange(int start, int end) {
- this.icaseCache = null;
- //System.err.println("Token#addRange(): "+start+" "+end);
- int r1, r2;
- if (start <= end) {
- r1 = start;
- r2 = end;
- } else {
- r1 = end;
- r2 = start;
- }
-
- int pos = 0;
- if (this.ranges == null) {
- this.ranges = new int[2];
- this.ranges[0] = r1;
- this.ranges[1] = r2;
- this.setSorted(true);
- } else {
- pos = this.ranges.length;
- if (this.ranges[pos-1]+1 == r1) {
- this.ranges[pos-1] = r2;
- return;
- }
- int[] temp = new int[pos+2];
- System.arraycopy(this.ranges, 0, temp, 0, pos);
- this.ranges = temp;
- if (this.ranges[pos-1] >= r1)
- this.setSorted(false);
- this.ranges[pos++] = r1;
- this.ranges[pos] = r2;
- if (!this.sorted)
- this.sortRanges();
- }
- }
-
- private final boolean isSorted() {
- return this.sorted;
- }
- private final void setSorted(boolean sort) {
- this.sorted = sort;
- if (!sort) this.compacted = false;
- }
- private final boolean isCompacted() {
- return this.compacted;
- }
- private final void setCompacted() {
- this.compacted = true;
- }
-
- protected void sortRanges() {
- if (this.isSorted())
- return;
- if (this.ranges == null)
- return;
- //System.err.println("Do sorting: "+this.ranges.length);
-
- // Bubble sort
- // Why? -- In many cases,
- // this.ranges has few elements.
- for (int i = this.ranges.length-4; i >= 0; i -= 2) {
- for (int j = 0; j <= i; j += 2) {
- if (this.ranges[j] > this.ranges[j+2]
- || this.ranges[j] == this.ranges[j+2] && this.ranges[j+1] > this.ranges[j+3]) {
- int tmp;
- tmp = this.ranges[j+2];
- this.ranges[j+2] = this.ranges[j];
- this.ranges[j] = tmp;
- tmp = this.ranges[j+3];
- this.ranges[j+3] = this.ranges[j+1];
- this.ranges[j+1] = tmp;
- }
- }
- }
- this.setSorted(true);
- }
-
- /**
- * this.ranges is sorted.
- */
- protected void compactRanges() {
- boolean DEBUG = false;
- if (this.ranges == null || this.ranges.length <= 2)
- return;
- if (this.isCompacted())
- return;
- int base = 0; // Index of writing point
- int target = 0; // Index of processing point
-
- while (target < this.ranges.length) {
- if (base != target) {
- this.ranges[base] = this.ranges[target++];
- this.ranges[base+1] = this.ranges[target++];
- } else
- target += 2;
- int baseend = this.ranges[base+1];
- while (target < this.ranges.length) {
- if (baseend+1 < this.ranges[target])
- break;
- if (baseend+1 == this.ranges[target]) {
- if (DEBUG)
- System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
- +", "+this.ranges[base+1]
- +"], ["+this.ranges[target]
- +", "+this.ranges[target+1]
- +"] -> ["+this.ranges[base]
- +", "+this.ranges[target+1]
- +"]");
- this.ranges[base+1] = this.ranges[target+1];
- baseend = this.ranges[base+1];
- target += 2;
- } else if (baseend >= this.ranges[target+1]) {
- if (DEBUG)
- System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
- +", "+this.ranges[base+1]
- +"], ["+this.ranges[target]
- +", "+this.ranges[target+1]
- +"] -> ["+this.ranges[base]
- +", "+this.ranges[base+1]
- +"]");
- target += 2;
- } else if (baseend < this.ranges[target+1]) {
- if (DEBUG)
- System.err.println("Token#compactRanges(): Compaction: ["+this.ranges[base]
- +", "+this.ranges[base+1]
- +"], ["+this.ranges[target]
- +", "+this.ranges[target+1]
- +"] -> ["+this.ranges[base]
- +", "+this.ranges[target+1]
- +"]");
- this.ranges[base+1] = this.ranges[target+1];
- baseend = this.ranges[base+1];
- target += 2;
- } else {
- throw new RuntimeException("Token#compactRanges(): Internel Error: ["
- +this.ranges[base]
- +","+this.ranges[base+1]
- +"] ["+this.ranges[target]
- +","+this.ranges[target+1]+"]");
- }
- } // while
- base += 2;
- }
-
- if (base != this.ranges.length) {
- int[] result = new int[base];
- System.arraycopy(this.ranges, 0, result, 0, base);
- this.ranges = result;
- }
- this.setCompacted();
- }
-
- protected void mergeRanges(Token token) {
- RangeToken tok = (RangeToken)token;
- this.sortRanges();
- tok.sortRanges();
- if (tok.ranges == null)
- return;
- this.icaseCache = null;
- this.setSorted(true);
- if (this.ranges == null) {
- this.ranges = new int[tok.ranges.length];
- System.arraycopy(tok.ranges, 0, this.ranges, 0, tok.ranges.length);
- return;
- }
- int[] result = new int[this.ranges.length+tok.ranges.length];
- for (int i = 0, j = 0, k = 0; i < this.ranges.length || j < tok.ranges.length;) {
- if (i >= this.ranges.length) {
- result[k++] = tok.ranges[j++];
- result[k++] = tok.ranges[j++];
- } else if (j >= tok.ranges.length) {
- result[k++] = this.ranges[i++];
- result[k++] = this.ranges[i++];
- } else if (tok.ranges[j] < this.ranges[i]
- || tok.ranges[j] == this.ranges[i] && tok.ranges[j+1] < this.ranges[i+1]) {
- result[k++] = tok.ranges[j++];
- result[k++] = tok.ranges[j++];
- } else {
- result[k++] = this.ranges[i++];
- result[k++] = this.ranges[i++];
- }
- }
- this.ranges = result;
- }
-
- protected void subtractRanges(Token token) {
- if (token.type == NRANGE) {
- this.intersectRanges(token);
- return;
- }
- RangeToken tok = (RangeToken)token;
- if (tok.ranges == null || this.ranges == null)
- return;
- this.icaseCache = null;
- this.sortRanges();
- this.compactRanges();
- tok.sortRanges();
- tok.compactRanges();
-
- //System.err.println("Token#substractRanges(): Entry: "+this.ranges.length+", "+tok.ranges.length);
-
- int[] result = new int[this.ranges.length+tok.ranges.length];
- int wp = 0, src = 0, sub = 0;
- while (src < this.ranges.length && sub < tok.ranges.length) {
- int srcbegin = this.ranges[src];
- int srcend = this.ranges[src+1];
- int subbegin = tok.ranges[sub];
- int subend = tok.ranges[sub+1];
- if (srcend < subbegin) { // Not overlapped
- // src: o-----o
- // sub: o-----o
- // res: o-----o
- // Reuse sub
- result[wp++] = this.ranges[src++];
- result[wp++] = this.ranges[src++];
- } else if (srcend >= subbegin
- && srcbegin <= subend) { // Overlapped
- // src: o--------o
- // sub: o----o
- // sub: o----o
- // sub: o----o
- // sub: o------------o
- if (subbegin <= srcbegin && srcend <= subend) {
- // src: o--------o
- // sub: o------------o
- // res: empty
- // Reuse sub
- src += 2;
- } else if (subbegin <= srcbegin) {
- // src: o--------o
- // sub: o----o
- // res: o-----o
- // Reuse src(=res)
- this.ranges[src] = subend+1;
- sub += 2;
- } else if (srcend <= subend) {
- // src: o--------o
- // sub: o----o
- // res: o-----o
- // Reuse sub
- result[wp++] = srcbegin;
- result[wp++] = subbegin-1;
- src += 2;
- } else {
- // src: o--------o
- // sub: o----o
- // res: o-o o-o
- // Reuse src(=right res)
- result[wp++] = srcbegin;
- result[wp++] = subbegin-1;
- this.ranges[src] = subend+1;
- sub += 2;
- }
- } else if (subend < srcbegin) {
- // Not overlapped
- // src: o-----o
- // sub: o----o
- sub += 2;
- } else {
- throw new RuntimeException("Token#subtractRanges(): Internal Error: ["+this.ranges[src]
- +","+this.ranges[src+1]
- +"] - ["+tok.ranges[sub]
- +","+tok.ranges[sub+1]
- +"]");
- }
- }
- while (src < this.ranges.length) {
- result[wp++] = this.ranges[src++];
- result[wp++] = this.ranges[src++];
- }
- this.ranges = new int[wp];
- System.arraycopy(result, 0, this.ranges, 0, wp);
- // this.ranges is sorted and compacted.
- }
-
- /**
- * @param tok Ignore whether it is NRANGE or not.
- */
- protected void intersectRanges(Token token) {
- RangeToken tok = (RangeToken)token;
- if (tok.ranges == null || this.ranges == null)
- return;
- this.icaseCache = null;
- this.sortRanges();
- this.compactRanges();
- tok.sortRanges();
- tok.compactRanges();
-
- int[] result = new int[this.ranges.length+tok.ranges.length];
- int wp = 0, src1 = 0, src2 = 0;
- while (src1 < this.ranges.length && src2 < tok.ranges.length) {
- int src1begin = this.ranges[src1];
- int src1end = this.ranges[src1+1];
- int src2begin = tok.ranges[src2];
- int src2end = tok.ranges[src2+1];
- if (src1end < src2begin) { // Not overlapped
- // src1: o-----o
- // src2: o-----o
- // res: empty
- // Reuse src2
- src1 += 2;
- } else if (src1end >= src2begin
- && src1begin <= src2end) { // Overlapped
- // src1: o--------o
- // src2: o----o
- // src2: o----o
- // src2: o----o
- // src2: o------------o
- if (src2begin <= src2begin && src1end <= src2end) {
- // src1: o--------o
- // src2: o------------o
- // res: o--------o
- // Reuse src2
- result[wp++] = src1begin;
- result[wp++] = src1end;
- src1 += 2;
- } else if (src2begin <= src1begin) {
- // src1: o--------o
- // src2: o----o
- // res: o--o
- // Reuse the rest of src1
- result[wp++] = src1begin;
- result[wp++] = src2end;
- this.ranges[src1] = src2end+1;
- src2 += 2;
- } else if (src1end <= src2end) {
- // src1: o--------o
- // src2: o----o
- // res: o--o
- // Reuse src2
- result[wp++] = src2begin;
- result[wp++] = src1end;
- src1 += 2;
- } else {
- // src1: o--------o
- // src2: o----o
- // res: o----o
- // Reuse the rest of src1
- result[wp++] = src2begin;
- result[wp++] = src2end;
- this.ranges[src1] = src2end+1;
- }
- } else if (src2end < src1begin) {
- // Not overlapped
- // src1: o-----o
- // src2: o----o
- src2 += 2;
- } else {
- throw new RuntimeException("Token#intersectRanges(): Internal Error: ["
- +this.ranges[src1]
- +","+this.ranges[src1+1]
- +"] & ["+tok.ranges[src2]
- +","+tok.ranges[src2+1]
- +"]");
- }
- }
- while (src1 < this.ranges.length) {
- result[wp++] = this.ranges[src1++];
- result[wp++] = this.ranges[src1++];
- }
- this.ranges = new int[wp];
- System.arraycopy(result, 0, this.ranges, 0, wp);
- // this.ranges is sorted and compacted.
- }
-
- /**
- * for RANGE: Creates complement.
- * for NRANGE: Creates the same meaning RANGE.
- */
- static Token complementRanges(Token token) {
- if (token.type != RANGE && token.type != NRANGE)
- throw new IllegalArgumentException("Token#complementRanges(): must be RANGE: "+token.type);
- RangeToken tok = (RangeToken)token;
- tok.sortRanges();
- tok.compactRanges();
- int len = tok.ranges.length+2;
- if (tok.ranges[0] == 0)
- len -= 2;
- int last = tok.ranges[tok.ranges.length-1];
- if (last == UTF16_MAX)
- len -= 2;
- RangeToken ret = Token.createRange();
- ret.ranges = new int[len];
- int wp = 0;
- if (tok.ranges[0] > 0) {
- ret.ranges[wp++] = 0;
- ret.ranges[wp++] = tok.ranges[0]-1;
- }
- for (int i = 1; i < tok.ranges.length-2; i += 2) {
- ret.ranges[wp++] = tok.ranges[i]+1;
- ret.ranges[wp++] = tok.ranges[i+1]-1;
- }
- if (last != UTF16_MAX) {
- ret.ranges[wp++] = last+1;
- ret.ranges[wp] = UTF16_MAX;
- }
- ret.setCompacted();
- return ret;
- }
-
- synchronized RangeToken getCaseInsensitiveToken() {
- if (this.icaseCache != null)
- return this.icaseCache;
-
- RangeToken uppers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange();
- for (int i = 0; i < this.ranges.length; i += 2) {
- for (int ch = this.ranges[i]; ch <= this.ranges[i+1]; ch ++) {
- if (ch > 0xffff)
- uppers.addRange(ch, ch);
- else {
- char uch = Character.toUpperCase((char)ch);
- uppers.addRange(uch, uch);
- }
- }
- }
- RangeToken lowers = this.type == Token.RANGE ? Token.createRange() : Token.createNRange();
- for (int i = 0; i < uppers.ranges.length; i += 2) {
- for (int ch = uppers.ranges[i]; ch <= uppers.ranges[i+1]; ch ++) {
- if (ch > 0xffff)
- lowers.addRange(ch, ch);
- else {
- char uch = Character.toUpperCase((char)ch);
- lowers.addRange(uch, uch);
- }
- }
- }
- lowers.mergeRanges(uppers);
- lowers.mergeRanges(this);
- lowers.compactRanges();
-
- this.icaseCache = lowers;
- return lowers;
- }
-
- void dumpRanges() {
- System.err.print("RANGE: ");
- if (this.ranges == null)
- System.err.println(" NULL");
- for (int i = 0; i < this.ranges.length; i += 2) {
- System.err.print("["+this.ranges[i]+","+this.ranges[i+1]+"] ");
- }
- System.err.println("");
- }
-
- boolean match(int ch) {
- if (this.map == null) this.createMap();
- boolean ret;
- if (this.type == RANGE) {
- if (ch < MAPSIZE)
- return (this.map[ch/32] & (1<<(ch&0x1f))) != 0;
- ret = false;
- for (int i = this.nonMapIndex; i < this.ranges.length; i += 2) {
- if (this.ranges[i] <= ch && ch <= this.ranges[i+1])
- return true;
- }
- } else {
- if (ch < MAPSIZE)
- return (this.map[ch/32] & (1<<(ch&0x1f))) == 0;
- ret = true;
- for (int i = this.nonMapIndex; i < this.ranges.length; i += 2) {
- if (this.ranges[i] <= ch && ch <= this.ranges[i+1])
- return false;
- }
- }
- return ret;
- }
-
- private static final int MAPSIZE = 256;
- private void createMap() {
- int asize = MAPSIZE/32; // 32 is the number of bits in `int'.
- int [] map = new int[asize];
- int nonMapIndex = this.ranges.length;
- for (int i = 0; i < asize; ++i) {
- map[i] = 0;
- }
- for (int i = 0; i < this.ranges.length; i += 2) {
- int s = this.ranges[i];
- int e = this.ranges[i+1];
- if (s < MAPSIZE) {
- for (int j = s; j <= e && j < MAPSIZE; j++) {
- map[j/32] |= 1<<(j&0x1f); // s&0x1f : 0-31
- }
- }
- else {
- nonMapIndex = i;
- break;
- }
- if (e >= MAPSIZE) {
- nonMapIndex = i;
- break;
- }
- }
- this.map = map;
- this.nonMapIndex = nonMapIndex;
- //for (int i = 0; i < asize; i ++) System.err.println("Map: "+Integer.toString(this.map[i], 16));
- }
-
- public String toString(int options) {
- String ret;
- if (this.type == RANGE) {
- if (this == Token.token_dot)
- ret = ".";
- else if (this == Token.token_0to9)
- ret = "\\d";
- else if (this == Token.token_wordchars)
- ret = "\\w";
- else if (this == Token.token_spaces)
- ret = "\\s";
- else {
- StringBuffer sb = new StringBuffer();
- sb.append("[");
- for (int i = 0; i < this.ranges.length; i += 2) {
- if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) sb.append(",");
- if (this.ranges[i] == this.ranges[i+1]) {
- sb.append(escapeCharInCharClass(this.ranges[i]));
- } else {
- sb.append(escapeCharInCharClass(this.ranges[i]));
- sb.append((char)'-');
- sb.append(escapeCharInCharClass(this.ranges[i+1]));
- }
- }
- sb.append("]");
- ret = sb.toString();
- }
- } else {
- if (this == Token.token_not_0to9)
- ret = "\\D";
- else if (this == Token.token_not_wordchars)
- ret = "\\W";
- else if (this == Token.token_not_spaces)
- ret = "\\S";
- else {
- StringBuffer sb = new StringBuffer();
- sb.append("[^");
- for (int i = 0; i < this.ranges.length; i += 2) {
- if ((options & RegularExpression.SPECIAL_COMMA) != 0 && i > 0) sb.append(",");
- if (this.ranges[i] == this.ranges[i+1]) {
- sb.append(escapeCharInCharClass(this.ranges[i]));
- } else {
- sb.append(escapeCharInCharClass(this.ranges[i]));
- sb.append('-');
- sb.append(escapeCharInCharClass(this.ranges[i+1]));
- }
- }
- sb.append("]");
- ret = sb.toString();
- }
- }
- return ret;
- }
-
- private static String escapeCharInCharClass(int ch) {
- String ret;
- switch (ch) {
- case '[': case ']': case '-': case '^':
- case ',': case '\\':
- ret = "\\"+(char)ch;
- break;
- case '\f': ret = "\\f"; break;
- case '\n': ret = "\\n"; break;
- case '\r': ret = "\\r"; break;
- case '\t': ret = "\\t"; break;
- case 0x1b: ret = "\\e"; break;
- //case 0x0b: ret = "\\v"; break;
- default:
- if (ch < 0x20) {
- String pre = "0"+Integer.toHexString(ch);
- ret = "\\x"+pre.substring(pre.length()-2, pre.length());
- } else if (ch >= 0x10000) {
- String pre = "0"+Integer.toHexString(ch);
- ret = "\\v"+pre.substring(pre.length()-6, pre.length());
- } else
- ret = ""+(char)ch;
- }
- return ret;
- }
-
-}