diff options
Diffstat (limited to 'java/src/com/android/i18n/addressinput/FieldVerifier.java')
-rw-r--r-- | java/src/com/android/i18n/addressinput/FieldVerifier.java | 552 |
1 files changed, 260 insertions, 292 deletions
diff --git a/java/src/com/android/i18n/addressinput/FieldVerifier.java b/java/src/com/android/i18n/addressinput/FieldVerifier.java index bd9c5e5..9dee8de 100644 --- a/java/src/com/android/i18n/addressinput/FieldVerifier.java +++ b/java/src/com/android/i18n/addressinput/FieldVerifier.java @@ -20,6 +20,7 @@ import com.android.i18n.addressinput.LookupKey.ScriptType; import java.util.EnumSet; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Pattern; @@ -31,328 +32,295 @@ import java.util.regex.Pattern; * and only provides format and match verification for the postal code field. */ public class FieldVerifier { - // Node data values are delimited by this symbol. - private static final String DATA_DELIMITER = "~"; - // Keys are built up using this delimiter: eg data/US, data/US/CA. - private static final String KEY_DELIMITER = "/"; + // Node data values are delimited by this symbol. + private static final String DATA_DELIMITER = "~"; + // Keys are built up using this delimiter: eg data/US, data/US/CA. + private static final String KEY_DELIMITER = "/"; - private String mId; - private DataSource mDataSource; + private static final FormatInterpreter FORMAT_INTERPRETER = + new FormatInterpreter(new FormOptions.Builder().build()); - private Set<AddressField> mPossibleFields; - private Set<AddressField> mRequired; - // Known values. Can be either a key, a name in Latin, or a name in native script. - private Map<String, String> mCandidateValues; + // Package-private so it can be accessed by tests. + String id; + private DataSource dataSource; - // Keys for the subnodes of this verifier. For example, a key for the US would be CA, since - // there is a sub-verifier with the ID "data/US/CA". Keys may be the local names of the - // locations in the next level of the hierarchy, or the abbreviations if suitable abbreviations - // exist. - private String[] mKeys; - // Names in Latin. These are only populated if the native/local names are in a script other than - // latin. - private String[] mLatinNames; - // Names in native script. - private String[] mLocalNames; + // Package-private so they can be accessed by tests. + Set<AddressField> possiblyUsedFields; + Set<AddressField> required; + // Known values. Can be either a key, a name in Latin, or a name in native script. + private Map<String, String> candidateValues; - // Pattern representing the format of a postal code number. - private Pattern mFormat; - // Defines the valid range of a postal code number. - private Pattern mMatch; + // Keys for the subnodes of this verifier. For example, a key for the US would be CA, since + // there is a sub-verifier with the ID "data/US/CA". Keys may be the local names of the + // locations in the next level of the hierarchy, or the abbreviations if suitable abbreviations + // exist. Package-private so it can be accessed by tests. + String[] keys; + // Names in Latin. These are only populated if the native/local names are in a script other than + // latin. + private String[] latinNames; + // Names in native script. + private String[] localNames; - /** - * Creates the root field verifier for a particular data source. - */ - public FieldVerifier(DataSource dataSource) { - mDataSource = dataSource; - populateRootVerifier(); - } + // Pattern representing the format of a postal code number. + private Pattern format; + // Defines the valid range of a postal code number. + private Pattern match; + + /** + * Creates the root field verifier for a particular data source. + */ + public FieldVerifier(DataSource dataSource) { + this.dataSource = dataSource; + populateRootVerifier(); + } + + /** + * Creates a field verifier based on its parent and on the new data for this node supplied by + * nodeData (which may be null). + * + * Package-private so it can be accessed by tests. + */ + FieldVerifier(FieldVerifier parent, AddressVerificationNodeData nodeData) { + // Most information is inherited from the parent. + possiblyUsedFields = parent.possiblyUsedFields; + required = parent.required; + dataSource = parent.dataSource; + format = parent.format; + match = parent.match; + // Here we add in any overrides from this particular node as well as information such as + // localNames, latinNames and keys. + populate(nodeData); + // candidateValues should never be inherited from the parent, but built up from the + // localNames in this node. + candidateValues = Util.buildNameToKeyMap(keys, localNames, latinNames); + } - /** - * Creates a field verifier based on its parent and on the new data for this node supplied by - * nodeData (which may be null). - */ - private FieldVerifier(FieldVerifier parent, AddressVerificationNodeData nodeData) { - // Most information is inherited from the parent. - mPossibleFields = parent.mPossibleFields; - mRequired = parent.mRequired; - mDataSource = parent.mDataSource; - mFormat = parent.mFormat; - mMatch = parent.mMatch; - // Here we add in any overrides from this particular node as well as information such as - // localNames, latinNames and keys. - populate(nodeData); - // candidateValues should never be inherited from the parent, but built up from the - // localNames in this node. - mCandidateValues = Util.buildNameToKeyMap(mKeys, mLocalNames, mLatinNames); + /** + * Sets possiblyUsedFields, required, keys and candidateValues for the root field verifier. + */ + private void populateRootVerifier() { + id = "data"; + // Keys come from the countries under "data". + AddressVerificationNodeData rootNode = dataSource.getDefaultData("data"); + if (rootNode.containsKey(AddressDataKey.COUNTRIES)) { + keys = rootNode.get(AddressDataKey.COUNTRIES).split(DATA_DELIMITER); } + // candidateValues is just the set of keys. + candidateValues = Util.buildNameToKeyMap(keys, null, null); - /** - * Sets possibleFieldsUsed, required, keys and candidateValues for the root field verifier. This - * is a little messy at the moment since not all the appropriate information is actually under - * the root "data" node in the metadata. For example, "possibleFields" and "required" are not - * present there. - */ - private void populateRootVerifier() { - mId = "data"; - // Keys come from the countries under "data". - AddressVerificationNodeData rootNode = mDataSource.getDefaultData("data"); - if (rootNode.containsKey(AddressDataKey.COUNTRIES)) { - mKeys = rootNode.get(AddressDataKey.COUNTRIES).split(DATA_DELIMITER); - } - // candidateValues is just the set of keys. - mCandidateValues = Util.buildNameToKeyMap(mKeys, null, null); + // TODO: Investigate if these need to be set here. The country level population already + // handles the fallback, the question is if validation can be done without a country level + // validator being created. + // Copy "possiblyUsedFields" and "required" from the defaults here for bootstrapping. + possiblyUsedFields = new HashSet<AddressField>(); + required = new HashSet<AddressField>(); + populatePossibleAndRequired("ZZ"); + } - // Copy "possibleFieldsUsed" and "required" from the defaults here for bootstrapping. - // TODO: Investigate a cleaner way of doing this - maybe we should populate "data" with this - // information instead. - AddressVerificationNodeData defaultZZ = mDataSource.getDefaultData("data/ZZ"); - mPossibleFields = new HashSet<AddressField>(); - if (defaultZZ.containsKey(AddressDataKey.FMT)) { - mPossibleFields = parseAddressFields(defaultZZ.get(AddressDataKey.FMT)); - } - mRequired = new HashSet<AddressField>(); - if (defaultZZ.containsKey(AddressDataKey.REQUIRE)) { - mRequired = parseRequireString(defaultZZ.get(AddressDataKey.REQUIRE)); - } + /** + * Populates this verifier with data from the node data passed in and from RegionDataConstants. + * The node data may be null. + */ + private void populate(AddressVerificationNodeData nodeData) { + if (nodeData == null) { + return; + } + if (nodeData.containsKey(AddressDataKey.ID)) { + id = nodeData.get(AddressDataKey.ID); + } + if (nodeData.containsKey(AddressDataKey.SUB_KEYS)) { + keys = nodeData.get(AddressDataKey.SUB_KEYS).split(DATA_DELIMITER); + } + if (nodeData.containsKey(AddressDataKey.SUB_LNAMES)) { + latinNames = nodeData.get(AddressDataKey.SUB_LNAMES).split(DATA_DELIMITER); + } + if (nodeData.containsKey(AddressDataKey.SUB_NAMES)) { + localNames = nodeData.get(AddressDataKey.SUB_NAMES).split(DATA_DELIMITER); + } + if (nodeData.containsKey(AddressDataKey.XZIP)) { + format = Pattern.compile(nodeData.get(AddressDataKey.XZIP), Pattern.CASE_INSENSITIVE); + } + if (nodeData.containsKey(AddressDataKey.ZIP)) { + // This key has two different meanings, depending on whether this is a country-level key + // or not. + if (isCountryKey()) { + format = Pattern.compile(nodeData.get(AddressDataKey.ZIP), + Pattern.CASE_INSENSITIVE); + } else { + match = Pattern.compile(nodeData.get(AddressDataKey.ZIP), + Pattern.CASE_INSENSITIVE); + } + } + // If there are latin names but no local names, and there are the same number of latin names + // as there are keys, then we assume the local names are the same as the keys. + if (keys != null && localNames == null && latinNames != null && + keys.length == latinNames.length) { + localNames = keys; } - /** - * Populates this verifier with data from the node data passed in. This may be null. - */ - private void populate(AddressVerificationNodeData nodeData) { - if (nodeData == null) { - return; - } - if (nodeData.containsKey(AddressDataKey.ID)) { - mId = nodeData.get(AddressDataKey.ID); - } - if (nodeData.containsKey(AddressDataKey.SUB_KEYS)) { - mKeys = nodeData.get(AddressDataKey.SUB_KEYS).split(DATA_DELIMITER); - } - if (nodeData.containsKey(AddressDataKey.SUB_LNAMES)) { - mLatinNames = nodeData.get(AddressDataKey.SUB_LNAMES).split(DATA_DELIMITER); - } - if (nodeData.containsKey(AddressDataKey.SUB_NAMES)) { - mLocalNames = nodeData.get(AddressDataKey.SUB_NAMES).split(DATA_DELIMITER); - } - if (nodeData.containsKey(AddressDataKey.FMT)) { - mPossibleFields = parseAddressFields(nodeData.get(AddressDataKey.FMT)); - } - if (nodeData.containsKey(AddressDataKey.REQUIRE)) { - mRequired = parseRequireString(nodeData.get(AddressDataKey.REQUIRE)); - } - if (nodeData.containsKey(AddressDataKey.XZIP)) { - mFormat = Pattern.compile(nodeData.get(AddressDataKey.XZIP), Pattern.CASE_INSENSITIVE); - } - if (nodeData.containsKey(AddressDataKey.ZIP)) { - // This key has two different meanings, depending on whether this is a country-level key - // or not. - if (isCountryKey()) { - mFormat = Pattern.compile(nodeData.get(AddressDataKey.ZIP), - Pattern.CASE_INSENSITIVE); - } else { - mMatch = Pattern.compile(nodeData.get(AddressDataKey.ZIP), - Pattern.CASE_INSENSITIVE); - } - } - // If there are latin names but no local names, and there are the same number of latin names - // as there are keys, then we assume the local names are the same as the keys. - if (mKeys != null && mLocalNames == null && mLatinNames != null && - mKeys.length == mLatinNames.length) { - mLocalNames = mKeys; - } + // These fields are populated from RegionDataConstants so that the metadata server can be + // updated without needing to be in sync with clients. + if (isCountryKey()) { + populatePossibleAndRequired(id.split(KEY_DELIMITER)[1]); } + } - FieldVerifier refineVerifier(String sublevel) { - if (Util.trimToNull(sublevel) == null) { - return new FieldVerifier(this, null); - } - // If the parent node didn't exist, then the subLevelName will start with "null". - String subLevelName = mId + KEY_DELIMITER + sublevel; - // For names with no Latin equivalent, we can look up the sublevel name directly. - AddressVerificationNodeData nodeData = mDataSource.get(subLevelName); + private void populatePossibleAndRequired(String regionCode) { + List<AddressField> possible = FORMAT_INTERPRETER.getAddressFieldOrder(regionCode); + possiblyUsedFields = convertAddressFieldsToPossiblyUsedSet(possible); + required = FormatInterpreter.getRequiredFields(regionCode); + } + + FieldVerifier refineVerifier(String sublevel) { + if (Util.trimToNull(sublevel) == null) { + return new FieldVerifier(this, null); + } + // If the parent node didn't exist, then the subLevelName will start with "null". + String subLevelName = id + KEY_DELIMITER + sublevel; + // For names with no Latin equivalent, we can look up the sublevel name directly. + AddressVerificationNodeData nodeData = dataSource.get(subLevelName); + if (nodeData != null) { + return new FieldVerifier(this, nodeData); + } + // If that failed, then we try to look up the local name equivalent of this latin name. + // First check these exist. + if (latinNames == null) { + return new FieldVerifier(this, null); + } + for (int n = 0; n < latinNames.length; n++) { + if (latinNames[n].equalsIgnoreCase(sublevel)) { + // We found a match - we should try looking up a key with the local name at the same + // index. + subLevelName = id + KEY_DELIMITER + localNames[n]; + nodeData = dataSource.get(subLevelName); if (nodeData != null) { - return new FieldVerifier(this, nodeData); + return new FieldVerifier(this, nodeData); } - // If that failed, then we try to look up the local name equivalent of this latin name. - // First check these exist. - if (mLatinNames == null) { - return new FieldVerifier(this, null); - } - for (int n = 0; n < mLatinNames.length; n++) { - if (mLatinNames[n].equalsIgnoreCase(sublevel)) { - // We found a match - we should try looking up a key with the local name at the same - // index. - subLevelName = mId + KEY_DELIMITER + mLocalNames[n]; - nodeData = mDataSource.get(subLevelName); - if (nodeData != null) { - return new FieldVerifier(this, nodeData); - } - } - } - // No sub-verifiers were found. - return new FieldVerifier(this, null); + } } + // No sub-verifiers were found. + return new FieldVerifier(this, null); + } - /** - * Returns the ID of this verifier. - */ - @Override - public String toString() { - return mId; - } + /** + * Returns the ID of this verifier. + */ + @Override + public String toString() { + return id; + } - /** - * Checks a value in a particular script for a particular field to see if it causes the problem - * specified. If so, this problem is added to the AddressProblems collection passed in. Returns - * true if no problem was found. - */ - protected boolean check(ScriptType script, AddressProblemType problem, AddressField field, - String value, AddressProblems problems) { - boolean problemFound = false; + /** + * Checks a value in a particular script for a particular field to see if it causes the problem + * specified. If so, this problem is added to the AddressProblems collection passed in. Returns + * true if no problem was found. + */ + protected boolean check(ScriptType script, AddressProblemType problem, AddressField field, + String value, AddressProblems problems) { + boolean problemFound = false; - String trimmedValue = Util.trimToNull(value); - switch (problem) { - case USING_UNUSED_FIELD: - if (trimmedValue != null && !mPossibleFields.contains(field)) { - problemFound = true; - } - break; - case MISSING_REQUIRED_FIELD: - if (mRequired.contains(field) && trimmedValue == null) { - problemFound = true; - } - break; - case UNKNOWN_VALUE: - // An empty string will never be an UNKNOWN_VALUE. It is invalid - // only when it appears in a required field (In that case it will - // be reported as MISSING_REQUIRED_FIELD). - if (trimmedValue == null) { - break; - } - problemFound = !isKnownInScript(script, trimmedValue); - break; - case UNRECOGNIZED_FORMAT: - if (trimmedValue != null && mFormat != null && - !mFormat.matcher(trimmedValue).matches()) { - problemFound = true; - } - break; - case MISMATCHING_VALUE: - if (trimmedValue != null && mMatch != null && - !mMatch.matcher(trimmedValue).lookingAt()) { - problemFound = true; - } - break; - default: - throw new RuntimeException("Unknown problem: " + problem); + String trimmedValue = Util.trimToNull(value); + switch (problem) { + case USING_UNUSED_FIELD: + if (trimmedValue != null && !possiblyUsedFields.contains(field)) { + problemFound = true; } - if (problemFound) { - problems.add(field, problem); - } - return !problemFound; - } - - /** - * Checks the value of a particular field in a particular script against the known values for - * this field. If script is null, it checks both the local and the latin values. Otherwise it - * checks only the values in the script specified. - */ - private boolean isKnownInScript(ScriptType script, String value) { - String trimmedValue = Util.trimToNull(value); - Util.checkNotNull(trimmedValue); - if (script == null) { - return (mCandidateValues == null || - mCandidateValues.containsKey(trimmedValue.toLowerCase())); + break; + case MISSING_REQUIRED_FIELD: + if (required.contains(field) && trimmedValue == null) { + problemFound = true; } - // Otherwise, if we know the script, we want to restrict the candidates to only names in - // that script. - String[] namesToConsider = (script == ScriptType.LATIN) ? mLatinNames : mLocalNames; - Set<String> candidates = new HashSet<String>(); - if (namesToConsider != null) { - for (String name : namesToConsider) { - candidates.add(name.toLowerCase()); - } + break; + case UNKNOWN_VALUE: + // An empty string will never be an UNKNOWN_VALUE. It is invalid + // only when it appears in a required field (In that case it will + // be reported as MISSING_REQUIRED_FIELD). + if (trimmedValue == null) { + break; } - if (mKeys != null) { - for (String name : mKeys) { - candidates.add(name.toLowerCase()); - } + problemFound = !isKnownInScript(script, trimmedValue); + break; + case UNRECOGNIZED_FORMAT: + if (trimmedValue != null && format != null && + !format.matcher(trimmedValue).matches()) { + problemFound = true; } - - if (candidates.size() == 0 || trimmedValue == null) { - return true; + break; + case MISMATCHING_VALUE: + if (trimmedValue != null && match != null && + !match.matcher(trimmedValue).lookingAt()) { + problemFound = true; } - - return candidates.contains(value.toLowerCase()); + break; + default: + throw new RuntimeException("Unknown problem: " + problem); } + if (problemFound) { + problems.add(field, problem); + } + return !problemFound; + } - /** - * Parses the value of the "fmt" key in the data to see which fields are used for a particular - * country. Returns a list of all fields found. Country is always assumed to be present. Skips - * characters that indicate new-lines in the format information, as well as any characters not - * escaped with "%". - */ - private static Set<AddressField> parseAddressFields(String value) { - EnumSet<AddressField> result = EnumSet.of(AddressField.COUNTRY); - boolean escaped = false; - for (char c : value.toCharArray()) { - if (escaped) { - escaped = false; - if (c == 'n') { - continue; - } - AddressField f = AddressField.of(c); - if (f == null) { - throw new RuntimeException( - "Unrecognized character '" + c + "' in format pattern: " + value); - } - result.add(f); - } else if (c == '%') { - escaped = true; - } - } - // These fields are not mentioned in the metadata at the moment since there is an effort to - // move away from STREET_ADDRESS and use these fields instead. This means they have to be - // removed here. - result.remove(AddressField.ADDRESS_LINE_1); - result.remove(AddressField.ADDRESS_LINE_2); - - return result; + /** + * Checks the value of a particular field in a particular script against the known values for + * this field. If script is null, it checks both the local and the latin values. Otherwise it + * checks only the values in the script specified. + */ + private boolean isKnownInScript(ScriptType script, String value) { + String trimmedValue = Util.trimToNull(value); + Util.checkNotNull(trimmedValue); + if (script == null) { + return (candidateValues == null || + candidateValues.containsKey(trimmedValue.toLowerCase())); + } + // Otherwise, if we know the script, we want to restrict the candidates to only names in + // that script. + String[] namesToConsider = (script == ScriptType.LATIN) ? latinNames : localNames; + Set<String> candidates = new HashSet<String>(); + if (namesToConsider != null) { + for (String name : namesToConsider) { + candidates.add(name.toLowerCase()); + } + } + if (keys != null) { + for (String name : keys) { + candidates.add(name.toLowerCase()); + } } - /** - * Parses the value of the "required" key in the data. Adds country as well as any other field - * mentioned in the string. - */ - private static Set<AddressField> parseRequireString(String value) { - // Country is always required - EnumSet<AddressField> result = EnumSet.of(AddressField.COUNTRY); + if (candidates.size() == 0 || trimmedValue == null) { + return true; + } - for (char c : value.toCharArray()) { - AddressField f = AddressField.of(c); - if (f == null) { - throw new RuntimeException("Unrecognized character '" + c + "' in require pattern: " - + value); - } - result.add(f); - } - // These fields are not mentioned in the metadata at the moment since there is an effort to - // move away from STREET_ADDRESS and use these fields instead. This means they have to be - // removed here. - result.remove(AddressField.ADDRESS_LINE_1); - result.remove(AddressField.ADDRESS_LINE_2); + return candidates.contains(value.toLowerCase()); + } - return result; + /** + * Converts a list of address fields to a set of possibly used fields. Adds country and handles + * street address. + */ + private static Set<AddressField> convertAddressFieldsToPossiblyUsedSet( + List<AddressField> fields) { + // COUNTRY is never unexpected. + EnumSet<AddressField> result = EnumSet.of(AddressField.COUNTRY); + for (AddressField field : fields) { + // Replace ADDRESS_LINE with STREET_ADDRESS because that's what the validation expects. + if (field == AddressField.ADDRESS_LINE_1 || + field == AddressField.ADDRESS_LINE_2) { + result.add(AddressField.STREET_ADDRESS); + } else { + result.add(field); + } } + return result; + } - /** - * Returns true if this key represents a country. We assume all keys with only one delimiter are - * at the country level (such as "data/US"). - */ - private boolean isCountryKey() { - Util.checkNotNull(mId, "Cannot use null as key"); - return mId.split(KEY_DELIMITER).length == 2; - } + /** + * Returns true if this key represents a country. We assume all keys with only one delimiter are + * at the country level (such as "data/US"). + */ + private boolean isCountryKey() { + Util.checkNotNull(id, "Cannot use null as key"); + return id.split(KEY_DELIMITER).length == 2; + } } |