1 files changed, 260 insertions, 292 deletions
diff --git a/java/src/com/android/i18n/addressinput/FieldVerifier.java b/java/src/com/android/i18n/addressinput/FieldVerifier.java
index bd9c5e5..9dee8de 100644
--- a/java/src/com/android/i18n/addressinput/FieldVerifier.java
+++ b/java/src/com/android/i18n/addressinput/FieldVerifier.java
@@ -20,6 +20,7 @@ import com.android.i18n.addressinput.LookupKey.ScriptType;
 
 import java.util.EnumSet;
 import java.util.HashSet;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.regex.Pattern;
@@ -31,328 +32,295 @@ import java.util.regex.Pattern;
  * and only provides format and match verification for the postal code field.
  */
 public class FieldVerifier {
-    // Node data values are delimited by this symbol.
-    private static final String DATA_DELIMITER = "~";
-    // Keys are built up using this delimiter: eg data/US, data/US/CA.
-    private static final String KEY_DELIMITER = "/";
+  // Node data values are delimited by this symbol.
+  private static final String DATA_DELIMITER = "~";
+  // Keys are built up using this delimiter: eg data/US, data/US/CA.
+  private static final String KEY_DELIMITER = "/";
 
-    private String mId;
-    private DataSource mDataSource;
+  private static final FormatInterpreter FORMAT_INTERPRETER =
+      new FormatInterpreter(new FormOptions.Builder().build());
 
-    private Set<AddressField> mPossibleFields;
-    private Set<AddressField> mRequired;
-    // Known values. Can be either a key, a name in Latin, or a name in native script.
-    private Map<String, String> mCandidateValues;
+  // Package-private so it can be accessed by tests.
+  String id;
+  private DataSource dataSource;
 
-    // Keys for the subnodes of this verifier. For example, a key for the US would be CA, since
-    // there is a sub-verifier with the ID "data/US/CA". Keys may be the local names of the
-    // locations in the next level of the hierarchy, or the abbreviations if suitable abbreviations
-    // exist.
-    private String[] mKeys;
-    // Names in Latin. These are only populated if the native/local names are in a script other than
-    // latin.
-    private String[] mLatinNames;
-    // Names in native script.
-    private String[] mLocalNames;
+  // Package-private so they can be accessed by tests.
+  Set<AddressField> possiblyUsedFields;
+  Set<AddressField> required;
+  // Known values. Can be either a key, a name in Latin, or a name in native script.
+  private Map<String, String> candidateValues;
 
-    // Pattern representing the format of a postal code number.
-    private Pattern mFormat;
-    // Defines the valid range of a postal code number.
-    private Pattern mMatch;
+  // Keys for the subnodes of this verifier. For example, a key for the US would be CA, since
+  // there is a sub-verifier with the ID "data/US/CA". Keys may be the local names of the
+  // locations in the next level of the hierarchy, or the abbreviations if suitable abbreviations
+  // exist. Package-private so it can be accessed by tests.
+  String[] keys;
+  // Names in Latin. These are only populated if the native/local names are in a script other than
+  // latin.
+  private String[] latinNames;
+  // Names in native script.
+  private String[] localNames;
 
-    /**
-     * Creates the root field verifier for a particular data source.
-     */
-    public FieldVerifier(DataSource dataSource) {
-        mDataSource = dataSource;
-        populateRootVerifier();
-    }
+  // Pattern representing the format of a postal code number.
+  private Pattern format;
+  // Defines the valid range of a postal code number.
+  private Pattern match;
+
+  /**
+   * Creates the root field verifier for a particular data source.
+   */
+  public FieldVerifier(DataSource dataSource) {
+    this.dataSource = dataSource;
+    populateRootVerifier();
+  }
+
+  /**
+   * Creates a field verifier based on its parent and on the new data for this node supplied by
+   * nodeData (which may be null).
+   *
+   * Package-private so it can be accessed by tests.
+   */
+  FieldVerifier(FieldVerifier parent, AddressVerificationNodeData nodeData) {
+    // Most information is inherited from the parent.
+    possiblyUsedFields = parent.possiblyUsedFields;
+    required = parent.required;
+    dataSource = parent.dataSource;
+    format = parent.format;
+    match = parent.match;
+    // Here we add in any overrides from this particular node as well as information such as
+    // localNames, latinNames and keys.
+    populate(nodeData);
+    // candidateValues should never be inherited from the parent, but built up from the
+    // localNames in this node.
+    candidateValues = Util.buildNameToKeyMap(keys, localNames, latinNames);
+  }
 
-    /**
-     * Creates a field verifier based on its parent and on the new data for this node supplied by
-     * nodeData (which may be null).
-     */
-    private FieldVerifier(FieldVerifier parent, AddressVerificationNodeData nodeData) {
-        // Most information is inherited from the parent.
-        mPossibleFields = parent.mPossibleFields;
-        mRequired = parent.mRequired;
-        mDataSource = parent.mDataSource;
-        mFormat = parent.mFormat;
-        mMatch = parent.mMatch;
-        // Here we add in any overrides from this particular node as well as information such as
-        // localNames, latinNames and keys.
-        populate(nodeData);
-        // candidateValues should never be inherited from the parent, but built up from the
-        // localNames in this node.
-        mCandidateValues = Util.buildNameToKeyMap(mKeys, mLocalNames, mLatinNames);
+  /**
+   * Sets possiblyUsedFields, required, keys and candidateValues for the root field verifier.
+   */
+  private void populateRootVerifier() {
+    id = "data";
+    // Keys come from the countries under "data".
+    AddressVerificationNodeData rootNode = dataSource.getDefaultData("data");
+    if (rootNode.containsKey(AddressDataKey.COUNTRIES)) {
+      keys = rootNode.get(AddressDataKey.COUNTRIES).split(DATA_DELIMITER);
     }
+    // candidateValues is just the set of keys.
+    candidateValues = Util.buildNameToKeyMap(keys, null, null);
 
-    /**
-     * Sets possibleFieldsUsed, required, keys and candidateValues for the root field verifier. This
-     * is a little messy at the moment since not all the appropriate information is actually under
-     * the root "data" node in the metadata. For example, "possibleFields" and "required" are not
-     * present there.
-     */
-    private void populateRootVerifier() {
-        mId = "data";
-        // Keys come from the countries under "data".
-        AddressVerificationNodeData rootNode = mDataSource.getDefaultData("data");
-        if (rootNode.containsKey(AddressDataKey.COUNTRIES)) {
-            mKeys = rootNode.get(AddressDataKey.COUNTRIES).split(DATA_DELIMITER);
-        }
-        // candidateValues is just the set of keys.
-        mCandidateValues = Util.buildNameToKeyMap(mKeys, null, null);
+    // TODO: Investigate if these need to be set here. The country level population already
+    // handles the fallback, the question is if validation can be done without a country level
+    // validator being created.
+    // Copy "possiblyUsedFields" and "required" from the defaults here for bootstrapping.
+    possiblyUsedFields = new HashSet<AddressField>();
+    required = new HashSet<AddressField>();
+    populatePossibleAndRequired("ZZ");
+  }
 
-        // Copy "possibleFieldsUsed" and "required" from the defaults here for bootstrapping.
-        // TODO: Investigate a cleaner way of doing this - maybe we should populate "data" with this
-        // information instead.
-        AddressVerificationNodeData defaultZZ = mDataSource.getDefaultData("data/ZZ");
-        mPossibleFields = new HashSet<AddressField>();
-        if (defaultZZ.containsKey(AddressDataKey.FMT)) {
-            mPossibleFields = parseAddressFields(defaultZZ.get(AddressDataKey.FMT));
-        }
-        mRequired = new HashSet<AddressField>();
-        if (defaultZZ.containsKey(AddressDataKey.REQUIRE)) {
-            mRequired = parseRequireString(defaultZZ.get(AddressDataKey.REQUIRE));
-        }
+  /**
+   * Populates this verifier with data from the node data passed in and from RegionDataConstants.
+   * The node data may be null.
+   */
+  private void populate(AddressVerificationNodeData nodeData) {
+    if (nodeData == null) {
+      return;
+    }
+    if (nodeData.containsKey(AddressDataKey.ID)) {
+      id = nodeData.get(AddressDataKey.ID);
+    }
+    if (nodeData.containsKey(AddressDataKey.SUB_KEYS)) {
+      keys = nodeData.get(AddressDataKey.SUB_KEYS).split(DATA_DELIMITER);
+    }
+    if (nodeData.containsKey(AddressDataKey.SUB_LNAMES)) {
+      latinNames = nodeData.get(AddressDataKey.SUB_LNAMES).split(DATA_DELIMITER);
+    }
+    if (nodeData.containsKey(AddressDataKey.SUB_NAMES)) {
+      localNames = nodeData.get(AddressDataKey.SUB_NAMES).split(DATA_DELIMITER);
+    }
+    if (nodeData.containsKey(AddressDataKey.XZIP)) {
+      format = Pattern.compile(nodeData.get(AddressDataKey.XZIP), Pattern.CASE_INSENSITIVE);
+    }
+    if (nodeData.containsKey(AddressDataKey.ZIP)) {
+      // This key has two different meanings, depending on whether this is a country-level key
+      // or not.
+      if (isCountryKey()) {
+        format = Pattern.compile(nodeData.get(AddressDataKey.ZIP),
+            Pattern.CASE_INSENSITIVE);
+      } else {
+        match = Pattern.compile(nodeData.get(AddressDataKey.ZIP),
+            Pattern.CASE_INSENSITIVE);
+      }
+    }
+    // If there are latin names but no local names, and there are the same number of latin names
+    // as there are keys, then we assume the local names are the same as the keys.
+    if (keys != null && localNames == null && latinNames != null &&
+        keys.length == latinNames.length) {
+      localNames = keys;
     }
 
-    /**
-     * Populates this verifier with data from the node data passed in. This may be null.
-     */
-    private void populate(AddressVerificationNodeData nodeData) {
-        if (nodeData == null) {
-            return;
-        }
-        if (nodeData.containsKey(AddressDataKey.ID)) {
-            mId = nodeData.get(AddressDataKey.ID);
-        }
-        if (nodeData.containsKey(AddressDataKey.SUB_KEYS)) {
-            mKeys = nodeData.get(AddressDataKey.SUB_KEYS).split(DATA_DELIMITER);
-        }
-        if (nodeData.containsKey(AddressDataKey.SUB_LNAMES)) {
-            mLatinNames = nodeData.get(AddressDataKey.SUB_LNAMES).split(DATA_DELIMITER);
-        }
-        if (nodeData.containsKey(AddressDataKey.SUB_NAMES)) {
-            mLocalNames = nodeData.get(AddressDataKey.SUB_NAMES).split(DATA_DELIMITER);
-        }
-        if (nodeData.containsKey(AddressDataKey.FMT)) {
-            mPossibleFields = parseAddressFields(nodeData.get(AddressDataKey.FMT));
-        }
-        if (nodeData.containsKey(AddressDataKey.REQUIRE)) {
-            mRequired = parseRequireString(nodeData.get(AddressDataKey.REQUIRE));
-        }
-        if (nodeData.containsKey(AddressDataKey.XZIP)) {
-            mFormat = Pattern.compile(nodeData.get(AddressDataKey.XZIP), Pattern.CASE_INSENSITIVE);
-        }
-        if (nodeData.containsKey(AddressDataKey.ZIP)) {
-            // This key has two different meanings, depending on whether this is a country-level key
-            // or not.
-            if (isCountryKey()) {
-                mFormat = Pattern.compile(nodeData.get(AddressDataKey.ZIP),
-                                          Pattern.CASE_INSENSITIVE);
-            } else {
-                mMatch = Pattern.compile(nodeData.get(AddressDataKey.ZIP),
-                                         Pattern.CASE_INSENSITIVE);
-            }
-        }
-        // If there are latin names but no local names, and there are the same number of latin names
-        // as there are keys, then we assume the local names are the same as the keys.
-        if (mKeys != null && mLocalNames == null && mLatinNames != null &&
-            mKeys.length == mLatinNames.length) {
-            mLocalNames = mKeys;
-        }
+    // These fields are populated from RegionDataConstants so that the metadata server can be
+    // updated without needing to be in sync with clients.
+    if (isCountryKey()) {
+      populatePossibleAndRequired(id.split(KEY_DELIMITER)[1]);
     }
+  }
 
-    FieldVerifier refineVerifier(String sublevel) {
-        if (Util.trimToNull(sublevel) == null) {
-            return new FieldVerifier(this, null);
-        }
-        // If the parent node didn't exist, then the subLevelName will start with "null".
-        String subLevelName = mId + KEY_DELIMITER + sublevel;
-        // For names with no Latin equivalent, we can look up the sublevel name directly.
-        AddressVerificationNodeData nodeData = mDataSource.get(subLevelName);
+  private void populatePossibleAndRequired(String regionCode) {
+    List<AddressField> possible = FORMAT_INTERPRETER.getAddressFieldOrder(regionCode);
+    possiblyUsedFields = convertAddressFieldsToPossiblyUsedSet(possible);
+    required = FormatInterpreter.getRequiredFields(regionCode);
+  }
+
+  FieldVerifier refineVerifier(String sublevel) {
+    if (Util.trimToNull(sublevel) == null) {
+      return new FieldVerifier(this, null);
+    }
+    // If the parent node didn't exist, then the subLevelName will start with "null".
+    String subLevelName = id + KEY_DELIMITER + sublevel;
+    // For names with no Latin equivalent, we can look up the sublevel name directly.
+    AddressVerificationNodeData nodeData = dataSource.get(subLevelName);
+    if (nodeData != null) {
+      return new FieldVerifier(this, nodeData);
+    }
+    // If that failed, then we try to look up the local name equivalent of this latin name.
+    // First check these exist.
+    if (latinNames == null) {
+      return new FieldVerifier(this, null);
+    }
+    for (int n = 0; n < latinNames.length; n++) {
+      if (latinNames[n].equalsIgnoreCase(sublevel)) {
+        // We found a match - we should try looking up a key with the local name at the same
+        // index.
+        subLevelName = id + KEY_DELIMITER + localNames[n];
+        nodeData = dataSource.get(subLevelName);
         if (nodeData != null) {
-            return new FieldVerifier(this, nodeData);
+          return new FieldVerifier(this, nodeData);
         }
-        // If that failed, then we try to look up the local name equivalent of this latin name.
-        // First check these exist.
-        if (mLatinNames == null) {
-            return new FieldVerifier(this, null);
-        }
-        for (int n = 0; n < mLatinNames.length; n++) {
-            if (mLatinNames[n].equalsIgnoreCase(sublevel)) {
-                // We found a match - we should try looking up a key with the local name at the same
-                // index.
-                subLevelName = mId + KEY_DELIMITER + mLocalNames[n];
-                nodeData = mDataSource.get(subLevelName);
-                if (nodeData != null) {
-                    return new FieldVerifier(this, nodeData);
-                }
-            }
-        }
-        // No sub-verifiers were found.
-        return new FieldVerifier(this, null);
+      }
     }
+    // No sub-verifiers were found.
+    return new FieldVerifier(this, null);
+  }
 
-    /**
-     * Returns the ID of this verifier.
-     */
-    @Override
-    public String toString() {
-        return mId;
-    }
+  /**
+   * Returns the ID of this verifier.
+   */
+  @Override
+  public String toString() {
+    return id;
+  }
 
-    /**
-     * Checks a value in a particular script for a particular field to see if it causes the problem
-     * specified. If so, this problem is added to the AddressProblems collection passed in. Returns
-     * true if no problem was found.
-     */
-    protected boolean check(ScriptType script, AddressProblemType problem, AddressField field,
-            String value, AddressProblems problems) {
-        boolean problemFound = false;
+  /**
+   * Checks a value in a particular script for a particular field to see if it causes the problem
+   * specified. If so, this problem is added to the AddressProblems collection passed in. Returns
+   * true if no problem was found.
+   */
+  protected boolean check(ScriptType script, AddressProblemType problem, AddressField field,
+      String value, AddressProblems problems) {
+    boolean problemFound = false;
 
-        String trimmedValue = Util.trimToNull(value);
-        switch (problem) {
-            case USING_UNUSED_FIELD:
-                if (trimmedValue != null && !mPossibleFields.contains(field)) {
-                    problemFound = true;
-                }
-                break;
-            case MISSING_REQUIRED_FIELD:
-                if (mRequired.contains(field) && trimmedValue == null) {
-                    problemFound = true;
-                }
-                break;
-            case UNKNOWN_VALUE:
-                // An empty string will never be an UNKNOWN_VALUE. It is invalid
-                // only when it appears in a required field (In that case it will
-                // be reported as MISSING_REQUIRED_FIELD).
-                if (trimmedValue == null) {
-                    break;
-                }
-                problemFound = !isKnownInScript(script, trimmedValue);
-                break;
-            case UNRECOGNIZED_FORMAT:
-                if (trimmedValue != null && mFormat != null &&
-                        !mFormat.matcher(trimmedValue).matches()) {
-                    problemFound = true;
-                }
-                break;
-            case MISMATCHING_VALUE:
-                if (trimmedValue != null && mMatch != null &&
-                        !mMatch.matcher(trimmedValue).lookingAt()) {
-                    problemFound = true;
-                }
-                break;
-            default:
-                throw new RuntimeException("Unknown problem: " + problem);
+    String trimmedValue = Util.trimToNull(value);
+    switch (problem) {
+      case USING_UNUSED_FIELD:
+        if (trimmedValue != null && !possiblyUsedFields.contains(field)) {
+          problemFound = true;
         }
-        if (problemFound) {
-            problems.add(field, problem);
-        }
-        return !problemFound;
-    }
-
-    /**
-     * Checks the value of a particular field in a particular script against the known values for
-     * this field. If script is null, it checks both the local and the latin values. Otherwise it
-     * checks only the values in the script specified.
-     */
-    private boolean isKnownInScript(ScriptType script, String value) {
-        String trimmedValue = Util.trimToNull(value);
-        Util.checkNotNull(trimmedValue);
-        if (script == null) {
-            return (mCandidateValues == null ||
-                    mCandidateValues.containsKey(trimmedValue.toLowerCase()));
+        break;
+      case MISSING_REQUIRED_FIELD:
+        if (required.contains(field) && trimmedValue == null) {
+          problemFound = true;
         }
-        // Otherwise, if we know the script, we want to restrict the candidates to only names in
-        // that script.
-        String[] namesToConsider = (script == ScriptType.LATIN) ? mLatinNames : mLocalNames;
-        Set<String> candidates = new HashSet<String>();
-        if (namesToConsider != null) {
-            for (String name : namesToConsider) {
-                candidates.add(name.toLowerCase());
-            }
+        break;
+      case UNKNOWN_VALUE:
+        // An empty string will never be an UNKNOWN_VALUE. It is invalid
+        // only when it appears in a required field (In that case it will
+        // be reported as MISSING_REQUIRED_FIELD).
+        if (trimmedValue == null) {
+          break;
         }
-        if (mKeys != null) {
-            for (String name : mKeys) {
-                candidates.add(name.toLowerCase());
-            }
+        problemFound = !isKnownInScript(script, trimmedValue);
+        break;
+      case UNRECOGNIZED_FORMAT:
+        if (trimmedValue != null && format != null &&
+            !format.matcher(trimmedValue).matches()) {
+          problemFound = true;
         }
-
-        if (candidates.size() == 0 || trimmedValue == null) {
-            return true;
+        break;
+      case MISMATCHING_VALUE:
+        if (trimmedValue != null && match != null &&
+            !match.matcher(trimmedValue).lookingAt()) {
+          problemFound = true;
         }
-
-        return candidates.contains(value.toLowerCase());
+        break;
+      default:
+        throw new RuntimeException("Unknown problem: " + problem);
     }
+    if (problemFound) {
+      problems.add(field, problem);
+    }
+    return !problemFound;
+  }
 
-    /**
-     * Parses the value of the "fmt" key in the data to see which fields are used for a particular
-     * country. Returns a list of all fields found. Country is always assumed to be present. Skips
-     * characters that indicate new-lines in the format information, as well as any characters not
-     * escaped with "%".
-     */
-    private static Set<AddressField> parseAddressFields(String value) {
-        EnumSet<AddressField> result = EnumSet.of(AddressField.COUNTRY);
-        boolean escaped = false;
-        for (char c : value.toCharArray()) {
-            if (escaped) {
-                escaped = false;
-                if (c == 'n') {
-                    continue;
-                }
-                AddressField f = AddressField.of(c);
-                if (f == null) {
-                    throw new RuntimeException(
-                            "Unrecognized character '" + c + "' in format pattern: " + value);
-                }
-                result.add(f);
-            } else if (c == '%') {
-                escaped = true;
-            }
-        }
-        // These fields are not mentioned in the metadata at the moment since there is an effort to
-        // move away from STREET_ADDRESS and use these fields instead. This means they have to be
-        // removed here.
-        result.remove(AddressField.ADDRESS_LINE_1);
-        result.remove(AddressField.ADDRESS_LINE_2);
-
-        return result;
+  /**
+   * Checks the value of a particular field in a particular script against the known values for
+   * this field. If script is null, it checks both the local and the latin values. Otherwise it
+   * checks only the values in the script specified.
+   */
+  private boolean isKnownInScript(ScriptType script, String value) {
+    String trimmedValue = Util.trimToNull(value);
+    Util.checkNotNull(trimmedValue);
+    if (script == null) {
+      return (candidateValues == null ||
+          candidateValues.containsKey(trimmedValue.toLowerCase()));
+    }
+    // Otherwise, if we know the script, we want to restrict the candidates to only names in
+    // that script.
+    String[] namesToConsider = (script == ScriptType.LATIN) ? latinNames : localNames;
+    Set<String> candidates = new HashSet<String>();
+    if (namesToConsider != null) {
+      for (String name : namesToConsider) {
+        candidates.add(name.toLowerCase());
+      }
+    }
+    if (keys != null) {
+      for (String name : keys) {
+        candidates.add(name.toLowerCase());
+      }
     }
 
-    /**
-     * Parses the value of the "required" key in the data. Adds country as well as any other field
-     * mentioned in the string.
-     */
-    private static Set<AddressField> parseRequireString(String value) {
-        // Country is always required
-        EnumSet<AddressField> result = EnumSet.of(AddressField.COUNTRY);
+    if (candidates.size() == 0 || trimmedValue == null) {
+      return true;
+    }
 
-        for (char c : value.toCharArray()) {
-            AddressField f = AddressField.of(c);
-            if (f == null) {
-                throw new RuntimeException("Unrecognized character '" + c + "' in require pattern: "
-                        + value);
-            }
-            result.add(f);
-        }
-        // These fields are not mentioned in the metadata at the moment since there is an effort to
-        // move away from STREET_ADDRESS and use these fields instead. This means they have to be
-        // removed here.
-        result.remove(AddressField.ADDRESS_LINE_1);
-        result.remove(AddressField.ADDRESS_LINE_2);
+    return candidates.contains(value.toLowerCase());
+  }
 
-        return result;
+  /**
+   * Converts a list of address fields to a set of possibly used fields. Adds country and handles
+   * street address.
+   */
+  private static Set<AddressField> convertAddressFieldsToPossiblyUsedSet(
+      List<AddressField> fields) {
+    // COUNTRY is never unexpected.
+    EnumSet<AddressField> result = EnumSet.of(AddressField.COUNTRY);
+    for (AddressField field : fields) {
+      // Replace ADDRESS_LINE with STREET_ADDRESS because that's what the validation expects.
+      if (field == AddressField.ADDRESS_LINE_1 ||
+          field == AddressField.ADDRESS_LINE_2) {
+        result.add(AddressField.STREET_ADDRESS);
+      } else {
+        result.add(field);
+      }
     }
+    return result;
+  }
 
-    /**
-     * Returns true if this key represents a country. We assume all keys with only one delimiter are
-     * at the country level (such as "data/US").
-     */
-    private boolean isCountryKey() {
-        Util.checkNotNull(mId, "Cannot use null as key");
-        return mId.split(KEY_DELIMITER).length == 2;
-    }
+  /**
+   * Returns true if this key represents a country. We assume all keys with only one delimiter are
+   * at the country level (such as "data/US").
+   */
+  private boolean isCountryKey() {
+    Util.checkNotNull(id, "Cannot use null as key");
+    return id.split(KEY_DELIMITER).length == 2;
+  }
 }