1/* 2 * USPostalCodeService.java 3 * Copyright (C) 2006 Amin Ahmad 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 */ 19package org.ahmadsoft.postal; 20 21import java.util.ArrayList; 22import java.util.Iterator; 23import java.util.List; 24 25/** 26 * Provides a postal code service implementation for the United 27 * States of America. 28 * 29 * @author Amin Ahmad 30 */ 31public class USPostalCodeService { 32 private PostalRetrievalStrategy retrievalStrategy; 33 private static boolean firstUse = true; 34 35 public USPostalCodeService() { 36 if (firstUse) { 37 firstUse = false; 38 System.out.println( 39 "U.S. Postal Code Service for Java version 1, Copyright (C) 2006 Amin Ahmad\n\n"+ 40 "U.S. Postal Code Service for Java comes with ABSOLUTELY NO WARRANTY;\n"+ 41 "This program is free software; you can redistribute it and/or modify\n"+ 42 "it under the terms of the GNU General Public License as published by\n"+ 43 "the Free Software Foundation; either version 2 of the License, or\n"+ 44 "any later version." 45 ); 46 } 47 } 48 49 /** 50 * Disposes an instance of this object. Subsequent use of this 51 * object is a logical programming error. 52 * @throws Exception 53 */ 54 public void dispose() throws Exception { 55 56 } 57 58 /** 59 * Initializes an instance of the US Postal Code Service. Initialization 60 * is required prior to use. 61 * 62 * @param retrievalStrategy an initialized postal retrieval strategy. 63 * @throws Exception if an error occurs during initialization. Renders 64 * this service instance unusable. 65 */ 66 public void initialize(PostalRetrievalStrategy retrievalStrategy) throws Exception { 67 this.retrievalStrategy = retrievalStrategy; 68 } 69 70 /** 71 * Returns a list of all recognized candidate cities for a given postal 72 * code. Candidate cities are catogorized as actual, acceptable, and 73 * unacceptable. 74 * 75 * @param postalCode the postal code. 76 * @return a list of all recognized candidate cities for a given postal 77 * code. 78 */ 79 public List getCandidates(int postalCode) { 80 return retrievalStrategy.getCandidates(postalCode); 81 } 82 83 /** 84 * Returns <code>true</code> if the given postal code is within 85 * the given state, or <code>false</code> otherwise. For 86 * example, <code>isPostalCodeIn(85050, "AZ")</code> will return <code>true</code> 87 * because the 85050 postal code is within Arizona, but 88 * <code>isPostalCodeIn(43202, "CA")</code> will return <code>false</code> 89 * because the 43202 is not in California, but rather in Ohio. 90 * 91 * @param postalCode the postal code. 92 * @param stateAbbr the two-digit, upper-case abbreviation for 93 * a state, as specified in <a href="http://www.usps.com/ncsc/lookups/usps_abbreviations.html"> 94 * United States Postal Service - Abbreviations</a>. 95 * @return <code>true</code> if the given postal code is within 96 * the given state, or <code>false</code> otherwise. 97 */ 98 public boolean isPostalCodeIn(int postalCode, String stateAbbr) { 99 List candidates = this.retrievalStrategy.getCandidates(postalCode); 100 for (Iterator i=candidates.iterator();i.hasNext();) { 101 PostalCodeEntry entry = (PostalCodeEntry) i.next(); 102 if (entry.getState().equals(stateAbbr)) { 103 return true; 104 } 105 } 106 return false; 107 } 108 109 /** 110 * Returns the <code>PostalCodeEntry</code> for the actual city 111 * registered with the U.S. Post Office for this postal code. This 112 * is a useful operation because every postal code has an official 113 * city name associated with it, as well as several other names 114 * recognized by the post office as acceptable or unacceptable. 115 * <p> 116 * For example, <code>getActualFor(90064)</code> will return a 117 * <code>PostalCodeEntry</code> for Los Angeles, CA, which is 118 * the official city for the 90064 postal code. Rancho Park, CA 119 * is an acceptable, but not the actual, name. 120 * 121 * @param postalCode 122 * @return the <code>PostalCodeEntry</code> for the actual city 123 * registered with the U.S. Post Office for this postal code, or 124 * <code>null</code> if there is no "actual" candidate for this postal code. 125 * This may occur if the postal code is not yet assigned, or is 126 * out of range. 127 */ 128 public PostalCodeEntry getActualFor(int postalCode) { 129 List candidates = this.retrievalStrategy.getCandidates(postalCode); 130 for (Iterator i=candidates.iterator();i.hasNext();) { 131 PostalCodeEntry entry = (PostalCodeEntry) i.next(); 132 if (entry.getEntryType() == PostalCodeConstants.CITY_ACTUAL) { 133 return entry; 134 } 135 } 136 return null; 137 } 138 139 /** 140 * Performs a match using default matching options well-suited 141 * to common validation. Specifically, ignore capitalization is 142 * <code>true</code>, ignore punctuation is <code>true</code>, 143 * ignore whitespace is <code>false</code>, and the minimum match 144 * level is <code>PostalCodeConstants.CITY_ACCEPTABLE</code>. 145 * 146 * @param city the city to match. 147 * @param postalCode the postal code within which to match the city. 148 * @return the closest matches to the specified city within the specified 149 * postal code. 150 * @see #match(String, int, MatchOptions) 151 */ 152 public List match(String city, int postalCode) { 153 return match(city, postalCode, new MatchOptions(true, true, false, PostalCodeConstants.CITY_ACCEPTABLE)); 154 } 155 156 /** 157 * Returns a list of the closest matches to the specified city 158 * within the specified postal code. The details of the matching 159 * process can be controlled by specifying match options. 160 * <p> 161 * Note: This method does not take the state into consideration. Rather, 162 * use #isPostalCodeIn(int, String) to determine if a postal code 163 * is within a given state. 164 * 165 * @param city the city to match. 166 * @param postalCode the postal code within which to match the city. 167 * @param options parameters to control the matching process. 168 * @return a list of the closest matches to the specified city within 169 * the specfied postal code. 170 * @see MatchOptions 171 */ 172 public List match(String city, int postalCode, MatchOptions options) { 173 List results = new ArrayList(); 174 List candidates = getCandidates(postalCode); 175 176 StringBuffer sbCity = new StringBuffer(city.length()); 177 178 for (int j=0; j<city.length(); ++j) { 179 char c = city.charAt(j); 180 if (options.isIgnoreWhitespace() && Character.isWhitespace(c)) 181 continue; 182 if (options.isIgnorePunctuation() && punctuation.indexOf(c) > -1) 183 continue; 184 if (options.isIgnoreCapitalization()) 185 c = Character.toUpperCase(c); 186 sbCity.append(c); 187 } 188 189 city = sbCity.toString(); 190 191 // Two passes. The first pass determines the minimum value 192 // while the second pass copies all values having that value 193 // into the results array. 194 // 195 196 int min = Integer.MAX_VALUE; 197 for (Iterator i=candidates.iterator();i.hasNext();) { 198 PostalCodeEntry entry = (PostalCodeEntry) i.next(); 199 if (entry.getEntryType() > options.getMaxMatchLevel()) 200 continue; 201 min = Math.min(min, StringUtils.getLevenshteinDistance(entry.getCity(), city)); 202 } 203 for (Iterator i=candidates.iterator();i.hasNext();) { 204 PostalCodeEntry entry = (PostalCodeEntry) i.next(); 205 if (entry.getEntryType() > options.getMaxMatchLevel()) 206 continue; 207 if (min == StringUtils.getLevenshteinDistance(entry.getCity(), city)) { 208 results.add(new MatchResult(entry, min)); 209 } 210 } 211 212 return results; 213 } 214 215 private static final String punctuation = "`~!@#$%^&*()_-+=[{]}\\|;:'\",<.>/?"; 216 217 /** 218 * Specifies options for performing an advanced match 219 * operation. 220 * 221 * @author Amin Ahmad 222 */ 223 public static class MatchOptions { 224 private boolean ignoreWhitespace=false; 225 private boolean ignorePunctuation=true; 226 private boolean ignoreCapitalization=true; 227 private int maxMatchLevel=PostalCodeConstants.CITY_ACCEPTABLE; 228 229 /** 230 * Creates a new match option. 231 * @param ignoreCapitalization 232 * @param ignorePunctuation 233 * @param ignoreWhitespace 234 * @param maxMatchLevel 235 */ 236 public MatchOptions(boolean ignoreCapitalization, boolean ignorePunctuation, boolean ignoreWhitespace, int maxMatchLevel) { 237 super(); 238 this.ignoreCapitalization = ignoreCapitalization; 239 this.ignorePunctuation = ignorePunctuation; 240 this.ignoreWhitespace = ignoreWhitespace; 241 this.maxMatchLevel = maxMatchLevel; 242 } 243 244 /** 245 * Returns <code>true</code> is capitalization should be ignored when computing 246 * the distance between two names, and <code>false</code> otherwise. For example, 247 * if the value were true, then the distace between Dallas and dallas would be 248 * zero, but if the value were false, then the distace would be one. 249 * 250 * @return <code>true</code> is capitalization should be ignored when computing 251 * the distance between two names, and <code>false</code> otherwise. 252 */ 253 public boolean isIgnoreCapitalization() { 254 return ignoreCapitalization; 255 } 256 257 /** 258 * Returns <code>true</code> if punctuation should be ignored when computing the 259 * distance between two names, and <code>false</code> otherwise. The 260 * punctuation characters are defined as follows: 261 * <code>`~!@#$%^&*()_-+=[{]}\|;:'",<.>/?</code> 262 * 263 * @return <code>true</code> if punctuation should be ignored when 264 * computing the distance between two names, and <code>false</code> 265 * otherwise. 266 */ 267 public boolean isIgnorePunctuation() { 268 return ignorePunctuation; 269 } 270 271 /** 272 * Returns <code>true</code> if whitespace should be ignored when computing the 273 * distance between two names, and <code>false otherwise</code>. If, for 274 * example, the value were true, then the match distance between 275 * LOSANGELES and LOS ANGELES would be one, and if the value were false, 276 * the distance would be zero. 277 * 278 * @return <code>true</code> if whitespace should be ignored when computing the 279 * distance between two names, and <code>false otherwise</code>. 280 */ 281 public boolean isIgnoreWhitespace() { 282 return ignoreWhitespace; 283 } 284 285 /** 286 * Returns the maximum match level for a match operation. Only cities within a postal 287 * code that meet the maximum match level are considered for inclusion in the results. 288 * Note that <code>CITY_ACTUAL</code> < <code>CITY_ACCEPTABLE</code> < 289 * <code>CITY_UNACCEPTABLE</code>. 290 * 291 * @return the minimum match level for a match operation. 292 */ 293 public int getMaxMatchLevel() { 294 return maxMatchLevel; 295 } 296 } 297 298 /** 299 * The result of a matching operation. 300 * @author Amin Ahmad 301 */ 302 public static class MatchResult { 303 public int distance; 304 public PostalCodeEntry match; 305 public MatchResult(PostalCodeEntry match, int distance) { 306 super(); 307 this.match = match; 308 this.distance = distance; 309 } 310 public String toString() { 311 return match + ", distance = " + distance; 312 } 313 } 314} 315