View Javadoc
1 /* 2 * $Header: /home/cvs/jakarta-commons/validator/src/share/org/apache/commons/validator/UrlValidator.java,v 1.19 2004/02/21 17:10:29 rleland Exp $ 3 * $Revision: 1.19 $ 4 * $Date: 2004/02/21 17:10:29 $ 5 * 6 * ==================================================================== 7 * Copyright 2001-2004 The Apache Software Foundation 8 * 9 * Licensed under the Apache License, Version 2.0 (the "License"); 10 * you may not use this file except in compliance with the License. 11 * You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 package org.apache.commons.validator; 23 24 import java.io.Serializable; 25 import java.util.Arrays; 26 import java.util.HashSet; 27 import java.util.Set; 28 29 import org.apache.commons.validator.util.Flags; 30 import org.apache.oro.text.perl.Perl5Util; 31 32 /*** 33 * <p>Validates URLs.</p> 34 * Behavour of validation is modified by passing in options: 35 * <li>ALLOW_2_SLASHES - [FALSE] Allows double '/' characters in the path 36 * component.</li> 37 * <li>NO_FRAGMENT- [FALSE] By default fragments are allowed, if this option is 38 * included then fragments are flagged as illegal.</li> 39 * <li>ALLOW_ALL_SCHEMES - [FALSE] By default only http, https, and ftp are 40 * considered valid schemes. Enabling this option will let any scheme pass validation.</li> 41 * 42 * <p>Originally based in on php script by Debbie Dyer, validation.php v1.2b, Date: 03/07/02, 43 * http://javascript.internet.com. However, this validation now bears little resemblance 44 * to the php original.</p> 45 * <pre> 46 * Example of usage: 47 * Construct a UrlValidator with valid schemes of "http", and "https". 48 * 49 * String[] schemes = {"http","https"}. 50 * Urlvalidator urlValidator = new Urlvalidator(schemes); 51 * if (urlValidator.isValid("ftp")) { 52 * System.out.println("url is valid"); 53 * } else { 54 * System.out.println("url is invalid"); 55 * } 56 * 57 * prints "url is invalid" 58 * If instead the default constructor is used. 59 * 60 * Urlvalidator urlValidator = new Urlvalidator(); 61 * if (urlValidator.isValid("ftp")) { 62 * System.out.println("url is valid"); 63 * } else { 64 * System.out.println("url is invalid"); 65 * } 66 * 67 * prints out "url is valid" 68 * </pre> 69 * 70 * @see 71 * <a href='http://www.ietf.org/rfc/rfc2396.txt' > 72 * Uniform Resource Identifiers (URI): Generic Syntax 73 * </a> 74 * 75 * @since Validator 1.1 76 */ 77 public class UrlValidator implements Serializable { 78 79 /*** 80 * Allows all validly formatted schemes to pass validation instead of supplying a 81 * set of valid schemes. 82 */ 83 public static final int ALLOW_ALL_SCHEMES = 1 << 0; 84 85 /*** 86 * Allow two slashes in the path component of the URL. 87 */ 88 public static final int ALLOW_2_SLASHES = 1 << 1; 89 90 /*** 91 * Enabling this options disallows any URL fragments. 92 */ 93 public static final int NO_FRAGMENTS = 1 << 2; 94 95 private static final String ALPHA_CHARS = "a-zA-Z"; 96 97 private static final String ALPHA_NUMERIC_CHARS = ALPHA_CHARS + "//d"; 98 99 private static final String SPECIAL_CHARS = ";/@&=,.?:+$"; 100 101 private static final String VALID_CHARS = "[^//s" + SPECIAL_CHARS + "]"; 102 103 private static final String SCHEME_CHARS = ALPHA_CHARS; 104 105 // Drop numeric, and "+-." for now 106 private static final String AUTHORITY_CHARS = ALPHA_NUMERIC_CHARS + "//-//."; 107 108 private static final String ATOM = VALID_CHARS + '+'; 109 110 /*** 111 * This expression derived/taken from the BNF for URI (RFC2396). 112 */ 113 private static final String URL_PATTERN = 114 "/^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(//?([^#]*))?(#(.*))?/"; 115 // 12 3 4 5 6 7 8 9 116 117 /*** 118 * Schema/Protocol (ie. http:, ftp:, file:, etc). 119 */ 120 private static final int PARSE_URL_SCHEME = 2; 121 122 /*** 123 * Includes hostname/ip and port number. 124 */ 125 private static final int PARSE_URL_AUTHORITY = 4; 126 127 private static final int PARSE_URL_PATH = 5; 128 129 private static final int PARSE_URL_QUERY = 7; 130 131 private static final int PARSE_URL_FRAGMENT = 9; 132 133 /*** 134 * Protocol (ie. http:, ftp:,https:). 135 */ 136 private static final String SCHEME_PATTERN = "/^[" + SCHEME_CHARS + "]/"; 137 138 private static final String AUTHORITY_PATTERN = 139 "/^([" + AUTHORITY_CHARS + "]*)(://d*)?(.*)?/"; 140 // 1 2 3 4 141 142 private static final int PARSE_AUTHORITY_HOST_IP = 1; 143 144 private static final int PARSE_AUTHORITY_PORT = 2; 145 146 /*** 147 * Should always be empty. 148 */ 149 private static final int PARSE_AUTHORITY_EXTRA = 3; 150 151 private static final String PATH_PATTERN = 152 "/^(/[-a-zA-Z0-9_:@&?=+,.!/~*'%$]*)$/"; 153 154 private static final String QUERY_PATTERN = "/^(.*)$/"; 155 156 private static final String LEGAL_ASCII_PATTERN = "/^[//000-//177]+$/"; 157 158 private static final String IP_V4_DOMAIN_PATTERN = 159 "/^(//d{1,3})[.](//d{1,3})[.](//d{1,3})[.](//d{1,3})$/"; 160 161 private static final String DOMAIN_PATTERN = 162 "/^" + ATOM + "(//." + ATOM + ")*$/"; 163 164 private static final String PORT_PATTERN = "/^:(//d{1,5})$/"; 165 166 private static final String ATOM_PATTERN = "/(" + ATOM + ")/"; 167 168 private static final String ALPHA_PATTERN = "/^[" + ALPHA_CHARS + "]/"; 169 170 /*** 171 * Holds the set of current validation options. 172 */ 173 private Flags options = null; 174 175 /*** 176 * The set of schemes that are allowed to be in a URL. 177 */ 178 private Set allowedSchemes = new HashSet(); 179 180 /*** 181 * If no schemes are provided, default to this set. 182 */ 183 protected String[] defaultSchemes = {"http", "https", "ftp"}; 184 185 /*** 186 * Create a UrlValidator with default properties. 187 */ 188 public UrlValidator() { 189 this(null); 190 } 191 192 /*** 193 * Behavior of validation is modified by passing in several strings options: 194 * @param schemes Pass in one or more url schemes to consider valid, passing in 195 * a null will default to "http,https,ftp" being valid. 196 * If a non-null schemes is specified then all valid schemes must 197 * be specified. Setting the ALLOW_ALL_SCHEMES option will 198 * ignore the contents of schemes. 199 */ 200 public UrlValidator(String[] schemes) { 201 this(schemes, 0); 202 } 203 204 /*** 205 * Initialize a UrlValidator with the given validation options. 206 * @param options The options should be set using the public constants declared in 207 * this class. To set multiple options you simply add them together. For example, 208 * ALLOW_2_SLASHES + NO_FRAGMENTS enables both of those options. 209 */ 210 public UrlValidator(int options) { 211 this(null, options); 212 } 213 214 /*** 215 * Behavour of validation is modified by passing in options: 216 * @param schemes The set of valid schemes. 217 * @param options The options should be set using the public constants declared in 218 * this class. To set multiple options you simply add them together. For example, 219 * ALLOW_2_SLASHES + NO_FRAGMENTS enables both of those options. 220 */ 221 public UrlValidator(String[] schemes, int options) { 222 this.options = new Flags(options); 223 224 if (this.options.isOn(ALLOW_ALL_SCHEMES)) { 225 return; 226 } 227 228 if (schemes == null) { 229 schemes = this.defaultSchemes; 230 } 231 232 this.allowedSchemes.addAll(Arrays.asList(schemes)); 233 } 234 235 /*** 236 * <p>Checks if a field has a valid url address.</p> 237 * 238 * @param value The value validation is being performed on. A <code>null</code> 239 * value is considered invalid. 240 * @return true if the url is valid. 241 */ 242 public boolean isValid(String value) { 243 if (value == null) { 244 return false; 245 } 246 247 Perl5Util matchUrlPat = new Perl5Util(); 248 Perl5Util matchAsciiPat = new Perl5Util(); 249 250 if (!matchAsciiPat.match(LEGAL_ASCII_PATTERN, value)) { 251 return false; 252 } 253 254 // Check the whole url address structure 255 if (!matchUrlPat.match(URL_PATTERN, value)) { 256 return false; 257 } 258 259 if (!isValidScheme(matchUrlPat.group(PARSE_URL_SCHEME))) { 260 return false; 261 } 262 263 if (!isValidAuthority(matchUrlPat.group(PARSE_URL_AUTHORITY))) { 264 return false; 265 } 266 267 if (!isValidPath(matchUrlPat.group(PARSE_URL_PATH))) { 268 return false; 269 } 270 271 if (!isValidQuery(matchUrlPat.group(PARSE_URL_QUERY))) { 272 return false; 273 } 274 275 if (!isValidFragment(matchUrlPat.group(PARSE_URL_FRAGMENT))) { 276 return false; 277 } 278 279 return true; 280 } 281 282 /*** 283 * Validate scheme. If schemes[] was initialized to a non null, 284 * then only those scheme's are allowed. Note this is slightly different 285 * than for the constructor. 286 * @param scheme The scheme to validate. A <code>null</code> value is considered 287 * invalid. 288 * @return true if valid. 289 */ 290 protected boolean isValidScheme(String scheme) { 291 if (scheme == null) { 292 return false; 293 } 294 295 Perl5Util schemeMatcher = new Perl5Util(); 296 if (!schemeMatcher.match(SCHEME_PATTERN, scheme)) { 297 return false; 298 } 299 300 if (this.options.isOff(ALLOW_ALL_SCHEMES)) { 301 302 if (!this.allowedSchemes.contains(scheme)) { 303 return false; 304 } 305 } 306 307 return true; 308 } 309 310 /*** 311 * Returns true if the authority is properly formatted. An authority is the combination 312 * of hostname and port. A <code>null</code> authority value is considered invalid. 313 */ 314 protected boolean isValidAuthority(String authority) { 315 if (authority == null) { 316 return false; 317 } 318 319 Perl5Util authorityMatcher = new Perl5Util(); 320 Perl5Util matchIPV4Pat = new Perl5Util(); 321 322 if (!authorityMatcher.match(AUTHORITY_PATTERN, authority)) { 323 return false; 324 } 325 326 boolean ipV4Address = false; 327 boolean hostname = false; 328 // check if authority is IP address or hostname 329 String hostIP = authorityMatcher.group(PARSE_AUTHORITY_HOST_IP); 330 ipV4Address = matchIPV4Pat.match(IP_V4_DOMAIN_PATTERN, hostIP); 331 332 if (ipV4Address) { 333 // this is an IP address so check components 334 for (int i = 1; i <= 4; i++) { 335 String ipSegment = matchIPV4Pat.group(i); 336 if (ipSegment == null || ipSegment.length() <= 0) { 337 return false; 338 } 339 340 try { 341 if (Integer.parseInt(ipSegment) > 255) { 342 return false; 343 } 344 } catch(NumberFormatException e) { 345 return false; 346 } 347 348 } 349 } else { 350 // Domain is hostname name 351 Perl5Util domainMatcher = new Perl5Util(); 352 hostname = domainMatcher.match(DOMAIN_PATTERN, hostIP); 353 } 354 355 //rightmost hostname will never start with a digit. 356 if (hostname) { 357 String[] domainSegment = new String[10]; 358 boolean match = true; 359 int segmentCount = 0; 360 int segmentLength = 0; 361 Perl5Util atomMatcher = new Perl5Util(); 362 363 while (match) { 364 match = atomMatcher.match(ATOM_PATTERN, hostIP); 365 if (match) { 366 domainSegment[segmentCount] = atomMatcher.group(1); 367 segmentLength = domainSegment[segmentCount].length() + 1; 368 hostIP = 369 (segmentLength >= hostIP.length()) 370 ? "" 371 : hostIP.substring(segmentLength); 372 373 segmentCount++; 374 } 375 } 376 String topLevel = domainSegment[segmentCount - 1]; 377 if (topLevel.length() < 2 || topLevel.length() > 4) { 378 return false; 379 } 380 381 // First letter of top level must be a alpha 382 Perl5Util alphaMatcher = new Perl5Util(); 383 if (!alphaMatcher.match(ALPHA_PATTERN, topLevel.substring(0, 1))) { 384 return false; 385 } 386 387 // Make sure there's a host name preceding the authority. 388 if (segmentCount < 2) { 389 return false; 390 } 391 } 392 393 if (!hostname && !ipV4Address) { 394 return false; 395 } 396 397 String port = authorityMatcher.group(PARSE_AUTHORITY_PORT); 398 if (port != null) { 399 Perl5Util portMatcher = new Perl5Util(); 400 if (!portMatcher.match(PORT_PATTERN, port)) { 401 return false; 402 } 403 } 404 405 String extra = authorityMatcher.group(PARSE_AUTHORITY_EXTRA); 406 if (!GenericValidator.isBlankOrNull(extra)) { 407 return false; 408 } 409 410 return true; 411 } 412 413 /*** 414 * Returns true if the path is valid. A <code>null</code> value is considered invalid. 415 */ 416 protected boolean isValidPath(String path) { 417 if (path == null) { 418 return false; 419 } 420 421 Perl5Util pathMatcher = new Perl5Util(); 422 423 if (!pathMatcher.match(PATH_PATTERN, path)) { 424 return false; 425 } 426 427 if (path.endsWith("/")) { 428 return false; 429 } 430 431 int slash2Count = countToken("//", path); 432 if (this.options.isOff(ALLOW_2_SLASHES) && (slash2Count > 0)) { 433 return false; 434 } 435 436 int slashCount = countToken("/", path); 437 int dot2Count = countToken("..", path); 438 if (dot2Count > 0) { 439 if ((slashCount - slash2Count - 1) <= dot2Count) { 440 return false; 441 } 442 } 443 444 return true; 445 } 446 447 /*** 448 * Returns true if the query is null or it's a properly formatted query string. 449 */ 450 protected boolean isValidQuery(String query) { 451 if (query == null) { 452 return true; 453 } 454 455 Perl5Util queryMatcher = new Perl5Util(); 456 return queryMatcher.match(QUERY_PATTERN, query); 457 } 458 459 /*** 460 * Returns true if the given fragment is null or fragments are allowed. 461 */ 462 protected boolean isValidFragment(String fragment) { 463 if (fragment == null) { 464 return true; 465 } 466 467 return this.options.isOff(NO_FRAGMENTS); 468 } 469 470 /*** 471 * Returns the number of times the token appears in the target. 472 */ 473 protected int countToken(String token, String target) { 474 int tokenIndex = 0; 475 int count = 0; 476 while (tokenIndex != -1) { 477 tokenIndex = target.indexOf(token, tokenIndex); 478 if (tokenIndex > -1) { 479 tokenIndex++; 480 count++; 481 } 482 } 483 return count; 484 } 485 }

This page was automatically generated by Maven