001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied. See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 *
019 */
020 package org.apache.directory.shared.ldap.util;
021
022
023 import java.io.ByteArrayOutputStream;
024 import java.io.File;
025 import java.io.FileFilter;
026 import java.io.OutputStreamWriter;
027 import java.io.UnsupportedEncodingException;
028 import java.lang.reflect.Method;
029 import java.nio.charset.Charset;
030 import java.util.ArrayList;
031 import java.util.List;
032 import java.util.Map;
033 import java.util.Set;
034 import java.util.regex.Pattern;
035 import java.util.regex.PatternSyntaxException;
036
037 import javax.naming.InvalidNameException;
038
039 import org.apache.directory.shared.asn1.codec.binary.Hex;
040 import org.apache.directory.shared.i18n.I18n;
041 import org.apache.directory.shared.ldap.entry.client.ClientBinaryValue;
042 import org.apache.directory.shared.ldap.entry.client.ClientStringValue;
043 import org.apache.directory.shared.ldap.schema.syntaxCheckers.UuidSyntaxChecker;
044
045
046 /**
047 * Various string manipulation methods that are more efficient then chaining
048 * string operations: all is done in the same buffer without creating a bunch of
049 * string objects.
050 *
051 * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a>
052 * @version $Rev: 920081 $
053 */
054 public class StringTools
055 {
056 /** The default charset, because it's not provided by JDK 1.5 */
057 static String defaultCharset = null;
058
059
060
061 // ~ Static fields/initializers
062 // -----------------------------------------------------------------
063
064 /** Hex chars */
065 private static final byte[] HEX_CHAR = new byte[]
066 { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
067
068 private static final int UTF8_MULTI_BYTES_MASK = 0x0080;
069
070 private static final int UTF8_TWO_BYTES_MASK = 0x00E0;
071
072 private static final int UTF8_TWO_BYTES = 0x00C0;
073
074 private static final int UTF8_THREE_BYTES_MASK = 0x00F0;
075
076 private static final int UTF8_THREE_BYTES = 0x00E0;
077
078 private static final int UTF8_FOUR_BYTES_MASK = 0x00F8;
079
080 private static final int UTF8_FOUR_BYTES = 0x00F0;
081
082 private static final int UTF8_FIVE_BYTES_MASK = 0x00FC;
083
084 private static final int UTF8_FIVE_BYTES = 0x00F8;
085
086 private static final int UTF8_SIX_BYTES_MASK = 0x00FE;
087
088 private static final int UTF8_SIX_BYTES = 0x00FC;
089
090 /** <alpha> ::= [0x41-0x5A] | [0x61-0x7A] */
091 public static final boolean[] ALPHA =
092 {
093 false, false, false, false, false, false, false, false,
094 false, false, false, false, false, false, false, false,
095 false, false, false, false, false, false, false, false,
096 false, false, false, false, false, false, false, false,
097 false, false, false, false, false, false, false, false,
098 false, false, false, false, false, false, false, false,
099 false, false, false, false, false, false, false, false,
100 false, false, false, false, false, false, false, false,
101 false, true, true, true, true, true, true, true,
102 true, true, true, true, true, true, true, true,
103 true, true, true, true, true, true, true, true,
104 true, true, true, false, false, false, false, false,
105 false, true, true, true, true, true, true, true,
106 true, true, true, true, true, true, true, true,
107 true, true, true, true, true, true, true, true,
108 true, true, true, false, false, false, false, false
109 };
110
111 /** <alpha-lower-case> ::= [0x61-0x7A] */
112 public static final boolean[] ALPHA_LOWER_CASE =
113 {
114 false, false, false, false, false, false, false, false,
115 false, false, false, false, false, false, false, false,
116 false, false, false, false, false, false, false, false,
117 false, false, false, false, false, false, false, false,
118 false, false, false, false, false, false, false, false,
119 false, false, false, false, false, false, false, false,
120 false, false, false, false, false, false, false, false,
121 false, false, false, false, false, false, false, false,
122 false, false, false, false, false, false, false, false,
123 false, false, false, false, false, false, false, false,
124 false, false, false, false, false, false, false, false,
125 false, false, false, false, false, false, false, false,
126 false, true, true, true, true, true, true, true,
127 true, true, true, true, true, true, true, true,
128 true, true, true, true, true, true, true, true,
129 true, true, true, false, false, false, false, false
130 };
131
132 /** <alpha-upper-case> ::= [0x41-0x5A] */
133 public static final boolean[] ALPHA_UPPER_CASE =
134 {
135 false, false, false, false, false, false, false, false,
136 false, false, false, false, false, false, false, false,
137 false, false, false, false, false, false, false, false,
138 false, false, false, false, false, false, false, false,
139 false, false, false, false, false, false, false, false,
140 false, false, false, false, false, false, false, false,
141 false, false, false, false, false, false, false, false,
142 false, false, false, false, false, false, false, false,
143 false, true, true, true, true, true, true, true,
144 true, true, true, true, true, true, true, true,
145 true, true, true, true, true, true, true, true,
146 true, true, true, false, false, false, false, false,
147 false, false, false, false, false, false, false, false,
148 false, false, false, false, false, false, false, false,
149 false, false, false, false, false, false, false, false,
150 false, false, false, false, false, false, false, false,
151 };
152
153 /** <alpha-digit> | <digit> */
154 public static final boolean[] ALPHA_DIGIT =
155 {
156 false, false, false, false, false, false, false, false,
157 false, false, false, false, false, false, false, false,
158 false, false, false, false, false, false, false, false,
159 false, false, false, false, false, false, false, false,
160 false, false, false, false, false, false, false, false,
161 false, false, false, false, false, false, false, false,
162 true, true, true, true, true, true, true, true,
163 true, true, false, false, false, false, false, false,
164 false, true, true, true, true, true, true, true,
165 true, true, true, true, true, true, true, true,
166 true, true, true, true, true, true, true, true,
167 true, true, true, false, false, false, false, false,
168 false, true, true, true, true, true, true, true,
169 true, true, true, true, true, true, true, true,
170 true, true, true, true, true, true, true, true,
171 true, true, true, false, false, false, false, false
172 };
173
174 /** <alpha> | <digit> | '-' */
175 public static final boolean[] CHAR =
176 {
177 false, false, false, false, false, false, false, false,
178 false, false, false, false, false, false, false, false,
179 false, false, false, false, false, false, false, false,
180 false, false, false, false, false, false, false, false,
181 false, false, false, false, false, false, false, false,
182 false, false, false, false, false, true, false, false,
183 true, true, true, true, true, true, true, true,
184 true, true, false, false, false, false, false, false,
185 false, true, true, true, true, true, true, true,
186 true, true, true, true, true, true, true, true,
187 true, true, true, true, true, true, true, true,
188 true, true, true, false, false, false, false, false,
189 false, true, true, true, true, true, true, true,
190 true, true, true, true, true, true, true, true,
191 true, true, true, true, true, true, true, true,
192 true, true, true, false, false, false, false, false
193 };
194
195 /** %01-%27 %2B-%5B %5D-%7F */
196 private static final boolean[] UNICODE_SUBSET =
197 {
198 false, true, true, true, true, true, true, true, // '\0'
199 true, true, true, true, true, true, true, true,
200 true, true, true, true, true, true, true, true,
201 true, true, true, true, true, true, true, true,
202 true, true, true, true, true, true, true, true,
203 false, false, false, true, true, true, true, true, // '(', ')', '*'
204 true, true, true, true, true, true, true, true,
205 true, true, true, true, true, true, true, true,
206 true, true, true, true, true, true, true, true,
207 true, true, true, true, true, true, true, true,
208 true, true, true, true, true, true, true, true,
209 true, true, true, true, false, true, true, true, // '\'
210 true, true, true, true, true, true, true, true,
211 true, true, true, true, true, true, true, true,
212 true, true, true, true, true, true, true, true,
213 true, true, true, true, true, true, true, true,
214 };
215
216 /** '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' */
217 private static final boolean[] DIGIT =
218 {
219 false, false, false, false, false, false, false, false,
220 false, false, false, false, false, false, false, false,
221 false, false, false, false, false, false, false, false,
222 false, false, false, false, false, false, false, false,
223 false, false, false, false, false, false, false, false,
224 false, false, false, false, false, false, false, false,
225 true, true, true, true, true, true, true, true,
226 true, true, false, false, false, false, false, false,
227 false, false, false, false, false, false, false, false,
228 false, false, false, false, false, false, false, false,
229 false, false, false, false, false, false, false, false,
230 false, false, false, false, false, false, false, false,
231 false, false, false, false, false, false, false, false,
232 false, false, false, false, false, false, false, false,
233 false, false, false, false, false, false, false, false,
234 false, false, false, false, false, false, false, false
235 };
236
237 /** <hex> ::= [0x30-0x39] | [0x41-0x46] | [0x61-0x66] */
238 private static final boolean[] HEX =
239 {
240 false, false, false, false, false, false, false, false,
241 false, false, false, false, false, false, false, false,
242 false, false, false, false, false, false, false, false,
243 false, false, false, false, false, false, false, false,
244 false, false, false, false, false, false, false, false,
245 false, false, false, false, false, false, false, false,
246 true, true, true, true, true, true, true, true,
247 true, true, false, false, false, false, false, false,
248 false, true, true, true, true, true, true, false,
249 false, false, false, false, false, false, false, false,
250 false, false, false, false, false, false, false, false,
251 false, false, false, false, false, false, false, false,
252 false, true, true, true, true, true, true, false,
253 false, false, false, false, false, false, false, false,
254 false, false, false, false, false, false, false, false,
255 false, false, false, false, false, false, false, false };
256
257 /** A table containing booleans when the corresponding char is printable */
258 private static final boolean[] IS_PRINTABLE_CHAR =
259 {
260 false, false, false, false, false, false, false, false, // ---, ---, ---, ---, ---, ---, ---, ---
261 false, false, false, false, false, false, false, false, // ---, ---, ---, ---, ---, ---, ---, ---
262 false, false, false, false, false, false, false, false, // ---, ---, ---, ---, ---, ---, ---, ---
263 false, false, false, false, false, false, false, false, // ---, ---, ---, ---, ---, ---, ---, ---
264 true, false, false, false, false, false, false, true, // ' ', ---, ---, ---, ---, ---, ---, "'"
265 true, true, false, true, true, true, true, true, // '(', ')', ---, '+', ',', '-', '.', '/'
266 true, true, true, true, true, true, true, true, // '0', '1', '2', '3', '4', '5', '6', '7',
267 true, true, true, false, false, true, false, true, // '8', '9', ':', ---, ---, '=', ---, '?'
268 false, true, true, true, true, true, true, true, // ---, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
269 true, true, true, true, true, true, true, true, // 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O'
270 true, true, true, true, true, true, true, true, // 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W'
271 true, true, true, false, false, false, false, false, // 'X', 'Y', 'Z', ---, ---, ---, ---, ---
272 false, true, true, true, true, true, true, true, // ---, 'a', 'b', 'c', 'd', 'e', 'f', 'g'
273 true, true, true, true, true, true, true, true, // 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o'
274 true, true, true, true, true, true, true, true, // 'p', 'q', 'r', 's', 't', 'u', 'v', 'w'
275 true, true, true, false, false, false, false, false // 'x', 'y', 'z', ---, ---, ---, ---, ---
276 };
277
278
279 /** <hex> ::= [0x30-0x39] | [0x41-0x46] | [0x61-0x66] */
280 private static final byte[] HEX_VALUE =
281 {
282 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00 -> 0F
283 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10 -> 1F
284 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20 -> 2F
285 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30 -> 3F ( 0, 1,2, 3, 4,5, 6, 7, 8, 9 )
286 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40 -> 4F ( A, B, C, D, E, F )
287 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50 -> 5F
288 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1 // 60 -> 6F ( a, b, c, d, e, f )
289 };
290
291 /** lowerCase = 'a' .. 'z', '0'..'9', '-' */
292 private static final char[] LOWER_CASE =
293 {
294 0, 0, 0, 0, 0, 0, 0, 0,
295 0, 0, 0, 0, 0, 0, 0, 0,
296 0, 0, 0, 0, 0, 0, 0, 0,
297 0, 0, 0, 0, 0, 0, 0, 0,
298 0, 0, 0, 0, 0, 0, 0, 0,
299 0, 0, 0, 0, 0, '-', 0, 0,
300 '0', '1', '2', '3', '4', '5', '6', '7',
301 '8', '9', 0, 0, 0, 0, 0, 0,
302 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
303 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
304 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
305 'x', 'y', 'z', 0, 0, 0, 0, 0,
306 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
307 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
308 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
309 'x', 'y', 'z', 0, 0, 0, 0, 0,
310 0, 0, 0, 0, 0, 0, 0, 0,
311 0, 0, 0, 0, 0, 0, 0, 0,
312 0, 0, 0, 0, 0, 0, 0, 0,
313 0, 0, 0, 0, 0, 0, 0, 0,
314 0, 0, 0, 0, 0, 0, 0, 0,
315 0, 0, 0, 0, 0, 0, 0, 0,
316 0, 0, 0, 0, 0, 0, 0, 0,
317 0, 0, 0, 0, 0, 0, 0, 0
318 };
319
320 private static final char[] TO_LOWER_CASE =
321 {
322 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
323 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
324 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
325 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
326 ' ', 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, '\'',
327 '(', ')', 0x2A, '+', ',', '-', '.', '/',
328 '0', '1', '2', '3', '4', '5', '6', '7',
329 '8', '9', ':', 0x3B, 0x3C, '=', 0x3E, '?',
330 0x40, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
331 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
332 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
333 'x', 'y', 'z', 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
334 0x60, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
335 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
336 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
337 'x', 'y', 'z', 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
338 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
339 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
340 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
341 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
342 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
343 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
344 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
345 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
346 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
347 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
348 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
349 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
350 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
351 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
352 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
353 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
354 };
355
356
357 /** upperCase = 'A' .. 'Z', '0'..'9', '-' */
358 private static final char[] UPPER_CASE =
359 {
360 0, 0, 0, 0, 0, 0, 0, 0,
361 0, 0, 0, 0, 0, 0, 0, 0,
362 0, 0, 0, 0, 0, 0, 0, 0,
363 0, 0, 0, 0, 0, 0, 0, 0,
364 0, 0, 0, 0, 0, 0, 0, 0,
365 0, 0, 0, 0, 0, '-', 0, 0,
366 '0', '1', '2', '3', '4', '5', '6', '7',
367 '8', '9', 0, 0, 0, 0, 0, 0,
368 0, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
369 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
370 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
371 'X', 'Y', 'Z', 0, 0, 0, 0, 0,
372 0, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
373 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
374 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
375 'X', 'Y', 'Z', 0, 0, 0, 0, 0,
376 0, 0, 0, 0, 0, 0, 0, 0,
377 0, 0, 0, 0, 0, 0, 0, 0,
378 0, 0, 0, 0, 0, 0, 0, 0,
379 0, 0, 0, 0, 0, 0, 0, 0,
380 0, 0, 0, 0, 0, 0, 0, 0,
381 0, 0, 0, 0, 0, 0, 0, 0,
382 0, 0, 0, 0, 0, 0, 0, 0,
383 0, 0, 0, 0, 0, 0, 0, 0
384 };
385
386 private static final int CHAR_ONE_BYTE_MASK = 0xFFFFFF80;
387
388 private static final int CHAR_TWO_BYTES_MASK = 0xFFFFF800;
389
390 private static final int CHAR_THREE_BYTES_MASK = 0xFFFF0000;
391
392 private static final int CHAR_FOUR_BYTES_MASK = 0xFFE00000;
393
394 private static final int CHAR_FIVE_BYTES_MASK = 0xFC000000;
395
396 private static final int CHAR_SIX_BYTES_MASK = 0x80000000;
397
398 public static final int NOT_EQUAL = -1;
399
400 // The following methods are taken from org.apache.commons.lang.StringUtils
401
402 /**
403 * The empty String <code>""</code>.
404 *
405 * @since 2.0
406 */
407 public static final String EMPTY = "";
408
409 /**
410 * The empty byte[]
411 */
412 public static final byte[] EMPTY_BYTES = new byte[]
413 {};
414
415 /**
416 * The empty String[]
417 */
418 public static final String[] EMPTY_STRINGS = new String[]
419 {};
420
421 /**
422 * Trims several consecutive characters into one.
423 *
424 * @param str
425 * the string to trim consecutive characters of
426 * @param ch
427 * the character to trim down
428 * @return the newly trimmed down string
429 */
430 public static final String trimConsecutiveToOne( String str, char ch )
431 {
432 if ( ( null == str ) || ( str.length() == 0 ) )
433 {
434 return "";
435 }
436
437 char[] buffer = str.toCharArray();
438 char[] newbuf = new char[buffer.length];
439 int pos = 0;
440 boolean same = false;
441
442 for ( int i = 0; i < buffer.length; i++ )
443 {
444 char car = buffer[i];
445
446 if ( car == ch )
447 {
448 if ( same )
449 {
450 continue;
451 }
452 else
453 {
454 same = true;
455 newbuf[pos++] = car;
456 }
457 }
458 else
459 {
460 same = false;
461 newbuf[pos++] = car;
462 }
463 }
464
465 return new String( newbuf, 0, pos );
466 }
467
468
469 /**
470 * A deep trim of a string remove whitespace from the ends as well as
471 * excessive whitespace within the inside of the string between
472 * non-whitespace characters. A deep trim reduces internal whitespace down
473 * to a single space to perserve the whitespace separated tokenization order
474 * of the String.
475 *
476 * @param string the string to deep trim.
477 * @return the trimmed string.
478 */
479 public static final String deepTrim( String string )
480 {
481 return deepTrim( string, false );
482 }
483
484
485 /**
486 * This does the same thing as a trim but we also lowercase the string while
487 * performing the deep trim within the same buffer. This saves us from
488 * having to create multiple String and StringBuffer objects and is much
489 * more efficient.
490 *
491 * @see StringTools#deepTrim( String )
492 */
493 public static final String deepTrimToLower( String string )
494 {
495 return deepTrim( string, true );
496 }
497
498
499 /**
500 * Put common code to deepTrim(String) and deepTrimToLower here.
501 *
502 * @param str the string to deep trim
503 * @param toLowerCase how to normalize for case: upper or lower
504 * @return the deep trimmed string
505 * @see StringTools#deepTrim( String )
506 *
507 * TODO Replace the toCharArray() by substring manipulations
508 */
509 public static final String deepTrim( String str, boolean toLowerCase )
510 {
511 if ( ( null == str ) || ( str.length() == 0 ) )
512 {
513 return "";
514 }
515
516 char ch;
517 char[] buf = str.toCharArray();
518 char[] newbuf = new char[buf.length];
519 boolean wsSeen = false;
520 boolean isStart = true;
521 int pos = 0;
522
523 for ( int i = 0; i < str.length(); i++ )
524 {
525 ch = buf[i];
526
527 // filter out all uppercase characters
528 if ( toLowerCase )
529 {
530 if ( Character.isUpperCase( ch ) )
531 {
532 ch = Character.toLowerCase( ch );
533 }
534 }
535
536 // Check to see if we should add space
537 if ( Character.isWhitespace( ch ) )
538 {
539 // If the buffer has had characters added already check last
540 // added character. Only append a spc if last character was
541 // not whitespace.
542 if ( wsSeen )
543 {
544 continue;
545 }
546 else
547 {
548 wsSeen = true;
549
550 if ( isStart )
551 {
552 isStart = false;
553 }
554 else
555 {
556 newbuf[pos++] = ch;
557 }
558 }
559 }
560 else
561 {
562 // Add all non-whitespace
563 wsSeen = false;
564 isStart = false;
565 newbuf[pos++] = ch;
566 }
567 }
568
569 return ( pos == 0 ? "" : new String( newbuf, 0, ( wsSeen ? pos - 1 : pos ) ) );
570 }
571
572 /**
573 * Truncates large Strings showing a portion of the String's head and tail
574 * with the center cut out and replaced with '...'. Also displays the total
575 * length of the truncated string so size of '...' can be interpreted.
576 * Useful for large strings in UIs or hex dumps to log files.
577 *
578 * @param str the string to truncate
579 * @param head the amount of the head to display
580 * @param tail the amount of the tail to display
581 * @return the center truncated string
582 */
583 public static final String centerTrunc( String str, int head, int tail )
584 {
585 StringBuffer buf = null;
586
587 // Return as-is if String is smaller than or equal to the head plus the
588 // tail plus the number of characters added to the trunc representation
589 // plus the number of digits in the string length.
590 if ( str.length() <= ( head + tail + 7 + str.length() / 10 ) )
591 {
592 return str;
593 }
594
595 buf = new StringBuffer();
596 buf.append( '[' ).append( str.length() ).append( "][" );
597 buf.append( str.substring( 0, head ) ).append( "..." );
598 buf.append( str.substring( str.length() - tail ) );
599 buf.append( ']' );
600 return buf.toString();
601 }
602
603
604 /**
605 * Gets a hex string from byte array.
606 *
607 * @param res
608 * the byte array
609 * @return the hex string representing the binary values in the array
610 */
611 public static final String toHexString( byte[] res )
612 {
613 StringBuffer buf = new StringBuffer( res.length << 1 );
614
615 for ( int ii = 0; ii < res.length; ii++ )
616 {
617 String digit = Integer.toHexString( 0xFF & res[ii] );
618
619 if ( digit.length() == 1 )
620 {
621 digit = '0' + digit;
622 }
623
624 buf.append( digit );
625 }
626 return buf.toString().toUpperCase();
627 }
628
629 /**
630 * Rewrote the toLowercase method to improve performances.
631 * In Ldap, attributesType are supposed to use ASCII chars :
632 * 'a'-'z', 'A'-'Z', '0'-'9', '.' and '-' only.
633 *
634 * @param value The String to lowercase
635 * @return The lowercase string
636 */
637 public static final String toLowerCase( String value )
638 {
639 if ( ( null == value ) || ( value.length() == 0 ) )
640 {
641 return "";
642 }
643
644 char[] chars = value.toCharArray();
645
646 for ( int i = 0; i < chars.length; i++ )
647 {
648 chars[i] = TO_LOWER_CASE[ chars[i] ];
649 }
650
651 return new String( chars );
652 }
653
654 /**
655 * Rewrote the toLowercase method to improve performances.
656 * In Ldap, attributesType are supposed to use ASCII chars :
657 * 'a'-'z', 'A'-'Z', '0'-'9', '.' and '-' only.
658 *
659 * @param value The String to uppercase
660 * @return The uppercase string
661 */
662 public static final String toUpperCase( String value )
663 {
664 if ( ( null == value ) || ( value.length() == 0 ) )
665 {
666 return "";
667 }
668
669 char[] chars = value.toCharArray();
670
671 for ( int i = 0; i < chars.length; i++ )
672 {
673 chars[i] = UPPER_CASE[ chars[i] ];
674 }
675
676 return new String( chars );
677 }
678
679 /**
680 * Get byte array from hex string
681 *
682 * @param hexString
683 * the hex string to convert to a byte array
684 * @return the byte form of the hex string.
685 */
686 public static final byte[] toByteArray( String hexString )
687 {
688 int arrLength = hexString.length() >> 1;
689 byte buf[] = new byte[arrLength];
690
691 for ( int ii = 0; ii < arrLength; ii++ )
692 {
693 int index = ii << 1;
694
695 String l_digit = hexString.substring( index, index + 2 );
696 buf[ii] = ( byte ) Integer.parseInt( l_digit, 16 );
697 }
698
699 return buf;
700 }
701
702
703 /**
704 * This method is used to insert HTML block dynamically
705 *
706 * @param source the HTML code to be processes
707 * @param replaceNl if true '\n' will be replaced by <br>
708 * @param replaceTag if true '<' will be replaced by < and '>' will be replaced
709 * by >
710 * @param replaceQuote if true '\"' will be replaced by "
711 * @return the formated html block
712 */
713 public static final String formatHtml( String source, boolean replaceNl, boolean replaceTag,
714 boolean replaceQuote )
715 {
716 StringBuffer buf = new StringBuffer();
717 int len = source.length();
718
719 for ( int ii = 0; ii < len; ii++ )
720 {
721 char ch = source.charAt( ii );
722
723 switch ( ch )
724 {
725 case '\"':
726 if ( replaceQuote )
727 {
728 buf.append( """ );
729 }
730 else
731 {
732 buf.append( ch );
733 }
734 break;
735
736 case '<':
737 if ( replaceTag )
738 {
739 buf.append( "<" );
740 }
741 else
742 {
743 buf.append( ch );
744 }
745 break;
746
747 case '>':
748 if ( replaceTag )
749 {
750 buf.append( ">" );
751 }
752 else
753 {
754 buf.append( ch );
755 }
756 break;
757
758 case '\n':
759 if ( replaceNl )
760 {
761 if ( replaceTag )
762 {
763 buf.append( "<br>" );
764 }
765 else
766 {
767 buf.append( "<br>" );
768 }
769 }
770 else
771 {
772 buf.append( ch );
773 }
774 break;
775
776 case '\r':
777 break;
778
779 case '&':
780 buf.append( "&" );
781 break;
782
783 default:
784 buf.append( ch );
785 break;
786 }
787 }
788
789 return buf.toString();
790 }
791
792
793 /**
794 * Creates a regular expression from an LDAP substring assertion filter
795 * specification.
796 *
797 * @param initialPattern
798 * the initial fragment before wildcards
799 * @param anyPattern
800 * fragments surrounded by wildcards if any
801 * @param finalPattern
802 * the final fragment after last wildcard if any
803 * @return the regular expression for the substring match filter
804 * @throws PatternSyntaxException
805 * if a syntactically correct regular expression cannot be
806 * compiled
807 */
808 public static final Pattern getRegex( String initialPattern, String[] anyPattern, String finalPattern )
809 throws PatternSyntaxException
810 {
811 StringBuffer buf = new StringBuffer();
812
813 if ( initialPattern != null )
814 {
815 buf.append( '^' ).append( Pattern.quote( initialPattern ) );
816 }
817
818 if ( anyPattern != null )
819 {
820 for ( int i = 0; i < anyPattern.length; i++ )
821 {
822 buf.append( ".*" ).append( Pattern.quote( anyPattern[i] ) );
823 }
824 }
825
826 if ( finalPattern != null )
827 {
828 buf.append( ".*" ).append( Pattern.quote( finalPattern ) );
829 }
830 else
831 {
832 buf.append( ".*" );
833 }
834
835 return Pattern.compile( buf.toString() );
836 }
837
838
839 /**
840 * Generates a regular expression from an LDAP substring match expression by
841 * parsing out the supplied string argument.
842 *
843 * @param ldapRegex
844 * the substring match expression
845 * @return the regular expression for the substring match filter
846 * @throws PatternSyntaxException
847 * if a syntactically correct regular expression cannot be
848 * compiled
849 */
850 public static final Pattern getRegex( String ldapRegex ) throws PatternSyntaxException
851 {
852 if ( ldapRegex == null )
853 {
854 throw new PatternSyntaxException( I18n.err( I18n.ERR_04429 ), "null", -1 );
855 }
856
857 List<String> any = new ArrayList<String>();
858 String remaining = ldapRegex;
859 int index = remaining.indexOf( '*' );
860
861 if ( index == -1 )
862 {
863 throw new PatternSyntaxException( I18n.err( I18n.ERR_04430 ), remaining, -1 );
864 }
865
866 String initialPattern = null;
867
868 if ( remaining.charAt( 0 ) != '*' )
869 {
870 initialPattern = remaining.substring( 0, index );
871 }
872
873 remaining = remaining.substring( index + 1, remaining.length() );
874
875 while ( ( index = remaining.indexOf( '*' ) ) != -1 )
876 {
877 any.add( remaining.substring( 0, index ) );
878 remaining = remaining.substring( index + 1, remaining.length() );
879 }
880
881 String finalPattern = null;
882 if ( !remaining.endsWith( "*" ) && remaining.length() > 0 )
883 {
884 finalPattern = remaining;
885 }
886
887 if ( any.size() > 0 )
888 {
889 String[] anyStrs = new String[any.size()];
890
891 for ( int i = 0; i < anyStrs.length; i++ )
892 {
893 anyStrs[i] = any.get( i );
894 }
895
896 return getRegex( initialPattern, anyStrs, finalPattern );
897 }
898
899 return getRegex( initialPattern, null, finalPattern );
900 }
901
902
903 /**
904 * Splits apart a OS separator delimited set of paths in a string into
905 * multiple Strings. File component path strings are returned within a List
906 * in the order they are found in the composite path string. Optionally, a
907 * file filter can be used to filter out path strings to control the
908 * components returned. If the filter is null all path components are
909 * returned.
910 *
911 * @param paths
912 * a set of paths delimited using the OS path separator
913 * @param filter
914 * a FileFilter used to filter the return set
915 * @return the filter accepted path component Strings in the order
916 * encountered
917 */
918 public static final List<String> getPaths( String paths, FileFilter filter )
919 {
920 int start = 0;
921 int stop = -1;
922 String path = null;
923 List<String> list = new ArrayList<String>();
924
925 // Abandon with no values if paths string is null
926 if ( paths == null || paths.trim().equals( "" ) )
927 {
928 return list;
929 }
930
931 final int max = paths.length() - 1;
932
933 // Loop spliting string using OS path separator: terminate
934 // when the start index is at the end of the paths string.
935 while ( start < max )
936 {
937 stop = paths.indexOf( File.pathSeparatorChar, start );
938
939 // The is no file sep between the start and the end of the string
940 if ( stop == -1 )
941 {
942 // If we have a trailing path remaining without ending separator
943 if ( start < max )
944 {
945 // Last path is everything from start to the string's end
946 path = paths.substring( start );
947
948 // Protect against consecutive separators side by side
949 if ( !path.trim().equals( "" ) )
950 {
951 // If filter is null add path, if it is not null add the
952 // path only if the filter accepts the path component.
953 if ( filter == null || filter.accept( new File( path ) ) )
954 {
955 list.add( path );
956 }
957 }
958 }
959
960 break; // Exit loop no more path components left!
961 }
962
963 // There is a separator between start and the end if we got here!
964 // start index is now at 0 or the index of last separator + 1
965 // stop index is now at next separator in front of start index
966 path = paths.substring( start, stop );
967
968 // Protect against consecutive separators side by side
969 if ( !path.trim().equals( "" ) )
970 {
971 // If filter is null add path, if it is not null add the path
972 // only if the filter accepts the path component.
973 if ( filter == null || filter.accept( new File( path ) ) )
974 {
975 list.add( path );
976 }
977 }
978
979 // Advance start index past separator to start of next path comp
980 start = stop + 1;
981 }
982
983 return list;
984 }
985
986
987 // ~ Methods
988 // ------------------------------------------------------------------------------------
989
990 /**
991 * Helper function that dump a byte in hex form
992 *
993 * @param octet The byte to dump
994 * @return A string representation of the byte
995 */
996 public static final String dumpByte( byte octet )
997 {
998 return new String( new byte[]
999 { '0', 'x', HEX_CHAR[( octet & 0x00F0 ) >> 4], HEX_CHAR[octet & 0x000F] } );
1000 }
1001
1002
1003 /**
1004 * Helper function that returns a char from an hex
1005 *
1006 * @param hex The hex to dump
1007 * @return A char representation of the hex
1008 */
1009 public static final char dumpHex( byte hex )
1010 {
1011 return ( char ) HEX_CHAR[hex & 0x000F];
1012 }
1013
1014
1015 /**
1016 * Helper function that dump an array of bytes in hex form
1017 *
1018 * @param buffer The bytes array to dump
1019 * @return A string representation of the array of bytes
1020 */
1021 public static final String dumpBytes( byte[] buffer )
1022 {
1023 if ( buffer == null )
1024 {
1025 return "";
1026 }
1027
1028 StringBuffer sb = new StringBuffer();
1029
1030 for ( int i = 0; i < buffer.length; i++ )
1031 {
1032 sb.append( "0x" ).append( ( char ) ( HEX_CHAR[( buffer[i] & 0x00F0 ) >> 4] ) ).append(
1033 ( char ) ( HEX_CHAR[buffer[i] & 0x000F] ) ).append( " " );
1034 }
1035
1036 return sb.toString();
1037 }
1038
1039 /**
1040 *
1041 * Helper method to render an object which can be a String or a byte[]
1042 *
1043 * @return A string representing the object
1044 */
1045 public static String dumpObject( Object object )
1046 {
1047 if ( object != null )
1048 {
1049 if ( object instanceof String )
1050 {
1051 return (String) object;
1052 }
1053 else if ( object instanceof byte[] )
1054 {
1055 return dumpBytes( ( byte[] ) object );
1056 }
1057 else if ( object instanceof ClientStringValue )
1058 {
1059 return ( ( ClientStringValue ) object ).get();
1060 }
1061 else if ( object instanceof ClientBinaryValue )
1062 {
1063 return dumpBytes( ( ( ClientBinaryValue ) object ).get() );
1064 }
1065 else
1066 {
1067 return "<unknown type>";
1068 }
1069 }
1070 else
1071 {
1072 return "";
1073 }
1074 }
1075
1076 /**
1077 * Helper function that dump an array of bytes in hex pair form,
1078 * without '0x' and space chars
1079 *
1080 * @param buffer The bytes array to dump
1081 * @return A string representation of the array of bytes
1082 */
1083 public static final String dumpHexPairs( byte[] buffer )
1084 {
1085 if ( buffer == null )
1086 {
1087 return "";
1088 }
1089
1090 char[] str = new char[buffer.length << 1];
1091
1092 for ( int i = 0, pos = 0; i < buffer.length; i++ )
1093 {
1094 str[pos++] = ( char ) ( HEX_CHAR[( buffer[i] & 0x00F0 ) >> 4] );
1095 str[pos++] = ( char ) ( HEX_CHAR[buffer[i] & 0x000F] );
1096 }
1097
1098 return new String( str );
1099 }
1100
1101 /**
1102 * Return the Unicode char which is coded in the bytes at position 0.
1103 *
1104 * @param bytes The byte[] represntation of an Unicode string.
1105 * @return The first char found.
1106 */
1107 public static final char bytesToChar( byte[] bytes )
1108 {
1109 return bytesToChar( bytes, 0 );
1110 }
1111
1112
1113 /**
1114 * Count the number of bytes needed to return an Unicode char. This can be
1115 * from 1 to 6.
1116 *
1117 * @param bytes The bytes to read
1118 * @param pos Position to start counting. It must be a valid start of a
1119 * encoded char !
1120 * @return The number of bytes to create a char, or -1 if the encoding is
1121 * wrong. TODO : Should stop after the third byte, as a char is only
1122 * 2 bytes long.
1123 */
1124 public static final int countBytesPerChar( byte[] bytes, int pos )
1125 {
1126 if ( bytes == null )
1127 {
1128 return -1;
1129 }
1130
1131 if ( ( bytes[pos] & UTF8_MULTI_BYTES_MASK ) == 0 )
1132 {
1133 return 1;
1134 }
1135 else if ( ( bytes[pos] & UTF8_TWO_BYTES_MASK ) == UTF8_TWO_BYTES )
1136 {
1137 return 2;
1138 }
1139 else if ( ( bytes[pos] & UTF8_THREE_BYTES_MASK ) == UTF8_THREE_BYTES )
1140 {
1141 return 3;
1142 }
1143 else if ( ( bytes[pos] & UTF8_FOUR_BYTES_MASK ) == UTF8_FOUR_BYTES )
1144 {
1145 return 4;
1146 }
1147 else if ( ( bytes[pos] & UTF8_FIVE_BYTES_MASK ) == UTF8_FIVE_BYTES )
1148 {
1149 return 5;
1150 }
1151 else if ( ( bytes[pos] & UTF8_SIX_BYTES_MASK ) == UTF8_SIX_BYTES )
1152 {
1153 return 6;
1154 }
1155 else
1156 {
1157 return -1;
1158 }
1159 }
1160
1161
1162 /**
1163 * Return the number of bytes that hold an Unicode char.
1164 *
1165 * @param car The character to be decoded
1166 * @return The number of bytes to hold the char. TODO : Should stop after
1167 * the third byte, as a char is only 2 bytes long.
1168 */
1169 public static final int countNbBytesPerChar( char car )
1170 {
1171 if ( ( car & CHAR_ONE_BYTE_MASK ) == 0 )
1172 {
1173 return 1;
1174 }
1175 else if ( ( car & CHAR_TWO_BYTES_MASK ) == 0 )
1176 {
1177 return 2;
1178 }
1179 else if ( ( car & CHAR_THREE_BYTES_MASK ) == 0 )
1180 {
1181 return 3;
1182 }
1183 else if ( ( car & CHAR_FOUR_BYTES_MASK ) == 0 )
1184 {
1185 return 4;
1186 }
1187 else if ( ( car & CHAR_FIVE_BYTES_MASK ) == 0 )
1188 {
1189 return 5;
1190 }
1191 else if ( ( car & CHAR_SIX_BYTES_MASK ) == 0 )
1192 {
1193 return 6;
1194 }
1195 else
1196 {
1197 return -1;
1198 }
1199 }
1200
1201
1202 /**
1203 * Count the number of bytes included in the given char[].
1204 *
1205 * @param chars The char array to decode
1206 * @return The number of bytes in the char array
1207 */
1208 public static final int countBytes( char[] chars )
1209 {
1210 if ( chars == null )
1211 {
1212 return 0;
1213 }
1214
1215 int nbBytes = 0;
1216 int currentPos = 0;
1217
1218 while ( currentPos < chars.length )
1219 {
1220 int nbb = countNbBytesPerChar( chars[currentPos] );
1221
1222 // If the number of bytes necessary to encode a character is
1223 // above 3, we will need two UTF-16 chars
1224 currentPos += ( nbb < 4 ? 1 : 2 );
1225 nbBytes += nbb;
1226 }
1227
1228 return nbBytes;
1229 }
1230
1231
1232 /**
1233 * Return the Unicode char which is coded in the bytes at the given
1234 * position.
1235 *
1236 * @param bytes The byte[] represntation of an Unicode string.
1237 * @param pos The current position to start decoding the char
1238 * @return The decoded char, or -1 if no char can be decoded TODO : Should
1239 * stop after the third byte, as a char is only 2 bytes long.
1240 */
1241 public static final char bytesToChar( byte[] bytes, int pos )
1242 {
1243 if ( bytes == null )
1244 {
1245 return ( char ) -1;
1246 }
1247
1248 if ( ( bytes[pos] & UTF8_MULTI_BYTES_MASK ) == 0 )
1249 {
1250 return ( char ) bytes[pos];
1251 }
1252 else
1253 {
1254 if ( ( bytes[pos] & UTF8_TWO_BYTES_MASK ) == UTF8_TWO_BYTES )
1255 {
1256 // Two bytes char
1257 return ( char ) ( ( ( bytes[pos] & 0x1C ) << 6 ) + // 110x-xxyy
1258 // 10zz-zzzz
1259 // ->
1260 // 0000-0xxx
1261 // 0000-0000
1262 ( ( bytes[pos] & 0x03 ) << 6 ) + // 110x-xxyy 10zz-zzzz
1263 // -> 0000-0000
1264 // yy00-0000
1265 ( bytes[pos + 1] & 0x3F ) // 110x-xxyy 10zz-zzzz -> 0000-0000
1266 // 00zz-zzzz
1267 ); // -> 0000-0xxx yyzz-zzzz (07FF)
1268 }
1269 else if ( ( bytes[pos] & UTF8_THREE_BYTES_MASK ) == UTF8_THREE_BYTES )
1270 {
1271 // Three bytes char
1272 return ( char ) (
1273 // 1110-tttt 10xx-xxyy 10zz-zzzz -> tttt-0000-0000-0000
1274 ( ( bytes[pos] & 0x0F ) << 12 ) +
1275 // 1110-tttt 10xx-xxyy 10zz-zzzz -> 0000-xxxx-0000-0000
1276 ( ( bytes[pos + 1] & 0x3C ) << 6 ) +
1277 // 1110-tttt 10xx-xxyy 10zz-zzzz -> 0000-0000-yy00-0000
1278 ( ( bytes[pos + 1] & 0x03 ) << 6 ) +
1279 // 1110-tttt 10xx-xxyy 10zz-zzzz -> 0000-0000-00zz-zzzz
1280 ( bytes[pos + 2] & 0x3F )
1281 // -> tttt-xxxx yyzz-zzzz (FF FF)
1282 );
1283 }
1284 else if ( ( bytes[pos] & UTF8_FOUR_BYTES_MASK ) == UTF8_FOUR_BYTES )
1285 {
1286 // Four bytes char
1287 return ( char ) (
1288 // 1111-0ttt 10uu-vvvv 10xx-xxyy 10zz-zzzz -> 000t-tt00
1289 // 0000-0000 0000-0000
1290 ( ( bytes[pos] & 0x07 ) << 18 ) +
1291 // 1111-0ttt 10uu-vvvv 10xx-xxyy 10zz-zzzz -> 0000-00uu
1292 // 0000-0000 0000-0000
1293 ( ( bytes[pos + 1] & 0x30 ) << 16 ) +
1294 // 1111-0ttt 10uu-vvvv 10xx-xxyy 10zz-zzzz -> 0000-0000
1295 // vvvv-0000 0000-0000
1296 ( ( bytes[pos + 1] & 0x0F ) << 12 ) +
1297 // 1111-0ttt 10uu-vvvv 10xx-xxyy 10zz-zzzz -> 0000-0000
1298 // 0000-xxxx 0000-0000
1299 ( ( bytes[pos + 2] & 0x3C ) << 6 ) +
1300 // 1111-0ttt 10uu-vvvv 10xx-xxyy 10zz-zzzz -> 0000-0000
1301 // 0000-0000 yy00-0000
1302 ( ( bytes[pos + 2] & 0x03 ) << 6 ) +
1303 // 1111-0ttt 10uu-vvvv 10xx-xxyy 10zz-zzzz -> 0000-0000
1304 // 0000-0000 00zz-zzzz
1305 ( bytes[pos + 3] & 0x3F )
1306 // -> 000t-ttuu vvvv-xxxx yyzz-zzzz (1FFFFF)
1307 );
1308 }
1309 else if ( ( bytes[pos] & UTF8_FIVE_BYTES_MASK ) == UTF8_FIVE_BYTES )
1310 {
1311 // Five bytes char
1312 return ( char ) (
1313 // 1111-10tt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz ->
1314 // 0000-00tt 0000-0000 0000-0000 0000-0000
1315 ( ( bytes[pos] & 0x03 ) << 24 ) +
1316 // 1111-10tt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz ->
1317 // 0000-0000 uuuu-uu00 0000-0000 0000-0000
1318 ( ( bytes[pos + 1] & 0x3F ) << 18 ) +
1319 // 1111-10tt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz ->
1320 // 0000-0000 0000-00vv 0000-0000 0000-0000
1321 ( ( bytes[pos + 2] & 0x30 ) << 12 ) +
1322 // 1111-10tt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz ->
1323 // 0000-0000 0000-0000 wwww-0000 0000-0000
1324 ( ( bytes[pos + 2] & 0x0F ) << 12 ) +
1325 // 1111-10tt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz ->
1326 // 0000-0000 0000-0000 0000-xxxx 0000-0000
1327 ( ( bytes[pos + 3] & 0x3C ) << 6 ) +
1328 // 1111-10tt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz ->
1329 // 0000-0000 0000-0000 0000-0000 yy00-0000
1330 ( ( bytes[pos + 3] & 0x03 ) << 6 ) +
1331 // 1111-10tt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz ->
1332 // 0000-0000 0000-0000 0000-0000 00zz-zzzz
1333 ( bytes[pos + 4] & 0x3F )
1334 // -> 0000-00tt uuuu-uuvv wwww-xxxx yyzz-zzzz (03 FF FF FF)
1335 );
1336 }
1337 else if ( ( bytes[pos] & UTF8_FIVE_BYTES_MASK ) == UTF8_FIVE_BYTES )
1338 {
1339 // Six bytes char
1340 return ( char ) (
1341 // 1111-110s 10tt-tttt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz
1342 // ->
1343 // 0s00-0000 0000-0000 0000-0000 0000-0000
1344 ( ( bytes[pos] & 0x01 ) << 30 ) +
1345 // 1111-110s 10tt-tttt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz
1346 // ->
1347 // 00tt-tttt 0000-0000 0000-0000 0000-0000
1348 ( ( bytes[pos + 1] & 0x3F ) << 24 ) +
1349 // 1111-110s 10tt-tttt 10uu-uuuu 10vv-wwww 10xx-xxyy
1350 // 10zz-zzzz ->
1351 // 0000-0000 uuuu-uu00 0000-0000 0000-0000
1352 ( ( bytes[pos + 2] & 0x3F ) << 18 ) +
1353 // 1111-110s 10tt-tttt 10uu-uuuu 10vv-wwww 10xx-xxyy
1354 // 10zz-zzzz ->
1355 // 0000-0000 0000-00vv 0000-0000 0000-0000
1356 ( ( bytes[pos + 3] & 0x30 ) << 12 ) +
1357 // 1111-110s 10tt-tttt 10uu-uuuu 10vv-wwww 10xx-xxyy
1358 // 10zz-zzzz ->
1359 // 0000-0000 0000-0000 wwww-0000 0000-0000
1360 ( ( bytes[pos + 3] & 0x0F ) << 12 ) +
1361 // 1111-110s 10tt-tttt 10uu-uuuu 10vv-wwww 10xx-xxyy
1362 // 10zz-zzzz ->
1363 // 0000-0000 0000-0000 0000-xxxx 0000-0000
1364 ( ( bytes[pos + 4] & 0x3C ) << 6 ) +
1365 // 1111-110s 10tt-tttt 10uu-uuuu 10vv-wwww 10xx-xxyy
1366 // 10zz-zzzz ->
1367 // 0000-0000 0000-0000 0000-0000 yy00-0000
1368 ( ( bytes[pos + 4] & 0x03 ) << 6 ) +
1369 // 1111-110s 10tt-tttt 10uu-uuuu 10vv-wwww 10xx-xxyy 10zz-zzzz
1370 // ->
1371 // 0000-0000 0000-0000 0000-0000 00zz-zzzz
1372 ( bytes[pos + 5] & 0x3F )
1373 // -> 0stt-tttt uuuu-uuvv wwww-xxxx yyzz-zzzz (7F FF FF FF)
1374 );
1375 }
1376 else
1377 {
1378 return ( char ) -1;
1379 }
1380 }
1381 }
1382
1383
1384 /**
1385 * Return the Unicode char which is coded in the bytes at the given
1386 * position.
1387 *
1388 * @param car The character to be transformed to an array of bytes
1389 *
1390 * @return The byte array representing the char
1391 *
1392 * TODO : Should stop after the third byte, as a char is only 2 bytes long.
1393 */
1394 public static final byte[] charToBytes( char car )
1395 {
1396 byte[] bytes = new byte[countNbBytesPerChar( car )];
1397
1398 if ( car <= 0x7F )
1399 {
1400 // Single byte char
1401 bytes[0] = ( byte ) car;
1402 return bytes;
1403 }
1404 else if ( car <= 0x7FF )
1405 {
1406 // two bytes char
1407 bytes[0] = ( byte ) ( 0x00C0 + ( ( car & 0x07C0 ) >> 6 ) );
1408 bytes[1] = ( byte ) ( 0x0080 + ( car & 0x3F ) );
1409 }
1410 else
1411 {
1412 // Three bytes char
1413 bytes[0] = ( byte ) ( 0x00E0 + ( ( car & 0xF000 ) >> 12 ) );
1414 bytes[1] = ( byte ) ( 0x0080 + ( ( car & 0x0FC0 ) >> 6 ) );
1415 bytes[2] = ( byte ) ( 0x0080 + ( car & 0x3F ) );
1416 }
1417
1418 return bytes;
1419 }
1420
1421
1422 /**
1423 * Count the number of chars included in the given byte[].
1424 *
1425 * @param bytes The byte array to decode
1426 * @return The number of char in the byte array
1427 */
1428 public static final int countChars( byte[] bytes )
1429 {
1430 if ( bytes == null )
1431 {
1432 return 0;
1433 }
1434
1435 int nbChars = 0;
1436 int currentPos = 0;
1437
1438 while ( currentPos < bytes.length )
1439 {
1440 currentPos += countBytesPerChar( bytes, currentPos );
1441 nbChars++;
1442 }
1443
1444 return nbChars;
1445 }
1446
1447
1448 /**
1449 * Check if a text is present at the current position in a buffer.
1450 *
1451 * @param bytes The buffer which contains the data
1452 * @param index Current position in the buffer
1453 * @param text The text we want to check
1454 * @return <code>true</code> if the buffer contains the text.
1455 */
1456 public static final int areEquals( byte[] bytes, int index, String text )
1457 {
1458 if ( ( bytes == null ) || ( bytes.length == 0 ) || ( bytes.length <= index ) || ( index < 0 )
1459 || ( text == null ) )
1460 {
1461 return NOT_EQUAL;
1462 }
1463 else
1464 {
1465 try
1466 {
1467 byte[] data = text.getBytes( "UTF-8" );
1468
1469 return areEquals( bytes, index, data );
1470 }
1471 catch ( UnsupportedEncodingException uee )
1472 {
1473 // if this happens something is really strange
1474 throw new RuntimeException( uee );
1475 }
1476 }
1477 }
1478
1479
1480 /**
1481 * Check if a text is present at the current position in a buffer.
1482 *
1483 * @param chars The buffer which contains the data
1484 * @param index Current position in the buffer
1485 * @param text The text we want to check
1486 * @return <code>true</code> if the buffer contains the text.
1487 */
1488 public static final int areEquals( char[] chars, int index, String text )
1489 {
1490 if ( ( chars == null ) || ( chars.length == 0 ) || ( chars.length <= index ) || ( index < 0 )
1491 || ( text == null ) )
1492 {
1493 return NOT_EQUAL;
1494 }
1495 else
1496 {
1497 char[] data = text.toCharArray();
1498
1499 return areEquals( chars, index, data );
1500 }
1501 }
1502
1503
1504 /**
1505 * Check if a text is present at the current position in a buffer.
1506 *
1507 * @param chars The buffer which contains the data
1508 * @param index Current position in the buffer
1509 * @param chars2 The text we want to check
1510 * @return <code>true</code> if the buffer contains the text.
1511 */
1512 public static final int areEquals( char[] chars, int index, char[] chars2 )
1513 {
1514 if ( ( chars == null ) || ( chars.length == 0 ) || ( chars.length <= index ) || ( index < 0 )
1515 || ( chars2 == null ) || ( chars2.length == 0 )
1516 || ( chars2.length > ( chars.length + index ) ) )
1517 {
1518 return NOT_EQUAL;
1519 }
1520 else
1521 {
1522 for ( int i = 0; i < chars2.length; i++ )
1523 {
1524 if ( chars[index++] != chars2[i] )
1525 {
1526 return NOT_EQUAL;
1527 }
1528 }
1529
1530 return index;
1531 }
1532 }
1533
1534 /**
1535 * Check if a text is present at the current position in another string.
1536 *
1537 * @param string The string which contains the data
1538 * @param index Current position in the string
1539 * @param text The text we want to check
1540 * @return <code>true</code> if the string contains the text.
1541 */
1542 public static final boolean areEquals( String string, int index, String text )
1543 {
1544 if ( ( string == null ) || ( text == null ) )
1545 {
1546 return false;
1547 }
1548
1549 int length1 = string.length();
1550 int length2 = text.length();
1551
1552 if ( ( length1 == 0 ) || ( length1 <= index ) || ( index < 0 )
1553 || ( length2 == 0 ) || ( length2 > ( length1 + index ) ) )
1554 {
1555 return false;
1556 }
1557 else
1558 {
1559 return string.substring( index ).startsWith( text );
1560 }
1561 }
1562
1563
1564 /**
1565 * Check if a text is present at the current position in a buffer.
1566 *
1567 * @param bytes The buffer which contains the data
1568 * @param index Current position in the buffer
1569 * @param bytes2 The text we want to check
1570 * @return <code>true</code> if the buffer contains the text.
1571 */
1572 public static final int areEquals( byte[] bytes, int index, byte[] bytes2 )
1573 {
1574
1575 if ( ( bytes == null ) || ( bytes.length == 0 ) || ( bytes.length <= index ) || ( index < 0 )
1576 || ( bytes2 == null ) || ( bytes2.length == 0 )
1577 || ( bytes2.length > ( bytes.length + index ) ) )
1578 {
1579 return NOT_EQUAL;
1580 }
1581 else
1582 {
1583 for ( int i = 0; i < bytes2.length; i++ )
1584 {
1585 if ( bytes[index++] != bytes2[i] )
1586 {
1587 return NOT_EQUAL;
1588 }
1589 }
1590
1591 return index;
1592 }
1593 }
1594
1595
1596 /**
1597 * Test if the current character is equal to a specific character. This
1598 * function works only for character between 0 and 127, as it does compare a
1599 * byte and a char (which is 16 bits wide)
1600 *
1601 * @param byteArray
1602 * The buffer which contains the data
1603 * @param index
1604 * Current position in the buffer
1605 * @param car
1606 * The character we want to compare with the current buffer
1607 * position
1608 * @return <code>true</code> if the current character equals the given
1609 * character.
1610 */
1611 public static final boolean isCharASCII( byte[] byteArray, int index, char car )
1612 {
1613 if ( ( byteArray == null ) || ( byteArray.length == 0 ) || ( index < 0 ) || ( index >= byteArray.length ) )
1614 {
1615 return false;
1616 }
1617 else
1618 {
1619 return ( ( byteArray[index] == car ) ? true : false );
1620 }
1621 }
1622
1623
1624 /**
1625 * Test if the current character is equal to a specific character.
1626 *
1627 * @param chars
1628 * The buffer which contains the data
1629 * @param index
1630 * Current position in the buffer
1631 * @param car
1632 * The character we want to compare with the current buffer
1633 * position
1634 * @return <code>true</code> if the current character equals the given
1635 * character.
1636 */
1637 public static final boolean isCharASCII( char[] chars, int index, char car )
1638 {
1639 if ( ( chars == null ) || ( chars.length == 0 ) || ( index < 0 ) || ( index >= chars.length ) )
1640 {
1641 return false;
1642 }
1643 else
1644 {
1645 return ( ( chars[index] == car ) ? true : false );
1646 }
1647 }
1648
1649 /**
1650 * Test if the current character is equal to a specific character.
1651 *
1652 * @param string The String which contains the data
1653 * @param index Current position in the string
1654 * @param car The character we want to compare with the current string
1655 * position
1656 * @return <code>true</code> if the current character equals the given
1657 * character.
1658 */
1659 public static final boolean isCharASCII( String string, int index, char car )
1660 {
1661 if ( string == null )
1662 {
1663 return false;
1664 }
1665
1666 int length = string.length();
1667
1668 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) )
1669 {
1670 return false;
1671 }
1672 else
1673 {
1674 return string.charAt( index ) == car;
1675 }
1676 }
1677
1678
1679 /**
1680 * Test if the current character is equal to a specific character.
1681 *
1682 * @param string The String which contains the data
1683 * @param index Current position in the string
1684 * @param car The character we want to compare with the current string
1685 * position
1686 * @return <code>true</code> if the current character equals the given
1687 * character.
1688 */
1689 public static final boolean isICharASCII( String string, int index, char car )
1690 {
1691 if ( string == null )
1692 {
1693 return false;
1694 }
1695
1696 int length = string.length();
1697
1698 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) )
1699 {
1700 return false;
1701 }
1702 else
1703 {
1704 return ( ( string.charAt( index ) | 0x20 ) & car ) == car;
1705 }
1706 }
1707
1708
1709 /**
1710 * Test if the current character is equal to a specific character.
1711 *
1712 * @param string The String which contains the data
1713 * @param index Current position in the string
1714 * @param car The character we want to compare with the current string
1715 * position
1716 * @return <code>true</code> if the current character equals the given
1717 * character.
1718 */
1719 public static final boolean isICharASCII( byte[] bytes, int index, char car )
1720 {
1721 if ( bytes == null )
1722 {
1723 return false;
1724 }
1725
1726 int length = bytes.length;
1727
1728 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) )
1729 {
1730 return false;
1731 }
1732 else
1733 {
1734 return ( ( bytes[ index ] | 0x20 ) & car ) == car;
1735 }
1736 }
1737
1738
1739 /**
1740 * Test if the current character is a bit, ie 0 or 1.
1741 *
1742 * @param string
1743 * The String which contains the data
1744 * @param index
1745 * Current position in the string
1746 * @return <code>true</code> if the current character is a bit (0 or 1)
1747 */
1748 public static final boolean isBit( String string, int index )
1749 {
1750 if ( string == null )
1751 {
1752 return false;
1753 }
1754
1755 int length = string.length();
1756
1757 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) )
1758 {
1759 return false;
1760 }
1761 else
1762 {
1763 char c = string.charAt( index );
1764 return ( ( c == '0' ) || ( c == '1' ) );
1765 }
1766 }
1767
1768
1769 /**
1770 * Get the character at a given position in a string, checking fo limits
1771 *
1772 * @param string The string which contains the data
1773 * @param index Current position in the string
1774 * @return The character ar the given position, or '\0' if something went wrong
1775 */
1776 public static final char charAt( String string, int index )
1777 {
1778 if ( string == null )
1779 {
1780 return '\0';
1781 }
1782
1783 int length = string.length();
1784
1785 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) )
1786 {
1787 return '\0';
1788 }
1789 else
1790 {
1791 return string.charAt( index ) ;
1792 }
1793 }
1794
1795
1796 /**
1797 * Translate two chars to an hex value. The chars must be
1798 * in [a-fA-F0-9]
1799 *
1800 * @param high The high value
1801 * @param low The low value
1802 * @return A byte representation of the two chars
1803 */
1804 public static byte getHexValue( char high, char low )
1805 {
1806 if ( ( high > 127 ) || ( low > 127 ) || ( high < 0 ) | ( low < 0 ) )
1807 {
1808 return -1;
1809 }
1810
1811 return (byte)( ( HEX_VALUE[high] << 4 ) | HEX_VALUE[low] );
1812 }
1813
1814
1815 /**
1816 * Translate two bytes to an hex value. The bytes must be
1817 * in [0-9a-fA-F]
1818 *
1819 * @param high The high value
1820 * @param low The low value
1821 * @return A byte representation of the two bytes
1822 */
1823 public static byte getHexValue( byte high, byte low )
1824 {
1825 if ( ( high > 127 ) || ( low > 127 ) || ( high < 0 ) | ( low < 0 ) )
1826 {
1827 return -1;
1828 }
1829
1830 return (byte)( ( HEX_VALUE[high] << 4 ) | HEX_VALUE[low] );
1831 }
1832
1833
1834 /**
1835 * Return an hex value from a sinle char
1836 * The char must be in [0-9a-fA-F]
1837 *
1838 * @param c The char we want to convert
1839 * @return A byte between 0 and 15
1840 */
1841 public static byte getHexValue( char c )
1842 {
1843 if ( ( c > 127 ) || ( c < 0 ) )
1844 {
1845 return -1;
1846 }
1847
1848 return HEX_VALUE[c];
1849 }
1850
1851 /**
1852 * Check if the current character is an Hex Char <hex> ::= [0x30-0x39] |
1853 * [0x41-0x46] | [0x61-0x66]
1854 *
1855 * @param bytes The buffer which contains the data
1856 * @param index Current position in the buffer
1857 * @return <code>true</code> if the current character is a Hex Char
1858 */
1859 public static final boolean isHex( byte[] bytes, int index )
1860 {
1861 if ( ( bytes == null ) || ( bytes.length == 0 ) || ( index < 0 ) || ( index >= bytes.length ) )
1862 {
1863 return false;
1864 }
1865 else
1866 {
1867 byte c = bytes[index];
1868
1869 if ( ( ( c | 0x7F ) != 0x7F ) || ( HEX[c] == false ) )
1870 {
1871 return false;
1872 }
1873 else
1874 {
1875 return true;
1876 }
1877 }
1878 }
1879
1880
1881 /**
1882 * Check if the current character is an Hex Char <hex> ::= [0x30-0x39] |
1883 * [0x41-0x46] | [0x61-0x66]
1884 *
1885 * @param chars The buffer which contains the data
1886 * @param index Current position in the buffer
1887 * @return <code>true</code> if the current character is a Hex Char
1888 */
1889 public static final boolean isHex( char[] chars, int index )
1890 {
1891 if ( ( chars == null ) || ( chars.length == 0 ) || ( index < 0 ) || ( index >= chars.length ) )
1892 {
1893 return false;
1894 }
1895 else
1896 {
1897 char c = chars[index];
1898
1899 if ( ( c > 127 ) || ( HEX[c] == false ) )
1900 {
1901 return false;
1902 }
1903 else
1904 {
1905 return true;
1906 }
1907 }
1908 }
1909
1910 /**
1911 * Check if the current character is an Hex Char <hex> ::= [0x30-0x39] |
1912 * [0x41-0x46] | [0x61-0x66]
1913 *
1914 * @param string The string which contains the data
1915 * @param index Current position in the string
1916 * @return <code>true</code> if the current character is a Hex Char
1917 */
1918 public static final boolean isHex( String string, int index )
1919 {
1920 if ( string == null )
1921 {
1922 return false;
1923 }
1924
1925 int length = string.length();
1926
1927 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) )
1928 {
1929 return false;
1930 }
1931 else
1932 {
1933 char c = string.charAt( index );
1934
1935 if ( ( c > 127 ) || ( HEX[c] == false ) )
1936 {
1937 return false;
1938 }
1939 else
1940 {
1941 return true;
1942 }
1943 }
1944 }
1945
1946
1947 /**
1948 * Test if the current character is a digit <digit> ::= '0' | '1' | '2' |
1949 * '3' | '4' | '5' | '6' | '7' | '8' | '9'
1950 *
1951 * @param bytes The buffer which contains the data
1952 * @return <code>true</code> if the current character is a Digit
1953 */
1954 public static final boolean isDigit( byte[] bytes )
1955 {
1956 if ( ( bytes == null ) || ( bytes.length == 0 ) )
1957 {
1958 return false;
1959 }
1960 else
1961 {
1962 return ( ( ( ( bytes[0] | 0x7F ) != 0x7F ) || !DIGIT[bytes[0]] ) ? false : true );
1963 }
1964 }
1965
1966
1967 /**
1968 * Test if the current character is a digit <digit> ::= '0' | '1' | '2' |
1969 * '3' | '4' | '5' | '6' | '7' | '8' | '9'
1970 *
1971 * @param car the character to test
1972 *
1973 * @return <code>true</code> if the character is a Digit
1974 */
1975 public static final boolean isDigit( char car )
1976 {
1977 return ( car >= '0' ) && ( car <= '9' );
1978 }
1979
1980
1981 /**
1982 * Test if the current byte is an Alpha character :
1983 * <alpha> ::= [0x41-0x5A] | [0x61-0x7A]
1984 *
1985 * @param c The byte to test
1986 *
1987 * @return <code>true</code> if the byte is an Alpha
1988 * character
1989 */
1990 public static final boolean isAlpha( byte c )
1991 {
1992 return ( ( c > 0 ) && ( c <= 127 ) && ALPHA[c] );
1993 }
1994
1995
1996 /**
1997 * Test if the current character is an Alpha character :
1998 * <alpha> ::= [0x41-0x5A] | [0x61-0x7A]
1999 *
2000 * @param c The char to test
2001 *
2002 * @return <code>true</code> if the character is an Alpha
2003 * character
2004 */
2005 public static final boolean isAlpha( char c )
2006 {
2007 return ( ( c > 0 ) && ( c <= 127 ) && ALPHA[c] );
2008 }
2009
2010
2011 /**
2012 * Test if the current character is an Alpha character : <alpha> ::=
2013 * [0x41-0x5A] | [0x61-0x7A]
2014 *
2015 * @param bytes The buffer which contains the data
2016 * @param index Current position in the buffer
2017 * @return <code>true</code> if the current character is an Alpha
2018 * character
2019 */
2020 public static final boolean isAlphaASCII( byte[] bytes, int index )
2021 {
2022 if ( ( bytes == null ) || ( bytes.length == 0 ) || ( index < 0 ) || ( index >= bytes.length ) )
2023 {
2024 return false;
2025 }
2026 else
2027 {
2028 byte c = bytes[index];
2029
2030 if ( ( ( c | 0x7F ) != 0x7F ) || ( ALPHA[c] == false ) )
2031 {
2032 return false;
2033 }
2034 else
2035 {
2036 return true;
2037 }
2038 }
2039 }
2040
2041
2042 /**
2043 * Test if the current character is an Alpha character : <alpha> ::=
2044 * [0x41-0x5A] | [0x61-0x7A]
2045 *
2046 * @param chars The buffer which contains the data
2047 * @param index Current position in the buffer
2048 * @return <code>true</code> if the current character is an Alpha
2049 * character
2050 */
2051 public static final boolean isAlphaASCII( char[] chars, int index )
2052 {
2053 if ( ( chars == null ) || ( chars.length == 0 ) || ( index < 0 ) || ( index >= chars.length ) )
2054 {
2055 return false;
2056 }
2057 else
2058 {
2059 char c = chars[index];
2060
2061 if ( ( c > 127 ) || ( ALPHA[c] == false ) )
2062 {
2063 return false;
2064 }
2065 else
2066 {
2067 return true;
2068 }
2069 }
2070 }
2071
2072
2073 /**
2074 * Test if the current character is an Alpha character : <alpha> ::=
2075 * [0x41-0x5A] | [0x61-0x7A]
2076 *
2077 * @param string The string which contains the data
2078 * @param index Current position in the string
2079 * @return <code>true</code> if the current character is an Alpha
2080 * character
2081 */
2082 public static final boolean isAlphaASCII( String string, int index )
2083 {
2084 if ( string == null )
2085 {
2086 return false;
2087 }
2088
2089 int length = string.length();
2090
2091 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) )
2092 {
2093 return false;
2094 }
2095 else
2096 {
2097 char c = string.charAt( index );
2098
2099 if ( ( c > 127 ) || ( ALPHA[c] == false ) )
2100 {
2101 return false;
2102 }
2103 else
2104 {
2105 return true;
2106 }
2107 }
2108 }
2109
2110
2111 /**
2112 * Test if the current character is a lowercased Alpha character : <br/>
2113 * <alpha> ::= [0x61-0x7A]
2114 *
2115 * @param string The string which contains the data
2116 * @param index Current position in the string
2117 * @return <code>true</code> if the current character is a lower Alpha
2118 * character
2119 */
2120 public static final boolean isAlphaLowercaseASCII( String string, int index )
2121 {
2122 if ( string == null )
2123 {
2124 return false;
2125 }
2126
2127 int length = string.length();
2128
2129 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) )
2130 {
2131 return false;
2132 }
2133 else
2134 {
2135 char c = string.charAt( index );
2136
2137 if ( ( c > 127 ) || ( ALPHA_LOWER_CASE[c] == false ) )
2138 {
2139 return false;
2140 }
2141 else
2142 {
2143 return true;
2144 }
2145 }
2146 }
2147
2148
2149 /**
2150 * Test if the current character is a uppercased Alpha character : <br/>
2151 * <alpha> ::= [0x61-0x7A]
2152 *
2153 * @param string The string which contains the data
2154 * @param index Current position in the string
2155 * @return <code>true</code> if the current character is a lower Alpha
2156 * character
2157 */
2158 public static final boolean isAlphaUppercaseASCII( String string, int index )
2159 {
2160 if ( string == null )
2161 {
2162 return false;
2163 }
2164
2165 int length = string.length();
2166
2167 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) )
2168 {
2169 return false;
2170 }
2171 else
2172 {
2173 char c = string.charAt( index );
2174
2175 if ( ( c > 127 ) || ( ALPHA_UPPER_CASE[c] == false ) )
2176 {
2177 return false;
2178 }
2179 else
2180 {
2181 return true;
2182 }
2183 }
2184 }
2185
2186
2187 /**
2188 * Test if the current character is a digit <digit> ::= '0' | '1' | '2' |
2189 * '3' | '4' | '5' | '6' | '7' | '8' | '9'
2190 *
2191 * @param bytes The buffer which contains the data
2192 * @param index Current position in the buffer
2193 * @return <code>true</code> if the current character is a Digit
2194 */
2195 public static final boolean isDigit( byte[] bytes, int index )
2196 {
2197 if ( ( bytes == null ) || ( bytes.length == 0 ) || ( index < 0 ) || ( index >= bytes.length ) )
2198 {
2199 return false;
2200 }
2201 else
2202 {
2203 return ( ( ( ( bytes[index] | 0x7F ) != 0x7F ) || !DIGIT[bytes[index]] ) ? false : true );
2204 }
2205 }
2206
2207
2208 /**
2209 * Test if the current character is a digit <digit> ::= '0' | '1' | '2' |
2210 * '3' | '4' | '5' | '6' | '7' | '8' | '9'
2211 *
2212 * @param chars The buffer which contains the data
2213 * @param index Current position in the buffer
2214 * @return <code>true</code> if the current character is a Digit
2215 */
2216 public static final boolean isDigit( char[] chars, int index )
2217 {
2218 if ( ( chars == null ) || ( chars.length == 0 ) || ( index < 0 ) || ( index >= chars.length ) )
2219 {
2220 return false;
2221 }
2222 else
2223 {
2224 return ( ( ( chars[index] > 127 ) || !DIGIT[chars[index]] ) ? false : true );
2225 }
2226 }
2227
2228
2229 /**
2230 * Test if the current character is a digit <digit> ::= '0' | '1' | '2' |
2231 * '3' | '4' | '5' | '6' | '7' | '8' | '9'
2232 *
2233 * @param string The string which contains the data
2234 * @param index Current position in the string
2235 * @return <code>true</code> if the current character is a Digit
2236 */
2237 public static final boolean isDigit( String string, int index )
2238 {
2239 if ( string == null )
2240 {
2241 return false;
2242 }
2243
2244 int length = string.length();
2245
2246 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) )
2247 {
2248 return false;
2249 }
2250 else
2251 {
2252 char c = string.charAt( index );
2253 return ( ( ( c > 127 ) || !DIGIT[c] ) ? false : true );
2254 }
2255 }
2256
2257
2258 /**
2259 * Test if the current character is a digit <digit> ::= '0' | '1' | '2' |
2260 * '3' | '4' | '5' | '6' | '7' | '8' | '9'
2261 *
2262 * @param chars The buffer which contains the data
2263 * @return <code>true</code> if the current character is a Digit
2264 */
2265 public static final boolean isDigit( char[] chars )
2266 {
2267 if ( ( chars == null ) || ( chars.length == 0 ) )
2268 {
2269 return false;
2270 }
2271 else
2272 {
2273 return ( ( ( chars[0] > 127 ) || !DIGIT[chars[0]] ) ? false : true );
2274 }
2275 }
2276
2277
2278 /**
2279 * Check if the current character is an 7 bits ASCII CHAR (between 0 and
2280 * 127).
2281 * <char> ::= <alpha> | <digit>
2282 *
2283 * @param string The string which contains the data
2284 * @param index Current position in the string
2285 * @return The position of the next character, if the current one is a CHAR.
2286 */
2287 public static final boolean isAlphaDigit( String string, int index )
2288 {
2289 if ( string == null )
2290 {
2291 return false;
2292 }
2293
2294 int length = string.length();
2295
2296 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) )
2297 {
2298 return false;
2299 }
2300 else
2301 {
2302 char c = string.charAt( index );
2303
2304 if ( ( c > 127 ) || ( ALPHA_DIGIT[c] == false ) )
2305 {
2306 return false;
2307 }
2308 else
2309 {
2310 return true;
2311 }
2312 }
2313 }
2314
2315
2316 /**
2317 * Check if the current character is an 7 bits ASCII CHAR (between 0 and
2318 * 127). <char> ::= <alpha> | <digit> | '-'
2319 *
2320 * @param bytes The buffer which contains the data
2321 * @param index Current position in the buffer
2322 * @return The position of the next character, if the current one is a CHAR.
2323 */
2324 public static final boolean isAlphaDigitMinus( byte[] bytes, int index )
2325 {
2326 if ( ( bytes == null ) || ( bytes.length == 0 ) || ( index < 0 ) || ( index >= bytes.length ) )
2327 {
2328 return false;
2329 }
2330 else
2331 {
2332 byte c = bytes[index];
2333
2334 if ( ( ( c | 0x7F ) != 0x7F ) || ( CHAR[c] == false ) )
2335 {
2336 return false;
2337 }
2338 else
2339 {
2340 return true;
2341 }
2342 }
2343 }
2344
2345
2346 /**
2347 * Check if the current character is an 7 bits ASCII CHAR (between 0 and
2348 * 127). <char> ::= <alpha> | <digit> | '-'
2349 *
2350 * @param chars The buffer which contains the data
2351 * @param index Current position in the buffer
2352 * @return The position of the next character, if the current one is a CHAR.
2353 */
2354 public static final boolean isAlphaDigitMinus( char[] chars, int index )
2355 {
2356 if ( ( chars == null ) || ( chars.length == 0 ) || ( index < 0 ) || ( index >= chars.length ) )
2357 {
2358 return false;
2359 }
2360 else
2361 {
2362 char c = chars[index];
2363
2364 if ( ( c > 127 ) || ( CHAR[c] == false ) )
2365 {
2366 return false;
2367 }
2368 else
2369 {
2370 return true;
2371 }
2372 }
2373 }
2374
2375
2376 /**
2377 * Check if the current character is an 7 bits ASCII CHAR (between 0 and
2378 * 127). <char> ::= <alpha> | <digit> | '-'
2379 *
2380 * @param string The string which contains the data
2381 * @param index Current position in the string
2382 * @return The position of the next character, if the current one is a CHAR.
2383 */
2384 public static final boolean isAlphaDigitMinus( String string, int index )
2385 {
2386 if ( string == null )
2387 {
2388 return false;
2389 }
2390
2391 int length = string.length();
2392
2393 if ( ( length == 0 ) || ( index < 0 ) || ( index >= length ) )
2394 {
2395 return false;
2396 }
2397 else
2398 {
2399 char c = string.charAt( index );
2400
2401 if ( ( c > 127 ) || ( CHAR[c] == false ) )
2402 {
2403 return false;
2404 }
2405 else
2406 {
2407 return true;
2408 }
2409 }
2410 }
2411
2412
2413 // Empty checks
2414 // -----------------------------------------------------------------------
2415 /**
2416 * <p>
2417 * Checks if a String is empty ("") or null.
2418 * </p>
2419 *
2420 * <pre>
2421 * StringUtils.isEmpty(null) = true
2422 * StringUtils.isEmpty("") = true
2423 * StringUtils.isEmpty(" ") = false
2424 * StringUtils.isEmpty("bob") = false
2425 * StringUtils.isEmpty(" bob ") = false
2426 * </pre>
2427 *
2428 * <p>
2429 * NOTE: This method changed in Lang version 2.0. It no longer trims the
2430 * String. That functionality is available in isBlank().
2431 * </p>
2432 *
2433 * @param str the String to check, may be null
2434 * @return <code>true</code> if the String is empty or null
2435 */
2436 public static final boolean isEmpty( String str )
2437 {
2438 return str == null || str.length() == 0;
2439 }
2440
2441
2442 /**
2443 * Checks if a bytes array is empty or null.
2444 *
2445 * @param bytes The bytes array to check, may be null
2446 * @return <code>true</code> if the bytes array is empty or null
2447 */
2448 public static final boolean isEmpty( byte[] bytes )
2449 {
2450 return bytes == null || bytes.length == 0;
2451 }
2452
2453
2454 /**
2455 * <p>
2456 * Checks if a String is not empty ("") and not null.
2457 * </p>
2458 *
2459 * <pre>
2460 * StringUtils.isNotEmpty(null) = false
2461 * StringUtils.isNotEmpty("") = false
2462 * StringUtils.isNotEmpty(" ") = true
2463 * StringUtils.isNotEmpty("bob") = true
2464 * StringUtils.isNotEmpty(" bob ") = true
2465 * </pre>
2466 *
2467 * @param str the String to check, may be null
2468 * @return <code>true</code> if the String is not empty and not null
2469 */
2470 public static final boolean isNotEmpty( String str )
2471 {
2472 return str != null && str.length() > 0;
2473 }
2474
2475
2476 /**
2477 * <p>
2478 * Removes spaces (char <= 32) from both start and ends of this String,
2479 * handling <code>null</code> by returning <code>null</code>.
2480 * </p>
2481 * Trim removes start and end characters <= 32.
2482 *
2483 * <pre>
2484 * StringUtils.trim(null) = null
2485 * StringUtils.trim("") = ""
2486 * StringUtils.trim(" ") = ""
2487 * StringUtils.trim("abc") = "abc"
2488 * StringUtils.trim(" abc ") = "abc"
2489 * </pre>
2490 *
2491 * @param str the String to be trimmed, may be null
2492 * @return the trimmed string, <code>null</code> if null String input
2493 */
2494 public static final String trim( String str )
2495 {
2496 return ( isEmpty( str ) ? "" : str.trim() );
2497 }
2498
2499
2500 /**
2501 * <p>
2502 * Removes spaces (char <= 32) from both start and ends of this bytes
2503 * array, handling <code>null</code> by returning <code>null</code>.
2504 * </p>
2505 * Trim removes start and end characters <= 32.
2506 *
2507 * <pre>
2508 * StringUtils.trim(null) = null
2509 * StringUtils.trim("") = ""
2510 * StringUtils.trim(" ") = ""
2511 * StringUtils.trim("abc") = "abc"
2512 * StringUtils.trim(" abc ") = "abc"
2513 * </pre>
2514 *
2515 * @param bytes the byte array to be trimmed, may be null
2516 *
2517 * @return the trimmed byte array
2518 */
2519 public static final byte[] trim( byte[] bytes )
2520 {
2521 if ( isEmpty( bytes ) )
2522 {
2523 return EMPTY_BYTES;
2524 }
2525
2526 int start = trimLeft( bytes, 0 );
2527 int end = trimRight( bytes, bytes.length - 1 );
2528
2529 int length = end - start + 1;
2530
2531 if ( length != 0 )
2532 {
2533 byte[] newBytes = new byte[end - start + 1];
2534
2535 System.arraycopy( bytes, start, newBytes, 0, length );
2536
2537 return newBytes;
2538 }
2539 else
2540 {
2541 return EMPTY_BYTES;
2542 }
2543 }
2544
2545
2546 /**
2547 * <p>
2548 * Removes spaces (char <= 32) from start of this String, handling
2549 * <code>null</code> by returning <code>null</code>.
2550 * </p>
2551 * Trim removes start characters <= 32.
2552 *
2553 * <pre>
2554 * StringUtils.trimLeft(null) = null
2555 * StringUtils.trimLeft("") = ""
2556 * StringUtils.trimLeft(" ") = ""
2557 * StringUtils.trimLeft("abc") = "abc"
2558 * StringUtils.trimLeft(" abc ") = "abc "
2559 * </pre>
2560 *
2561 * @param str the String to be trimmed, may be null
2562 * @return the trimmed string, <code>null</code> if null String input
2563 */
2564 public static final String trimLeft( String str )
2565 {
2566 if ( isEmpty( str ) )
2567 {
2568 return "";
2569 }
2570
2571 int start = 0;
2572 int end = str.length();
2573
2574 while ( ( start < end ) && ( str.charAt( start ) == ' ' ) )
2575 {
2576 start++;
2577 }
2578
2579 return ( start == 0 ? str : str.substring( start ) );
2580 }
2581
2582
2583 /**
2584 * <p>
2585 * Removes spaces (char <= 32) from start of this array, handling
2586 * <code>null</code> by returning <code>null</code>.
2587 * </p>
2588 * Trim removes start characters <= 32.
2589 *
2590 * <pre>
2591 * StringUtils.trimLeft(null) = null
2592 * StringUtils.trimLeft("") = ""
2593 * StringUtils.trimLeft(" ") = ""
2594 * StringUtils.trimLeft("abc") = "abc"
2595 * StringUtils.trimLeft(" abc ") = "abc "
2596 * </pre>
2597 *
2598 * @param chars the chars array to be trimmed, may be null
2599 * @return the position of the first char which is not a space, or the last
2600 * position of the array.
2601 */
2602 public static final int trimLeft( char[] chars, int pos )
2603 {
2604 if ( chars == null )
2605 {
2606 return pos;
2607 }
2608
2609 while ( ( pos < chars.length ) && ( chars[pos] == ' ' ) )
2610 {
2611 pos++;
2612 }
2613
2614 return pos;
2615 }
2616
2617
2618 /**
2619 * <p>
2620 * Removes spaces (char <= 32) from a position in this array, handling
2621 * <code>null</code> by returning <code>null</code>.
2622 * </p>
2623 * Trim removes start characters <= 32.
2624 *
2625 * <pre>
2626 * StringUtils.trimLeft(null) = null
2627 * StringUtils.trimLeft("",...) = ""
2628 * StringUtils.trimLeft(" ",...) = ""
2629 * StringUtils.trimLeft("abc",...) = "abc"
2630 * StringUtils.trimLeft(" abc ",...) = "abc "
2631 * </pre>
2632 *
2633 * @param string the string to be trimmed, may be null
2634 * @param pos The starting position
2635 */
2636 public static final void trimLeft( String string, Position pos )
2637 {
2638 if ( string == null )
2639 {
2640 return;
2641 }
2642
2643 int length = string.length();
2644
2645 while ( ( pos.start < length ) && ( string.charAt( pos.start ) == ' ' ) )
2646 {
2647 pos.start++;
2648 }
2649
2650 pos.end = pos.start;
2651
2652 return;
2653 }
2654
2655
2656 /**
2657 * <p>
2658 * Removes spaces (char <= 32) from a position in this array, handling
2659 * <code>null</code> by returning <code>null</code>.
2660 * </p>
2661 * Trim removes start characters <= 32.
2662 *
2663 * <pre>
2664 * StringUtils.trimLeft(null) = null
2665 * StringUtils.trimLeft("",...) = ""
2666 * StringUtils.trimLeft(" ",...) = ""
2667 * StringUtils.trimLeft("abc",...) = "abc"
2668 * StringUtils.trimLeft(" abc ",...) = "abc "
2669 * </pre>
2670 *
2671 * @param bytes the byte array to be trimmed, may be null
2672 * @param pos The starting position
2673 */
2674 public static final void trimLeft( byte[] bytes, Position pos )
2675 {
2676 if ( bytes == null )
2677 {
2678 return;
2679 }
2680
2681 int length = bytes.length;
2682
2683 while ( ( pos.start < length ) && ( bytes[ pos.start ] == ' ' ) )
2684 {
2685 pos.start++;
2686 }
2687
2688 pos.end = pos.start;
2689
2690 return;
2691 }
2692
2693
2694 /**
2695 * <p>
2696 * Removes spaces (char <= 32) from start of this array, handling
2697 * <code>null</code> by returning <code>null</code>.
2698 * </p>
2699 * Trim removes start characters <= 32.
2700 *
2701 * <pre>
2702 * StringUtils.trimLeft(null) = null
2703 * StringUtils.trimLeft("") = ""
2704 * StringUtils.trimLeft(" ") = ""
2705 * StringUtils.trimLeft("abc") = "abc"
2706 * StringUtils.trimLeft(" abc ") = "abc "
2707 * </pre>
2708 *
2709 * @param bytes the byte array to be trimmed, may be null
2710 * @return the position of the first byte which is not a space, or the last
2711 * position of the array.
2712 */
2713 public static final int trimLeft( byte[] bytes, int pos )
2714 {
2715 if ( bytes == null )
2716 {
2717 return pos;
2718 }
2719
2720 while ( ( pos < bytes.length ) && ( bytes[pos] == ' ' ) )
2721 {
2722 pos++;
2723 }
2724
2725 return pos;
2726 }
2727
2728
2729 /**
2730 * <p>
2731 * Removes spaces (char <= 32) from end of this String, handling
2732 * <code>null</code> by returning <code>null</code>.
2733 * </p>
2734 * Trim removes start characters <= 32.
2735 *
2736 * <pre>
2737 * StringUtils.trimRight(null) = null
2738 * StringUtils.trimRight("") = ""
2739 * StringUtils.trimRight(" ") = ""
2740 * StringUtils.trimRight("abc") = "abc"
2741 * StringUtils.trimRight(" abc ") = " abc"
2742 * </pre>
2743 *
2744 * @param str the String to be trimmed, may be null
2745 * @return the trimmed string, <code>null</code> if null String input
2746 */
2747 public static final String trimRight( String str )
2748 {
2749 if ( isEmpty( str ) )
2750 {
2751 return "";
2752 }
2753
2754 int length = str.length();
2755 int end = length;
2756
2757 while ( ( end > 0 ) && ( str.charAt( end - 1 ) == ' ' ) )
2758 {
2759 if ( ( end > 1 ) && ( str.charAt( end - 2 ) == '\\' ) )
2760 {
2761 break;
2762 }
2763
2764 end--;
2765 }
2766
2767 return ( end == length ? str : str.substring( 0, end ) );
2768 }
2769
2770 /**
2771 * <p>
2772 * Removes spaces (char <= 32) from end of this String, handling
2773 * <code>null</code> by returning <code>null</code>.
2774 * </p>
2775 * Trim removes start characters <= 32.
2776 *
2777 * <pre>
2778 * StringUtils.trimRight(null) = null
2779 * StringUtils.trimRight("") = ""
2780 * StringUtils.trimRight(" ") = ""
2781 * StringUtils.trimRight("abc") = "abc"
2782 * StringUtils.trimRight(" abc ") = " abc"
2783 * </pre>
2784 *
2785 * @param str the String to be trimmed, may be null
2786 * @param escapedSpace The last escaped space, if any
2787 * @return the trimmed string, <code>null</code> if null String input
2788 */
2789 public static final String trimRight( String str, int escapedSpace )
2790 {
2791 if ( isEmpty( str ) )
2792 {
2793 return "";
2794 }
2795
2796 int length = str.length();
2797 int end = length;
2798
2799 while ( ( end > 0 ) && ( str.charAt( end - 1 ) == ' ' ) && ( end > escapedSpace ) )
2800 {
2801 if ( ( end > 1 ) && ( str.charAt( end - 2 ) == '\\' ) )
2802 {
2803 break;
2804 }
2805
2806 end--;
2807 }
2808
2809 return ( end == length ? str : str.substring( 0, end ) );
2810 }
2811
2812
2813 /**
2814 * <p>
2815 * Removes spaces (char <= 32) from end of this array, handling
2816 * <code>null</code> by returning <code>null</code>.
2817 * </p>
2818 * Trim removes start characters <= 32.
2819 *
2820 * <pre>
2821 * StringUtils.trimRight(null) = null
2822 * StringUtils.trimRight("") = ""
2823 * StringUtils.trimRight(" ") = ""
2824 * StringUtils.trimRight("abc") = "abc"
2825 * StringUtils.trimRight(" abc ") = " abc"
2826 * </pre>
2827 *
2828 * @param chars the chars array to be trimmed, may be null
2829 * @return the position of the first char which is not a space, or the last
2830 * position of the array.
2831 */
2832 public static final int trimRight( char[] chars, int pos )
2833 {
2834 if ( chars == null )
2835 {
2836 return pos;
2837 }
2838
2839 while ( ( pos >= 0 ) && ( chars[pos - 1] == ' ' ) )
2840 {
2841 pos--;
2842 }
2843
2844 return pos;
2845 }
2846
2847
2848 /**
2849 * <p>
2850 * Removes spaces (char <= 32) from end of this string, handling
2851 * <code>null</code> by returning <code>null</code>.
2852 * </p>
2853 * Trim removes start characters <= 32.
2854 *
2855 * <pre>
2856 * StringUtils.trimRight(null) = null
2857 * StringUtils.trimRight("") = ""
2858 * StringUtils.trimRight(" ") = ""
2859 * StringUtils.trimRight("abc") = "abc"
2860 * StringUtils.trimRight(" abc ") = " abc"
2861 * </pre>
2862 *
2863 * @param string the string to be trimmed, may be null
2864 * @return the position of the first char which is not a space, or the last
2865 * position of the string.
2866 */
2867 public static final String trimRight( String string, Position pos )
2868 {
2869 if ( string == null )
2870 {
2871 return "";
2872 }
2873
2874 while ( ( pos.end >= 0 ) && ( string.charAt( pos.end - 1 ) == ' ' ) )
2875 {
2876 if ( ( pos.end > 1 ) && ( string.charAt( pos.end - 2 ) == '\\' ) )
2877 {
2878 break;
2879 }
2880
2881 pos.end--;
2882 }
2883
2884 return ( pos.end == string.length() ? string : string.substring( 0, pos.end ) );
2885 }
2886
2887
2888 /**
2889 * <p>
2890 * Removes spaces (char <= 32) from end of this string, handling
2891 * <code>null</code> by returning <code>null</code>.
2892 * </p>
2893 * Trim removes start characters <= 32.
2894 *
2895 * <pre>
2896 * StringUtils.trimRight(null) = null
2897 * StringUtils.trimRight("") = ""
2898 * StringUtils.trimRight(" ") = ""
2899 * StringUtils.trimRight("abc") = "abc"
2900 * StringUtils.trimRight(" abc ") = " abc"
2901 * </pre>
2902 *
2903 * @param bytes the byte array to be trimmed, may be null
2904 * @return the position of the first char which is not a space, or the last
2905 * position of the byte array.
2906 */
2907 public static final String trimRight( byte[] bytes, Position pos )
2908 {
2909 if ( bytes == null )
2910 {
2911 return "";
2912 }
2913
2914 while ( ( pos.end >= 0 ) && ( bytes[pos.end - 1] == ' ' ) )
2915 {
2916 if ( ( pos.end > 1 ) && ( bytes[pos.end - 2] == '\\' ) )
2917 {
2918 break;
2919 }
2920
2921 pos.end--;
2922 }
2923
2924 if ( pos.end == bytes.length )
2925 {
2926 return StringTools.utf8ToString( bytes );
2927 }
2928 else
2929 {
2930 return StringTools.utf8ToString( bytes, pos.end );
2931 }
2932 }
2933
2934
2935 /**
2936 * <p>
2937 * Removes spaces (char <= 32) from end of this array, handling
2938 * <code>null</code> by returning <code>null</code>.
2939 * </p>
2940 * Trim removes start characters <= 32.
2941 *
2942 * <pre>
2943 * StringUtils.trimRight(null) = null
2944 * StringUtils.trimRight("") = ""
2945 * StringUtils.trimRight(" ") = ""
2946 * StringUtils.trimRight("abc") = "abc"
2947 * StringUtils.trimRight(" abc ") = " abc"
2948 * </pre>
2949 *
2950 * @param bytes the byte array to be trimmed, may be null
2951 * @return the position of the first char which is not a space, or the last
2952 * position of the array.
2953 */
2954 public static final int trimRight( byte[] bytes, int pos )
2955 {
2956 if ( bytes == null )
2957 {
2958 return pos;
2959 }
2960
2961 while ( ( pos >= 0 ) && ( bytes[pos] == ' ' ) )
2962 {
2963 pos--;
2964 }
2965
2966 return pos;
2967 }
2968
2969
2970 // Case conversion
2971 // -----------------------------------------------------------------------
2972 /**
2973 * <p>
2974 * Converts a String to upper case as per {@link String#toUpperCase()}.
2975 * </p>
2976 * <p>
2977 * A <code>null</code> input String returns <code>null</code>.
2978 * </p>
2979 *
2980 * <pre>
2981 * StringUtils.upperCase(null) = null
2982 * StringUtils.upperCase("") = ""
2983 * StringUtils.upperCase("aBc") = "ABC"
2984 * </pre>
2985 *
2986 * @param str the String to upper case, may be null
2987 * @return the upper cased String, <code>null</code> if null String input
2988 */
2989 public static final String upperCase( String str )
2990 {
2991 if ( str == null )
2992 {
2993 return null;
2994 }
2995
2996 return str.toUpperCase();
2997 }
2998
2999
3000 /**
3001 * <p>
3002 * Converts a String to lower case as per {@link String#toLowerCase()}.
3003 * </p>
3004 * <p>
3005 * A <code>null</code> input String returns <code>null</code>.
3006 * </p>
3007 *
3008 * <pre>
3009 * StringUtils.lowerCase(null) = null
3010 * StringUtils.lowerCase("") = ""
3011 * StringUtils.lowerCase("aBc") = "abc"
3012 * </pre>
3013 *
3014 * @param str the String to lower case, may be null
3015 * @return the lower cased String, <code>null</code> if null String input
3016 */
3017 public static final String lowerCase( String str )
3018 {
3019 if ( str == null )
3020 {
3021 return null;
3022 }
3023
3024 return str.toLowerCase();
3025 }
3026
3027
3028 /**
3029 * Rewrote the toLowercase method to improve performances.
3030 * In Ldap, attributesType are supposed to use ASCII chars :
3031 * 'a'-'z', 'A'-'Z', '0'-'9', '.' and '-' only. We will take
3032 * care of any other chars either.
3033 *
3034 * @param str The String to lowercase
3035 * @return The lowercase string
3036 */
3037 public static final String lowerCaseAscii( String str )
3038 {
3039 if ( str == null )
3040 {
3041 return null;
3042 }
3043
3044 char[] chars = str.toCharArray();
3045 int pos = 0;
3046
3047 for ( char c:chars )
3048 {
3049 chars[pos++] = TO_LOWER_CASE[c];
3050 }
3051
3052 return new String( chars );
3053 }
3054
3055
3056 // Equals
3057 // -----------------------------------------------------------------------
3058 /**
3059 * <p>
3060 * Compares two Strings, returning <code>true</code> if they are equal.
3061 * </p>
3062 * <p>
3063 * <code>null</code>s are handled without exceptions. Two
3064 * <code>null</code> references are considered to be equal. The comparison
3065 * is case sensitive.
3066 * </p>
3067 *
3068 * <pre>
3069 * StringUtils.equals(null, null) = true
3070 * StringUtils.equals(null, "abc") = false
3071 * StringUtils.equals("abc", null) = false
3072 * StringUtils.equals("abc", "abc") = true
3073 * StringUtils.equals("abc", "ABC") = false
3074 * </pre>
3075 *
3076 * @see java.lang.String#equals(Object)
3077 * @param str1 the first String, may be null
3078 * @param str2 the second String, may be null
3079 * @return <code>true</code> if the Strings are equal, case sensitive, or
3080 * both <code>null</code>
3081 */
3082 public static final boolean equals( String str1, String str2 )
3083 {
3084 return str1 == null ? str2 == null : str1.equals( str2 );
3085 }
3086
3087
3088 /**
3089 * Return an UTF-8 encoded String
3090 *
3091 * @param bytes The byte array to be transformed to a String
3092 * @return A String.
3093 */
3094 public static final String utf8ToString( byte[] bytes )
3095 {
3096 if ( bytes == null )
3097 {
3098 return "";
3099 }
3100
3101 try
3102 {
3103 return new String( bytes, "UTF-8" );
3104 }
3105 catch ( UnsupportedEncodingException uee )
3106 {
3107 // if this happens something is really strange
3108 throw new RuntimeException( uee );
3109 }
3110 }
3111
3112
3113 /**
3114 * Return an UTF-8 encoded String
3115 *
3116 * @param bytes The byte array to be transformed to a String
3117 * @param length The length of the byte array to be converted
3118 * @return A String.
3119 */
3120 public static final String utf8ToString( byte[] bytes, int length )
3121 {
3122 if ( bytes == null )
3123 {
3124 return "";
3125 }
3126
3127 try
3128 {
3129 return new String( bytes, 0, length, "UTF-8" );
3130 }
3131 catch ( UnsupportedEncodingException uee )
3132 {
3133 // if this happens something is really strange
3134 throw new RuntimeException( uee );
3135 }
3136 }
3137
3138
3139 /**
3140 * Return an UTF-8 encoded String
3141 *
3142 * @param bytes The byte array to be transformed to a String
3143 * @param start the starting position in the byte array
3144 * @param length The length of the byte array to be converted
3145 * @return A String.
3146 */
3147 public static final String utf8ToString( byte[] bytes, int start, int length )
3148 {
3149 if ( bytes == null )
3150 {
3151 return "";
3152 }
3153
3154 try
3155 {
3156 return new String( bytes, start, length, "UTF-8" );
3157 }
3158 catch ( UnsupportedEncodingException uee )
3159 {
3160 // if this happens something is really strange
3161 throw new RuntimeException( uee );
3162 }
3163 }
3164
3165
3166 /**
3167 * Return UTF-8 encoded byte[] representation of a String
3168 *
3169 * @param string The string to be transformed to a byte array
3170 * @return The transformed byte array
3171 */
3172 public static final byte[] getBytesUtf8( String string )
3173 {
3174 if ( string == null )
3175 {
3176 return new byte[0];
3177 }
3178
3179 try
3180 {
3181 return string.getBytes( "UTF-8" );
3182 }
3183 catch ( UnsupportedEncodingException uee )
3184 {
3185 // if this happens something is really strange
3186 throw new RuntimeException( uee );
3187 }
3188 }
3189
3190
3191 /**
3192 * Utility method that return a String representation of a list
3193 *
3194 * @param list The list to transform to a string
3195 * @return A csv string
3196 */
3197 public static final String listToString( List<?> list )
3198 {
3199 if ( ( list == null ) || ( list.size() == 0 ) )
3200 {
3201 return "";
3202 }
3203
3204 StringBuilder sb = new StringBuilder();
3205 boolean isFirst = true;
3206
3207 for ( Object elem : list )
3208 {
3209 if ( isFirst )
3210 {
3211 isFirst = false;
3212 }
3213 else
3214 {
3215 sb.append( ", " );
3216 }
3217
3218 sb.append( elem );
3219 }
3220
3221 return sb.toString();
3222 }
3223
3224
3225
3226
3227 /**
3228 * Utility method that return a String representation of a set
3229 *
3230 * @param set The set to transform to a string
3231 * @return A csv string
3232 */
3233 public static final String setToString( Set<?> set )
3234 {
3235 if ( ( set == null ) || ( set.size() == 0 ) )
3236 {
3237 return "";
3238 }
3239
3240 StringBuilder sb = new StringBuilder();
3241 boolean isFirst = true;
3242
3243 for ( Object elem : set )
3244 {
3245 if ( isFirst )
3246 {
3247 isFirst = false;
3248 }
3249 else
3250 {
3251 sb.append( ", " );
3252 }
3253
3254 sb.append( elem );
3255 }
3256
3257 return sb.toString();
3258 }
3259
3260
3261 /**
3262 * Utility method that return a String representation of a list
3263 *
3264 * @param list The list to transform to a string
3265 * @param tabs The tabs to add in ffront of the elements
3266 * @return A csv string
3267 */
3268 public static final String listToString( List<?> list, String tabs )
3269 {
3270 if ( ( list == null ) || ( list.size() == 0 ) )
3271 {
3272 return "";
3273 }
3274
3275 StringBuffer sb = new StringBuffer();
3276
3277 for ( Object elem : list )
3278 {
3279 sb.append( tabs );
3280 sb.append( elem );
3281 sb.append( '\n' );
3282 }
3283
3284 return sb.toString();
3285 }
3286
3287
3288 /**
3289 * Utility method that return a String representation of a map. The elements
3290 * will be represented as "key = value"
3291 *
3292 * @param map The map to transform to a string
3293 * @return A csv string
3294 */
3295 public static final String mapToString( Map<?,?> map )
3296 {
3297 if ( ( map == null ) || ( map.size() == 0 ) )
3298 {
3299 return "";
3300 }
3301
3302 StringBuffer sb = new StringBuffer();
3303 boolean isFirst = true;
3304
3305 for ( Map.Entry<?, ?> entry:map.entrySet() )
3306 {
3307 if ( isFirst )
3308 {
3309 isFirst = false;
3310 }
3311 else
3312 {
3313 sb.append( ", " );
3314 }
3315
3316 sb.append( entry.getKey() );
3317 sb.append( " = '" ).append( entry.getValue() ).append( "'" );
3318 }
3319
3320 return sb.toString();
3321 }
3322
3323
3324 /**
3325 * Utility method that return a String representation of a map. The elements
3326 * will be represented as "key = value"
3327 *
3328 * @param map The map to transform to a string
3329 * @param tabs The tabs to add in ffront of the elements
3330 * @return A csv string
3331 */
3332 public static final String mapToString( Map<?,?> map, String tabs )
3333 {
3334 if ( ( map == null ) || ( map.size() == 0 ) )
3335 {
3336 return "";
3337 }
3338
3339 StringBuffer sb = new StringBuffer();
3340
3341 for ( Map.Entry<?, ?> entry:map.entrySet() )
3342 {
3343 sb.append( tabs );
3344 sb.append( entry.getKey() );
3345
3346 sb.append( " = '" ).append( entry.getValue().toString() ).append( "'\n" );
3347 }
3348
3349 return sb.toString();
3350 }
3351
3352
3353 /**
3354 * Get the default charset
3355 *
3356 * @return The default charset
3357 */
3358 public static final String getDefaultCharsetName()
3359 {
3360 if ( null == defaultCharset )
3361 {
3362 try
3363 {
3364 // Try with jdk 1.5 method, if we are using a 1.5 jdk :)
3365 Method method = Charset.class.getMethod( "defaultCharset", new Class[0] );
3366 defaultCharset = ((Charset) method.invoke( null, new Object[0]) ).name();
3367 }
3368 catch (Exception e)
3369 {
3370 // fall back to old method
3371 defaultCharset = new OutputStreamWriter( new ByteArrayOutputStream() ).getEncoding();
3372 }
3373 }
3374
3375 return defaultCharset;
3376 }
3377
3378
3379 /**
3380 * Decodes values of attributes in the DN encoded in hex into a UTF-8
3381 * String. RFC2253 allows a DN's attribute to be encoded in hex.
3382 * The encoded value starts with a # then is followed by an even
3383 * number of hex characters.
3384 *
3385 * @param str the string to decode
3386 * @return the decoded string
3387 */
3388 public static final String decodeHexString( String str ) throws InvalidNameException
3389 {
3390 if ( str == null || str.length() == 0 )
3391 {
3392 throw new InvalidNameException( I18n.err( I18n.ERR_04431 ) );
3393 }
3394
3395 char[] chars = str.toCharArray();
3396
3397 if ( chars[0] != '#' )
3398 {
3399 throw new InvalidNameException( I18n.err( I18n.ERR_04432, str ) );
3400 }
3401
3402 // the bytes representing the encoded string of hex
3403 // this should be ( length - 1 )/2 in size
3404 byte[] decoded = new byte[ ( chars.length - 1 ) >> 1 ];
3405
3406 for ( int ii = 1, jj = 0 ; ii < chars.length; ii+=2, jj++ )
3407 {
3408 int ch = ( StringTools.HEX_VALUE[chars[ii]] << 4 ) +
3409 StringTools.HEX_VALUE[chars[ii+1]];
3410 decoded[jj] = ( byte ) ch;
3411 }
3412
3413 return StringTools.utf8ToString( decoded );
3414 }
3415
3416
3417 /**
3418 * Decodes sequences of escaped hex within an attribute's value into
3419 * a UTF-8 String. The hex is decoded inline and the complete decoded
3420 * String is returned.
3421 *
3422 * @param str the string containing hex escapes
3423 * @return the decoded string
3424 */
3425 public static final String decodeEscapedHex( String str ) throws InvalidNameException
3426 {
3427 if ( str == null )
3428 {
3429 throw new InvalidNameException( I18n.err( I18n.ERR_04433 ) );
3430 }
3431
3432 int length = str.length();
3433
3434 if ( length == 0 )
3435 {
3436 throw new InvalidNameException( I18n.err( I18n.ERR_04434 ) );
3437 }
3438
3439 // create buffer and add everything before start of scan
3440 StringBuffer buf = new StringBuffer();
3441 ByteBuffer bb = new ByteBuffer();
3442 boolean escaped = false;
3443
3444 // start scaning until we find an escaped series of bytes
3445 for ( int ii = 0; ii < length; ii++ )
3446 {
3447 char c = str.charAt( ii );
3448
3449 if ( !escaped && c == '\\' )
3450 {
3451 // we have the start of a hex escape sequence
3452 if ( isHex( str, ii+1 ) && isHex ( str, ii+2 ) )
3453 {
3454 bb.clear();
3455 int advancedBy = collectEscapedHexBytes( bb, str, ii );
3456 ii+=advancedBy-1;
3457 buf.append( StringTools.utf8ToString( bb.buffer(), bb.position() ) );
3458 escaped = false;
3459 continue;
3460 }
3461 else
3462 {
3463 // It may be an escaped char ( ' ', '"', '#', '+', ',', ';', '<', '=', '>', '\' )
3464 escaped = true;
3465 continue;
3466 }
3467 }
3468
3469 if ( escaped )
3470 {
3471 if ( DNUtils.isPairCharOnly( c ) )
3472 {
3473 // It is an escaped char ( ' ', '"', '#', '+', ',', ';', '<', '=', '>', '\' )
3474 // Stores it into the buffer without the '\'
3475 escaped = false;
3476 buf.append( c );
3477 continue;
3478 }
3479 else
3480 {
3481 throw new InvalidNameException( I18n.err( I18n.ERR_04435 ) );
3482 }
3483 }
3484 else
3485 {
3486 buf.append( str.charAt( ii ) );
3487 }
3488 }
3489
3490 if ( escaped )
3491 {
3492 // We should not have a '\' at the end of the string
3493 throw new InvalidNameException( I18n.err( I18n.ERR_04436 ) );
3494 }
3495
3496 return buf.toString();
3497 }
3498
3499
3500 /**
3501 * Convert an escaoed list of bytes to a byte[]
3502 *
3503 * @param str the string containing hex escapes
3504 * @return the converted byte[]
3505 */
3506 public static final byte[] convertEscapedHex( String str ) throws InvalidNameException
3507 {
3508 if ( str == null )
3509 {
3510 throw new InvalidNameException( I18n.err( I18n.ERR_04433 ) );
3511 }
3512
3513 int length = str.length();
3514
3515 if ( length == 0 )
3516 {
3517 throw new InvalidNameException( I18n.err( I18n.ERR_04434 ) );
3518 }
3519
3520 // create buffer and add everything before start of scan
3521 byte[] buf = new byte[ str.length()/3];
3522 int pos = 0;
3523
3524 // start scaning until we find an escaped series of bytes
3525 for ( int i = 0; i < length; i++ )
3526 {
3527 char c = str.charAt( i );
3528
3529 if ( c == '\\' )
3530 {
3531 // we have the start of a hex escape sequence
3532 if ( isHex( str, i+1 ) && isHex ( str, i+2 ) )
3533 {
3534 byte value = ( byte ) ( (StringTools.HEX_VALUE[str.charAt( i+1 )] << 4 ) +
3535 StringTools.HEX_VALUE[str.charAt( i+2 )] );
3536
3537 i+=2;
3538 buf[pos++] = value;
3539 }
3540 }
3541 else
3542 {
3543 throw new InvalidNameException( I18n.err( I18n.ERR_04435 ) );
3544 }
3545 }
3546
3547 return buf;
3548 }
3549
3550
3551 /**
3552 * Collects an hex sequence from a string, and returns the value
3553 * as an integer, after having modified the initial value (the escaped
3554 * hex value is transsformed to the byte it represents).
3555 *
3556 * @param bb the buffer which will contain the unescaped byte
3557 * @param str the initial string with ecaped chars
3558 * @param index the position in the string of the escaped data
3559 * @return the byte as an integer
3560 */
3561 public static int collectEscapedHexBytes( ByteBuffer bb, String str, int index )
3562 {
3563 int advanceBy = 0;
3564
3565 for ( int ii = index; ii < str.length(); ii += 3, advanceBy += 3 )
3566 {
3567 // we have the start of a hex escape sequence
3568 if ( ( str.charAt( ii ) == '\\' ) && isHex( str, ii+1 ) && isHex ( str, ii+2 ) )
3569 {
3570 int bite = ( StringTools.HEX_VALUE[str.charAt( ii+1 )] << 4 ) +
3571 StringTools.HEX_VALUE[str.charAt( ii+2 )];
3572 bb.append( bite );
3573 }
3574 else
3575 {
3576 break;
3577 }
3578 }
3579
3580 return advanceBy;
3581 }
3582
3583
3584 /**
3585 * Thansform an array of ASCII bytes to a string. the byte array should contains
3586 * only values in [0, 127].
3587 *
3588 * @param bytes The byte array to transform
3589 * @return The resulting string
3590 */
3591 public static String asciiBytesToString( byte[] bytes )
3592 {
3593 if ( (bytes == null) || (bytes.length == 0 ) )
3594 {
3595 return "";
3596 }
3597
3598 char[] result = new char[bytes.length];
3599
3600 for ( int i = 0; i < bytes.length; i++ )
3601 {
3602 result[i] = (char)bytes[i];
3603 }
3604
3605 return new String( result );
3606 }
3607
3608
3609 /**
3610 * Build an AttributeType froma byte array. An AttributeType contains
3611 * only chars within [0-9][a-z][A-Z][-.].
3612 *
3613 * @param bytes The bytes containing the AttributeType
3614 * @return The AttributeType as a String
3615 */
3616 public static String getType( byte[] bytes)
3617 {
3618 if ( bytes == null )
3619 {
3620 return null;
3621 }
3622
3623 char[] chars = new char[bytes.length];
3624 int pos = 0;
3625
3626 for ( byte b:bytes )
3627 {
3628 chars[pos++] = (char)b;
3629 }
3630
3631 return new String( chars );
3632 }
3633
3634
3635 /**
3636 *
3637 * Check that a String is a valid IA5String. An IA5String contains only
3638 * char which values is between [0, 7F]
3639 *
3640 * @param str The String to check
3641 * @return <code>true</code> if the string is an IA5String or is empty,
3642 * <code>false</code> otherwise
3643 */
3644 public static boolean isIA5String( String str )
3645 {
3646 if ( ( str == null ) || ( str.length() == 0 ) )
3647 {
3648 return true;
3649 }
3650
3651 // All the chars must be in [0x00, 0x7F]
3652 for ( char c:str.toCharArray() )
3653 {
3654 if ( ( c < 0 ) || ( c > 0x7F ) )
3655 {
3656 return false;
3657 }
3658 }
3659
3660 return true;
3661 }
3662
3663
3664 /**
3665 *
3666 * Check that a String is a valid PrintableString. A PrintableString contains only
3667 * the following set of chars :
3668 * { ' ', ''', '(', ')', '+', '-', '.', '/', [0-9], ':', '=', '?', [A-Z], [a-z]}
3669 *
3670 * @param str The String to check
3671 * @return <code>true</code> if the string is a PrintableString or is empty,
3672 * <code>false</code> otherwise
3673 */
3674 public static boolean isPrintableString( String str )
3675 {
3676 if ( ( str == null ) || ( str.length() == 0 ) )
3677 {
3678 return true;
3679 }
3680
3681 for ( char c:str.toCharArray() )
3682 {
3683 if ( ( c > 127 ) || !IS_PRINTABLE_CHAR[ c ] )
3684 {
3685 return false;
3686 }
3687 }
3688
3689 return true;
3690 }
3691
3692
3693 /**
3694 * Check if the current char is in the unicodeSubset : all chars but
3695 * '\0', '(', ')', '*' and '\'
3696 *
3697 * @param str The string to check
3698 * @param pos Position of the current char
3699 * @return True if the current char is in the unicode subset
3700 */
3701 public static boolean isUnicodeSubset( String str, int pos )
3702 {
3703 if ( ( str == null ) || ( str.length() <= pos ) || ( pos < 0 ) )
3704 {
3705 return false;
3706 }
3707
3708 char c = str.charAt( pos );
3709
3710 return ( ( c > 127 ) || UNICODE_SUBSET[c] );
3711 }
3712
3713
3714 /**
3715 * Check if the current char is in the unicodeSubset : all chars but
3716 * '\0', '(', ')', '*' and '\'
3717 *
3718 * @param c The char to check
3719 * @return True if the current char is in the unicode subset
3720 */
3721 public static boolean isUnicodeSubset( char c )
3722 {
3723 return ( ( c > 127 ) || UNICODE_SUBSET[c] );
3724 }
3725
3726
3727 /**
3728 * converts the bytes of a UUID to string
3729 *
3730 * @param bytes bytes of a UUID
3731 * @return UUID in string format
3732 */
3733 public static String uuidToString( byte[] bytes )
3734 {
3735 if ( bytes == null || bytes.length != 16 )
3736 {
3737 return "Invalid UUID";
3738 }
3739
3740 char[] hex = Hex.encodeHex( bytes );
3741 StringBuffer sb = new StringBuffer();
3742 sb.append( hex, 0, 8 );
3743 sb.append( '-' );
3744 sb.append( hex, 8, 4 );
3745 sb.append( '-' );
3746 sb.append( hex, 12, 4 );
3747 sb.append( '-' );
3748 sb.append( hex, 16, 4 );
3749 sb.append( '-' );
3750 sb.append( hex, 20, 12 );
3751
3752 return sb.toString().toLowerCase();
3753 }
3754
3755
3756 /**
3757 * converts the string representation of an UUID to bytes
3758 *
3759 * @param string the string representation of an UUID
3760 * @return the bytes, null if the the syntax is not valid
3761 */
3762 public static byte[] uuidToBytes( String string )
3763 {
3764 if ( !new UuidSyntaxChecker().isValidSyntax( string ) )
3765 {
3766 return null;
3767 }
3768
3769 char[] chars = string.toCharArray();
3770 byte[] bytes = new byte[16];
3771 bytes[0] = getHexValue( chars[0], chars[1] );
3772 bytes[1] = getHexValue( chars[2], chars[3] );
3773 bytes[2] = getHexValue( chars[4], chars[5] );
3774 bytes[3] = getHexValue( chars[6], chars[7] );
3775
3776 bytes[4] = getHexValue( chars[9], chars[10] );
3777 bytes[5] = getHexValue( chars[11], chars[12] );
3778
3779 bytes[6] = getHexValue( chars[14], chars[15] );
3780 bytes[7] = getHexValue( chars[16], chars[17] );
3781
3782 bytes[8] = getHexValue( chars[19], chars[20] );
3783 bytes[9] = getHexValue( chars[21], chars[22] );
3784
3785 bytes[10] = getHexValue( chars[24], chars[25] );
3786 bytes[11] = getHexValue( chars[26], chars[27] );
3787 bytes[12] = getHexValue( chars[28], chars[29] );
3788 bytes[13] = getHexValue( chars[30], chars[31] );
3789 bytes[14] = getHexValue( chars[32], chars[33] );
3790 bytes[15] = getHexValue( chars[34], chars[35] );
3791
3792 return bytes;
3793 }
3794
3795 }