001 /*
002 * Copyright 2012-2013 UnboundID Corp.
003 * All Rights Reserved.
004 */
005 /*
006 * Copyright (C) 2012-2013 UnboundID Corp.
007 *
008 * This program is free software; you can redistribute it and/or modify
009 * it under the terms of the GNU General Public License (GPLv2 only)
010 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
011 * as published by the Free Software Foundation.
012 *
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
016 * GNU General Public License for more details.
017 *
018 * You should have received a copy of the GNU General Public License
019 * along with this program; if not, see <http://www.gnu.org/licenses>.
020 */
021 package com.unboundid.util;
022
023
024
025 import java.io.IOException;
026 import java.text.ParseException;
027
028 import static com.unboundid.util.UtilityMessages.*;
029 import static com.unboundid.util.Validator.*;
030
031
032
033 /**
034 * This class provides methods for encoding and decoding data in base32 as
035 * defined in <A HREF="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</A>. It
036 * provides a somewhat compact way of representing binary data using only
037 * printable characters (a subset of ASCII letters and numeric digits selected
038 * to avoid ambiguity, like confusion between the number 1 and the uppercase
039 * letter I, and between the number 0 and the uppercase letter O). It uses a
040 * five-bit encoding mechanism in which every five bytes of raw data is
041 * converted into eight bytes of base32-encoded data.
042 * <BR><BR>
043 * <H2>Example</H2>
044 * The following examples demonstrate the process for base32-encoding raw data,
045 * and for decoding a string containing base32-encoded data back to the raw
046 * data used to create it:
047 * <PRE>
048 * // Base32-encode some raw data:
049 * String base32String = Base32.encode(rawDataBytes);
050 * System.out.println("Base32 encoded representation of the raw data is " +
051 * base32String);
052 *
053 * // Decode a base32 string back to raw data:
054 * try
055 * {
056 * byte[] decodedRawDataBytes = base32.decode(base32String);
057 * }
058 * catch (ParseException pe)
059 * {
060 * System.err.println("The string did not contain valid base32-encoded " +
061 * "data: " + pe.getMessage());
062 * }
063 * </PRE>
064 */
065 @ThreadSafety(level=ThreadSafetyLevel.COMPLETELY_THREADSAFE)
066 public final class Base32
067 {
068 /**
069 * The set of characters in the base32 alphabet.
070 */
071 private static final char[] BASE32_ALPHABET =
072 ("ABCDEFGHIJKLMNOPQRSTUVWXYZ234567").toCharArray();
073
074
075
076 /**
077 * Prevent this class from being instantiated.
078 */
079 private Base32()
080 {
081 // No implementation is required.
082 }
083
084
085
086 /**
087 * Encodes the UTF-8 representation of the provided string in base32 format.
088 *
089 * @param data The raw data to be encoded. It must not be {@code null}.
090 *
091 * @return The base32-encoded representation of the provided data.
092 */
093 public static String encode(final String data)
094 {
095 ensureNotNull(data);
096
097 return encode(StaticUtils.getBytes(data));
098 }
099
100
101
102 /**
103 * Encodes the provided data in base32 format.
104 *
105 * @param data The raw data to be encoded. It must not be {@code null}.
106 *
107 * @return The base32-encoded representation of the provided data.
108 */
109 public static String encode(final byte[] data)
110 {
111 ensureNotNull(data);
112
113 final StringBuilder buffer = new StringBuilder(4*data.length/3+1);
114 encodeInternal(data, 0, data.length, buffer);
115 return buffer.toString();
116 }
117
118
119
120 /**
121 * Appends a base32-encoded version of the contents of the provided buffer
122 * (using a UTF-8 representation) to the given buffer.
123 *
124 * @param data The raw data to be encoded. It must not be {@code null}.
125 * @param buffer The buffer to which the base32-encoded data is to be
126 * written.
127 */
128 public static void encode(final String data, final StringBuilder buffer)
129 {
130 ensureNotNull(data);
131
132 encode(StaticUtils.getBytes(data), buffer);
133 }
134
135
136
137 /**
138 * Appends a base32-encoded version of the contents of the provided buffer
139 * (using a UTF-8 representation) to the given buffer.
140 *
141 * @param data The raw data to be encoded. It must not be {@code null}.
142 * @param buffer The buffer to which the base32-encoded data is to be
143 * written.
144 */
145 public static void encode(final String data, final ByteStringBuffer buffer)
146 {
147 ensureNotNull(data);
148
149 encode(StaticUtils.getBytes(data), buffer);
150 }
151
152
153
154 /**
155 * Appends a base32-encoded representation of the provided data to the given
156 * buffer.
157 *
158 * @param data The raw data to be encoded. It must not be {@code null}.
159 * @param buffer The buffer to which the base32-encoded data is to be
160 * written.
161 */
162 public static void encode(final byte[] data, final StringBuilder buffer)
163 {
164 encodeInternal(data, 0, data.length, buffer);
165 }
166
167
168
169 /**
170 * Appends a base32-encoded representation of the provided data to the given
171 * buffer.
172 *
173 * @param data The array containing the raw data to be encoded. It must
174 * not be {@code null}.
175 * @param off The offset in the array at which the data to encode begins.
176 * @param length The number of bytes to be encoded.
177 * @param buffer The buffer to which the base32-encoded data is to be
178 * written.
179 */
180 public static void encode(final byte[] data, final int off, final int length,
181 final StringBuilder buffer)
182 {
183 encodeInternal(data, off, length, buffer);
184 }
185
186
187
188 /**
189 * Appends a base32-encoded representation of the provided data to the given
190 * buffer.
191 *
192 * @param data The raw data to be encoded. It must not be {@code null}.
193 * @param buffer The buffer to which the base32-encoded data is to be
194 * written.
195 */
196 public static void encode(final byte[] data, final ByteStringBuffer buffer)
197 {
198 encodeInternal(data, 0, data.length, buffer);
199 }
200
201
202
203 /**
204 * Appends a base32-encoded representation of the provided data to the given
205 * buffer.
206 *
207 * @param data The raw data to be encoded. It must not be {@code null}.
208 * @param off The offset in the array at which the data to encode begins.
209 * @param length The number of bytes to be encoded.
210 * @param buffer The buffer to which the base32-encoded data is to be
211 * written.
212 */
213 public static void encode(final byte[] data, final int off, final int length,
214 final ByteStringBuffer buffer)
215 {
216 encodeInternal(data, off, length, buffer);
217 }
218
219
220
221 /**
222 * Appends a base32-encoded representation of the provided data to the given
223 * buffer.
224 *
225 * @param data The raw data to be encoded. It must not be {@code null}.
226 * @param off The offset in the array at which the data to encode begins.
227 * @param length The number of bytes to be encoded.
228 * @param buffer The buffer to which the base32-encoded data is to be
229 * written.
230 */
231 private static void encodeInternal(final byte[] data, final int off,
232 final int length, final Appendable buffer)
233 {
234 ensureNotNull(data);
235 ensureTrue(data.length >= off);
236 ensureTrue(data.length >= (off+length));
237
238 if (length == 0)
239 {
240 return;
241 }
242
243 try
244 {
245 int pos = off;
246 for (int i=0; i < (length / 5); i++)
247 {
248 final long longValue =
249 (((data[pos++] & 0xFFL) << 32) |
250 ((data[pos++] & 0xFFL) << 24) |
251 ((data[pos++] & 0xFFL) << 16) |
252 ((data[pos++] & 0xFFL) << 8) |
253 (data[pos++] & 0xFFL));
254
255 buffer.append(BASE32_ALPHABET[(int) ((longValue >> 35) & 0x1FL)]);
256 buffer.append(BASE32_ALPHABET[(int) ((longValue >> 30) & 0x1FL)]);
257 buffer.append(BASE32_ALPHABET[(int) ((longValue >> 25) & 0x1FL)]);
258 buffer.append(BASE32_ALPHABET[(int) ((longValue >> 20) & 0x1FL)]);
259 buffer.append(BASE32_ALPHABET[(int) ((longValue >> 15) & 0x1FL)]);
260 buffer.append(BASE32_ALPHABET[(int) ((longValue >> 10) & 0x1FL)]);
261 buffer.append(BASE32_ALPHABET[(int) ((longValue >> 5) & 0x1FL)]);
262 buffer.append(BASE32_ALPHABET[(int) (longValue & 0x1FL)]);
263 }
264
265 switch ((off+length) - pos)
266 {
267 case 1:
268 long longValue = ((data[pos] & 0xFFL) << 32);
269 buffer.append(BASE32_ALPHABET[(int) ((longValue >> 35) & 0x1FL)]);
270 buffer.append(BASE32_ALPHABET[(int) ((longValue >> 30) & 0x1FL)]);
271 buffer.append("======");
272 return;
273
274 case 2:
275 longValue = (((data[pos++] & 0xFFL) << 32) |
276 ((data[pos] & 0xFFL) << 24));
277 buffer.append(BASE32_ALPHABET[(int) ((longValue >> 35) & 0x1FL)]);
278 buffer.append(BASE32_ALPHABET[(int) ((longValue >> 30) & 0x1FL)]);
279 buffer.append(BASE32_ALPHABET[(int) ((longValue >> 25) & 0x1FL)]);
280 buffer.append(BASE32_ALPHABET[(int) ((longValue >> 20) & 0x1FL)]);
281 buffer.append("====");
282 return;
283
284 case 3:
285 longValue = (((data[pos++] & 0xFFL) << 32) |
286 ((data[pos++] & 0xFFL) << 24) |
287 ((data[pos] & 0xFFL) << 16));
288 buffer.append(BASE32_ALPHABET[(int) ((longValue >> 35) & 0x1FL)]);
289 buffer.append(BASE32_ALPHABET[(int) ((longValue >> 30) & 0x1FL)]);
290 buffer.append(BASE32_ALPHABET[(int) ((longValue >> 25) & 0x1FL)]);
291 buffer.append(BASE32_ALPHABET[(int) ((longValue >> 20) & 0x1FL)]);
292 buffer.append(BASE32_ALPHABET[(int) ((longValue >> 15) & 0x1FL)]);
293 buffer.append("===");
294 return;
295
296 case 4:
297 longValue = (((data[pos++] & 0xFFL) << 32) |
298 ((data[pos++] & 0xFFL) << 24) |
299 ((data[pos++] & 0xFFL) << 16) |
300 ((data[pos] & 0xFFL) << 8));
301 buffer.append(BASE32_ALPHABET[(int) ((longValue >> 35) & 0x1FL)]);
302 buffer.append(BASE32_ALPHABET[(int) ((longValue >> 30) & 0x1FL)]);
303 buffer.append(BASE32_ALPHABET[(int) ((longValue >> 25) & 0x1FL)]);
304 buffer.append(BASE32_ALPHABET[(int) ((longValue >> 20) & 0x1FL)]);
305 buffer.append(BASE32_ALPHABET[(int) ((longValue >> 15) & 0x1FL)]);
306 buffer.append(BASE32_ALPHABET[(int) ((longValue >> 10) & 0x1FL)]);
307 buffer.append(BASE32_ALPHABET[(int) ((longValue >> 5) & 0x1FL)]);
308 buffer.append("=");
309 return;
310 }
311 }
312 catch (final IOException ioe)
313 {
314 Debug.debugException(ioe);
315
316 // This should never happen.
317 throw new RuntimeException(ioe.getMessage(), ioe);
318 }
319 }
320
321
322
323 /**
324 * Decodes the contents of the provided base32-encoded string.
325 *
326 * @param data The base32-encoded string to decode. It must not be
327 * {@code null}.
328 *
329 * @return A byte array containing the decoded data.
330 *
331 * @throws ParseException If the contents of the provided string cannot be
332 * parsed as base32-encoded data.
333 */
334 public static byte[] decode(final String data)
335 throws ParseException
336 {
337 ensureNotNull(data);
338
339 final int length = data.length();
340 if (length == 0)
341 {
342 return new byte[0];
343 }
344
345 if ((length % 8) != 0)
346 {
347 throw new ParseException(ERR_BASE32_DECODE_INVALID_LENGTH.get(), length);
348 }
349
350 final ByteStringBuffer buffer = new ByteStringBuffer(5 * (length / 8));
351
352 int stringPos = 0;
353 while (stringPos < length)
354 {
355 long longValue = 0x00;
356 for (int i=0; i < 8; i++)
357 {
358 longValue <<= 5;
359 switch (data.charAt(stringPos++))
360 {
361 case 'A':
362 case 'a':
363 longValue |= 0x00L;
364 break;
365 case 'B':
366 case 'b':
367 longValue |= 0x01L;
368 break;
369 case 'C':
370 case 'c':
371 longValue |= 0x02L;
372 break;
373 case 'D':
374 case 'd':
375 longValue |= 0x03L;
376 break;
377 case 'E':
378 case 'e':
379 longValue |= 0x04L;
380 break;
381 case 'F':
382 case 'f':
383 longValue |= 0x05L;
384 break;
385 case 'G':
386 case 'g':
387 longValue |= 0x06L;
388 break;
389 case 'H':
390 case 'h':
391 longValue |= 0x07L;
392 break;
393 case 'I':
394 case 'i':
395 longValue |= 0x08L;
396 break;
397 case 'J':
398 case 'j':
399 longValue |= 0x09L;
400 break;
401 case 'K':
402 case 'k':
403 longValue |= 0x0AL;
404 break;
405 case 'L':
406 case 'l':
407 longValue |= 0x0BL;
408 break;
409 case 'M':
410 case 'm':
411 longValue |= 0x0CL;
412 break;
413 case 'N':
414 case 'n':
415 longValue |= 0x0DL;
416 break;
417 case 'O':
418 case 'o':
419 longValue |= 0x0EL;
420 break;
421 case 'P':
422 case 'p':
423 longValue |= 0x0FL;
424 break;
425 case 'Q':
426 case 'q':
427 longValue |= 0x10L;
428 break;
429 case 'R':
430 case 'r':
431 longValue |= 0x11L;
432 break;
433 case 'S':
434 case 's':
435 longValue |= 0x12L;
436 break;
437 case 'T':
438 case 't':
439 longValue |= 0x13L;
440 break;
441 case 'U':
442 case 'u':
443 longValue |= 0x14L;
444 break;
445 case 'V':
446 case 'v':
447 longValue |= 0x15L;
448 break;
449 case 'W':
450 case 'w':
451 longValue |= 0x16L;
452 break;
453 case 'X':
454 case 'x':
455 longValue |= 0x17L;
456 break;
457 case 'Y':
458 case 'y':
459 longValue |= 0x18L;
460 break;
461 case 'Z':
462 case 'z':
463 longValue |= 0x19L;
464 break;
465 case '2':
466 longValue |= 0x1AL;
467 break;
468 case '3':
469 longValue |= 0x1BL;
470 break;
471 case '4':
472 longValue |= 0x1CL;
473 break;
474 case '5':
475 longValue |= 0x1DL;
476 break;
477 case '6':
478 longValue |= 0x1EL;
479 break;
480 case '7':
481 longValue |= 0x1FL;
482 break;
483
484 case '=':
485 switch (length - stringPos)
486 {
487 case 0:
488 // The string ended with a single equal sign, so there are
489 // four bytes left.
490 buffer.append((byte) ((longValue >> 32) & 0xFFL));
491 buffer.append((byte) ((longValue >> 24) & 0xFFL));
492 buffer.append((byte) ((longValue >> 16) & 0xFFL));
493 buffer.append((byte) ((longValue >> 8) & 0xFFL));
494 return buffer.toByteArray();
495
496 case 2:
497 // The string ended with three equal signs, so there are three
498 // bytes left.
499 longValue <<= 10;
500 buffer.append((byte) ((longValue >> 32) & 0xFFL));
501 buffer.append((byte) ((longValue >> 24) & 0xFFL));
502 buffer.append((byte) ((longValue >> 16) & 0xFFL));
503 return buffer.toByteArray();
504
505 case 3:
506 // The string ended with four equal signs, so there are two
507 // bytes left.
508 longValue <<= 15;
509 buffer.append((byte) ((longValue >> 32) & 0xFFL));
510 buffer.append((byte) ((longValue >> 24) & 0xFFL));
511 return buffer.toByteArray();
512
513 case 5:
514 // The string ended with six equal signs, so there is one byte
515 // left.
516 longValue <<= 25;
517 buffer.append((byte) ((longValue >> 32) & 0xFFL));
518 return buffer.toByteArray();
519
520 default:
521 throw new ParseException(
522 ERR_BASE32_DECODE_UNEXPECTED_EQUAL.get((stringPos-1)),
523 (stringPos-1));
524 }
525
526 default:
527 throw new ParseException(
528 ERR_BASE32_DECODE_UNEXPECTED_CHAR.get(
529 data.charAt(stringPos-1)),
530 (stringPos-1));
531 }
532 }
533
534 buffer.append((byte) ((longValue >> 32) & 0xFFL));
535 buffer.append((byte) ((longValue >> 24) & 0xFFL));
536 buffer.append((byte) ((longValue >> 16) & 0xFFL));
537 buffer.append((byte) ((longValue >> 8) & 0xFFL));
538 buffer.append((byte) (longValue & 0xFFL));
539 }
540
541 return buffer.toByteArray();
542 }
543
544
545
546 /**
547 * Decodes the contents of the provided base32-encoded string to a string
548 * containing the raw data using the UTF-8 encoding.
549 *
550 * @param data The base32-encoded string to decode. It must not be
551 * {@code null}.
552 *
553 * @return A string containing the decoded data.
554 *
555 * @throws ParseException If the contents of the provided string cannot be
556 * parsed as base32-encoded data using the UTF-8
557 * encoding.
558 */
559 public static String decodeToString(final String data)
560 throws ParseException
561 {
562 ensureNotNull(data);
563
564 final byte[] decodedBytes = decode(data);
565 return StaticUtils.toUTF8String(decodedBytes);
566 }
567 }