001    /*
002     * Copyright 2012-2013 UnboundID Corp.
003     * All Rights Reserved.
004     */
005    /*
006     * Copyright (C) 2012-2013 UnboundID Corp.
007     *
008     * This program is free software; you can redistribute it and/or modify
009     * it under the terms of the GNU General Public License (GPLv2 only)
010     * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
011     * as published by the Free Software Foundation.
012     *
013     * This program is distributed in the hope that it will be useful,
014     * but WITHOUT ANY WARRANTY; without even the implied warranty of
015     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016     * GNU General Public License for more details.
017     *
018     * You should have received a copy of the GNU General Public License
019     * along with this program; if not, see <http://www.gnu.org/licenses>.
020     */
021    package com.unboundid.util;
022    
023    
024    
025    import java.io.IOException;
026    import java.text.ParseException;
027    
028    import static com.unboundid.util.UtilityMessages.*;
029    import static com.unboundid.util.Validator.*;
030    
031    
032    
033    /**
034     * This class provides methods for encoding and decoding data in base32 as
035     * defined in <A HREF="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</A>.  It
036     * provides a somewhat compact way of representing binary data using only
037     * printable characters (a subset of ASCII letters and numeric digits selected
038     * to avoid ambiguity, like confusion between the number 1 and the uppercase
039     * letter I, and between the number 0 and the uppercase letter O).  It uses a
040     * five-bit encoding mechanism in which every five bytes of raw data is
041     * converted into eight bytes of base32-encoded data.
042     * <BR><BR>
043     * <H2>Example</H2>
044     * The following examples demonstrate the process for base32-encoding raw data,
045     * and for decoding a string containing base32-encoded data back to the raw
046     * data used to create it:
047     * <PRE>
048     *   // Base32-encode some raw data:
049     *   String base32String = Base32.encode(rawDataBytes);
050     *   System.out.println("Base32 encoded representation of the raw data is " +
051     *                      base32String);
052     *
053     *   // Decode a base32 string back to raw data:
054     *   try
055     *   {
056     *     byte[] decodedRawDataBytes = base32.decode(base32String);
057     *   }
058     *   catch (ParseException pe)
059     *   {
060     *     System.err.println("The string did not contain valid base32-encoded " +
061     *                        "data:  " + pe.getMessage());
062     *   }
063     * </PRE>
064     */
065    @ThreadSafety(level=ThreadSafetyLevel.COMPLETELY_THREADSAFE)
066    public final class Base32
067    {
068      /**
069       * The set of characters in the base32 alphabet.
070       */
071      private static final char[] BASE32_ALPHABET =
072           ("ABCDEFGHIJKLMNOPQRSTUVWXYZ234567").toCharArray();
073    
074    
075    
076      /**
077       * Prevent this class from being instantiated.
078       */
079      private Base32()
080      {
081        // No implementation is required.
082      }
083    
084    
085    
086      /**
087       * Encodes the UTF-8 representation of the provided string in base32 format.
088       *
089       * @param  data  The raw data to be encoded.  It must not be {@code null}.
090       *
091       * @return  The base32-encoded representation of the provided data.
092       */
093      public static String encode(final String data)
094      {
095        ensureNotNull(data);
096    
097        return encode(StaticUtils.getBytes(data));
098      }
099    
100    
101    
102      /**
103       * Encodes the provided data in base32 format.
104       *
105       * @param  data  The raw data to be encoded.  It must not be {@code null}.
106       *
107       * @return  The base32-encoded representation of the provided data.
108       */
109      public static String encode(final byte[] data)
110      {
111        ensureNotNull(data);
112    
113        final StringBuilder buffer = new StringBuilder(4*data.length/3+1);
114        encodeInternal(data, 0, data.length, buffer);
115        return buffer.toString();
116      }
117    
118    
119    
120      /**
121       * Appends a base32-encoded version of the contents of the provided buffer
122       * (using a UTF-8 representation) to the given buffer.
123       *
124       * @param  data    The raw data to be encoded.  It must not be {@code null}.
125       * @param  buffer  The buffer to which the base32-encoded data is to be
126       *                 written.
127       */
128      public static void encode(final String data, final StringBuilder buffer)
129      {
130        ensureNotNull(data);
131    
132        encode(StaticUtils.getBytes(data), buffer);
133      }
134    
135    
136    
137      /**
138       * Appends a base32-encoded version of the contents of the provided buffer
139       * (using a UTF-8 representation) to the given buffer.
140       *
141       * @param  data    The raw data to be encoded.  It must not be {@code null}.
142       * @param  buffer  The buffer to which the base32-encoded data is to be
143       *                 written.
144       */
145      public static void encode(final String data, final ByteStringBuffer buffer)
146      {
147        ensureNotNull(data);
148    
149        encode(StaticUtils.getBytes(data), buffer);
150      }
151    
152    
153    
154      /**
155       * Appends a base32-encoded representation of the provided data to the given
156       * buffer.
157       *
158       * @param  data    The raw data to be encoded.  It must not be {@code null}.
159       * @param  buffer  The buffer to which the base32-encoded data is to be
160       *                 written.
161       */
162      public static void encode(final byte[] data, final StringBuilder buffer)
163      {
164        encodeInternal(data, 0, data.length, buffer);
165      }
166    
167    
168    
169      /**
170       * Appends a base32-encoded representation of the provided data to the given
171       * buffer.
172       *
173       * @param  data    The array containing the raw data to be encoded.  It must
174       *                 not be {@code null}.
175       * @param  off     The offset in the array at which the data to encode begins.
176       * @param  length  The number of bytes to be encoded.
177       * @param  buffer  The buffer to which the base32-encoded data is to be
178       *                 written.
179       */
180      public static void encode(final byte[] data, final int off, final int length,
181                                final StringBuilder buffer)
182      {
183        encodeInternal(data, off, length, buffer);
184      }
185    
186    
187    
188      /**
189       * Appends a base32-encoded representation of the provided data to the given
190       * buffer.
191       *
192       * @param  data    The raw data to be encoded.  It must not be {@code null}.
193       * @param  buffer  The buffer to which the base32-encoded data is to be
194       *                 written.
195       */
196      public static void encode(final byte[] data, final ByteStringBuffer buffer)
197      {
198        encodeInternal(data, 0, data.length, buffer);
199      }
200    
201    
202    
203      /**
204       * Appends a base32-encoded representation of the provided data to the given
205       * buffer.
206       *
207       * @param  data    The raw data to be encoded.  It must not be {@code null}.
208       * @param  off     The offset in the array at which the data to encode begins.
209       * @param  length  The number of bytes to be encoded.
210       * @param  buffer  The buffer to which the base32-encoded data is to be
211       *                 written.
212       */
213      public static void encode(final byte[] data, final int off, final int length,
214                                final ByteStringBuffer buffer)
215      {
216        encodeInternal(data, off, length, buffer);
217      }
218    
219    
220    
221      /**
222       * Appends a base32-encoded representation of the provided data to the given
223       * buffer.
224       *
225       * @param  data    The raw data to be encoded.  It must not be {@code null}.
226       * @param  off     The offset in the array at which the data to encode begins.
227       * @param  length  The number of bytes to be encoded.
228       * @param  buffer  The buffer to which the base32-encoded data is to be
229       *                 written.
230       */
231      private static void encodeInternal(final byte[] data, final int off,
232                                         final int length, final Appendable buffer)
233      {
234        ensureNotNull(data);
235        ensureTrue(data.length >= off);
236        ensureTrue(data.length >= (off+length));
237    
238        if (length == 0)
239        {
240          return;
241        }
242    
243        try
244        {
245          int pos = off;
246          for (int i=0; i < (length / 5); i++)
247          {
248            final long longValue =
249                 (((data[pos++] & 0xFFL) << 32) |
250                  ((data[pos++] & 0xFFL) << 24) |
251                  ((data[pos++] & 0xFFL) << 16) |
252                  ((data[pos++] & 0xFFL) << 8) |
253                   (data[pos++] & 0xFFL));
254    
255            buffer.append(BASE32_ALPHABET[(int) ((longValue >> 35) & 0x1FL)]);
256            buffer.append(BASE32_ALPHABET[(int) ((longValue >> 30) & 0x1FL)]);
257            buffer.append(BASE32_ALPHABET[(int) ((longValue >> 25) & 0x1FL)]);
258            buffer.append(BASE32_ALPHABET[(int) ((longValue >> 20) & 0x1FL)]);
259            buffer.append(BASE32_ALPHABET[(int) ((longValue >> 15) & 0x1FL)]);
260            buffer.append(BASE32_ALPHABET[(int) ((longValue >> 10) & 0x1FL)]);
261            buffer.append(BASE32_ALPHABET[(int) ((longValue >> 5) & 0x1FL)]);
262            buffer.append(BASE32_ALPHABET[(int) (longValue & 0x1FL)]);
263          }
264    
265          switch ((off+length) - pos)
266          {
267            case 1:
268              long longValue = ((data[pos] & 0xFFL) << 32);
269              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 35) & 0x1FL)]);
270              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 30) & 0x1FL)]);
271              buffer.append("======");
272              return;
273    
274            case 2:
275              longValue = (((data[pos++] & 0xFFL) << 32) |
276                           ((data[pos] & 0xFFL) << 24));
277              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 35) & 0x1FL)]);
278              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 30) & 0x1FL)]);
279              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 25) & 0x1FL)]);
280              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 20) & 0x1FL)]);
281              buffer.append("====");
282              return;
283    
284            case 3:
285              longValue = (((data[pos++] & 0xFFL) << 32) |
286                           ((data[pos++] & 0xFFL) << 24) |
287                           ((data[pos] & 0xFFL) << 16));
288              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 35) & 0x1FL)]);
289              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 30) & 0x1FL)]);
290              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 25) & 0x1FL)]);
291              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 20) & 0x1FL)]);
292              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 15) & 0x1FL)]);
293              buffer.append("===");
294              return;
295    
296            case 4:
297              longValue = (((data[pos++] & 0xFFL) << 32) |
298                           ((data[pos++] & 0xFFL) << 24) |
299                           ((data[pos++] & 0xFFL) << 16) |
300                           ((data[pos] & 0xFFL) << 8));
301              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 35) & 0x1FL)]);
302              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 30) & 0x1FL)]);
303              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 25) & 0x1FL)]);
304              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 20) & 0x1FL)]);
305              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 15) & 0x1FL)]);
306              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 10) & 0x1FL)]);
307              buffer.append(BASE32_ALPHABET[(int) ((longValue >> 5) & 0x1FL)]);
308              buffer.append("=");
309              return;
310          }
311        }
312        catch (final IOException ioe)
313        {
314          Debug.debugException(ioe);
315    
316          // This should never happen.
317          throw new RuntimeException(ioe.getMessage(), ioe);
318        }
319      }
320    
321    
322    
323      /**
324       * Decodes the contents of the provided base32-encoded string.
325       *
326       * @param  data  The base32-encoded string to decode.  It must not be
327       *               {@code null}.
328       *
329       * @return  A byte array containing the decoded data.
330       *
331       * @throws  ParseException  If the contents of the provided string cannot be
332       *                          parsed as base32-encoded data.
333       */
334      public static byte[] decode(final String data)
335             throws ParseException
336      {
337        ensureNotNull(data);
338    
339        final int length = data.length();
340        if (length == 0)
341        {
342          return new byte[0];
343        }
344    
345        if ((length % 8) != 0)
346        {
347          throw new ParseException(ERR_BASE32_DECODE_INVALID_LENGTH.get(), length);
348        }
349    
350        final ByteStringBuffer buffer = new ByteStringBuffer(5 * (length / 8));
351    
352        int stringPos = 0;
353        while (stringPos < length)
354        {
355          long longValue = 0x00;
356          for (int i=0; i < 8; i++)
357          {
358            longValue <<= 5;
359            switch (data.charAt(stringPos++))
360            {
361              case 'A':
362              case 'a':
363                longValue |= 0x00L;
364                break;
365              case 'B':
366              case 'b':
367                longValue |= 0x01L;
368                break;
369              case 'C':
370              case 'c':
371                longValue |= 0x02L;
372                break;
373              case 'D':
374              case 'd':
375                longValue |= 0x03L;
376                break;
377              case 'E':
378              case 'e':
379                longValue |= 0x04L;
380                break;
381              case 'F':
382              case 'f':
383                longValue |= 0x05L;
384                break;
385              case 'G':
386              case 'g':
387                longValue |= 0x06L;
388                break;
389              case 'H':
390              case 'h':
391                longValue |= 0x07L;
392                break;
393              case 'I':
394              case 'i':
395                longValue |= 0x08L;
396                break;
397              case 'J':
398              case 'j':
399                longValue |= 0x09L;
400                break;
401              case 'K':
402              case 'k':
403                longValue |= 0x0AL;
404                break;
405              case 'L':
406              case 'l':
407                longValue |= 0x0BL;
408                break;
409              case 'M':
410              case 'm':
411                longValue |= 0x0CL;
412                break;
413              case 'N':
414              case 'n':
415                longValue |= 0x0DL;
416                break;
417              case 'O':
418              case 'o':
419                longValue |= 0x0EL;
420                break;
421              case 'P':
422              case 'p':
423                longValue |= 0x0FL;
424                break;
425              case 'Q':
426              case 'q':
427                longValue |= 0x10L;
428                break;
429              case 'R':
430              case 'r':
431                longValue |= 0x11L;
432                break;
433              case 'S':
434              case 's':
435                longValue |= 0x12L;
436                break;
437              case 'T':
438              case 't':
439                longValue |= 0x13L;
440                break;
441              case 'U':
442              case 'u':
443                longValue |= 0x14L;
444                break;
445              case 'V':
446              case 'v':
447                longValue |= 0x15L;
448                break;
449              case 'W':
450              case 'w':
451                longValue |= 0x16L;
452                break;
453              case 'X':
454              case 'x':
455                longValue |= 0x17L;
456                break;
457              case 'Y':
458              case 'y':
459                longValue |= 0x18L;
460                break;
461              case 'Z':
462              case 'z':
463                longValue |= 0x19L;
464                break;
465              case '2':
466                longValue |= 0x1AL;
467                break;
468              case '3':
469                longValue |= 0x1BL;
470                break;
471              case '4':
472                longValue |= 0x1CL;
473                break;
474              case '5':
475                longValue |= 0x1DL;
476                break;
477              case '6':
478                longValue |= 0x1EL;
479                break;
480              case '7':
481                longValue |= 0x1FL;
482                break;
483    
484              case '=':
485                switch (length - stringPos)
486                {
487                  case 0:
488                    // The string ended with a single equal sign, so there are
489                    // four bytes left.
490                    buffer.append((byte) ((longValue >> 32) & 0xFFL));
491                    buffer.append((byte) ((longValue >> 24) & 0xFFL));
492                    buffer.append((byte) ((longValue >> 16) & 0xFFL));
493                    buffer.append((byte) ((longValue >> 8) & 0xFFL));
494                    return buffer.toByteArray();
495    
496                  case 2:
497                    // The string ended with three equal signs, so there are three
498                    // bytes left.
499                    longValue <<= 10;
500                    buffer.append((byte) ((longValue >> 32) & 0xFFL));
501                    buffer.append((byte) ((longValue >> 24) & 0xFFL));
502                    buffer.append((byte) ((longValue >> 16) & 0xFFL));
503                    return buffer.toByteArray();
504    
505                  case 3:
506                    // The string ended with four equal signs, so there are two
507                    // bytes left.
508                    longValue <<= 15;
509                    buffer.append((byte) ((longValue >> 32) & 0xFFL));
510                    buffer.append((byte) ((longValue >> 24) & 0xFFL));
511                    return buffer.toByteArray();
512    
513                  case 5:
514                    // The string ended with six equal signs, so there is one byte
515                    // left.
516                    longValue <<= 25;
517                    buffer.append((byte) ((longValue >> 32) & 0xFFL));
518                    return buffer.toByteArray();
519    
520                  default:
521                    throw new ParseException(
522                         ERR_BASE32_DECODE_UNEXPECTED_EQUAL.get((stringPos-1)),
523                         (stringPos-1));
524                }
525    
526              default:
527                throw new ParseException(
528                     ERR_BASE32_DECODE_UNEXPECTED_CHAR.get(
529                          data.charAt(stringPos-1)),
530                     (stringPos-1));
531            }
532          }
533    
534          buffer.append((byte) ((longValue >> 32) & 0xFFL));
535          buffer.append((byte) ((longValue >> 24) & 0xFFL));
536          buffer.append((byte) ((longValue >> 16) & 0xFFL));
537          buffer.append((byte) ((longValue >> 8) & 0xFFL));
538          buffer.append((byte) (longValue & 0xFFL));
539        }
540    
541        return buffer.toByteArray();
542      }
543    
544    
545    
546      /**
547       * Decodes the contents of the provided base32-encoded string to a string
548       * containing the raw data using the UTF-8 encoding.
549       *
550       * @param  data  The base32-encoded string to decode.  It must not be
551       *               {@code null}.
552       *
553       * @return  A string containing the decoded data.
554       *
555       * @throws  ParseException  If the contents of the provided string cannot be
556       *                          parsed as base32-encoded data using the UTF-8
557       *                          encoding.
558       */
559      public static String decodeToString(final String data)
560             throws ParseException
561      {
562        ensureNotNull(data);
563    
564        final byte[] decodedBytes = decode(data);
565        return StaticUtils.toUTF8String(decodedBytes);
566      }
567    }