001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied. See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 *
019 */
020
021 package org.apache.directory.shared.ldap.util;
022
023 import org.apache.directory.shared.i18n.I18n;
024
025
026 /**
027 * decoding of base64 characters to raw bytes.
028 *
029 * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a>
030 * @version $Revision: 919765 $
031 */
032 public class Base64
033 {
034
035 /**
036 * passed data array.
037 *
038 * @param a_data
039 * the array of bytes to encode
040 * @return base64-coded character array.
041 */
042 public static char[] encode( byte[] a_data )
043 {
044 char[] l_out = new char[( ( a_data.length + 2 ) / 3 ) * 4];
045
046 //
047 // 3 bytes encode to 4 chars. Output is always an even
048 // multiple of 4 characters.
049 //
050 for ( int ii = 0, l_index = 0; ii < a_data.length; ii += 3, l_index += 4 )
051 {
052 boolean l_quad = false;
053 boolean l_trip = false;
054
055 int l_val = ( 0xFF & a_data[ii] );
056 l_val <<= 8;
057 if ( ( ii + 1 ) < a_data.length )
058 {
059 l_val |= ( 0xFF & a_data[ii + 1] );
060 l_trip = true;
061 }
062
063 l_val <<= 8;
064 if ( ( ii + 2 ) < a_data.length )
065 {
066 l_val |= ( 0xFF & a_data[ii + 2] );
067 l_quad = true;
068 }
069
070 l_out[l_index + 3] = s_alphabet[( l_quad ? ( l_val & 0x3F ) : 64 )];
071 l_val >>= 6;
072 l_out[l_index + 2] = s_alphabet[( l_trip ? ( l_val & 0x3F ) : 64 )];
073 l_val >>= 6;
074 l_out[l_index + 1] = s_alphabet[l_val & 0x3F];
075 l_val >>= 6;
076 l_out[l_index + 0] = s_alphabet[l_val & 0x3F];
077 }
078 return l_out;
079 }
080
081
082 /**
083 * Decodes a BASE-64 encoded stream to recover the original data. White
084 * space before and after will be trimmed away, but no other manipulation of
085 * the input will be performed. As of version 1.2 this method will properly
086 * handle input containing junk characters (newlines and the like) rather
087 * than throwing an error. It does this by pre-parsing the input and
088 * generating from that a count of VALID input characters.
089 *
090 * @param a_data
091 * data to decode.
092 * @return the decoded binary data.
093 */
094 public static byte[] decode( char[] data )
095 {
096 // as our input could contain non-BASE64 data (newlines,
097 // whitespace of any sort, whatever) we must first adjust
098 // our count of USABLE data so that...
099 // (a) we don't misallocate the output array, and
100 // (b) think that we miscalculated our data length
101 // just because of extraneous throw-away junk
102
103 int tempLen = data.length;
104
105 for ( char c:data)
106 {
107 if ( ( c > 255 ) || s_codes[c] < 0 )
108 {
109 --tempLen; // ignore non-valid chars and padding
110 }
111 }
112 // calculate required length:
113 // -- 3 bytes for every 4 valid base64 chars
114 // -- plus 2 bytes if there are 3 extra base64 chars,
115 // or plus 1 byte if there are 2 extra.
116
117 int l_len = ( tempLen / 4 ) * 3;
118
119 if ( ( tempLen % 4 ) == 3 )
120 {
121 l_len += 2;
122 }
123
124 if ( ( tempLen % 4 ) == 2 )
125 {
126 l_len += 1;
127 }
128
129 byte[] l_out = new byte[l_len];
130
131 int l_shift = 0; // # of excess bits stored in accum
132 int l_accum = 0; // excess bits
133 int l_index = 0;
134
135 // we now go through the entire array (NOT using the 'tempLen' value)
136 for ( char c:data )
137 {
138 int l_value = ( c > 255 ) ? -1 : s_codes[c];
139
140 if ( l_value >= 0 ) // skip over non-code
141 {
142 l_accum <<= 6; // bits shift up by 6 each time thru
143 l_shift += 6; // loop, with new bits being put in
144 l_accum |= l_value; // at the bottom. whenever there
145 if ( l_shift >= 8 ) // are 8 or more shifted in, write them
146 {
147 l_shift -= 8; // out (from the top, leaving any excess
148 l_out[l_index++] = // at the bottom for next iteration.
149 ( byte ) ( ( l_accum >> l_shift ) & 0xff );
150 }
151 }
152 // we will also have skipped processing a padding null byte ('=')
153 // here;
154 // these are used ONLY for padding to an even length and do not
155 // legally
156 // occur as encoded data. for this reason we can ignore the fact
157 // that
158 // no index++ operation occurs in that special case: the out[] array
159 // is
160 // initialized to all-zero bytes to start with and that works to our
161 // advantage in this combination.
162 }
163
164 // if there is STILL something wrong we just have to throw up now!
165 if ( l_index != l_out.length )
166 {
167 throw new Error( I18n.err( I18n.ERR_04348, l_index, l_out.length ) );
168 }
169
170 return l_out;
171 }
172
173 /** code characters for values 0..63 */
174 private static char[] s_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
175 .toCharArray();
176
177 /** lookup table for converting base64 characters to value in range 0..63 */
178 private static byte[] s_codes = new byte[256];
179
180 static
181 {
182 for ( int ii = 0; ii < 256; ii++ )
183 {
184 s_codes[ii] = -1;
185 }
186
187 for ( int ii = 'A'; ii <= 'Z'; ii++ )
188 {
189 s_codes[ii] = ( byte ) ( ii - 'A' );
190 }
191
192 for ( int ii = 'a'; ii <= 'z'; ii++ )
193 {
194 s_codes[ii] = ( byte ) ( 26 + ii - 'a' );
195 }
196
197 for ( int ii = '0'; ii <= '9'; ii++ )
198 {
199 s_codes[ii] = ( byte ) ( 52 + ii - '0' );
200 }
201
202 s_codes['+'] = 62;
203 s_codes['/'] = 63;
204 }
205 }