001    /*
002     *  Licensed to the Apache Software Foundation (ASF) under one
003     *  or more contributor license agreements.  See the NOTICE file
004     *  distributed with this work for additional information
005     *  regarding copyright ownership.  The ASF licenses this file
006     *  to you under the Apache License, Version 2.0 (the
007     *  "License"); you may not use this file except in compliance
008     *  with the License.  You may obtain a copy of the License at
009     *  
010     *    http://www.apache.org/licenses/LICENSE-2.0
011     *  
012     *  Unless required by applicable law or agreed to in writing,
013     *  software distributed under the License is distributed on an
014     *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015     *  KIND, either express or implied.  See the License for the
016     *  specific language governing permissions and limitations
017     *  under the License. 
018     *  
019     */
020    
021    package org.apache.directory.shared.ldap.util;
022    
023    import org.apache.directory.shared.i18n.I18n;
024    
025    
026    /**
027     * decoding of base64 characters to raw bytes.
028     * 
029     * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a>
030     * @version $Revision: 919765 $
031     */
032    public class Base64
033    {
034    
035        /**
036         * passed data array.
037         * 
038         * @param a_data
039         *            the array of bytes to encode
040         * @return base64-coded character array.
041         */
042        public static char[] encode( byte[] a_data )
043        {
044            char[] l_out = new char[( ( a_data.length + 2 ) / 3 ) * 4];
045    
046            //
047            // 3 bytes encode to 4 chars. Output is always an even
048            // multiple of 4 characters.
049            //
050            for ( int ii = 0, l_index = 0; ii < a_data.length; ii += 3, l_index += 4 )
051            {
052                boolean l_quad = false;
053                boolean l_trip = false;
054    
055                int l_val = ( 0xFF & a_data[ii] );
056                l_val <<= 8;
057                if ( ( ii + 1 ) < a_data.length )
058                {
059                    l_val |= ( 0xFF & a_data[ii + 1] );
060                    l_trip = true;
061                }
062    
063                l_val <<= 8;
064                if ( ( ii + 2 ) < a_data.length )
065                {
066                    l_val |= ( 0xFF & a_data[ii + 2] );
067                    l_quad = true;
068                }
069    
070                l_out[l_index + 3] = s_alphabet[( l_quad ? ( l_val & 0x3F ) : 64 )];
071                l_val >>= 6;
072                l_out[l_index + 2] = s_alphabet[( l_trip ? ( l_val & 0x3F ) : 64 )];
073                l_val >>= 6;
074                l_out[l_index + 1] = s_alphabet[l_val & 0x3F];
075                l_val >>= 6;
076                l_out[l_index + 0] = s_alphabet[l_val & 0x3F];
077            }
078            return l_out;
079        }
080    
081    
082        /**
083         * Decodes a BASE-64 encoded stream to recover the original data. White
084         * space before and after will be trimmed away, but no other manipulation of
085         * the input will be performed. As of version 1.2 this method will properly
086         * handle input containing junk characters (newlines and the like) rather
087         * than throwing an error. It does this by pre-parsing the input and
088         * generating from that a count of VALID input characters.
089         * 
090         * @param a_data
091         *            data to decode.
092         * @return the decoded binary data.
093         */
094        public static byte[] decode( char[] data )
095        {
096            // as our input could contain non-BASE64 data (newlines,
097            // whitespace of any sort, whatever) we must first adjust
098            // our count of USABLE data so that...
099            // (a) we don't misallocate the output array, and
100            // (b) think that we miscalculated our data length
101            // just because of extraneous throw-away junk
102    
103            int tempLen = data.length;
104            
105            for ( char c:data)
106            {
107                if ( ( c > 255 ) || s_codes[c] < 0 )
108                {
109                    --tempLen; // ignore non-valid chars and padding
110                }
111            }
112            // calculate required length:
113            // -- 3 bytes for every 4 valid base64 chars
114            // -- plus 2 bytes if there are 3 extra base64 chars,
115            // or plus 1 byte if there are 2 extra.
116    
117            int l_len = ( tempLen / 4 ) * 3;
118    
119            if ( ( tempLen % 4 ) == 3 )
120            {
121                l_len += 2;
122            }
123    
124            if ( ( tempLen % 4 ) == 2 )
125            {
126                l_len += 1;
127            }
128    
129            byte[] l_out = new byte[l_len];
130    
131            int l_shift = 0; // # of excess bits stored in accum
132            int l_accum = 0; // excess bits
133            int l_index = 0;
134    
135            // we now go through the entire array (NOT using the 'tempLen' value)
136            for ( char c:data )
137            {
138                int l_value = ( c > 255 ) ? -1 : s_codes[c];
139    
140                if ( l_value >= 0 ) // skip over non-code
141                {
142                    l_accum <<= 6; // bits shift up by 6 each time thru
143                    l_shift += 6; // loop, with new bits being put in
144                    l_accum |= l_value; // at the bottom. whenever there
145                    if ( l_shift >= 8 ) // are 8 or more shifted in, write them
146                    {
147                        l_shift -= 8; // out (from the top, leaving any excess
148                        l_out[l_index++] = // at the bottom for next iteration.
149                        ( byte ) ( ( l_accum >> l_shift ) & 0xff );
150                    }
151                }
152                // we will also have skipped processing a padding null byte ('=')
153                // here;
154                // these are used ONLY for padding to an even length and do not
155                // legally
156                // occur as encoded data. for this reason we can ignore the fact
157                // that
158                // no index++ operation occurs in that special case: the out[] array
159                // is
160                // initialized to all-zero bytes to start with and that works to our
161                // advantage in this combination.
162            }
163    
164            // if there is STILL something wrong we just have to throw up now!
165            if ( l_index != l_out.length )
166            {
167                throw new Error( I18n.err( I18n.ERR_04348, l_index, l_out.length ) );
168            }
169    
170            return l_out;
171        }
172    
173        /** code characters for values 0..63 */
174        private static char[] s_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
175            .toCharArray();
176    
177        /** lookup table for converting base64 characters to value in range 0..63 */
178        private static byte[] s_codes = new byte[256];
179    
180        static
181        {
182            for ( int ii = 0; ii < 256; ii++ )
183            {
184                s_codes[ii] = -1;
185            }
186    
187            for ( int ii = 'A'; ii <= 'Z'; ii++ )
188            {
189                s_codes[ii] = ( byte ) ( ii - 'A' );
190            }
191    
192            for ( int ii = 'a'; ii <= 'z'; ii++ )
193            {
194                s_codes[ii] = ( byte ) ( 26 + ii - 'a' );
195            }
196    
197            for ( int ii = '0'; ii <= '9'; ii++ )
198            {
199                s_codes[ii] = ( byte ) ( 52 + ii - '0' );
200            }
201    
202            s_codes['+'] = 62;
203            s_codes['/'] = 63;
204        }
205    }