00001 #ifndef OC_CODER_H 00002 #define OC_CODER_H 00003 00004 #include <string> 00005 #include <algorithm> 00006 #include <vector> 00007 00008 00009 using namespace std; 00010 /* 00011 ocCoder 00012 various encode/decode pairs 00013 --------------------- 00014 Start of complete encoding library 00015 Copyright (2002) D.K.McCombs - w3sys.com 00016 00017 RFC 2045 Internet Message Bodies November 1996 00018 00019 6.8. Base64 Content-Transfer-Encoding top end 00020 00021 The Base64 Content-Transfer-Encoding is designed to represent 00022 arbitrary sequences of octets in a form that need not be humanly 00023 readable. The encoding and decoding algorithms are simple, but the 00024 encoded data are consistently only about 33 percent larger than the 00025 unencoded data. This encoding is virtually identical to the one used 00026 in Privacy Enhanced Mail (PEM) applications, as defined in RFC 1421. 00027 00028 A 65-character subset of US-ASCII is used, enabling 6 bits to be 00029 represented per printable character. (The extra 65th character, "=", 00030 is used to signify a special processing function.) 00031 00032 NOTE: This subset has the important property that it is represented 00033 identically in all versions of ISO 646, including US-ASCII, and all 00034 characters in the subset are also represented identically in all 00035 versions of EBCDIC. Other popular encodings, such as the encoding 00036 used by the uuencode utility, Macintosh binhex 4.0 [RFC-1741], and 00037 the base85 encoding specified as part of Level 2 PostScript, do not 00038 share these properties, and thus do not fulfill the portability 00039 requirements a binary transport encoding for mail must meet. 00040 00041 The encoding process represents 24-bit groups of input bits as output 00042 strings of 4 encoded characters. Proceeding from left to right, a 00043 24-bit input group is formed by concatenating 3 8bit input groups. 00044 These 24 bits are then treated as 4 concatenated 6-bit groups, each 00045 of which is translated into a single digit in the base64 alphabet. 00046 When encoding a bit stream via the base64 encoding, the bit stream 00047 must be presumed to be ordered with the most-significant-bit first. 00048 That is, the first bit in the stream will be the high-order bit in 00049 the first 8bit byte, and the eighth bit will be the low-order bit in 00050 the first 8bit byte, and so on. 00051 00052 Each 6-bit group is used as an index into an array of 64 printable 00053 characters. The character referenced by the index is placed in the 00054 output string. These characters, identified in Table 1, below, are 00055 selected so as to be universally representable, and the set excludes 00056 characters with particular significance to SMTP (e.g., ".", CR, LF) 00057 and to the multipart boundary delimiters defined in RFC 2046 (e.g., 00058 "-"). 00059 Table 1: The Base64 Alphabet 00060 00061 Value Encoding Value Encoding Value Encoding Value Encoding 00062 0 A 17 R 34 i 51 z 00063 1 B 18 S 35 j 52 0 00064 2 C 19 T 36 k 53 1 00065 3 D 20 U 37 l 54 2 00066 4 E 21 V 38 m 55 3 00067 5 F 22 W 39 n 56 4 00068 6 G 23 X 40 o 57 5 00069 7 H 24 Y 41 p 58 6 00070 8 I 25 Z 42 q 59 7 00071 9 J 26 a 43 r 60 8 00072 10 K 27 b 44 s 61 9 00073 11 L 28 c 45 t 62 + 00074 12 M 29 d 46 u 63 / 00075 13 N 30 e 47 v 00076 14 O 31 f 48 w (pad) = 00077 15 P 32 g 49 x 00078 16 Q 33 h 50 y 00079 00080 The encoded output stream must be represented in lines of no more 00081 than 76 characters each. All line breaks or other characters not 00082 found in Table 1 must be ignored by decoding software. In base64 00083 data, characters other than those in Table 1, line breaks, and other 00084 white space probably indicate a transmierror, about which a 00085 warning message or even a message rejection might be appropriate 00086 under some circumstances. 00087 00088 Special processing is performed if fewer than 24 bits are available 00089 at the end of the data being encoded. A full encoding quantum is 00090 always completed at the end of a body. When fewer than 24 input bits 00091 are available in an input group, zero bits are added (on the right) 00092 to form an integral number of 6-bit groups. Padding at the end of 00093 the data is performed using the "=" character. Since all base64 00094 input is an integral number of octets, only the following cases can 00095 arise: (1) the final quantum of encoding input is an integral 00096 multiple of 24 bits; here, the final unit of encoded output will be 00097 an integral multiple of 4 characters with no "=" padding, (2) the 00098 final quantum of encoding input is exactly 8 bits; here, the final 00099 unit of encoded output will be two characters followed by two "=" 00100 padding characters, or (3) the final quantum of encoding input is 00101 exactly 16 bits; here, the final unit of encoded output will be three 00102 characters followed by one "=" padding character. 00103 00104 Because it is used only for padding at the end of the data, the 00105 occurrence of any "=" characters may be taken as evidence that the 00106 end of the data has been reached (without truncation in transit). No 00107 such assurance is possible, however, when the number of octets 00108 transmitted was a multiple of three and no "=" characters are 00109 present. 00110 00111 Any characters outside of the base64 alphabet are to be ignored in 00112 base64-encoded data. 00113 00114 Care must be taken to use the proper octets for line breaks if base64 00115 encoding is applied directly to text material that has not been 00116 converted to canonical form. In particular, text line breaks must be 00117 converted into CRLF sequences prior to base64 encoding. The 00118 important thing to note is that this may be done directly by the 00119 encoder rather than in a prior canonicalization step in some 00120 implementations. 00121 00122 NOTE: There is no need to worry about quoting potential boundary 00123 delimiters within base64-encoded bodies within multipart entities 00124 because no hyphen characters are used in the base64 encoding. 00125 00126 */ 00127 00128 00129 // since max line for mail transport=76, use 4/3(76), or 57, for bin encode length 00130 const size_t ENCODELength = 57; 00131 const size_t DECODELength = 76; 00132 const size_t REVARRAYSize = 126; 00133 00134 // pack the structure byte tight 00135 #pragma pack(1) 00136 #ifdef IS_BIG_ENDIAN 00137 union b24bits 00138 { 00139 struct { 00140 unsigned int ch4:6; 00141 unsigned int ch3:6; 00142 unsigned int ch2:6; 00143 unsigned int ch1:6; 00144 } bit6Sequence; 00145 struct { 00146 unsigned char ch3; 00147 unsigned char ch2; 00148 unsigned char ch1; 00149 } byteSequence; 00150 }; 00151 #else 00152 union b24bits 00153 { 00154 struct { 00155 unsigned int ch1:6; 00156 unsigned int ch2:6; 00157 unsigned int ch3:6; 00158 unsigned int ch4:6; 00159 } bit6Sequence; 00160 struct { 00161 unsigned char ch1; 00162 unsigned char ch2; 00163 unsigned char ch3; 00164 } byteSequence; 00165 }; 00166 #endif 00167 // return to normal stucture (probably machine word size) boundaries 00168 #pragma pack() 00169 00170 typedef vector< unsigned char > bins; 00171 00172 class ocCoder 00173 { 00174 private: 00175 string alphabet; 00176 bins ralphabet; 00177 bins bindata; 00178 string base64data; 00179 char base64pad; 00180 b24bits base64mask; 00181 00182 public: 00183 ocCoder() 00184 // The forward map for encoding 00185 // 1 2 3 4 5 6 00186 // 0123456789012345678901234567890123456789012345678901234567890123 00187 :alphabet("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") 00188 ,ralphabet(REVARRAYSize,'\0') 00189 ,bindata(ENCODELength,'\0') 00190 ,base64pad('=') 00191 { 00192 // the reverse map for decoding; 00193 ralphabet[65] = 0; ralphabet[66] = 1; ralphabet[67] = 2; ralphabet[68] = 3; 00194 ralphabet[69] = 4; ralphabet[70] = 5; ralphabet[71] = 6; ralphabet[72] = 7; 00195 ralphabet[73] = 8; ralphabet[74] = 9; ralphabet[75] = 10; ralphabet[76] = 11; 00196 ralphabet[77] = 12; ralphabet[78] = 13; ralphabet[79] = 14; ralphabet[80] = 15; 00197 ralphabet[81] = 16; ralphabet[82] = 17; ralphabet[83] = 18; ralphabet[84] = 19; 00198 ralphabet[85] = 20; ralphabet[86] = 21; ralphabet[87] = 22; ralphabet[88] = 23; 00199 ralphabet[89] = 24; ralphabet[90] = 25; ralphabet[97] = 26; ralphabet[98] = 27; 00200 ralphabet[99] = 28; ralphabet[100] = 29; ralphabet[101] = 30; ralphabet[102] = 31; 00201 ralphabet[103] = 32; ralphabet[104] = 33; ralphabet[105] = 34; ralphabet[106] = 35; 00202 ralphabet[107] = 36; ralphabet[108] = 37; ralphabet[109] = 38; ralphabet[110] = 39; 00203 ralphabet[111] = 40; ralphabet[112] = 41; ralphabet[113] = 42; ralphabet[114] = 43; 00204 ralphabet[115] = 44; ralphabet[116] = 45; ralphabet[117] = 46; ralphabet[118] = 47; 00205 ralphabet[119] = 48; ralphabet[120] = 49; ralphabet[121] = 50; ralphabet[122] = 51; 00206 ralphabet[48] = 52; ralphabet[49] = 53; ralphabet[50] = 54; ralphabet[51] = 55; 00207 ralphabet[52] = 56; ralphabet[53] = 57; ralphabet[54] = 58; ralphabet[55] = 59; 00208 ralphabet[56] = 60; ralphabet[57] = 61; ralphabet[43] = 62; ralphabet[47] = 63; 00209 } 00210 00211 virtual ~ocCoder() 00212 {;} 00213 00214 /* 00215 Take binary segment of length and convert in to base64 00216 */ 00217 string & base64encode( const unsigned char * input, size_t length ) 00218 { 00219 // initialize the output data 00220 base64data = ""; 00221 base64data.resize(DECODELength); 00222 00223 // The function won't do much if the length is to big 00224 if( length <= ENCODELength ) 00225 { 00226 int idx,odx; 00227 for( idx=0,odx=0; 00228 input && idx < length; 00229 idx += 3, odx+=4 ) 00230 { 00231 // clear the union 00232 memset( &base64mask, 0, 3 ); 00233 00234 // Put the three raw values in the union 00235 base64mask.byteSequence.ch3 = input[idx]; 00236 if( (idx+1) < length ) base64mask.byteSequence.ch2 = input[idx+1]; 00237 if( (idx+2) < length ) base64mask.byteSequence.ch1 = input[idx+2]; 00238 00239 // Translate the four output values and put them into the return string 00240 base64data[odx] = alphabet[base64mask.bit6Sequence.ch4]; 00241 base64data[odx+1] = alphabet[base64mask.bit6Sequence.ch3]; 00242 // See about padding 00243 if( (idx+1) < length ) base64data[odx+2] = alphabet[base64mask.bit6Sequence.ch2]; 00244 else base64data[odx+2] = base64pad; 00245 if( (idx+2) < length ) base64data[odx+3] = alphabet[base64mask.bit6Sequence.ch1]; 00246 else base64data[odx+3] = base64pad; 00247 } 00248 } 00249 return base64data; 00250 } 00251 00252 bins & base64decode ( string & encoded ) 00253 { 00254 bindata.clear(); 00255 int idx; 00256 // four chars at a time 00257 for( idx = 0; idx < encoded.length(); idx+=4 ) 00258 { 00259 // clear the union 00260 memset( &base64mask, 0, 3 ); 00261 00262 // Put 4 elements into the union 00263 base64mask.bit6Sequence.ch4 = ralphabet[encoded[idx]]; 00264 base64mask.bit6Sequence.ch3 = ralphabet[encoded[idx+1]]; 00265 if(encoded[idx+2] != base64pad) base64mask.bit6Sequence.ch2 = ralphabet[encoded[idx+2]]; 00266 if(encoded[idx+3] != base64pad) base64mask.bit6Sequence.ch1 = ralphabet[encoded[idx+3]]; 00267 00268 // Take binary data out of the union and put into the vector 00269 bindata.push_back(base64mask.byteSequence.ch3); 00270 if(encoded[idx+2] != base64pad) bindata.push_back(base64mask.byteSequence.ch2); 00271 if(encoded[idx+3] != base64pad) bindata.push_back(base64mask.byteSequence.ch1); 00272 } 00273 00274 // Return the output 00275 return bindata; 00276 } 00277 00278 string & data( void ) 00279 { 00280 return base64data; 00281 } 00282 }; 00283 00284 // define a stream operator for bins 00285 ostream & operator << ( ostream & outstream, bins & binary ) 00286 { 00287 for( int i=0; i < binary.size(); i++ ) 00288 { 00289 outstream.put( binary[i] ); 00290 } 00291 return outstream; 00292 } 00293 00294 #endif
1.2.18