xref: /trunk/main/tools/inc/tools/inetmime.hxx (revision 67e470da)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 #ifndef TOOLS_INETMIME_HXX
24 #define TOOLS_INETMIME_HXX
25 
26 #include "tools/toolsdllapi.h"
27 #include <rtl/alloc.h>
28 #include <rtl/string.h>
29 #include "rtl/tencinfo.h"
30 #include <tools/debug.hxx>
31 #include <tools/errcode.hxx>
32 #include <tools/list.hxx>
33 #include <tools/string.hxx>
34 
35 class DateTime;
36 class INetContentTypeParameterList;
37 class INetMIMECharsetList_Impl;
38 class INetMIMEOutputSink;
39 
40 //============================================================================
41 class TOOLS_DLLPUBLIC INetMIME
42 {
43 public:
44 	enum { SOFT_LINE_LENGTH_LIMIT = 76,
45 		   HARD_LINE_LENGTH_LIMIT = 998 };
46 
47 	/** The various types of message header field bodies, with respect to
48 		encoding and decoding them.
49 
50 		@descr  At the moment, five different types of header fields suffice
51 		to describe how to encoded and decode any known message header field
52 		body, but need for more types may arise in the future as new header
53 		fields are introduced.
54 
55 		@descr  The following is an exhaustive list of all the header fields
56 		currently known to our implementation.  For every header field, it
57 		includes a 'canonic' (with regard to capitalization) name, a grammar
58 		rule for the body (using RFC 822 and RFC 2234 conventions), a list of
59 		relevant sources of information, and the HeaderFieldType value to use
60 		with that header field.  The list is based on RFC 2076 and draft-
61 		palme-mailext-headers-02.txt (see also <http://www.dsv.su.se/~jpalme/
62 		ietf/jp-ietf-home.html#anchor1003783>).
63 
64 		Approved: address  ;RFC 1036; HEADER_FIELD_ADDRESS
65 		bcc: #address  ;RFCs 822, 2047; HEADER_FIELD_ADDRESS
66 		cc: 1#address  ;RFCs 822, 2047; HEADER_FIELD_ADDRESS
67 		Comments: *text  ;RFCs 822, RFC 2047; HEADER_FIELD_TEXT
68 		Content-Base: absoluteURI  ;RFC 2110; HEADER_FIELD_TEXT
69 		Content-Description: *text  ;RFC 2045, RFC 2047; HEADER_FIELD_TEXT
70 		Content-Disposition: disposition-type *(";" disposition-parm)
71 			;RFC 1806; HEADER_FIELD_STRUCTURED
72 		Content-ID: msg-id  ;RFC 2045, RFC 2047; HEADER_FIELD_MESSAGE_ID
73 		Content-Location: absoluteURI / relativeURI  ;RFC 2110;
74 			HEADER_FIELD_TEXT
75 		Content-Transfer-Encoding: mechanism  ;RFC 2045, RFC 2047;
76 			HEADER_FIELD_STRUCTURED
77 		Content-Type: type "/" subtype *(";" parameter)  ;RFC 2045, RFC 2047;
78 			HEADER_FIELD_STRUCTURED
79 		Control:  *text ;RFC 1036; HEADER_FIELD_TEXT
80 		Date: date-time  ;RFC 822, RFC 1123, RFC 2047; HEADER_FIELD_STRUCTURED
81 		Distribution: 1#atom  ;RFC 1036; HEADER_FIELD_STRUCTURED
82 		Encrypted: 1#2word  ;RFC 822, RFC 2047; HEADER_FIELD_STRUCTURED
83 		Expires: date-time  ;RFC 1036; HEADER_FIELD_STRUCTURED
84 		Followup-To: 1#(atom *("." atom))  ;RFC 1036; HEADER_FIELD_STRUCTURED
85 		From: mailbox / 1#mailbox  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
86 		In-Reply-To: *(phrase / msg-id)  ;RFC 822, RFC 2047;
87 			HEADER_FIELD_ADDRESS
88 		Keywords: #phrase  ;RFC 822, RFC 2047; HEADER_FIELD_PHRASE
89 		MIME-Version: 1*DIGIT "." 1*DIGIT  ;RFC 2045, RFC 2047;
90 			HEADER_FIELD_STRUCTURED
91 		Message-ID: msg-id  ;RFC 822, RFC 2047; HEADER_FIELD_MESSAGE_ID
92 		Newsgroups: 1#(atom *("." atom))  ;RFC 1036, RFC 2047;
93 			HEADER_FIELD_STRUCTURED
94 		Organization: *text  ;RFC 1036; HEADER_FIELD_TEXT
95 		Received: ["from" domain] ["by" domain] ["via" atom] *("with" atom)
96 			["id" msg-id] ["for" addr-spec] ";" date-time  ;RFC 822, RFC 1123,
97 			RFC 2047; HEADER_FIELD_STRUCTURED
98 		References: *(phrase / msg-id)  ;RFC 822, RFC 2047;
99 			HEADER_FIELD_ADDRESS
100 		Reply-To: 1#address  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
101 		Resent-Date: date-time  ;RFC 822, RFC 1123, RFC 2047;
102 			HEADER_FIELD_STRUCTURED
103 		Resent-From: mailbox / 1#mailbox  ;RFC 822, RFC 2047;
104 			HEADER_FIELD_ADDRESS
105 		Resent-Message-ID: msg-id  ;RFC 822, RFC 2047; HEADER_FIELD_MESSAGE_ID
106 		Resent-Reply-To: 1#address  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
107 		Resent-Sender: mailbox  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
108 		Resent-To: 1#address  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
109 		Resent-bcc: #address  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
110 		Resent-cc: 1#address  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
111 		Return-path: route-addr / ("<" ">")  ;RFC 822, RFC 1123, RFC 2047;
112 			HEADER_FIELD_STRUCTURED
113 		Return-Receipt-To: address  ;Not Internet standard;
114 			HEADER_FIELD_ADDRES
115 		Sender: mailbox  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
116 		Subject: *text  ;RFC 822, RFC 2047; HEADER_FIELD_TEXT
117 		Summary: *text  ;RFC 1036; HEADER_FIELD_TEXT
118 		To: 1#address  ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
119 		X-CHAOS-Marked: "YES" / "NO"  ;local; HEADER_FIELD_STRUCTURED
120 		X-CHAOS-Read: "YES" / "NO"  ;local; HEADER_FIELD_STRUCTURED
121 		X-CHAOS-Recipients: #*("<" atom word ">")  ;local;
122 			HEADER_FIELD_STRUCTURED
123 		X-CHAOS-Size: 1*DIGIT  ;local; HEADER_FIELD_STRUCTURED
124 		X-Mailer: *text  ;Not Internet standard; HEADER_FIELD_TEXT
125 		X-Mozilla-Status: 4HEXDIG  ;Mozilla; HEADER_FIELD_STRUCTURED
126 		X-Newsreader: *text  ;Not Internet standard; HEADER_FIELD_TEXT
127 		X-Priority: "1" / "2" / "3" / "4" / "5"  ;Not Internet standard;
128 			HEADER_FIELD_STRUCTURED
129 		Xref: sub-domain
130 			1*((atom / string) *("." (atom / string)) ":" msg-number)
131 			;RFCs 1036, 2047, local; HEADER_FIELD_STRUCTURED
132 	 */
133 	enum HeaderFieldType
134 	{
135 		HEADER_FIELD_TEXT,
136 		HEADER_FIELD_STRUCTURED,
137 		HEADER_FIELD_PHRASE,
138 		HEADER_FIELD_MESSAGE_ID,
139 		HEADER_FIELD_ADDRESS
140 	};
141 
142 	/** Check for US-ASCII character.
143 
144 		@param nChar  Some UCS-4 character.
145 
146 		@return  True if nChar is a US-ASCII character (0x00--0x7F).
147 	 */
148 	static inline bool isUSASCII(sal_uInt32 nChar);
149 
150 	/** Check for ISO 8859-1 character.
151 
152 		@param nChar  Some UCS-4 character.
153 
154 		@return  True if nChar is a ISO 8859-1 character (0x00--0xFF).
155 	 */
156 	static inline bool isISO88591(sal_uInt32 nChar);
157 
158 	/** Check for US-ASCII control character.
159 
160 		@param nChar  Some UCS-4 character.
161 
162 		@return  True if nChar is a US-ASCII control character (US-ASCII
163 		0x00--0x1F or 0x7F).
164 	 */
165 	static inline bool isControl(sal_uInt32 nChar);
166 
167 	/** Check for US-ASCII white space character.
168 
169 		@param nChar  Some UCS-4 character.
170 
171 		@return  True if nChar is a US-ASCII white space character (US-ASCII
172 		0x09 or 0x20).
173 	 */
174 	static inline bool isWhiteSpace(sal_uInt32 nChar);
175 
176 	/** Check for US-ASCII visible character.
177 
178 		@param nChar  Some UCS-4 character.
179 
180 		@return  True if nChar is a US-ASCII visible character (US-ASCII
181 		0x21--0x7E).
182 	 */
183 	static inline bool isVisible(sal_uInt32 nChar);
184 
185 	/** Check for US-ASCII digit character.
186 
187 		@param nChar  Some UCS-4 character.
188 
189 		@return  True if nChar is a US-ASCII (decimal) digit character (US-
190 		ASCII '0'--'9').
191 	 */
192 	static inline bool isDigit(sal_uInt32 nChar);
193 
194 	/** Check for US-ASCII canonic hexadecimal digit character.
195 
196 		@param nChar  Some UCS-4 character.
197 
198 		@return  True if nChar is a US-ASCII canonic (i.e., upper case)
199 		hexadecimal digit character (US-ASCII '0'--'9' or 'A'--'F').
200 	 */
201 	static inline bool isCanonicHexDigit(sal_uInt32 nChar);
202 
203 	/** Check for US-ASCII hexadecimal digit character.
204 
205 		@param nChar  Some UCS-4 character.
206 
207 		@return  True if nChar is a US-ASCII hexadecimal digit character (US-
208 		ASCII '0'--'9', 'A'--'F', 'a'--'f').
209 	 */
210 	static inline bool isHexDigit(sal_uInt32 nChar);
211 
212 	/** Check for US-ASCII upper case character.
213 
214 		@param nChar  Some UCS-4 character.
215 
216 		@return  True if nChar is a US-ASCII upper case alphabetic character
217 		(US-ASCII 'A'--'Z').
218 	 */
219 	static inline bool isUpperCase(sal_uInt32 nChar);
220 
221 	/** Check for US-ASCII lower case character.
222 
223 		@param nChar  Some UCS-4 character.
224 
225 		@return  True if nChar is a US-ASCII lower case alphabetic character
226 		(US-ASCII 'a'--'z').
227 	 */
228 	static inline bool isLowerCase(sal_uInt32 nChar);
229 
230 	/** Check for US-ASCII alphabetic character.
231 
232 		@param nChar  Some UCS-4 character.
233 
234 		@return  True if nChar is a US-ASCII alphabetic character (US-ASCII
235 		'A'--'Z' or 'a'--'z').
236 	 */
237 	static inline bool isAlpha(sal_uInt32 nChar);
238 
239 	/** Check for US-ASCII alphanumeric character.
240 
241 		@param nChar  Some UCS-4 character.
242 
243 		@return  True if nChar is a US-ASCII alphanumeric character (US-ASCII
244 		'0'--'9', 'A'--'Z' or 'a'--'z').
245 	 */
246 	static inline bool isAlphanumeric(sal_uInt32 nChar);
247 
248 	/** Check for US-ASCII Base 64 digit character.
249 
250 		@param nChar  Some UCS-4 character.
251 
252 		@return  True if nChar is a US-ASCII Base 64 digit character (US-ASCII
253 		'A'--'Z', 'a'--'z', '0'--'9', '+', or '/').
254 	 */
255 	static inline bool isBase64Digit(sal_uInt32 nChar);
256 
257 	/** Check whether some character is valid within an RFC 822 <atom>.
258 
259 		@param nChar  Some UCS-4 character.
260 
261 		@return  True if nChar is valid within an RFC 822 <atom> (US-ASCII
262 		'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
263 		'-', '/', '=', '?', '^', '_', '`', '{', '|', '}', or '~').
264 	 */
265 	static bool isAtomChar(sal_uInt32 nChar);
266 
267 	/** Check whether some character is valid within an RFC 2045 <token>.
268 
269 		@param nChar  Some UCS-4 character.
270 
271 		@return  True if nChar is valid within an RFC 2047 <token> (US-ASCII
272 		'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
273 		'-', '.', '^', '_', '`', '{', '|', '}', or '~').
274 	 */
275 	static bool isTokenChar(sal_uInt32 nChar);
276 
277 	/** Check whether some character is valid within an RFC 2047 <token>.
278 
279 		@param nChar  Some UCS-4 character.
280 
281 		@return  True if nChar is valid within an RFC 2047 <token> (US-ASCII
282 		'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
283 		'-', '^', '_', '`', '{', '|', '}', or '~').
284 	 */
285 	static bool isEncodedWordTokenChar(sal_uInt32 nChar);
286 
287 	/** Check whether some character is valid within an RFC 2060 <atom>.
288 
289 		@param nChar  Some UCS-4 character.
290 
291 		@return  True if nChar is valid within an RFC 2060 <atom> (US-ASCII
292 		'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '&', ''', '+', ',', '-',
293 		'.', '/', ':', ';', '<', '=', '>', '?', '@', '[', ']', '^', '_', '`',
294 		'|', '}', or '~').
295 	 */
296 	static bool isIMAPAtomChar(sal_uInt32 nChar);
297 
298 	/** Translate an US-ASCII character to upper case.
299 
300 		@param nChar  Some UCS-4 character.
301 
302 		@return  If nChar is a US-ASCII upper case character (US-ASCII
303 		'A'--'Z'), return the corresponding US-ASCII lower case character (US-
304 		ASCII 'a'--'z'); otherwise, return nChar unchanged.
305 	 */
306 	static inline sal_uInt32 toUpperCase(sal_uInt32 nChar);
307 
308 	/** Translate an US-ASCII character to lower case.
309 
310 		@param nChar  Some UCS-4 character.
311 
312 		@return  If nChar is a US-ASCII lower case character (US-ASCII
313 		'a'--'z'), return the corresponding US-ASCII upper case character (US-
314 		ASCII 'A'--'Z'); otherwise, return nChar unchanged.
315 	 */
316 	static inline sal_uInt32 toLowerCase(sal_uInt32 nChar);
317 
318 	/** Get the digit weight of a US-ASCII character.
319 
320 		@param nChar  Some UCS-4 character.
321 
322 		@return  If nChar is a US-ASCII (decimal) digit character (US-ASCII
323 		'0'--'9'), return the corresponding weight (0--9); otherwise,
324 		return -1.
325 	 */
326 	static inline int getWeight(sal_uInt32 nChar);
327 
328 	/** Get the hexadecimal digit weight of a US-ASCII character.
329 
330 		@param nChar  Some UCS-4 character.
331 
332 		@return  If nChar is a US-ASCII hexadecimal digit character (US-ASCII
333 		'0'--'9', 'A'--'F', or 'a'--'f'), return the corresponding weight
334 		(0--15); otherwise, return -1.
335 	 */
336 	static inline int getHexWeight(sal_uInt32 nChar);
337 
338 	/** Get the Base 64 digit weight of a US-ASCII character.
339 
340 		@param nChar  Some UCS-4 character.
341 
342 		@return  If nChar is a US-ASCII Base 64 digit character (US-ASCII
343 		'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
344 		corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
345 		character (US-ASCII '='), return -1; otherwise, return -2.
346 	 */
347 	static inline int getBase64Weight(sal_uInt32 nChar);
348 
349 	/** Get a decimal digit encoded as US-ASCII.
350 
351 		@param nWeight  Must be in the range 0--9, inclusive.
352 
353 		@return  The decimal digit corresponding to nWeight (US-ASCII
354 		'0'--'9').
355 	 */
356 	static sal_uInt32 getDigit(int nWeight);
357 
358 	/** Get a hexadecimal digit encoded as US-ASCII.
359 
360 		@param nWeight  Must be in the range 0--15, inclusive.
361 
362 		@return  The canonic (i.e., upper case) hexadecimal digit
363 		corresponding to nWeight (US-ASCII '0'--'9' or 'A'--'F').
364 	 */
365 	static sal_uInt32 getHexDigit(int nWeight);
366 
367 	/** Get a Base 64 digit encoded as US-ASCII.
368 
369 		@param nWeight  Must be in the range 0--63, inclusive.
370 
371 		@return  The Base 64 digit corresponding to nWeight (US-ASCII 'A'--
372 		'Z', 'a'--'z', '0'--'9', '+' or '/').
373 	 */
374 	static sal_uInt32 getBase64Digit(int nWeight);
375 
376 	static inline bool isHighSurrogate(sal_uInt32 nUTF16);
377 
378 	static inline bool isLowSurrogate(sal_uInt32 nUTF16);
379 
380 	static inline sal_uInt32 toUTF32(sal_Unicode cHighSurrogate,
381 									 sal_Unicode cLowSurrogate);
382 
383 	/** Check two US-ASCII strings for equality, ignoring case.
384 
385 		@param pBegin1  Points to the start of the first string, must not be
386 		null.
387 
388 		@param pEnd1  Points past the end of the first string, must be >=
389 		pBegin1.
390 
391 		@param pBegin2  Points to the start of the second string, must not be
392 		null.
393 
394 		@param pEnd2  Points past the end of the second string, must be >=
395 		pBegin2.
396 
397 		@return  True if the two strings are equal, ignoring the case of US-
398 		ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
399 	 */
400 	static bool equalIgnoreCase(const sal_Char * pBegin1,
401 								const sal_Char * pEnd1,
402 								const sal_Char * pBegin2,
403 								const sal_Char * pEnd2);
404 
405 	/** Check two US-ASCII strings for equality, ignoring case.
406 
407 		@param pBegin1  Points to the start of the first string, must not be
408 		null.
409 
410 		@param pEnd1  Points past the end of the first string, must be >=
411 		pBegin1.
412 
413 		@param pString2  Points to the start of the null terminated second
414 		string, must not be null.
415 
416 		@return  True if the two strings are equal, ignoring the case of US-
417 		ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
418 	 */
419 	static bool equalIgnoreCase(const sal_Char * pBegin1,
420 								const sal_Char * pEnd1,
421 								const sal_Char * pString2);
422 
423 	/** Check two US-ASCII strings for equality, ignoring case.
424 
425 		@param pBegin1  Points to the start of the first string, must not be
426 		null.
427 
428 		@param pEnd1  Points past the end of the first string, must be >=
429 		pBegin1.
430 
431 		@param pString2  Points to the start of the null terminated second
432 		string, must not be null.
433 
434 		@return  True if the two strings are equal, ignoring the case of US-
435 		ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
436 	 */
437 	static bool equalIgnoreCase(const sal_Unicode * pBegin1,
438 								const sal_Unicode * pEnd1,
439 								const sal_Char * pString2);
440 
441 	/** Check two US-ASCII strings for equality, ignoring case.
442 
443 		@param rString1  The first string.
444 
445 		@param sString2  Points to the start of the null terminated second
446 		string, must not be null.
447 
448 		@return  True if the two strings are equal, ignoring the case of US-
449 		ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
450 	 */
451 	static inline bool equalIgnoreCase(const ByteString & rString1,
452 									   const sal_Char * pString2);
453 
454 	static inline bool startsWithLineBreak(const sal_Char * pBegin,
455 										   const sal_Char * pEnd);
456 
457 	static inline bool startsWithLineBreak(const sal_Unicode * pBegin,
458 										   const sal_Unicode * pEnd);
459 
460 	static inline bool startsWithLineFolding(const sal_Char * pBegin,
461 											 const sal_Char * pEnd);
462 
463 	static inline bool startsWithLineFolding(const sal_Unicode * pBegin,
464 											 const sal_Unicode * pEnd);
465 
466 	static bool startsWithLinearWhiteSpace(const sal_Char * pBegin,
467 										   const sal_Char * pEnd);
468 
469 	static const sal_Char * skipLinearWhiteSpace(const sal_Char * pBegin,
470 												 const sal_Char * pEnd);
471 
472 	static const sal_Unicode * skipLinearWhiteSpace(const sal_Unicode *
473 													    pBegin,
474 													const sal_Unicode * pEnd);
475 
476 	static const sal_Char * skipComment(const sal_Char * pBegin,
477 										const sal_Char * pEnd);
478 
479 	static const sal_Unicode * skipComment(const sal_Unicode * pBegin,
480 										   const sal_Unicode * pEnd);
481 
482 	static const sal_Char * skipLinearWhiteSpaceComment(const sal_Char *
483 														    pBegin,
484 														const sal_Char *
485 														    pEnd);
486 
487 	static const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
488 														       pBegin,
489 														   const sal_Unicode *
490 														       pEnd);
491 
492 	static inline bool needsQuotedStringEscape(sal_uInt32 nChar);
493 
494 	static const sal_Char * skipQuotedString(const sal_Char * pBegin,
495 											 const sal_Char * pEnd);
496 
497 	static const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
498 												const sal_Unicode * pEnd);
499 
500 	static const sal_Char * scanAtom(const sal_Char * pBegin,
501 									 const sal_Char * pEnd);
502 
503 	static const sal_Unicode * scanAtom(const sal_Unicode * pBegin,
504 										const sal_Unicode * pEnd);
505 
506 	static bool scanUnsigned(const sal_Char *& rBegin, const sal_Char * pEnd,
507 							 bool bLeadingZeroes, sal_uInt32 & rValue);
508 
509 	static bool scanUnsigned(const sal_Unicode *& rBegin,
510 							 const sal_Unicode * pEnd, bool bLeadingZeroes,
511 							 sal_uInt32 & rValue);
512 
513 	static bool scanUnsignedHex(const sal_Char *& rBegin,
514 								const sal_Char * pEnd, bool bLeadingZeroes,
515 								sal_uInt32 & rValue);
516 
517 	static bool scanUnsignedHex(const sal_Unicode *& rBegin,
518 								const sal_Unicode * pEnd, bool bLeadingZeroes,
519 								sal_uInt32 & rValue);
520 
521 	static const sal_Char * scanQuotedBlock(const sal_Char * pBegin,
522 											const sal_Char * pEnd,
523 											sal_uInt32 nOpening,
524 											sal_uInt32 nClosing,
525 											sal_Size & rLength,
526 											bool & rModify);
527 
528 	static const sal_Unicode * scanQuotedBlock(const sal_Unicode * pBegin,
529 											   const sal_Unicode * pEnd,
530 											   sal_uInt32 nOpening,
531 											   sal_uInt32 nClosing,
532 											   sal_Size & rLength,
533 											   bool & rModify);
534 
535 	static sal_Char const * scanParameters(sal_Char const * pBegin,
536 										   sal_Char const * pEnd,
537 										   INetContentTypeParameterList *
538 										       pParameters);
539 
540 	static sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
541 											  sal_Unicode const * pEnd,
542 											  INetContentTypeParameterList *
543 											      pParameters);
544 
545 	static inline rtl_TextEncoding translateToMIME(rtl_TextEncoding
546 												       eEncoding);
547 
548 	static inline rtl_TextEncoding translateFromMIME(rtl_TextEncoding
549 													     eEncoding);
550 
551 	static const sal_Char * getCharsetName(rtl_TextEncoding eEncoding);
552 
553 	static rtl_TextEncoding getCharsetEncoding(const sal_Char * pBegin,
554 											   const sal_Char * pEnd);
555 
556 	static rtl_TextEncoding getCharsetEncoding(const sal_Unicode * pBegin,
557 											   const sal_Unicode * pEnd);
558 
559 	static inline bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding);
560 
561 	static INetMIMECharsetList_Impl *
562 	createPreferredCharsetList(rtl_TextEncoding eEncoding);
563 
564 	static sal_Unicode * convertToUnicode(const sal_Char * pBegin,
565 										  const sal_Char * pEnd,
566 										  rtl_TextEncoding eEncoding,
567 										  sal_Size & rSize);
568 
569 	static sal_Char * convertFromUnicode(const sal_Unicode * pBegin,
570 										 const sal_Unicode * pEnd,
571 										 rtl_TextEncoding eEncoding,
572 										 sal_Size & rSize);
573 
574 	/** Get the number of octets required to encode an UCS-4 character using
575 		UTF-8 encoding.
576 
577 		@param nChar  Some UCS-4 character.
578 
579 		@return  The number of octets required (in the range 1--6, inclusive).
580 	 */
581 	static inline int getUTF8OctetCount(sal_uInt32 nChar);
582 
583 	static inline void writeEscapeSequence(INetMIMEOutputSink & rSink,
584 										   sal_uInt32 nChar);
585 
586 	static void writeUTF8(INetMIMEOutputSink & rSink, sal_uInt32 nChar);
587 
588 	static void writeUnsigned(INetMIMEOutputSink & rSink, sal_uInt32 nValue,
589 							  int nMinDigits = 1);
590 
591 	static void writeDateTime(INetMIMEOutputSink & rSink,
592 							  const DateTime & rUTC);
593 
594 	static void writeHeaderFieldBody(INetMIMEOutputSink & rSink,
595 									 HeaderFieldType eType,
596 									 const ByteString & rBody,
597 									 rtl_TextEncoding ePreferredEncoding,
598 									 bool bInitialSpace = true);
599 
600 	static void writeHeaderFieldBody(INetMIMEOutputSink & rSink,
601 									 HeaderFieldType eType,
602 									 const UniString & rBody,
603 									 rtl_TextEncoding ePreferredEncoding,
604 									 bool bInitialSpace = true);
605 
606 	static bool translateUTF8Char(const sal_Char *& rBegin,
607 								  const sal_Char * pEnd,
608 								  rtl_TextEncoding eEncoding,
609 								  sal_uInt32 & rCharacter);
610 
611 	static ByteString decodeUTF8(const ByteString & rText,
612 								 rtl_TextEncoding eEncoding);
613 
614 	static UniString decodeHeaderFieldBody(HeaderFieldType eType,
615 										   const ByteString & rBody);
616 
617 // #i70651#: Prevent warnings on Mac OS X.
618 #ifdef MACOSX
619 #pragma GCC system_header
620 #endif
621 
622 	/** Get the UTF-32 character at the head of a UTF-16 encoded string.
623 
624 		@param rBegin  Points to the start of the UTF-16 encoded string, must
625 		not be null.  On exit, it points past the first UTF-32 character's
626 		encoding.
627 
628 		@param pEnd  Points past the end of the UTF-16 encoded string, must be
629 		strictly greater than rBegin.
630 
631 		@return  The UCS-4 character at the head of the UTF-16 encoded string.
632 		If the string does not start with the UTF-16 encoding of a UCS-32
633 		character, the first UTF-16 value is returned.
634 	 */
635 	static inline sal_uInt32 getUTF32Character(const sal_Unicode *& rBegin,
636 											   const sal_Unicode * pEnd);
637 
638 	/** Put the UTF-16 encoding of a UTF-32 character into a buffer.
639 
640 		@param pBuffer  Points to a buffer, must not be null.
641 
642 		@param nUTF32  An UTF-32 character, must be in the range 0..0x10FFFF.
643 
644 		@return  A pointer past the UTF-16 characters put into the buffer
645 		(i.e., pBuffer + 1 or pBuffer + 2).
646 	 */
647 	static inline sal_Unicode * putUTF32Character(sal_Unicode * pBuffer,
648 												  sal_uInt32 nUTF32);
649 };
650 
651 // static
isUSASCII(sal_uInt32 nChar)652 inline bool INetMIME::isUSASCII(sal_uInt32 nChar)
653 {
654 	return nChar <= 0x7F;
655 }
656 
657 // static
isISO88591(sal_uInt32 nChar)658 inline bool INetMIME::isISO88591(sal_uInt32 nChar)
659 {
660 	return nChar <= 0xFF;
661 }
662 
663 // static
isControl(sal_uInt32 nChar)664 inline bool INetMIME::isControl(sal_uInt32 nChar)
665 {
666 	return nChar <= 0x1F || nChar == 0x7F;
667 }
668 
669 // static
isWhiteSpace(sal_uInt32 nChar)670 inline bool INetMIME::isWhiteSpace(sal_uInt32 nChar)
671 {
672 	return nChar == '\t' || nChar == ' ';
673 }
674 
675 // static
isVisible(sal_uInt32 nChar)676 inline bool INetMIME::isVisible(sal_uInt32 nChar)
677 {
678 	return nChar >= '!' && nChar <= '~';
679 }
680 
681 // static
isDigit(sal_uInt32 nChar)682 inline bool INetMIME::isDigit(sal_uInt32 nChar)
683 {
684 	return nChar >= '0' && nChar <= '9';
685 }
686 
687 // static
isCanonicHexDigit(sal_uInt32 nChar)688 inline bool INetMIME::isCanonicHexDigit(sal_uInt32 nChar)
689 {
690 	return isDigit(nChar) || (nChar >= 'A' && nChar <= 'F');
691 }
692 
693 // static
isHexDigit(sal_uInt32 nChar)694 inline bool INetMIME::isHexDigit(sal_uInt32 nChar)
695 {
696 	return isCanonicHexDigit(nChar) || (nChar >= 'a' && nChar <= 'f');
697 }
698 
699 // static
isUpperCase(sal_uInt32 nChar)700 inline bool INetMIME::isUpperCase(sal_uInt32 nChar)
701 {
702 	return nChar >= 'A' && nChar <= 'Z';
703 }
704 
705 // static
isLowerCase(sal_uInt32 nChar)706 inline bool INetMIME::isLowerCase(sal_uInt32 nChar)
707 {
708 	return nChar >= 'a' && nChar <= 'z';
709 }
710 
711 // static
isAlpha(sal_uInt32 nChar)712 inline bool INetMIME::isAlpha(sal_uInt32 nChar)
713 {
714 	return isUpperCase(nChar) || isLowerCase(nChar);
715 }
716 
717 // static
isAlphanumeric(sal_uInt32 nChar)718 inline bool INetMIME::isAlphanumeric(sal_uInt32 nChar)
719 {
720 	return isAlpha(nChar) || isDigit(nChar);
721 }
722 
723 // static
isBase64Digit(sal_uInt32 nChar)724 inline bool INetMIME::isBase64Digit(sal_uInt32 nChar)
725 {
726 	return isUpperCase(nChar) || isLowerCase(nChar) || isDigit(nChar)
727 	       || nChar == '+' || nChar == '/';
728 }
729 
730 // static
toUpperCase(sal_uInt32 nChar)731 inline sal_uInt32 INetMIME::toUpperCase(sal_uInt32 nChar)
732 {
733 	return isLowerCase(nChar) ? nChar - ('a' - 'A') : nChar;
734 }
735 
736 // static
toLowerCase(sal_uInt32 nChar)737 inline sal_uInt32 INetMIME::toLowerCase(sal_uInt32 nChar)
738 {
739 	return isUpperCase(nChar) ? nChar + ('a' - 'A') : nChar;
740 }
741 
742 // static
getWeight(sal_uInt32 nChar)743 inline int INetMIME::getWeight(sal_uInt32 nChar)
744 {
745 	return isDigit(nChar) ? int(nChar - '0') : -1;
746 }
747 
748 // static
getHexWeight(sal_uInt32 nChar)749 inline int INetMIME::getHexWeight(sal_uInt32 nChar)
750 {
751 	return isDigit(nChar) ? int(nChar - '0') :
752 	       nChar >= 'A' && nChar <= 'F' ? int(nChar - 'A' + 10) :
753 	       nChar >= 'a' && nChar <= 'f' ? int(nChar - 'a' + 10) : -1;
754 }
755 
756 // static
getBase64Weight(sal_uInt32 nChar)757 inline int INetMIME::getBase64Weight(sal_uInt32 nChar)
758 {
759 	return isUpperCase(nChar) ? int(nChar - 'A') :
760 	       isLowerCase(nChar) ? int(nChar - 'a' + 26) :
761 	       isDigit(nChar) ? int(nChar - '0' + 52) :
762 	       nChar == '+' ? 62 :
763 	       nChar == '/' ? 63 :
764 	       nChar == '=' ? -1 : -2;
765 }
766 
767 // static
isHighSurrogate(sal_uInt32 nUTF16)768 inline bool INetMIME::isHighSurrogate(sal_uInt32 nUTF16)
769 {
770 	return nUTF16 >= 0xD800 && nUTF16 <= 0xDBFF;
771 }
772 
773 // static
isLowSurrogate(sal_uInt32 nUTF16)774 inline bool INetMIME::isLowSurrogate(sal_uInt32 nUTF16)
775 {
776 	return nUTF16 >= 0xDC00 && nUTF16 <= 0xDFFF;
777 }
778 
779 // static
toUTF32(sal_Unicode cHighSurrogate,sal_Unicode cLowSurrogate)780 inline sal_uInt32 INetMIME::toUTF32(sal_Unicode cHighSurrogate,
781 									sal_Unicode cLowSurrogate)
782 {
783 	DBG_ASSERT(isHighSurrogate(cHighSurrogate)
784 			   && isLowSurrogate(cLowSurrogate),
785 			   "INetMIME::toUTF32(): Bad chars");
786 	return ((sal_uInt32(cHighSurrogate) & 0x3FF) << 10)
787 		       | (sal_uInt32(cLowSurrogate) & 0x3FF);
788 }
789 
790 // static
equalIgnoreCase(const ByteString & rString1,const sal_Char * pString2)791 inline bool INetMIME::equalIgnoreCase(const ByteString & rString1,
792 									  const sal_Char * pString2)
793 {
794 	return equalIgnoreCase(rString1.GetBuffer(),
795 						   rString1.GetBuffer() + rString1.Len(), pString2);
796 }
797 
798 // static
startsWithLineBreak(const sal_Char * pBegin,const sal_Char * pEnd)799 inline bool INetMIME::startsWithLineBreak(const sal_Char * pBegin,
800 										  const sal_Char * pEnd)
801 {
802 	DBG_ASSERT(pBegin && pBegin <= pEnd,
803 			   "INetMIME::startsWithLineBreak(): Bad sequence");
804 
805 	return pEnd - pBegin >= 2 && pBegin[0] == 0x0D && pBegin[1] == 0x0A;
806 		// CR, LF
807 }
808 
809 // static
startsWithLineBreak(const sal_Unicode * pBegin,const sal_Unicode * pEnd)810 inline bool INetMIME::startsWithLineBreak(const sal_Unicode * pBegin,
811 											  const sal_Unicode * pEnd)
812 {
813 	DBG_ASSERT(pBegin && pBegin <= pEnd,
814 			   "INetMIME::startsWithLineBreak(): Bad sequence");
815 
816 	return pEnd - pBegin >= 2 && pBegin[0] == 0x0D && pBegin[1] == 0x0A;
817 		// CR, LF
818 }
819 
820 // static
startsWithLineFolding(const sal_Char * pBegin,const sal_Char * pEnd)821 inline bool INetMIME::startsWithLineFolding(const sal_Char * pBegin,
822 											const sal_Char * pEnd)
823 {
824 	DBG_ASSERT(pBegin && pBegin <= pEnd,
825 			   "INetMIME::startsWithLineFolding(): Bad sequence");
826 
827 	return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
828 	       && isWhiteSpace(pBegin[2]); // CR, LF
829 }
830 
831 // static
startsWithLineFolding(const sal_Unicode * pBegin,const sal_Unicode * pEnd)832 inline bool INetMIME::startsWithLineFolding(const sal_Unicode * pBegin,
833 											const sal_Unicode * pEnd)
834 {
835 	DBG_ASSERT(pBegin && pBegin <= pEnd,
836 			   "INetMIME::startsWithLineFolding(): Bad sequence");
837 
838 	return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
839 	       && isWhiteSpace(pBegin[2]); // CR, LF
840 }
841 
842 // static
startsWithLinearWhiteSpace(const sal_Char * pBegin,const sal_Char * pEnd)843 inline bool INetMIME::startsWithLinearWhiteSpace(const sal_Char * pBegin,
844 												 const sal_Char * pEnd)
845 {
846 	DBG_ASSERT(pBegin && pBegin <= pEnd,
847 			   "INetMIME::startsWithLinearWhiteSpace(): Bad sequence");
848 
849 	return pBegin != pEnd
850 	       && (isWhiteSpace(*pBegin) || startsWithLineFolding(pBegin, pEnd));
851 }
852 
853 // static
needsQuotedStringEscape(sal_uInt32 nChar)854 inline bool INetMIME::needsQuotedStringEscape(sal_uInt32 nChar)
855 {
856 	return nChar == '"' || nChar == '\\';
857 }
858 
859 // static
translateToMIME(rtl_TextEncoding eEncoding)860 inline rtl_TextEncoding INetMIME::translateToMIME(rtl_TextEncoding eEncoding)
861 {
862 #if defined WNT
863 	return eEncoding == RTL_TEXTENCODING_MS_1252 ?
864 		       RTL_TEXTENCODING_ISO_8859_1 : eEncoding;
865 #else // WNT
866 	return eEncoding;
867 #endif // WNT
868 }
869 
870 // static
translateFromMIME(rtl_TextEncoding eEncoding)871 inline rtl_TextEncoding INetMIME::translateFromMIME(rtl_TextEncoding
872 													    eEncoding)
873 {
874 #if defined WNT
875 	return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
876 		       RTL_TEXTENCODING_MS_1252 : eEncoding;
877 #else // WNT
878 	return eEncoding;
879 #endif // WNT
880 }
881 
882 // static
isMIMECharsetEncoding(rtl_TextEncoding eEncoding)883 inline bool INetMIME::isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
884 {
885 	return ( rtl_isOctetTextEncoding(eEncoding) == sal_True );
886 }
887 
888 // static
getUTF8OctetCount(sal_uInt32 nChar)889 inline int INetMIME::getUTF8OctetCount(sal_uInt32 nChar)
890 {
891 	DBG_ASSERT(nChar < 0x80000000, "INetMIME::getUTF8OctetCount(): Bad char");
892 
893 	return nChar < 0x80 ? 1 :
894 	       nChar < 0x800 ? 2 :
895 	       nChar <= 0x10000 ? 3 :
896 	       nChar <= 0x200000 ? 4 :
897 	       nChar <= 0x4000000 ? 5 : 6;
898 }
899 
900 // static
getUTF32Character(const sal_Unicode * & rBegin,const sal_Unicode * pEnd)901 inline sal_uInt32 INetMIME::getUTF32Character(const sal_Unicode *& rBegin,
902 											  const sal_Unicode * pEnd)
903 {
904 	DBG_ASSERT(rBegin && rBegin < pEnd,
905 			   "INetMIME::getUTF32Character(): Bad sequence");
906 	if (rBegin + 1 < pEnd && rBegin[0] >= 0xD800 && rBegin[0] <= 0xDBFF
907 		&& rBegin[1] >= 0xDC00 && rBegin[1] <= 0xDFFF)
908 	{
909 		sal_uInt32 nUTF32 = sal_uInt32(*rBegin++ & 0x3FF) << 10;
910 		return (nUTF32 | (*rBegin++ & 0x3FF)) + 0x10000;
911 	}
912 	else
913 		return *rBegin++;
914 }
915 
916 // static
putUTF32Character(sal_Unicode * pBuffer,sal_uInt32 nUTF32)917 inline sal_Unicode * INetMIME::putUTF32Character(sal_Unicode * pBuffer,
918 												 sal_uInt32 nUTF32)
919 {
920 	DBG_ASSERT(nUTF32 <= 0x10FFFF, "INetMIME::putUTF32Character(): Bad char");
921 	if (nUTF32 < 0x10000)
922 		*pBuffer++ = sal_Unicode(nUTF32);
923 	else
924 	{
925 		nUTF32 -= 0x10000;
926 		*pBuffer++ = sal_Unicode(0xD800 | (nUTF32 >> 10));
927 		*pBuffer++ = sal_Unicode(0xDC00 | (nUTF32 & 0x3FF));
928 	}
929 	return pBuffer;
930 }
931 
932 //============================================================================
933 class INetMIMEOutputSink
934 {
935 public:
936 	static sal_uInt32 const NO_LINE_LENGTH_LIMIT = SAL_MAX_UINT32;
937 
938 private:
939 	sal_uInt32 m_nColumn;
940 	sal_uInt32 m_nLineLengthLimit;
941 
942 protected:
943 	/** Write a sequence of octets.
944 
945 		@param pBegin  Points to the start of the sequence, must not be null.
946 
947 		@param pEnd  Points past the end of the sequence, must be >= pBegin.
948 	 */
949 	virtual void writeSequence(const sal_Char * pBegin,
950 							   const sal_Char * pEnd) = 0;
951 
952 	/** Write a null terminated sequence of octets (without the terminating
953 		null).
954 
955 		@param pOctets  A null terminated sequence of octets, must not be
956 		null.
957 
958 		@return  The length of pOctets (without the terminating null).
959 	 */
960 	virtual sal_Size writeSequence(const sal_Char * pSequence);
961 
962 	/** Write a sequence of octets.
963 
964 		@descr  The supplied sequence of UCS-4 characters is interpreted as a
965 		sequence of octets.  It is an error if any of the elements of the
966 		sequence has a numerical value greater than 255.
967 
968 		@param pBegin  Points to the start of the sequence, must not be null.
969 
970 		@param pEnd  Points past the end of the sequence, must be >= pBegin.
971 	 */
972 	virtual void writeSequence(const sal_uInt32 * pBegin,
973 							   const sal_uInt32 * pEnd);
974 
975 	/** Write a sequence of octets.
976 
977 		@descr  The supplied sequence of Unicode characters is interpreted as
978 		a sequence of octets.  It is an error if any of the elements of the
979 		sequence has a numerical value greater than 255.
980 
981 		@param pBegin  Points to the start of the sequence, must not be null.
982 
983 		@param pEnd  Points past the end of the sequence, must be >= pBegin.
984 	 */
985 	virtual void writeSequence(const sal_Unicode * pBegin,
986 							   const sal_Unicode * pEnd);
987 
988 public:
INetMIMEOutputSink(sal_uInt32 nTheColumn=0,sal_uInt32 nTheLineLengthLimit=INetMIME::SOFT_LINE_LENGTH_LIMIT)989 	INetMIMEOutputSink(sal_uInt32 nTheColumn = 0,
990 					   sal_uInt32 nTheLineLengthLimit
991 					       = INetMIME::SOFT_LINE_LENGTH_LIMIT):
992 		m_nColumn(nTheColumn), m_nLineLengthLimit(nTheLineLengthLimit) {}
993 
~INetMIMEOutputSink()994 	virtual ~INetMIMEOutputSink() {}
995 
996 	/** Get the current column.
997 
998 		@return  The current column (starting from zero).
999 	 */
getColumn() const1000 	sal_uInt32 getColumn() const { return m_nColumn; }
1001 
getLineLengthLimit() const1002 	sal_uInt32 getLineLengthLimit() const { return m_nLineLengthLimit; }
1003 
setLineLengthLimit(sal_uInt32 nTheLineLengthLimit)1004 	void setLineLengthLimit(sal_uInt32 nTheLineLengthLimit)
1005 	{ m_nLineLengthLimit = nTheLineLengthLimit; }
1006 
1007 	virtual ErrCode getError() const;
1008 
1009 	/** Write a sequence of octets.
1010 
1011 		@param pBegin  Points to the start of the sequence, must not be null.
1012 
1013 		@param pEnd  Points past the end of the sequence, must be >= pBegin.
1014 	 */
1015 	inline void write(const sal_Char * pBegin, const sal_Char * pEnd);
1016 
1017 	/** Write a sequence of octets.
1018 
1019 		@param pBegin  Points to the start of the sequence, must not be null.
1020 
1021 		@param nLength  The length of the sequence.
1022 	 */
write(const sal_Char * pBegin,sal_Size nLength)1023 	void write(const sal_Char * pBegin, sal_Size nLength)
1024 	{ write(pBegin, pBegin + nLength); }
1025 
1026 	/** Write a sequence of octets.
1027 
1028 		@descr  The supplied sequence of UCS-4 characters is interpreted as a
1029 		sequence of octets.  It is an error if any of the elements of the
1030 		sequence has a numerical value greater than 255.
1031 
1032 		@param pBegin  Points to the start of the sequence, must not be null.
1033 
1034 		@param pEnd  Points past the end of the sequence, must be >= pBegin.
1035 	 */
1036 	inline void write(const sal_uInt32 * pBegin, const sal_uInt32 * pEnd);
1037 
1038 	/** Write a sequence of octets.
1039 
1040 		@descr  The supplied sequence of Unicode characters is interpreted as
1041 		a sequence of octets.  It is an error if any of the elements of the
1042 		sequence has a numerical value greater than 255.
1043 
1044 		@param pBegin  Points to the start of the sequence, must not be null.
1045 
1046 		@param pEnd  Points past the end of the sequence, must be >= pBegin.
1047 	 */
1048 	inline void write(const sal_Unicode * pBegin, const sal_Unicode * pEnd);
1049 
1050 	/** Write a sequence of octets.
1051 
1052 		@param rOctets  A ByteString, interpreted as a sequence of octets.
1053 
1054 		@param nBegin  The offset of the first character to write.
1055 
1056 		@param nEnd  The offset past the last character to write.
1057 	 */
1058 	inline void write(const ByteString & rString, xub_StrLen nBegin,
1059 					  xub_StrLen nEnd);
1060 
1061 	/** Write a single octet.
1062 
1063 		@param nOctet  Some octet.
1064 
1065 		@return  This instance.
1066 	 */
1067 	inline INetMIMEOutputSink & operator <<(sal_Char nOctet);
1068 
1069 	/** Write a null terminated sequence of octets (without the terminating
1070 		null).
1071 
1072 		@param pOctets  A null terminated sequence of octets, must not be
1073 		null.
1074 
1075 		@return  This instance.
1076 	 */
1077 	inline INetMIMEOutputSink & operator <<(const sal_Char * pOctets);
1078 
1079 	/** Write a sequence of octets.
1080 
1081 		@param rOctets  A ByteString, interpreted as a sequence of octets.
1082 
1083 		@return  This instance.
1084 	 */
1085 	inline INetMIMEOutputSink & operator <<(const ByteString & rOctets);
1086 
1087 	/** Call a manipulator function.
1088 
1089 		@param  pManipulator  A manipulator function.
1090 
1091 		@return  Whatever the manipulator function returns.
1092 	 */
1093 	INetMIMEOutputSink &
operator <<(INetMIMEOutputSink & (* pManipulator)(INetMIMEOutputSink &))1094 	operator <<(INetMIMEOutputSink & (* pManipulator)(INetMIMEOutputSink &))
1095 	{ return pManipulator(*this); }
1096 
1097 	/** Write a line end (CR LF).
1098 	 */
1099 	void writeLineEnd();
1100 
1101 	/** A manipulator function that writes a line end (CR LF).
1102 
1103 		@param rSink  Some sink.
1104 
1105 		@return  The sink rSink.
1106 	 */
1107 	static inline INetMIMEOutputSink & endl(INetMIMEOutputSink & rSink);
1108 };
1109 
write(const sal_Char * pBegin,const sal_Char * pEnd)1110 inline void INetMIMEOutputSink::write(const sal_Char * pBegin,
1111 									  const sal_Char * pEnd)
1112 {
1113 	writeSequence(pBegin, pEnd);
1114 	m_nColumn += pEnd - pBegin;
1115 }
1116 
write(const sal_uInt32 * pBegin,const sal_uInt32 * pEnd)1117 inline void INetMIMEOutputSink::write(const sal_uInt32 * pBegin,
1118 									  const sal_uInt32 * pEnd)
1119 {
1120 	writeSequence(pBegin, pEnd);
1121 	m_nColumn += pEnd - pBegin;
1122 }
1123 
write(const sal_Unicode * pBegin,const sal_Unicode * pEnd)1124 inline void INetMIMEOutputSink::write(const sal_Unicode * pBegin,
1125 									  const sal_Unicode * pEnd)
1126 {
1127 	writeSequence(pBegin, pEnd);
1128 	m_nColumn += pEnd - pBegin;
1129 }
1130 
write(const ByteString & rOctets,xub_StrLen nBegin,xub_StrLen nEnd)1131 inline void INetMIMEOutputSink::write(const ByteString & rOctets,
1132 									  xub_StrLen nBegin, xub_StrLen nEnd)
1133 {
1134 	writeSequence(rOctets.GetBuffer() + nBegin, rOctets.GetBuffer() + nEnd);
1135 	m_nColumn += nEnd - nBegin;
1136 }
1137 
operator <<(sal_Char nOctet)1138 inline INetMIMEOutputSink & INetMIMEOutputSink::operator <<(sal_Char nOctet)
1139 {
1140 	writeSequence(&nOctet, &nOctet + 1);
1141 	++m_nColumn;
1142 	return *this;
1143 }
1144 
operator <<(const sal_Char * pOctets)1145 inline INetMIMEOutputSink & INetMIMEOutputSink::operator <<(const sal_Char *
1146 															    pOctets)
1147 {
1148 	m_nColumn += writeSequence(pOctets);
1149 	return *this;
1150 }
1151 
operator <<(const ByteString & rOctets)1152 inline INetMIMEOutputSink & INetMIMEOutputSink::operator <<(const ByteString &
1153 															    rOctets)
1154 {
1155 	writeSequence(rOctets.GetBuffer(), rOctets.GetBuffer() + rOctets.Len());
1156 	m_nColumn += rOctets.Len();
1157 	return *this;
1158 }
1159 
1160 // static
endl(INetMIMEOutputSink & rSink)1161 inline INetMIMEOutputSink & INetMIMEOutputSink::endl(INetMIMEOutputSink &
1162 													     rSink)
1163 {
1164 	rSink.writeLineEnd();
1165 	return rSink;
1166 }
1167 
1168 // static
writeEscapeSequence(INetMIMEOutputSink & rSink,sal_uInt32 nChar)1169 inline void INetMIME::writeEscapeSequence(INetMIMEOutputSink & rSink,
1170 										  sal_uInt32 nChar)
1171 {
1172 	DBG_ASSERT(nChar <= 0xFF, "INetMIME::writeEscapeSequence(): Bad char");
1173 	rSink << '=' << sal_uInt8(getHexDigit(nChar >> 4))
1174 		  << sal_uInt8(getHexDigit(nChar & 15));
1175 }
1176 
1177 //============================================================================
1178 class INetMIMEStringOutputSink: public INetMIMEOutputSink
1179 {
1180 	ByteString m_aBuffer;
1181 	bool m_bOverflow;
1182 
1183     using INetMIMEOutputSink::writeSequence;
1184 
1185 	virtual void writeSequence(const sal_Char * pBegin,
1186 							   const sal_Char * pEnd);
1187 
1188 public:
INetMIMEStringOutputSink(sal_uInt32 nColumn=0,sal_uInt32 nLineLengthLimit=INetMIME::SOFT_LINE_LENGTH_LIMIT)1189 	inline INetMIMEStringOutputSink(sal_uInt32 nColumn = 0,
1190 									sal_uInt32 nLineLengthLimit
1191 									    = INetMIME::SOFT_LINE_LENGTH_LIMIT):
1192 		INetMIMEOutputSink(nColumn, nLineLengthLimit), m_bOverflow(false) {}
1193 
1194 	virtual ErrCode getError() const;
1195 
1196 	inline ByteString takeBuffer();
1197 };
1198 
takeBuffer()1199 inline ByteString INetMIMEStringOutputSink::takeBuffer()
1200 {
1201 	ByteString aTheBuffer = m_aBuffer;
1202 	m_aBuffer.Erase();
1203 	m_bOverflow = false;
1204 	return aTheBuffer;
1205 }
1206 
1207 //============================================================================
1208 class INetMIMEUnicodeOutputSink: public INetMIMEOutputSink
1209 {
1210 	UniString m_aBuffer;
1211 	bool m_bOverflow;
1212 
1213     using INetMIMEOutputSink::writeSequence;
1214 
1215 	virtual void writeSequence(const sal_Char * pBegin,
1216 							   const sal_Char * pEnd);
1217 
1218 	virtual void writeSequence(const sal_uInt32 * pBegin,
1219 							   const sal_uInt32 * pEnd);
1220 
1221 	virtual void writeSequence(const sal_Unicode * pBegin,
1222 							   const sal_Unicode * pEnd);
1223 
1224 public:
INetMIMEUnicodeOutputSink(sal_uInt32 nColumn=0,sal_uInt32 nLineLengthLimit=INetMIME::SOFT_LINE_LENGTH_LIMIT)1225 	inline INetMIMEUnicodeOutputSink(sal_uInt32 nColumn = 0,
1226 									 sal_uInt32 nLineLengthLimit
1227 									     = INetMIME::SOFT_LINE_LENGTH_LIMIT):
1228 		INetMIMEOutputSink(nColumn, nLineLengthLimit), m_bOverflow(false) {}
1229 
1230 	virtual ErrCode getError() const;
1231 
1232 	inline UniString takeBuffer();
1233 };
1234 
takeBuffer()1235 inline UniString INetMIMEUnicodeOutputSink::takeBuffer()
1236 {
1237 	UniString aTheBuffer = m_aBuffer;
1238 	m_aBuffer.Erase();
1239 	m_bOverflow = false;
1240 	return aTheBuffer;
1241 }
1242 
1243 //============================================================================
1244 class INetMIMEEncodedWordOutputSink
1245 {
1246 public:
1247 	enum Context { CONTEXT_TEXT = 1,
1248 				   CONTEXT_COMMENT = 2,
1249 				   CONTEXT_PHRASE = 4 };
1250 
1251 	enum Space { SPACE_NO, SPACE_ENCODED, SPACE_ALWAYS };
1252 
1253 private:
1254 	enum { BUFFER_SIZE = 256 };
1255 
1256 	enum Coding { CODING_NONE, CODING_QUOTED, CODING_ENCODED,
1257 				  CODING_ENCODED_TERMINATED };
1258 
1259 	enum EncodedWordState { STATE_INITIAL, STATE_FIRST_EQUALS,
1260 							STATE_FIRST_QUESTION, STATE_CHARSET,
1261 							STATE_SECOND_QUESTION, STATE_ENCODING,
1262 							STATE_THIRD_QUESTION, STATE_ENCODED_TEXT,
1263 							STATE_FOURTH_QUESTION, STATE_SECOND_EQUALS,
1264 							STATE_BAD };
1265 
1266 	INetMIMEOutputSink & m_rSink;
1267 	Context m_eContext;
1268 	Space m_eInitialSpace;
1269 	sal_uInt32 m_nExtraSpaces;
1270 	INetMIMECharsetList_Impl * m_pEncodingList;
1271 	sal_Unicode * m_pBuffer;
1272 	sal_uInt32 m_nBufferSize;
1273 	sal_Unicode * m_pBufferEnd;
1274 	Coding m_ePrevCoding;
1275 	rtl_TextEncoding m_ePrevMIMEEncoding;
1276 	Coding m_eCoding;
1277 	sal_uInt32 m_nQuotedEscaped;
1278 	EncodedWordState m_eEncodedWordState;
1279 
1280 	inline bool needsEncodedWordEscape(sal_uInt32 nChar) const;
1281 
1282 	void finish(bool bWriteTrailer);
1283 
1284 public:
1285 	inline INetMIMEEncodedWordOutputSink(INetMIMEOutputSink & rTheSink,
1286 										 Context eTheContext,
1287 										 Space eTheInitialSpace,
1288 										 rtl_TextEncoding ePreferredEncoding);
1289 
1290 	~INetMIMEEncodedWordOutputSink();
1291 
1292 	INetMIMEEncodedWordOutputSink & operator <<(sal_uInt32 nChar);
1293 
1294 	inline void write(const sal_Char * pBegin, const sal_Char * pEnd);
1295 
1296 	inline void write(const sal_Unicode * pBegin, const sal_Unicode * pEnd);
1297 
1298 	inline bool flush();
1299 };
1300 
INetMIMEEncodedWordOutputSink(INetMIMEOutputSink & rTheSink,Context eTheContext,Space eTheInitialSpace,rtl_TextEncoding ePreferredEncoding)1301 inline INetMIMEEncodedWordOutputSink::INetMIMEEncodedWordOutputSink(
1302 	       INetMIMEOutputSink & rTheSink, Context eTheContext,
1303 		   Space eTheInitialSpace, rtl_TextEncoding ePreferredEncoding):
1304 	m_rSink(rTheSink),
1305 	m_eContext(eTheContext),
1306 	m_eInitialSpace(eTheInitialSpace),
1307 	m_nExtraSpaces(0),
1308 	m_pEncodingList(INetMIME::createPreferredCharsetList(ePreferredEncoding)),
1309 	m_ePrevCoding(CODING_NONE),
1310 	m_eCoding(CODING_NONE),
1311 	m_nQuotedEscaped(0),
1312 	m_eEncodedWordState(STATE_INITIAL)
1313 {
1314 	m_nBufferSize = BUFFER_SIZE;
1315 	m_pBuffer = static_cast< sal_Unicode * >(rtl_allocateMemory(
1316 		                                         m_nBufferSize
1317 												     * sizeof (sal_Unicode)));
1318 	m_pBufferEnd = m_pBuffer;
1319 }
1320 
write(const sal_Char * pBegin,const sal_Char * pEnd)1321 inline void INetMIMEEncodedWordOutputSink::write(const sal_Char * pBegin,
1322 												 const sal_Char * pEnd)
1323 {
1324 	DBG_ASSERT(pBegin && pBegin <= pEnd,
1325 			   "INetMIMEEncodedWordOutputSink::write(): Bad sequence");
1326 
1327 	while (pBegin != pEnd)
1328 		operator <<(*pBegin++);
1329 }
1330 
write(const sal_Unicode * pBegin,const sal_Unicode * pEnd)1331 inline void INetMIMEEncodedWordOutputSink::write(const sal_Unicode * pBegin,
1332 												 const sal_Unicode * pEnd)
1333 {
1334 	DBG_ASSERT(pBegin && pBegin <= pEnd,
1335 			   "INetMIMEEncodedWordOutputSink::write(): Bad sequence");
1336 
1337 	while (pBegin != pEnd)
1338 		operator <<(*pBegin++);
1339 }
1340 
flush()1341 inline bool INetMIMEEncodedWordOutputSink::flush()
1342 {
1343 	finish(true);
1344 	return m_ePrevCoding != CODING_NONE;
1345 }
1346 
1347 //============================================================================
1348 struct INetContentTypeParameter
1349 {
1350 	/** The name of the attribute, in US-ASCII encoding and converted to lower
1351 		case.  If a parameter value is split as described in RFC 2231, there
1352 		will only be one item for the complete parameter, with the attribute
1353 		name lacking any section suffix.
1354 	 */
1355 	const ByteString m_sAttribute;
1356 
1357 	/** The optional character set specification (see RFC 2231), in US-ASCII
1358 		encoding and converted to lower case.
1359 	 */
1360 	const ByteString m_sCharset;
1361 
1362 	/** The optional language specification (see RFC 2231), in US-ASCII
1363 		encoding and converted to lower case.
1364 	 */
1365 	const ByteString m_sLanguage;
1366 
1367 	/** The attribute value.  If the value is a quoted-string, it is
1368 		'unpacked.'  If a character set is specified, and the value can be
1369 		converted to Unicode, this is done.  Also, if no character set is
1370 		specified, it is first tried to convert the value from UTF-8 encoding
1371 		to Unicode, and if that doesn't work (because the value is not in
1372 		UTF-8 encoding), it is converted from ISO-8859-1 encoding to Unicode
1373 		(which will always work).  But if a character set is specified and the
1374 		value cannot be converted from that character set to Unicode, special
1375 		action is taken to produce a value that can possibly be transformed
1376 		back into its original form:  Any 8-bit character from a non-encoded
1377 		part of the original value is directly converted to Unicode
1378 		(effectively handling it as if it was ISO-8859-1 encoded), and any
1379 		8-bit character from an encoded part of the original value is mapped
1380 		to the range U+F800..U+F8FF at the top of the Corporate Use Subarea
1381 		within Unicode's Private Use Area (effectively adding 0xF800 to the
1382 		character's numeric value).
1383 	 */
1384 	const UniString m_sValue;
1385 
1386 	/** This is true if the value is successfuly converted to Unicode, and
1387 		false if the value is a special mixture of ISO-LATIN-1 characters and
1388 		characters from Unicode's Private Use Area.
1389 	 */
1390 	const bool m_bConverted;
1391 
1392 	inline INetContentTypeParameter(const ByteString & rTheAttribute,
1393 									const ByteString & rTheCharset,
1394 									const ByteString & rTheLanguage,
1395 									const UniString & rTheValue,
1396 									bool bTheConverted);
1397 };
1398 
INetContentTypeParameter(const ByteString & rTheAttribute,const ByteString & rTheCharset,const ByteString & rTheLanguage,const UniString & rTheValue,bool bTheConverted)1399 inline INetContentTypeParameter::INetContentTypeParameter(const ByteString &
1400 														      rTheAttribute,
1401 														  const ByteString &
1402 														      rTheCharset,
1403 														  const ByteString &
1404 														      rTheLanguage,
1405 														  const UniString &
1406 														      rTheValue,
1407 														  bool bTheConverted):
1408 	m_sAttribute(rTheAttribute),
1409 	m_sCharset(rTheCharset),
1410 	m_sLanguage(rTheLanguage),
1411 	m_sValue(rTheValue),
1412 	m_bConverted(bTheConverted)
1413 {}
1414 
1415 //============================================================================
1416 class TOOLS_DLLPUBLIC INetContentTypeParameterList: private List
1417 {
1418 public:
~INetContentTypeParameterList()1419 	~INetContentTypeParameterList() { Clear(); }
1420 
1421 	using List::Count;
1422 
1423 	void Clear();
1424 
Insert(INetContentTypeParameter * pParameter,sal_uIntPtr nIndex)1425 	void Insert(INetContentTypeParameter * pParameter, sal_uIntPtr nIndex)
1426 	{ List::Insert(pParameter, nIndex); }
1427 
1428 	inline const INetContentTypeParameter * GetObject(sal_uIntPtr nIndex) const;
1429 
1430 	const INetContentTypeParameter * find(const ByteString & rAttribute)
1431 		const;
1432 };
1433 
1434 inline const INetContentTypeParameter *
GetObject(sal_uIntPtr nIndex) const1435 INetContentTypeParameterList::GetObject(sal_uIntPtr nIndex) const
1436 {
1437 	return static_cast< INetContentTypeParameter * >(List::GetObject(nIndex));
1438 }
1439 
1440 #endif // TOOLS_INETMIME_HXX
1441 
1442