1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23 #ifndef TOOLS_INETMIME_HXX
24 #define TOOLS_INETMIME_HXX
25
26 #include "tools/toolsdllapi.h"
27 #include <rtl/alloc.h>
28 #include <rtl/string.h>
29 #include "rtl/tencinfo.h"
30 #include <tools/debug.hxx>
31 #include <tools/errcode.hxx>
32 #include <tools/list.hxx>
33 #include <tools/string.hxx>
34
35 class DateTime;
36 class INetContentTypeParameterList;
37 class INetMIMECharsetList_Impl;
38 class INetMIMEOutputSink;
39
40 //============================================================================
41 class TOOLS_DLLPUBLIC INetMIME
42 {
43 public:
44 enum { SOFT_LINE_LENGTH_LIMIT = 76,
45 HARD_LINE_LENGTH_LIMIT = 998 };
46
47 /** The various types of message header field bodies, with respect to
48 encoding and decoding them.
49
50 @descr At the moment, five different types of header fields suffice
51 to describe how to encoded and decode any known message header field
52 body, but need for more types may arise in the future as new header
53 fields are introduced.
54
55 @descr The following is an exhaustive list of all the header fields
56 currently known to our implementation. For every header field, it
57 includes a 'canonic' (with regard to capitalization) name, a grammar
58 rule for the body (using RFC 822 and RFC 2234 conventions), a list of
59 relevant sources of information, and the HeaderFieldType value to use
60 with that header field. The list is based on RFC 2076 and draft-
61 palme-mailext-headers-02.txt (see also <http://www.dsv.su.se/~jpalme/
62 ietf/jp-ietf-home.html#anchor1003783>).
63
64 Approved: address ;RFC 1036; HEADER_FIELD_ADDRESS
65 bcc: #address ;RFCs 822, 2047; HEADER_FIELD_ADDRESS
66 cc: 1#address ;RFCs 822, 2047; HEADER_FIELD_ADDRESS
67 Comments: *text ;RFCs 822, RFC 2047; HEADER_FIELD_TEXT
68 Content-Base: absoluteURI ;RFC 2110; HEADER_FIELD_TEXT
69 Content-Description: *text ;RFC 2045, RFC 2047; HEADER_FIELD_TEXT
70 Content-Disposition: disposition-type *(";" disposition-parm)
71 ;RFC 1806; HEADER_FIELD_STRUCTURED
72 Content-ID: msg-id ;RFC 2045, RFC 2047; HEADER_FIELD_MESSAGE_ID
73 Content-Location: absoluteURI / relativeURI ;RFC 2110;
74 HEADER_FIELD_TEXT
75 Content-Transfer-Encoding: mechanism ;RFC 2045, RFC 2047;
76 HEADER_FIELD_STRUCTURED
77 Content-Type: type "/" subtype *(";" parameter) ;RFC 2045, RFC 2047;
78 HEADER_FIELD_STRUCTURED
79 Control: *text ;RFC 1036; HEADER_FIELD_TEXT
80 Date: date-time ;RFC 822, RFC 1123, RFC 2047; HEADER_FIELD_STRUCTURED
81 Distribution: 1#atom ;RFC 1036; HEADER_FIELD_STRUCTURED
82 Encrypted: 1#2word ;RFC 822, RFC 2047; HEADER_FIELD_STRUCTURED
83 Expires: date-time ;RFC 1036; HEADER_FIELD_STRUCTURED
84 Followup-To: 1#(atom *("." atom)) ;RFC 1036; HEADER_FIELD_STRUCTURED
85 From: mailbox / 1#mailbox ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
86 In-Reply-To: *(phrase / msg-id) ;RFC 822, RFC 2047;
87 HEADER_FIELD_ADDRESS
88 Keywords: #phrase ;RFC 822, RFC 2047; HEADER_FIELD_PHRASE
89 MIME-Version: 1*DIGIT "." 1*DIGIT ;RFC 2045, RFC 2047;
90 HEADER_FIELD_STRUCTURED
91 Message-ID: msg-id ;RFC 822, RFC 2047; HEADER_FIELD_MESSAGE_ID
92 Newsgroups: 1#(atom *("." atom)) ;RFC 1036, RFC 2047;
93 HEADER_FIELD_STRUCTURED
94 Organization: *text ;RFC 1036; HEADER_FIELD_TEXT
95 Received: ["from" domain] ["by" domain] ["via" atom] *("with" atom)
96 ["id" msg-id] ["for" addr-spec] ";" date-time ;RFC 822, RFC 1123,
97 RFC 2047; HEADER_FIELD_STRUCTURED
98 References: *(phrase / msg-id) ;RFC 822, RFC 2047;
99 HEADER_FIELD_ADDRESS
100 Reply-To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
101 Resent-Date: date-time ;RFC 822, RFC 1123, RFC 2047;
102 HEADER_FIELD_STRUCTURED
103 Resent-From: mailbox / 1#mailbox ;RFC 822, RFC 2047;
104 HEADER_FIELD_ADDRESS
105 Resent-Message-ID: msg-id ;RFC 822, RFC 2047; HEADER_FIELD_MESSAGE_ID
106 Resent-Reply-To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
107 Resent-Sender: mailbox ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
108 Resent-To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
109 Resent-bcc: #address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
110 Resent-cc: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
111 Return-path: route-addr / ("<" ">") ;RFC 822, RFC 1123, RFC 2047;
112 HEADER_FIELD_STRUCTURED
113 Return-Receipt-To: address ;Not Internet standard;
114 HEADER_FIELD_ADDRES
115 Sender: mailbox ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
116 Subject: *text ;RFC 822, RFC 2047; HEADER_FIELD_TEXT
117 Summary: *text ;RFC 1036; HEADER_FIELD_TEXT
118 To: 1#address ;RFC 822, RFC 2047; HEADER_FIELD_ADDRESS
119 X-CHAOS-Marked: "YES" / "NO" ;local; HEADER_FIELD_STRUCTURED
120 X-CHAOS-Read: "YES" / "NO" ;local; HEADER_FIELD_STRUCTURED
121 X-CHAOS-Recipients: #*("<" atom word ">") ;local;
122 HEADER_FIELD_STRUCTURED
123 X-CHAOS-Size: 1*DIGIT ;local; HEADER_FIELD_STRUCTURED
124 X-Mailer: *text ;Not Internet standard; HEADER_FIELD_TEXT
125 X-Mozilla-Status: 4HEXDIG ;Mozilla; HEADER_FIELD_STRUCTURED
126 X-Newsreader: *text ;Not Internet standard; HEADER_FIELD_TEXT
127 X-Priority: "1" / "2" / "3" / "4" / "5" ;Not Internet standard;
128 HEADER_FIELD_STRUCTURED
129 Xref: sub-domain
130 1*((atom / string) *("." (atom / string)) ":" msg-number)
131 ;RFCs 1036, 2047, local; HEADER_FIELD_STRUCTURED
132 */
133 enum HeaderFieldType
134 {
135 HEADER_FIELD_TEXT,
136 HEADER_FIELD_STRUCTURED,
137 HEADER_FIELD_PHRASE,
138 HEADER_FIELD_MESSAGE_ID,
139 HEADER_FIELD_ADDRESS
140 };
141
142 /** Check for US-ASCII character.
143
144 @param nChar Some UCS-4 character.
145
146 @return True if nChar is a US-ASCII character (0x00--0x7F).
147 */
148 static inline bool isUSASCII(sal_uInt32 nChar);
149
150 /** Check for ISO 8859-1 character.
151
152 @param nChar Some UCS-4 character.
153
154 @return True if nChar is a ISO 8859-1 character (0x00--0xFF).
155 */
156 static inline bool isISO88591(sal_uInt32 nChar);
157
158 /** Check for US-ASCII control character.
159
160 @param nChar Some UCS-4 character.
161
162 @return True if nChar is a US-ASCII control character (US-ASCII
163 0x00--0x1F or 0x7F).
164 */
165 static inline bool isControl(sal_uInt32 nChar);
166
167 /** Check for US-ASCII white space character.
168
169 @param nChar Some UCS-4 character.
170
171 @return True if nChar is a US-ASCII white space character (US-ASCII
172 0x09 or 0x20).
173 */
174 static inline bool isWhiteSpace(sal_uInt32 nChar);
175
176 /** Check for US-ASCII visible character.
177
178 @param nChar Some UCS-4 character.
179
180 @return True if nChar is a US-ASCII visible character (US-ASCII
181 0x21--0x7E).
182 */
183 static inline bool isVisible(sal_uInt32 nChar);
184
185 /** Check for US-ASCII digit character.
186
187 @param nChar Some UCS-4 character.
188
189 @return True if nChar is a US-ASCII (decimal) digit character (US-
190 ASCII '0'--'9').
191 */
192 static inline bool isDigit(sal_uInt32 nChar);
193
194 /** Check for US-ASCII canonic hexadecimal digit character.
195
196 @param nChar Some UCS-4 character.
197
198 @return True if nChar is a US-ASCII canonic (i.e., upper case)
199 hexadecimal digit character (US-ASCII '0'--'9' or 'A'--'F').
200 */
201 static inline bool isCanonicHexDigit(sal_uInt32 nChar);
202
203 /** Check for US-ASCII hexadecimal digit character.
204
205 @param nChar Some UCS-4 character.
206
207 @return True if nChar is a US-ASCII hexadecimal digit character (US-
208 ASCII '0'--'9', 'A'--'F', 'a'--'f').
209 */
210 static inline bool isHexDigit(sal_uInt32 nChar);
211
212 /** Check for US-ASCII upper case character.
213
214 @param nChar Some UCS-4 character.
215
216 @return True if nChar is a US-ASCII upper case alphabetic character
217 (US-ASCII 'A'--'Z').
218 */
219 static inline bool isUpperCase(sal_uInt32 nChar);
220
221 /** Check for US-ASCII lower case character.
222
223 @param nChar Some UCS-4 character.
224
225 @return True if nChar is a US-ASCII lower case alphabetic character
226 (US-ASCII 'a'--'z').
227 */
228 static inline bool isLowerCase(sal_uInt32 nChar);
229
230 /** Check for US-ASCII alphabetic character.
231
232 @param nChar Some UCS-4 character.
233
234 @return True if nChar is a US-ASCII alphabetic character (US-ASCII
235 'A'--'Z' or 'a'--'z').
236 */
237 static inline bool isAlpha(sal_uInt32 nChar);
238
239 /** Check for US-ASCII alphanumeric character.
240
241 @param nChar Some UCS-4 character.
242
243 @return True if nChar is a US-ASCII alphanumeric character (US-ASCII
244 '0'--'9', 'A'--'Z' or 'a'--'z').
245 */
246 static inline bool isAlphanumeric(sal_uInt32 nChar);
247
248 /** Check for US-ASCII Base 64 digit character.
249
250 @param nChar Some UCS-4 character.
251
252 @return True if nChar is a US-ASCII Base 64 digit character (US-ASCII
253 'A'--'Z', 'a'--'z', '0'--'9', '+', or '/').
254 */
255 static inline bool isBase64Digit(sal_uInt32 nChar);
256
257 /** Check whether some character is valid within an RFC 822 <atom>.
258
259 @param nChar Some UCS-4 character.
260
261 @return True if nChar is valid within an RFC 822 <atom> (US-ASCII
262 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
263 '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', or '~').
264 */
265 static bool isAtomChar(sal_uInt32 nChar);
266
267 /** Check whether some character is valid within an RFC 2045 <token>.
268
269 @param nChar Some UCS-4 character.
270
271 @return True if nChar is valid within an RFC 2047 <token> (US-ASCII
272 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
273 '-', '.', '^', '_', '`', '{', '|', '}', or '~').
274 */
275 static bool isTokenChar(sal_uInt32 nChar);
276
277 /** Check whether some character is valid within an RFC 2047 <token>.
278
279 @param nChar Some UCS-4 character.
280
281 @return True if nChar is valid within an RFC 2047 <token> (US-ASCII
282 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
283 '-', '^', '_', '`', '{', '|', '}', or '~').
284 */
285 static bool isEncodedWordTokenChar(sal_uInt32 nChar);
286
287 /** Check whether some character is valid within an RFC 2060 <atom>.
288
289 @param nChar Some UCS-4 character.
290
291 @return True if nChar is valid within an RFC 2060 <atom> (US-ASCII
292 'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '&', ''', '+', ',', '-',
293 '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', ']', '^', '_', '`',
294 '|', '}', or '~').
295 */
296 static bool isIMAPAtomChar(sal_uInt32 nChar);
297
298 /** Translate an US-ASCII character to upper case.
299
300 @param nChar Some UCS-4 character.
301
302 @return If nChar is a US-ASCII upper case character (US-ASCII
303 'A'--'Z'), return the corresponding US-ASCII lower case character (US-
304 ASCII 'a'--'z'); otherwise, return nChar unchanged.
305 */
306 static inline sal_uInt32 toUpperCase(sal_uInt32 nChar);
307
308 /** Translate an US-ASCII character to lower case.
309
310 @param nChar Some UCS-4 character.
311
312 @return If nChar is a US-ASCII lower case character (US-ASCII
313 'a'--'z'), return the corresponding US-ASCII upper case character (US-
314 ASCII 'A'--'Z'); otherwise, return nChar unchanged.
315 */
316 static inline sal_uInt32 toLowerCase(sal_uInt32 nChar);
317
318 /** Get the digit weight of a US-ASCII character.
319
320 @param nChar Some UCS-4 character.
321
322 @return If nChar is a US-ASCII (decimal) digit character (US-ASCII
323 '0'--'9'), return the corresponding weight (0--9); otherwise,
324 return -1.
325 */
326 static inline int getWeight(sal_uInt32 nChar);
327
328 /** Get the hexadecimal digit weight of a US-ASCII character.
329
330 @param nChar Some UCS-4 character.
331
332 @return If nChar is a US-ASCII hexadecimal digit character (US-ASCII
333 '0'--'9', 'A'--'F', or 'a'--'f'), return the corresponding weight
334 (0--15); otherwise, return -1.
335 */
336 static inline int getHexWeight(sal_uInt32 nChar);
337
338 /** Get the Base 64 digit weight of a US-ASCII character.
339
340 @param nChar Some UCS-4 character.
341
342 @return If nChar is a US-ASCII Base 64 digit character (US-ASCII
343 'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
344 corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
345 character (US-ASCII '='), return -1; otherwise, return -2.
346 */
347 static inline int getBase64Weight(sal_uInt32 nChar);
348
349 /** Get a decimal digit encoded as US-ASCII.
350
351 @param nWeight Must be in the range 0--9, inclusive.
352
353 @return The decimal digit corresponding to nWeight (US-ASCII
354 '0'--'9').
355 */
356 static sal_uInt32 getDigit(int nWeight);
357
358 /** Get a hexadecimal digit encoded as US-ASCII.
359
360 @param nWeight Must be in the range 0--15, inclusive.
361
362 @return The canonic (i.e., upper case) hexadecimal digit
363 corresponding to nWeight (US-ASCII '0'--'9' or 'A'--'F').
364 */
365 static sal_uInt32 getHexDigit(int nWeight);
366
367 /** Get a Base 64 digit encoded as US-ASCII.
368
369 @param nWeight Must be in the range 0--63, inclusive.
370
371 @return The Base 64 digit corresponding to nWeight (US-ASCII 'A'--
372 'Z', 'a'--'z', '0'--'9', '+' or '/').
373 */
374 static sal_uInt32 getBase64Digit(int nWeight);
375
376 static inline bool isHighSurrogate(sal_uInt32 nUTF16);
377
378 static inline bool isLowSurrogate(sal_uInt32 nUTF16);
379
380 static inline sal_uInt32 toUTF32(sal_Unicode cHighSurrogate,
381 sal_Unicode cLowSurrogate);
382
383 /** Check two US-ASCII strings for equality, ignoring case.
384
385 @param pBegin1 Points to the start of the first string, must not be
386 null.
387
388 @param pEnd1 Points past the end of the first string, must be >=
389 pBegin1.
390
391 @param pBegin2 Points to the start of the second string, must not be
392 null.
393
394 @param pEnd2 Points past the end of the second string, must be >=
395 pBegin2.
396
397 @return True if the two strings are equal, ignoring the case of US-
398 ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
399 */
400 static bool equalIgnoreCase(const sal_Char * pBegin1,
401 const sal_Char * pEnd1,
402 const sal_Char * pBegin2,
403 const sal_Char * pEnd2);
404
405 /** Check two US-ASCII strings for equality, ignoring case.
406
407 @param pBegin1 Points to the start of the first string, must not be
408 null.
409
410 @param pEnd1 Points past the end of the first string, must be >=
411 pBegin1.
412
413 @param pString2 Points to the start of the null terminated second
414 string, must not be null.
415
416 @return True if the two strings are equal, ignoring the case of US-
417 ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
418 */
419 static bool equalIgnoreCase(const sal_Char * pBegin1,
420 const sal_Char * pEnd1,
421 const sal_Char * pString2);
422
423 /** Check two US-ASCII strings for equality, ignoring case.
424
425 @param pBegin1 Points to the start of the first string, must not be
426 null.
427
428 @param pEnd1 Points past the end of the first string, must be >=
429 pBegin1.
430
431 @param pString2 Points to the start of the null terminated second
432 string, must not be null.
433
434 @return True if the two strings are equal, ignoring the case of US-
435 ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
436 */
437 static bool equalIgnoreCase(const sal_Unicode * pBegin1,
438 const sal_Unicode * pEnd1,
439 const sal_Char * pString2);
440
441 /** Check two US-ASCII strings for equality, ignoring case.
442
443 @param rString1 The first string.
444
445 @param sString2 Points to the start of the null terminated second
446 string, must not be null.
447
448 @return True if the two strings are equal, ignoring the case of US-
449 ASCII alphabetic characters (US-ASCII 'A'--'Z' and 'a'--'z').
450 */
451 static inline bool equalIgnoreCase(const ByteString & rString1,
452 const sal_Char * pString2);
453
454 static inline bool startsWithLineBreak(const sal_Char * pBegin,
455 const sal_Char * pEnd);
456
457 static inline bool startsWithLineBreak(const sal_Unicode * pBegin,
458 const sal_Unicode * pEnd);
459
460 static inline bool startsWithLineFolding(const sal_Char * pBegin,
461 const sal_Char * pEnd);
462
463 static inline bool startsWithLineFolding(const sal_Unicode * pBegin,
464 const sal_Unicode * pEnd);
465
466 static bool startsWithLinearWhiteSpace(const sal_Char * pBegin,
467 const sal_Char * pEnd);
468
469 static const sal_Char * skipLinearWhiteSpace(const sal_Char * pBegin,
470 const sal_Char * pEnd);
471
472 static const sal_Unicode * skipLinearWhiteSpace(const sal_Unicode *
473 pBegin,
474 const sal_Unicode * pEnd);
475
476 static const sal_Char * skipComment(const sal_Char * pBegin,
477 const sal_Char * pEnd);
478
479 static const sal_Unicode * skipComment(const sal_Unicode * pBegin,
480 const sal_Unicode * pEnd);
481
482 static const sal_Char * skipLinearWhiteSpaceComment(const sal_Char *
483 pBegin,
484 const sal_Char *
485 pEnd);
486
487 static const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
488 pBegin,
489 const sal_Unicode *
490 pEnd);
491
492 static inline bool needsQuotedStringEscape(sal_uInt32 nChar);
493
494 static const sal_Char * skipQuotedString(const sal_Char * pBegin,
495 const sal_Char * pEnd);
496
497 static const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
498 const sal_Unicode * pEnd);
499
500 static const sal_Char * scanAtom(const sal_Char * pBegin,
501 const sal_Char * pEnd);
502
503 static const sal_Unicode * scanAtom(const sal_Unicode * pBegin,
504 const sal_Unicode * pEnd);
505
506 static bool scanUnsigned(const sal_Char *& rBegin, const sal_Char * pEnd,
507 bool bLeadingZeroes, sal_uInt32 & rValue);
508
509 static bool scanUnsigned(const sal_Unicode *& rBegin,
510 const sal_Unicode * pEnd, bool bLeadingZeroes,
511 sal_uInt32 & rValue);
512
513 static bool scanUnsignedHex(const sal_Char *& rBegin,
514 const sal_Char * pEnd, bool bLeadingZeroes,
515 sal_uInt32 & rValue);
516
517 static bool scanUnsignedHex(const sal_Unicode *& rBegin,
518 const sal_Unicode * pEnd, bool bLeadingZeroes,
519 sal_uInt32 & rValue);
520
521 static const sal_Char * scanQuotedBlock(const sal_Char * pBegin,
522 const sal_Char * pEnd,
523 sal_uInt32 nOpening,
524 sal_uInt32 nClosing,
525 sal_Size & rLength,
526 bool & rModify);
527
528 static const sal_Unicode * scanQuotedBlock(const sal_Unicode * pBegin,
529 const sal_Unicode * pEnd,
530 sal_uInt32 nOpening,
531 sal_uInt32 nClosing,
532 sal_Size & rLength,
533 bool & rModify);
534
535 static sal_Char const * scanParameters(sal_Char const * pBegin,
536 sal_Char const * pEnd,
537 INetContentTypeParameterList *
538 pParameters);
539
540 static sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
541 sal_Unicode const * pEnd,
542 INetContentTypeParameterList *
543 pParameters);
544
545 static inline rtl_TextEncoding translateToMIME(rtl_TextEncoding
546 eEncoding);
547
548 static inline rtl_TextEncoding translateFromMIME(rtl_TextEncoding
549 eEncoding);
550
551 static const sal_Char * getCharsetName(rtl_TextEncoding eEncoding);
552
553 static rtl_TextEncoding getCharsetEncoding(const sal_Char * pBegin,
554 const sal_Char * pEnd);
555
556 static rtl_TextEncoding getCharsetEncoding(const sal_Unicode * pBegin,
557 const sal_Unicode * pEnd);
558
559 static inline bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding);
560
561 static INetMIMECharsetList_Impl *
562 createPreferredCharsetList(rtl_TextEncoding eEncoding);
563
564 static sal_Unicode * convertToUnicode(const sal_Char * pBegin,
565 const sal_Char * pEnd,
566 rtl_TextEncoding eEncoding,
567 sal_Size & rSize);
568
569 static sal_Char * convertFromUnicode(const sal_Unicode * pBegin,
570 const sal_Unicode * pEnd,
571 rtl_TextEncoding eEncoding,
572 sal_Size & rSize);
573
574 /** Get the number of octets required to encode an UCS-4 character using
575 UTF-8 encoding.
576
577 @param nChar Some UCS-4 character.
578
579 @return The number of octets required (in the range 1--6, inclusive).
580 */
581 static inline int getUTF8OctetCount(sal_uInt32 nChar);
582
583 static inline void writeEscapeSequence(INetMIMEOutputSink & rSink,
584 sal_uInt32 nChar);
585
586 static void writeUTF8(INetMIMEOutputSink & rSink, sal_uInt32 nChar);
587
588 static void writeUnsigned(INetMIMEOutputSink & rSink, sal_uInt32 nValue,
589 int nMinDigits = 1);
590
591 static void writeDateTime(INetMIMEOutputSink & rSink,
592 const DateTime & rUTC);
593
594 static void writeHeaderFieldBody(INetMIMEOutputSink & rSink,
595 HeaderFieldType eType,
596 const ByteString & rBody,
597 rtl_TextEncoding ePreferredEncoding,
598 bool bInitialSpace = true);
599
600 static void writeHeaderFieldBody(INetMIMEOutputSink & rSink,
601 HeaderFieldType eType,
602 const UniString & rBody,
603 rtl_TextEncoding ePreferredEncoding,
604 bool bInitialSpace = true);
605
606 static bool translateUTF8Char(const sal_Char *& rBegin,
607 const sal_Char * pEnd,
608 rtl_TextEncoding eEncoding,
609 sal_uInt32 & rCharacter);
610
611 static ByteString decodeUTF8(const ByteString & rText,
612 rtl_TextEncoding eEncoding);
613
614 static UniString decodeHeaderFieldBody(HeaderFieldType eType,
615 const ByteString & rBody);
616
617 // #i70651#: Prevent warnings on Mac OS X.
618 #ifdef MACOSX
619 #pragma GCC system_header
620 #endif
621
622 /** Get the UTF-32 character at the head of a UTF-16 encoded string.
623
624 @param rBegin Points to the start of the UTF-16 encoded string, must
625 not be null. On exit, it points past the first UTF-32 character's
626 encoding.
627
628 @param pEnd Points past the end of the UTF-16 encoded string, must be
629 strictly greater than rBegin.
630
631 @return The UCS-4 character at the head of the UTF-16 encoded string.
632 If the string does not start with the UTF-16 encoding of a UCS-32
633 character, the first UTF-16 value is returned.
634 */
635 static inline sal_uInt32 getUTF32Character(const sal_Unicode *& rBegin,
636 const sal_Unicode * pEnd);
637
638 /** Put the UTF-16 encoding of a UTF-32 character into a buffer.
639
640 @param pBuffer Points to a buffer, must not be null.
641
642 @param nUTF32 An UTF-32 character, must be in the range 0..0x10FFFF.
643
644 @return A pointer past the UTF-16 characters put into the buffer
645 (i.e., pBuffer + 1 or pBuffer + 2).
646 */
647 static inline sal_Unicode * putUTF32Character(sal_Unicode * pBuffer,
648 sal_uInt32 nUTF32);
649 };
650
651 // static
isUSASCII(sal_uInt32 nChar)652 inline bool INetMIME::isUSASCII(sal_uInt32 nChar)
653 {
654 return nChar <= 0x7F;
655 }
656
657 // static
isISO88591(sal_uInt32 nChar)658 inline bool INetMIME::isISO88591(sal_uInt32 nChar)
659 {
660 return nChar <= 0xFF;
661 }
662
663 // static
isControl(sal_uInt32 nChar)664 inline bool INetMIME::isControl(sal_uInt32 nChar)
665 {
666 return nChar <= 0x1F || nChar == 0x7F;
667 }
668
669 // static
isWhiteSpace(sal_uInt32 nChar)670 inline bool INetMIME::isWhiteSpace(sal_uInt32 nChar)
671 {
672 return nChar == '\t' || nChar == ' ';
673 }
674
675 // static
isVisible(sal_uInt32 nChar)676 inline bool INetMIME::isVisible(sal_uInt32 nChar)
677 {
678 return nChar >= '!' && nChar <= '~';
679 }
680
681 // static
isDigit(sal_uInt32 nChar)682 inline bool INetMIME::isDigit(sal_uInt32 nChar)
683 {
684 return nChar >= '0' && nChar <= '9';
685 }
686
687 // static
isCanonicHexDigit(sal_uInt32 nChar)688 inline bool INetMIME::isCanonicHexDigit(sal_uInt32 nChar)
689 {
690 return isDigit(nChar) || (nChar >= 'A' && nChar <= 'F');
691 }
692
693 // static
isHexDigit(sal_uInt32 nChar)694 inline bool INetMIME::isHexDigit(sal_uInt32 nChar)
695 {
696 return isCanonicHexDigit(nChar) || (nChar >= 'a' && nChar <= 'f');
697 }
698
699 // static
isUpperCase(sal_uInt32 nChar)700 inline bool INetMIME::isUpperCase(sal_uInt32 nChar)
701 {
702 return nChar >= 'A' && nChar <= 'Z';
703 }
704
705 // static
isLowerCase(sal_uInt32 nChar)706 inline bool INetMIME::isLowerCase(sal_uInt32 nChar)
707 {
708 return nChar >= 'a' && nChar <= 'z';
709 }
710
711 // static
isAlpha(sal_uInt32 nChar)712 inline bool INetMIME::isAlpha(sal_uInt32 nChar)
713 {
714 return isUpperCase(nChar) || isLowerCase(nChar);
715 }
716
717 // static
isAlphanumeric(sal_uInt32 nChar)718 inline bool INetMIME::isAlphanumeric(sal_uInt32 nChar)
719 {
720 return isAlpha(nChar) || isDigit(nChar);
721 }
722
723 // static
isBase64Digit(sal_uInt32 nChar)724 inline bool INetMIME::isBase64Digit(sal_uInt32 nChar)
725 {
726 return isUpperCase(nChar) || isLowerCase(nChar) || isDigit(nChar)
727 || nChar == '+' || nChar == '/';
728 }
729
730 // static
toUpperCase(sal_uInt32 nChar)731 inline sal_uInt32 INetMIME::toUpperCase(sal_uInt32 nChar)
732 {
733 return isLowerCase(nChar) ? nChar - ('a' - 'A') : nChar;
734 }
735
736 // static
toLowerCase(sal_uInt32 nChar)737 inline sal_uInt32 INetMIME::toLowerCase(sal_uInt32 nChar)
738 {
739 return isUpperCase(nChar) ? nChar + ('a' - 'A') : nChar;
740 }
741
742 // static
getWeight(sal_uInt32 nChar)743 inline int INetMIME::getWeight(sal_uInt32 nChar)
744 {
745 return isDigit(nChar) ? int(nChar - '0') : -1;
746 }
747
748 // static
getHexWeight(sal_uInt32 nChar)749 inline int INetMIME::getHexWeight(sal_uInt32 nChar)
750 {
751 return isDigit(nChar) ? int(nChar - '0') :
752 nChar >= 'A' && nChar <= 'F' ? int(nChar - 'A' + 10) :
753 nChar >= 'a' && nChar <= 'f' ? int(nChar - 'a' + 10) : -1;
754 }
755
756 // static
getBase64Weight(sal_uInt32 nChar)757 inline int INetMIME::getBase64Weight(sal_uInt32 nChar)
758 {
759 return isUpperCase(nChar) ? int(nChar - 'A') :
760 isLowerCase(nChar) ? int(nChar - 'a' + 26) :
761 isDigit(nChar) ? int(nChar - '0' + 52) :
762 nChar == '+' ? 62 :
763 nChar == '/' ? 63 :
764 nChar == '=' ? -1 : -2;
765 }
766
767 // static
isHighSurrogate(sal_uInt32 nUTF16)768 inline bool INetMIME::isHighSurrogate(sal_uInt32 nUTF16)
769 {
770 return nUTF16 >= 0xD800 && nUTF16 <= 0xDBFF;
771 }
772
773 // static
isLowSurrogate(sal_uInt32 nUTF16)774 inline bool INetMIME::isLowSurrogate(sal_uInt32 nUTF16)
775 {
776 return nUTF16 >= 0xDC00 && nUTF16 <= 0xDFFF;
777 }
778
779 // static
toUTF32(sal_Unicode cHighSurrogate,sal_Unicode cLowSurrogate)780 inline sal_uInt32 INetMIME::toUTF32(sal_Unicode cHighSurrogate,
781 sal_Unicode cLowSurrogate)
782 {
783 DBG_ASSERT(isHighSurrogate(cHighSurrogate)
784 && isLowSurrogate(cLowSurrogate),
785 "INetMIME::toUTF32(): Bad chars");
786 return ((sal_uInt32(cHighSurrogate) & 0x3FF) << 10)
787 | (sal_uInt32(cLowSurrogate) & 0x3FF);
788 }
789
790 // static
equalIgnoreCase(const ByteString & rString1,const sal_Char * pString2)791 inline bool INetMIME::equalIgnoreCase(const ByteString & rString1,
792 const sal_Char * pString2)
793 {
794 return equalIgnoreCase(rString1.GetBuffer(),
795 rString1.GetBuffer() + rString1.Len(), pString2);
796 }
797
798 // static
startsWithLineBreak(const sal_Char * pBegin,const sal_Char * pEnd)799 inline bool INetMIME::startsWithLineBreak(const sal_Char * pBegin,
800 const sal_Char * pEnd)
801 {
802 DBG_ASSERT(pBegin && pBegin <= pEnd,
803 "INetMIME::startsWithLineBreak(): Bad sequence");
804
805 return pEnd - pBegin >= 2 && pBegin[0] == 0x0D && pBegin[1] == 0x0A;
806 // CR, LF
807 }
808
809 // static
startsWithLineBreak(const sal_Unicode * pBegin,const sal_Unicode * pEnd)810 inline bool INetMIME::startsWithLineBreak(const sal_Unicode * pBegin,
811 const sal_Unicode * pEnd)
812 {
813 DBG_ASSERT(pBegin && pBegin <= pEnd,
814 "INetMIME::startsWithLineBreak(): Bad sequence");
815
816 return pEnd - pBegin >= 2 && pBegin[0] == 0x0D && pBegin[1] == 0x0A;
817 // CR, LF
818 }
819
820 // static
startsWithLineFolding(const sal_Char * pBegin,const sal_Char * pEnd)821 inline bool INetMIME::startsWithLineFolding(const sal_Char * pBegin,
822 const sal_Char * pEnd)
823 {
824 DBG_ASSERT(pBegin && pBegin <= pEnd,
825 "INetMIME::startsWithLineFolding(): Bad sequence");
826
827 return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
828 && isWhiteSpace(pBegin[2]); // CR, LF
829 }
830
831 // static
startsWithLineFolding(const sal_Unicode * pBegin,const sal_Unicode * pEnd)832 inline bool INetMIME::startsWithLineFolding(const sal_Unicode * pBegin,
833 const sal_Unicode * pEnd)
834 {
835 DBG_ASSERT(pBegin && pBegin <= pEnd,
836 "INetMIME::startsWithLineFolding(): Bad sequence");
837
838 return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
839 && isWhiteSpace(pBegin[2]); // CR, LF
840 }
841
842 // static
startsWithLinearWhiteSpace(const sal_Char * pBegin,const sal_Char * pEnd)843 inline bool INetMIME::startsWithLinearWhiteSpace(const sal_Char * pBegin,
844 const sal_Char * pEnd)
845 {
846 DBG_ASSERT(pBegin && pBegin <= pEnd,
847 "INetMIME::startsWithLinearWhiteSpace(): Bad sequence");
848
849 return pBegin != pEnd
850 && (isWhiteSpace(*pBegin) || startsWithLineFolding(pBegin, pEnd));
851 }
852
853 // static
needsQuotedStringEscape(sal_uInt32 nChar)854 inline bool INetMIME::needsQuotedStringEscape(sal_uInt32 nChar)
855 {
856 return nChar == '"' || nChar == '\\';
857 }
858
859 // static
translateToMIME(rtl_TextEncoding eEncoding)860 inline rtl_TextEncoding INetMIME::translateToMIME(rtl_TextEncoding eEncoding)
861 {
862 #if defined WNT
863 return eEncoding == RTL_TEXTENCODING_MS_1252 ?
864 RTL_TEXTENCODING_ISO_8859_1 : eEncoding;
865 #else // WNT
866 return eEncoding;
867 #endif // WNT
868 }
869
870 // static
translateFromMIME(rtl_TextEncoding eEncoding)871 inline rtl_TextEncoding INetMIME::translateFromMIME(rtl_TextEncoding
872 eEncoding)
873 {
874 #if defined WNT
875 return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
876 RTL_TEXTENCODING_MS_1252 : eEncoding;
877 #else // WNT
878 return eEncoding;
879 #endif // WNT
880 }
881
882 // static
isMIMECharsetEncoding(rtl_TextEncoding eEncoding)883 inline bool INetMIME::isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
884 {
885 return ( rtl_isOctetTextEncoding(eEncoding) == sal_True );
886 }
887
888 // static
getUTF8OctetCount(sal_uInt32 nChar)889 inline int INetMIME::getUTF8OctetCount(sal_uInt32 nChar)
890 {
891 DBG_ASSERT(nChar < 0x80000000, "INetMIME::getUTF8OctetCount(): Bad char");
892
893 return nChar < 0x80 ? 1 :
894 nChar < 0x800 ? 2 :
895 nChar <= 0x10000 ? 3 :
896 nChar <= 0x200000 ? 4 :
897 nChar <= 0x4000000 ? 5 : 6;
898 }
899
900 // static
getUTF32Character(const sal_Unicode * & rBegin,const sal_Unicode * pEnd)901 inline sal_uInt32 INetMIME::getUTF32Character(const sal_Unicode *& rBegin,
902 const sal_Unicode * pEnd)
903 {
904 DBG_ASSERT(rBegin && rBegin < pEnd,
905 "INetMIME::getUTF32Character(): Bad sequence");
906 if (rBegin + 1 < pEnd && rBegin[0] >= 0xD800 && rBegin[0] <= 0xDBFF
907 && rBegin[1] >= 0xDC00 && rBegin[1] <= 0xDFFF)
908 {
909 sal_uInt32 nUTF32 = sal_uInt32(*rBegin++ & 0x3FF) << 10;
910 return (nUTF32 | (*rBegin++ & 0x3FF)) + 0x10000;
911 }
912 else
913 return *rBegin++;
914 }
915
916 // static
putUTF32Character(sal_Unicode * pBuffer,sal_uInt32 nUTF32)917 inline sal_Unicode * INetMIME::putUTF32Character(sal_Unicode * pBuffer,
918 sal_uInt32 nUTF32)
919 {
920 DBG_ASSERT(nUTF32 <= 0x10FFFF, "INetMIME::putUTF32Character(): Bad char");
921 if (nUTF32 < 0x10000)
922 *pBuffer++ = sal_Unicode(nUTF32);
923 else
924 {
925 nUTF32 -= 0x10000;
926 *pBuffer++ = sal_Unicode(0xD800 | (nUTF32 >> 10));
927 *pBuffer++ = sal_Unicode(0xDC00 | (nUTF32 & 0x3FF));
928 }
929 return pBuffer;
930 }
931
932 //============================================================================
933 class INetMIMEOutputSink
934 {
935 public:
936 static sal_uInt32 const NO_LINE_LENGTH_LIMIT = SAL_MAX_UINT32;
937
938 private:
939 sal_uInt32 m_nColumn;
940 sal_uInt32 m_nLineLengthLimit;
941
942 protected:
943 /** Write a sequence of octets.
944
945 @param pBegin Points to the start of the sequence, must not be null.
946
947 @param pEnd Points past the end of the sequence, must be >= pBegin.
948 */
949 virtual void writeSequence(const sal_Char * pBegin,
950 const sal_Char * pEnd) = 0;
951
952 /** Write a null terminated sequence of octets (without the terminating
953 null).
954
955 @param pOctets A null terminated sequence of octets, must not be
956 null.
957
958 @return The length of pOctets (without the terminating null).
959 */
960 virtual sal_Size writeSequence(const sal_Char * pSequence);
961
962 /** Write a sequence of octets.
963
964 @descr The supplied sequence of UCS-4 characters is interpreted as a
965 sequence of octets. It is an error if any of the elements of the
966 sequence has a numerical value greater than 255.
967
968 @param pBegin Points to the start of the sequence, must not be null.
969
970 @param pEnd Points past the end of the sequence, must be >= pBegin.
971 */
972 virtual void writeSequence(const sal_uInt32 * pBegin,
973 const sal_uInt32 * pEnd);
974
975 /** Write a sequence of octets.
976
977 @descr The supplied sequence of Unicode characters is interpreted as
978 a sequence of octets. It is an error if any of the elements of the
979 sequence has a numerical value greater than 255.
980
981 @param pBegin Points to the start of the sequence, must not be null.
982
983 @param pEnd Points past the end of the sequence, must be >= pBegin.
984 */
985 virtual void writeSequence(const sal_Unicode * pBegin,
986 const sal_Unicode * pEnd);
987
988 public:
INetMIMEOutputSink(sal_uInt32 nTheColumn=0,sal_uInt32 nTheLineLengthLimit=INetMIME::SOFT_LINE_LENGTH_LIMIT)989 INetMIMEOutputSink(sal_uInt32 nTheColumn = 0,
990 sal_uInt32 nTheLineLengthLimit
991 = INetMIME::SOFT_LINE_LENGTH_LIMIT):
992 m_nColumn(nTheColumn), m_nLineLengthLimit(nTheLineLengthLimit) {}
993
~INetMIMEOutputSink()994 virtual ~INetMIMEOutputSink() {}
995
996 /** Get the current column.
997
998 @return The current column (starting from zero).
999 */
getColumn() const1000 sal_uInt32 getColumn() const { return m_nColumn; }
1001
getLineLengthLimit() const1002 sal_uInt32 getLineLengthLimit() const { return m_nLineLengthLimit; }
1003
setLineLengthLimit(sal_uInt32 nTheLineLengthLimit)1004 void setLineLengthLimit(sal_uInt32 nTheLineLengthLimit)
1005 { m_nLineLengthLimit = nTheLineLengthLimit; }
1006
1007 virtual ErrCode getError() const;
1008
1009 /** Write a sequence of octets.
1010
1011 @param pBegin Points to the start of the sequence, must not be null.
1012
1013 @param pEnd Points past the end of the sequence, must be >= pBegin.
1014 */
1015 inline void write(const sal_Char * pBegin, const sal_Char * pEnd);
1016
1017 /** Write a sequence of octets.
1018
1019 @param pBegin Points to the start of the sequence, must not be null.
1020
1021 @param nLength The length of the sequence.
1022 */
write(const sal_Char * pBegin,sal_Size nLength)1023 void write(const sal_Char * pBegin, sal_Size nLength)
1024 { write(pBegin, pBegin + nLength); }
1025
1026 /** Write a sequence of octets.
1027
1028 @descr The supplied sequence of UCS-4 characters is interpreted as a
1029 sequence of octets. It is an error if any of the elements of the
1030 sequence has a numerical value greater than 255.
1031
1032 @param pBegin Points to the start of the sequence, must not be null.
1033
1034 @param pEnd Points past the end of the sequence, must be >= pBegin.
1035 */
1036 inline void write(const sal_uInt32 * pBegin, const sal_uInt32 * pEnd);
1037
1038 /** Write a sequence of octets.
1039
1040 @descr The supplied sequence of Unicode characters is interpreted as
1041 a sequence of octets. It is an error if any of the elements of the
1042 sequence has a numerical value greater than 255.
1043
1044 @param pBegin Points to the start of the sequence, must not be null.
1045
1046 @param pEnd Points past the end of the sequence, must be >= pBegin.
1047 */
1048 inline void write(const sal_Unicode * pBegin, const sal_Unicode * pEnd);
1049
1050 /** Write a sequence of octets.
1051
1052 @param rOctets A ByteString, interpreted as a sequence of octets.
1053
1054 @param nBegin The offset of the first character to write.
1055
1056 @param nEnd The offset past the last character to write.
1057 */
1058 inline void write(const ByteString & rString, xub_StrLen nBegin,
1059 xub_StrLen nEnd);
1060
1061 /** Write a single octet.
1062
1063 @param nOctet Some octet.
1064
1065 @return This instance.
1066 */
1067 inline INetMIMEOutputSink & operator <<(sal_Char nOctet);
1068
1069 /** Write a null terminated sequence of octets (without the terminating
1070 null).
1071
1072 @param pOctets A null terminated sequence of octets, must not be
1073 null.
1074
1075 @return This instance.
1076 */
1077 inline INetMIMEOutputSink & operator <<(const sal_Char * pOctets);
1078
1079 /** Write a sequence of octets.
1080
1081 @param rOctets A ByteString, interpreted as a sequence of octets.
1082
1083 @return This instance.
1084 */
1085 inline INetMIMEOutputSink & operator <<(const ByteString & rOctets);
1086
1087 /** Call a manipulator function.
1088
1089 @param pManipulator A manipulator function.
1090
1091 @return Whatever the manipulator function returns.
1092 */
1093 INetMIMEOutputSink &
operator <<(INetMIMEOutputSink & (* pManipulator)(INetMIMEOutputSink &))1094 operator <<(INetMIMEOutputSink & (* pManipulator)(INetMIMEOutputSink &))
1095 { return pManipulator(*this); }
1096
1097 /** Write a line end (CR LF).
1098 */
1099 void writeLineEnd();
1100
1101 /** A manipulator function that writes a line end (CR LF).
1102
1103 @param rSink Some sink.
1104
1105 @return The sink rSink.
1106 */
1107 static inline INetMIMEOutputSink & endl(INetMIMEOutputSink & rSink);
1108 };
1109
write(const sal_Char * pBegin,const sal_Char * pEnd)1110 inline void INetMIMEOutputSink::write(const sal_Char * pBegin,
1111 const sal_Char * pEnd)
1112 {
1113 writeSequence(pBegin, pEnd);
1114 m_nColumn += pEnd - pBegin;
1115 }
1116
write(const sal_uInt32 * pBegin,const sal_uInt32 * pEnd)1117 inline void INetMIMEOutputSink::write(const sal_uInt32 * pBegin,
1118 const sal_uInt32 * pEnd)
1119 {
1120 writeSequence(pBegin, pEnd);
1121 m_nColumn += pEnd - pBegin;
1122 }
1123
write(const sal_Unicode * pBegin,const sal_Unicode * pEnd)1124 inline void INetMIMEOutputSink::write(const sal_Unicode * pBegin,
1125 const sal_Unicode * pEnd)
1126 {
1127 writeSequence(pBegin, pEnd);
1128 m_nColumn += pEnd - pBegin;
1129 }
1130
write(const ByteString & rOctets,xub_StrLen nBegin,xub_StrLen nEnd)1131 inline void INetMIMEOutputSink::write(const ByteString & rOctets,
1132 xub_StrLen nBegin, xub_StrLen nEnd)
1133 {
1134 writeSequence(rOctets.GetBuffer() + nBegin, rOctets.GetBuffer() + nEnd);
1135 m_nColumn += nEnd - nBegin;
1136 }
1137
operator <<(sal_Char nOctet)1138 inline INetMIMEOutputSink & INetMIMEOutputSink::operator <<(sal_Char nOctet)
1139 {
1140 writeSequence(&nOctet, &nOctet + 1);
1141 ++m_nColumn;
1142 return *this;
1143 }
1144
operator <<(const sal_Char * pOctets)1145 inline INetMIMEOutputSink & INetMIMEOutputSink::operator <<(const sal_Char *
1146 pOctets)
1147 {
1148 m_nColumn += writeSequence(pOctets);
1149 return *this;
1150 }
1151
operator <<(const ByteString & rOctets)1152 inline INetMIMEOutputSink & INetMIMEOutputSink::operator <<(const ByteString &
1153 rOctets)
1154 {
1155 writeSequence(rOctets.GetBuffer(), rOctets.GetBuffer() + rOctets.Len());
1156 m_nColumn += rOctets.Len();
1157 return *this;
1158 }
1159
1160 // static
endl(INetMIMEOutputSink & rSink)1161 inline INetMIMEOutputSink & INetMIMEOutputSink::endl(INetMIMEOutputSink &
1162 rSink)
1163 {
1164 rSink.writeLineEnd();
1165 return rSink;
1166 }
1167
1168 // static
writeEscapeSequence(INetMIMEOutputSink & rSink,sal_uInt32 nChar)1169 inline void INetMIME::writeEscapeSequence(INetMIMEOutputSink & rSink,
1170 sal_uInt32 nChar)
1171 {
1172 DBG_ASSERT(nChar <= 0xFF, "INetMIME::writeEscapeSequence(): Bad char");
1173 rSink << '=' << sal_uInt8(getHexDigit(nChar >> 4))
1174 << sal_uInt8(getHexDigit(nChar & 15));
1175 }
1176
1177 //============================================================================
1178 class INetMIMEStringOutputSink: public INetMIMEOutputSink
1179 {
1180 ByteString m_aBuffer;
1181 bool m_bOverflow;
1182
1183 using INetMIMEOutputSink::writeSequence;
1184
1185 virtual void writeSequence(const sal_Char * pBegin,
1186 const sal_Char * pEnd);
1187
1188 public:
INetMIMEStringOutputSink(sal_uInt32 nColumn=0,sal_uInt32 nLineLengthLimit=INetMIME::SOFT_LINE_LENGTH_LIMIT)1189 inline INetMIMEStringOutputSink(sal_uInt32 nColumn = 0,
1190 sal_uInt32 nLineLengthLimit
1191 = INetMIME::SOFT_LINE_LENGTH_LIMIT):
1192 INetMIMEOutputSink(nColumn, nLineLengthLimit), m_bOverflow(false) {}
1193
1194 virtual ErrCode getError() const;
1195
1196 inline ByteString takeBuffer();
1197 };
1198
takeBuffer()1199 inline ByteString INetMIMEStringOutputSink::takeBuffer()
1200 {
1201 ByteString aTheBuffer = m_aBuffer;
1202 m_aBuffer.Erase();
1203 m_bOverflow = false;
1204 return aTheBuffer;
1205 }
1206
1207 //============================================================================
1208 class INetMIMEUnicodeOutputSink: public INetMIMEOutputSink
1209 {
1210 UniString m_aBuffer;
1211 bool m_bOverflow;
1212
1213 using INetMIMEOutputSink::writeSequence;
1214
1215 virtual void writeSequence(const sal_Char * pBegin,
1216 const sal_Char * pEnd);
1217
1218 virtual void writeSequence(const sal_uInt32 * pBegin,
1219 const sal_uInt32 * pEnd);
1220
1221 virtual void writeSequence(const sal_Unicode * pBegin,
1222 const sal_Unicode * pEnd);
1223
1224 public:
INetMIMEUnicodeOutputSink(sal_uInt32 nColumn=0,sal_uInt32 nLineLengthLimit=INetMIME::SOFT_LINE_LENGTH_LIMIT)1225 inline INetMIMEUnicodeOutputSink(sal_uInt32 nColumn = 0,
1226 sal_uInt32 nLineLengthLimit
1227 = INetMIME::SOFT_LINE_LENGTH_LIMIT):
1228 INetMIMEOutputSink(nColumn, nLineLengthLimit), m_bOverflow(false) {}
1229
1230 virtual ErrCode getError() const;
1231
1232 inline UniString takeBuffer();
1233 };
1234
takeBuffer()1235 inline UniString INetMIMEUnicodeOutputSink::takeBuffer()
1236 {
1237 UniString aTheBuffer = m_aBuffer;
1238 m_aBuffer.Erase();
1239 m_bOverflow = false;
1240 return aTheBuffer;
1241 }
1242
1243 //============================================================================
1244 class INetMIMEEncodedWordOutputSink
1245 {
1246 public:
1247 enum Context { CONTEXT_TEXT = 1,
1248 CONTEXT_COMMENT = 2,
1249 CONTEXT_PHRASE = 4 };
1250
1251 enum Space { SPACE_NO, SPACE_ENCODED, SPACE_ALWAYS };
1252
1253 private:
1254 enum { BUFFER_SIZE = 256 };
1255
1256 enum Coding { CODING_NONE, CODING_QUOTED, CODING_ENCODED,
1257 CODING_ENCODED_TERMINATED };
1258
1259 enum EncodedWordState { STATE_INITIAL, STATE_FIRST_EQUALS,
1260 STATE_FIRST_QUESTION, STATE_CHARSET,
1261 STATE_SECOND_QUESTION, STATE_ENCODING,
1262 STATE_THIRD_QUESTION, STATE_ENCODED_TEXT,
1263 STATE_FOURTH_QUESTION, STATE_SECOND_EQUALS,
1264 STATE_BAD };
1265
1266 INetMIMEOutputSink & m_rSink;
1267 Context m_eContext;
1268 Space m_eInitialSpace;
1269 sal_uInt32 m_nExtraSpaces;
1270 INetMIMECharsetList_Impl * m_pEncodingList;
1271 sal_Unicode * m_pBuffer;
1272 sal_uInt32 m_nBufferSize;
1273 sal_Unicode * m_pBufferEnd;
1274 Coding m_ePrevCoding;
1275 rtl_TextEncoding m_ePrevMIMEEncoding;
1276 Coding m_eCoding;
1277 sal_uInt32 m_nQuotedEscaped;
1278 EncodedWordState m_eEncodedWordState;
1279
1280 inline bool needsEncodedWordEscape(sal_uInt32 nChar) const;
1281
1282 void finish(bool bWriteTrailer);
1283
1284 public:
1285 inline INetMIMEEncodedWordOutputSink(INetMIMEOutputSink & rTheSink,
1286 Context eTheContext,
1287 Space eTheInitialSpace,
1288 rtl_TextEncoding ePreferredEncoding);
1289
1290 ~INetMIMEEncodedWordOutputSink();
1291
1292 INetMIMEEncodedWordOutputSink & operator <<(sal_uInt32 nChar);
1293
1294 inline void write(const sal_Char * pBegin, const sal_Char * pEnd);
1295
1296 inline void write(const sal_Unicode * pBegin, const sal_Unicode * pEnd);
1297
1298 inline bool flush();
1299 };
1300
INetMIMEEncodedWordOutputSink(INetMIMEOutputSink & rTheSink,Context eTheContext,Space eTheInitialSpace,rtl_TextEncoding ePreferredEncoding)1301 inline INetMIMEEncodedWordOutputSink::INetMIMEEncodedWordOutputSink(
1302 INetMIMEOutputSink & rTheSink, Context eTheContext,
1303 Space eTheInitialSpace, rtl_TextEncoding ePreferredEncoding):
1304 m_rSink(rTheSink),
1305 m_eContext(eTheContext),
1306 m_eInitialSpace(eTheInitialSpace),
1307 m_nExtraSpaces(0),
1308 m_pEncodingList(INetMIME::createPreferredCharsetList(ePreferredEncoding)),
1309 m_ePrevCoding(CODING_NONE),
1310 m_eCoding(CODING_NONE),
1311 m_nQuotedEscaped(0),
1312 m_eEncodedWordState(STATE_INITIAL)
1313 {
1314 m_nBufferSize = BUFFER_SIZE;
1315 m_pBuffer = static_cast< sal_Unicode * >(rtl_allocateMemory(
1316 m_nBufferSize
1317 * sizeof (sal_Unicode)));
1318 m_pBufferEnd = m_pBuffer;
1319 }
1320
write(const sal_Char * pBegin,const sal_Char * pEnd)1321 inline void INetMIMEEncodedWordOutputSink::write(const sal_Char * pBegin,
1322 const sal_Char * pEnd)
1323 {
1324 DBG_ASSERT(pBegin && pBegin <= pEnd,
1325 "INetMIMEEncodedWordOutputSink::write(): Bad sequence");
1326
1327 while (pBegin != pEnd)
1328 operator <<(*pBegin++);
1329 }
1330
write(const sal_Unicode * pBegin,const sal_Unicode * pEnd)1331 inline void INetMIMEEncodedWordOutputSink::write(const sal_Unicode * pBegin,
1332 const sal_Unicode * pEnd)
1333 {
1334 DBG_ASSERT(pBegin && pBegin <= pEnd,
1335 "INetMIMEEncodedWordOutputSink::write(): Bad sequence");
1336
1337 while (pBegin != pEnd)
1338 operator <<(*pBegin++);
1339 }
1340
flush()1341 inline bool INetMIMEEncodedWordOutputSink::flush()
1342 {
1343 finish(true);
1344 return m_ePrevCoding != CODING_NONE;
1345 }
1346
1347 //============================================================================
1348 struct INetContentTypeParameter
1349 {
1350 /** The name of the attribute, in US-ASCII encoding and converted to lower
1351 case. If a parameter value is split as described in RFC 2231, there
1352 will only be one item for the complete parameter, with the attribute
1353 name lacking any section suffix.
1354 */
1355 const ByteString m_sAttribute;
1356
1357 /** The optional character set specification (see RFC 2231), in US-ASCII
1358 encoding and converted to lower case.
1359 */
1360 const ByteString m_sCharset;
1361
1362 /** The optional language specification (see RFC 2231), in US-ASCII
1363 encoding and converted to lower case.
1364 */
1365 const ByteString m_sLanguage;
1366
1367 /** The attribute value. If the value is a quoted-string, it is
1368 'unpacked.' If a character set is specified, and the value can be
1369 converted to Unicode, this is done. Also, if no character set is
1370 specified, it is first tried to convert the value from UTF-8 encoding
1371 to Unicode, and if that doesn't work (because the value is not in
1372 UTF-8 encoding), it is converted from ISO-8859-1 encoding to Unicode
1373 (which will always work). But if a character set is specified and the
1374 value cannot be converted from that character set to Unicode, special
1375 action is taken to produce a value that can possibly be transformed
1376 back into its original form: Any 8-bit character from a non-encoded
1377 part of the original value is directly converted to Unicode
1378 (effectively handling it as if it was ISO-8859-1 encoded), and any
1379 8-bit character from an encoded part of the original value is mapped
1380 to the range U+F800..U+F8FF at the top of the Corporate Use Subarea
1381 within Unicode's Private Use Area (effectively adding 0xF800 to the
1382 character's numeric value).
1383 */
1384 const UniString m_sValue;
1385
1386 /** This is true if the value is successfuly converted to Unicode, and
1387 false if the value is a special mixture of ISO-LATIN-1 characters and
1388 characters from Unicode's Private Use Area.
1389 */
1390 const bool m_bConverted;
1391
1392 inline INetContentTypeParameter(const ByteString & rTheAttribute,
1393 const ByteString & rTheCharset,
1394 const ByteString & rTheLanguage,
1395 const UniString & rTheValue,
1396 bool bTheConverted);
1397 };
1398
INetContentTypeParameter(const ByteString & rTheAttribute,const ByteString & rTheCharset,const ByteString & rTheLanguage,const UniString & rTheValue,bool bTheConverted)1399 inline INetContentTypeParameter::INetContentTypeParameter(const ByteString &
1400 rTheAttribute,
1401 const ByteString &
1402 rTheCharset,
1403 const ByteString &
1404 rTheLanguage,
1405 const UniString &
1406 rTheValue,
1407 bool bTheConverted):
1408 m_sAttribute(rTheAttribute),
1409 m_sCharset(rTheCharset),
1410 m_sLanguage(rTheLanguage),
1411 m_sValue(rTheValue),
1412 m_bConverted(bTheConverted)
1413 {}
1414
1415 //============================================================================
1416 class TOOLS_DLLPUBLIC INetContentTypeParameterList: private List
1417 {
1418 public:
~INetContentTypeParameterList()1419 ~INetContentTypeParameterList() { Clear(); }
1420
1421 using List::Count;
1422
1423 void Clear();
1424
Insert(INetContentTypeParameter * pParameter,sal_uIntPtr nIndex)1425 void Insert(INetContentTypeParameter * pParameter, sal_uIntPtr nIndex)
1426 { List::Insert(pParameter, nIndex); }
1427
1428 inline const INetContentTypeParameter * GetObject(sal_uIntPtr nIndex) const;
1429
1430 const INetContentTypeParameter * find(const ByteString & rAttribute)
1431 const;
1432 };
1433
1434 inline const INetContentTypeParameter *
GetObject(sal_uIntPtr nIndex) const1435 INetContentTypeParameterList::GetObject(sal_uIntPtr nIndex) const
1436 {
1437 return static_cast< INetContentTypeParameter * >(List::GetObject(nIndex));
1438 }
1439
1440 #endif // TOOLS_INETMIME_HXX
1441
1442