xref: /trunk/main/svl/source/misc/urihelper.cxx (revision 1ecadb572e7010ff3b3382ad9bf179dbc6efadbb)
1 /*************************************************************************
2  *
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * Copyright 2000, 2010 Oracle and/or its affiliates.
6  *
7  * OpenOffice.org - a multi-platform office productivity suite
8  *
9  * This file is part of OpenOffice.org.
10  *
11  * OpenOffice.org is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser General Public License version 3
13  * only, as published by the Free Software Foundation.
14  *
15  * OpenOffice.org is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU Lesser General Public License version 3 for more details
19  * (a copy is included in the LICENSE file that accompanied this code).
20  *
21  * You should have received a copy of the GNU Lesser General Public License
22  * version 3 along with OpenOffice.org.  If not, see
23  * <http://www.openoffice.org/license.html>
24  * for a copy of the LGPLv3 License.
25  *
26  ************************************************************************/
27 
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_svl.hxx"
30 #include <svl/urihelper.hxx>
31 #include <com/sun/star/beans/XPropertySet.hpp>
32 #include "com/sun/star/lang/WrappedTargetRuntimeException.hpp"
33 #include "com/sun/star/lang/XMultiComponentFactory.hpp"
34 #include "com/sun/star/ucb/Command.hpp"
35 #include <com/sun/star/ucb/FileSystemNotation.hpp>
36 #include "com/sun/star/ucb/IllegalIdentifierException.hpp"
37 #include "com/sun/star/ucb/UnsupportedCommandException.hpp"
38 #include "com/sun/star/ucb/XCommandEnvironment.hpp"
39 #include "com/sun/star/ucb/XCommandProcessor.hpp"
40 #include "com/sun/star/ucb/XContent.hpp"
41 #include "com/sun/star/ucb/XContentIdentifierFactory.hpp"
42 #include "com/sun/star/ucb/XContentProvider.hpp"
43 #include <com/sun/star/ucb/XContentProviderManager.hpp>
44 #include "com/sun/star/uno/Any.hxx"
45 #include "com/sun/star/uno/Exception.hpp"
46 #include "com/sun/star/uno/Reference.hxx"
47 #include "com/sun/star/uno/RuntimeException.hpp"
48 #include "com/sun/star/uno/Sequence.hxx"
49 #include "com/sun/star/uno/XComponentContext.hpp"
50 #include "com/sun/star/uno/XInterface.hpp"
51 #include "com/sun/star/uri/UriReferenceFactory.hpp"
52 #include "com/sun/star/uri/XUriReference.hpp"
53 #include "com/sun/star/uri/XUriReferenceFactory.hpp"
54 #include "cppuhelper/exc_hlp.hxx"
55 #include "comphelper/processfactory.hxx"
56 #include "osl/diagnose.h"
57 #include "rtl/ustrbuf.hxx"
58 #include "rtl/ustring.h"
59 #include "rtl/ustring.hxx"
60 #include "sal/types.h"
61 #include <tools/debug.hxx>
62 #include <tools/inetmime.hxx>
63 #include <ucbhelper/contentbroker.hxx>
64 #include <unotools/charclass.hxx>
65 #include "rtl/instance.hxx"
66 
67 namespace unnamed_svl_urihelper {}
68 using namespace unnamed_svl_urihelper;
69     // unnamed namespaces don't work well yet...
70 
71 namespace css = com::sun::star;
72 using namespace com::sun::star;
73 
74 //============================================================================
75 //
76 //  SmartRel2Abs
77 //
78 //============================================================================
79 
80 namespace unnamed_svl_urihelper {
81 
82 inline UniString toUniString(ByteString const & rString)
83 {
84     return UniString(rString, RTL_TEXTENCODING_ISO_8859_1);
85 }
86 
87 inline UniString toUniString(UniString const & rString)
88 {
89     return rString;
90 }
91 
92 template< typename Str >
93 inline UniString SmartRel2Abs_Impl(INetURLObject const & rTheBaseURIRef,
94                                    Str const & rTheRelURIRef,
95                                    Link const & rMaybeFileHdl,
96                                    bool bCheckFileExists,
97                                    bool bIgnoreFragment,
98                                    INetURLObject::EncodeMechanism
99                                        eEncodeMechanism,
100                                    INetURLObject::DecodeMechanism
101                                        eDecodeMechanism,
102                                    rtl_TextEncoding eCharset,
103                                    bool bRelativeNonURIs,
104                                    INetURLObject::FSysStyle eStyle)
105 {
106     // Backwards compatibility:
107     if (rTheRelURIRef.Len() != 0 && rTheRelURIRef.GetChar(0) == '#')
108         return toUniString(rTheRelURIRef);
109 
110     INetURLObject aAbsURIRef;
111     if (rTheBaseURIRef.HasError())
112         aAbsURIRef.
113             SetSmartURL(rTheRelURIRef, eEncodeMechanism, eCharset, eStyle);
114     else
115     {
116         bool bWasAbsolute;
117         aAbsURIRef = rTheBaseURIRef.smartRel2Abs(rTheRelURIRef,
118                                                  bWasAbsolute,
119                                                  bIgnoreFragment,
120                                                  eEncodeMechanism,
121                                                  eCharset,
122                                                  bRelativeNonURIs,
123                                                  eStyle);
124         if (bCheckFileExists
125             && !bWasAbsolute
126             && (aAbsURIRef.GetProtocol() == INET_PROT_FILE))
127         {
128             INetURLObject aNonFileURIRef;
129             aNonFileURIRef.SetSmartURL(rTheRelURIRef,
130                                        eEncodeMechanism,
131                                        eCharset,
132                                        eStyle);
133             if (!aNonFileURIRef.HasError()
134                 && aNonFileURIRef.GetProtocol() != INET_PROT_FILE)
135             {
136                 bool bMaybeFile = false;
137                 if (rMaybeFileHdl.IsSet())
138                 {
139                     UniString aFilePath(toUniString(rTheRelURIRef));
140                     bMaybeFile = rMaybeFileHdl.Call(&aFilePath) != 0;
141                 }
142                 if (!bMaybeFile)
143                     aAbsURIRef = aNonFileURIRef;
144             }
145         }
146     }
147     return aAbsURIRef.GetMainURL(eDecodeMechanism, eCharset);
148 }
149 
150 }
151 
152 UniString
153 URIHelper::SmartRel2Abs(INetURLObject const & rTheBaseURIRef,
154                         ByteString const & rTheRelURIRef,
155                         Link const & rMaybeFileHdl,
156                         bool bCheckFileExists,
157                         bool bIgnoreFragment,
158                         INetURLObject::EncodeMechanism eEncodeMechanism,
159                         INetURLObject::DecodeMechanism eDecodeMechanism,
160                         rtl_TextEncoding eCharset,
161                         bool bRelativeNonURIs,
162                         INetURLObject::FSysStyle eStyle)
163 {
164     return SmartRel2Abs_Impl(rTheBaseURIRef, rTheRelURIRef, rMaybeFileHdl,
165                              bCheckFileExists, bIgnoreFragment,
166                              eEncodeMechanism, eDecodeMechanism, eCharset,
167                              bRelativeNonURIs, eStyle);
168 }
169 
170 UniString
171 URIHelper::SmartRel2Abs(INetURLObject const & rTheBaseURIRef,
172                         UniString const & rTheRelURIRef,
173                         Link const & rMaybeFileHdl,
174                         bool bCheckFileExists,
175                         bool bIgnoreFragment,
176                         INetURLObject::EncodeMechanism eEncodeMechanism,
177                         INetURLObject::DecodeMechanism eDecodeMechanism,
178                         rtl_TextEncoding eCharset,
179                         bool bRelativeNonURIs,
180                         INetURLObject::FSysStyle eStyle)
181 {
182     return SmartRel2Abs_Impl(rTheBaseURIRef, rTheRelURIRef, rMaybeFileHdl,
183                              bCheckFileExists, bIgnoreFragment,
184                              eEncodeMechanism, eDecodeMechanism, eCharset,
185                              bRelativeNonURIs, eStyle);
186 }
187 
188 //============================================================================
189 //
190 //  SetMaybeFileHdl
191 //
192 //============================================================================
193 
194 namespace { struct MaybeFileHdl : public rtl::Static< Link, MaybeFileHdl > {}; }
195 
196 void URIHelper::SetMaybeFileHdl(Link const & rTheMaybeFileHdl)
197 {
198     MaybeFileHdl::get() = rTheMaybeFileHdl;
199 }
200 
201 //============================================================================
202 //
203 //  GetMaybeFileHdl
204 //
205 //============================================================================
206 
207 Link URIHelper::GetMaybeFileHdl()
208 {
209     return MaybeFileHdl::get();
210 }
211 
212 namespace {
213 
214 bool isAbsoluteHierarchicalUriReference(
215     css::uno::Reference< css::uri::XUriReference > const & uriReference)
216 {
217     return uriReference.is() && uriReference->isAbsolute()
218         && uriReference->isHierarchical() && !uriReference->hasRelativePath();
219 }
220 
221 // To improve performance, assume that if for any prefix URL of a given
222 // hierarchical URL either a UCB content cannot be created, or the UCB content
223 // does not support the getCasePreservingURL command, then this will hold for
224 // any other prefix URL of the given URL, too:
225 enum Result { Success, GeneralFailure, SpecificFailure };
226 
227 Result normalizePrefix(
228     css::uno::Reference< css::ucb::XContentProvider > const & broker,
229     rtl::OUString const & uri, rtl::OUString * normalized)
230 {
231     OSL_ASSERT(broker.is() && normalized != 0);
232     css::uno::Reference< css::ucb::XContent > content;
233     try {
234         content = broker->queryContent(
235             css::uno::Reference< css::ucb::XContentIdentifierFactory >(
236                 broker, css::uno::UNO_QUERY_THROW)->createContentIdentifier(
237                     uri));
238     } catch (css::ucb::IllegalIdentifierException &) {}
239     if (!content.is()) {
240         return GeneralFailure;
241     }
242     try {
243         #if OSL_DEBUG_LEVEL > 0
244         bool ok =
245         #endif
246             (css::uno::Reference< css::ucb::XCommandProcessor >(
247                    content, css::uno::UNO_QUERY_THROW)->execute(
248                        css::ucb::Command(
249                            rtl::OUString(
250                                RTL_CONSTASCII_USTRINGPARAM(
251                                    "getCasePreservingURL")),
252                            -1, css::uno::Any()),
253                        0,
254                        css::uno::Reference< css::ucb::XCommandEnvironment >())
255                >>= *normalized);
256         OSL_ASSERT(ok);
257     } catch (css::uno::RuntimeException &) {
258         throw;
259     } catch (css::ucb::UnsupportedCommandException &) {
260         return GeneralFailure;
261     } catch (css::uno::Exception &) {
262         return SpecificFailure;
263     }
264     return Success;
265 }
266 
267 rtl::OUString normalize(
268     css::uno::Reference< css::ucb::XContentProvider > const & broker,
269     css::uno::Reference< css::uri::XUriReferenceFactory > const & uriFactory,
270     rtl::OUString const & uriReference)
271 {
272     // normalizePrefix can potentially fail (a typically example being a file
273     // URL that denotes a non-existing resource); in such a case, try to
274     // normalize as long a prefix of the given URL as possible (i.e., normalize
275     // all the existing directories within the path):
276     rtl::OUString normalized;
277     sal_Int32 n = uriReference.indexOf('#');
278     normalized = n == -1 ? uriReference : uriReference.copy(0, n);
279     switch (normalizePrefix(broker, normalized, &normalized)) {
280     case Success:
281         return n == -1 ? normalized : normalized + uriReference.copy(n);
282     case GeneralFailure:
283         return uriReference;
284     case SpecificFailure:
285     default:
286         break;
287     }
288     css::uno::Reference< css::uri::XUriReference > ref(
289         uriFactory->parse(uriReference));
290     if (!isAbsoluteHierarchicalUriReference(ref)) {
291         return uriReference;
292     }
293     sal_Int32 count = ref->getPathSegmentCount();
294     if (count < 2) {
295         return uriReference;
296     }
297     rtl::OUStringBuffer head(ref->getScheme());
298     head.append(static_cast< sal_Unicode >(':'));
299     if (ref->hasAuthority()) {
300         head.appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
301         head.append(ref->getAuthority());
302     }
303     for (sal_Int32 i = count - 1; i > 0; --i) {
304         rtl::OUStringBuffer buf(head);
305         for (sal_Int32 j = 0; j < i; ++j) {
306             buf.append(static_cast< sal_Unicode >('/'));
307             buf.append(ref->getPathSegment(j));
308         }
309         normalized = buf.makeStringAndClear();
310         if (normalizePrefix(broker, normalized, &normalized) != SpecificFailure)
311         {
312             buf.append(normalized);
313             css::uno::Reference< css::uri::XUriReference > preRef(
314                 uriFactory->parse(normalized));
315             if (!isAbsoluteHierarchicalUriReference(preRef)) {
316                 // This could only happen if something is inconsistent:
317                 break;
318             }
319             sal_Int32 preCount = preRef->getPathSegmentCount();
320             // normalizePrefix may have added or removed a final slash:
321             if (preCount != i) {
322                 if (preCount == i - 1) {
323                     buf.append(static_cast< sal_Unicode >('/'));
324                 } else if (preCount - 1 == i && buf.getLength() > 0
325                            && buf.charAt(buf.getLength() - 1) == '/')
326                 {
327                     buf.setLength(buf.getLength() - 1);
328                 } else {
329                     // This could only happen if something is inconsistent:
330                     break;
331                 }
332             }
333             for (sal_Int32 j = i; j < count; ++j) {
334                 buf.append(static_cast< sal_Unicode >('/'));
335                 buf.append(ref->getPathSegment(j));
336             }
337             if (ref->hasQuery()) {
338                 buf.append(static_cast< sal_Unicode >('?'));
339                 buf.append(ref->getQuery());
340             }
341             if (ref->hasFragment()) {
342                 buf.append(static_cast< sal_Unicode >('#'));
343                 buf.append(ref->getFragment());
344             }
345             return buf.makeStringAndClear();
346         }
347     }
348     return uriReference;
349 }
350 
351 }
352 
353 css::uno::Reference< css::uri::XUriReference >
354 URIHelper::normalizedMakeRelative(
355     css::uno::Reference< css::uno::XComponentContext > const & context,
356     rtl::OUString const & baseUriReference, rtl::OUString const & uriReference)
357 {
358     OSL_ASSERT(context.is());
359     css::uno::Reference< css::lang::XMultiComponentFactory > componentFactory(
360         context->getServiceManager());
361     if (!componentFactory.is()) {
362         throw css::uno::RuntimeException(
363             rtl::OUString(
364                 RTL_CONSTASCII_USTRINGPARAM(
365                     "component context has no service manager")),
366             css::uno::Reference< css::uno::XInterface >());
367     }
368     css::uno::Sequence< css::uno::Any > args(2);
369     args[0] <<= rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("Local"));
370     args[1] <<= rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("Office"));
371     css::uno::Reference< css::ucb::XContentProvider > broker;
372     try {
373         broker = css::uno::Reference< css::ucb::XContentProvider >(
374             componentFactory->createInstanceWithArgumentsAndContext(
375                 rtl::OUString(
376                     RTL_CONSTASCII_USTRINGPARAM(
377                         "com.sun.star.ucb.UniversalContentBroker")),
378                 args, context),
379             css::uno::UNO_QUERY_THROW);
380     } catch (css::uno::RuntimeException &) {
381         throw;
382     } catch (css::uno::Exception &) {
383         css::uno::Any exception(cppu::getCaughtException());
384         throw css::lang::WrappedTargetRuntimeException(
385             rtl::OUString(
386                 RTL_CONSTASCII_USTRINGPARAM(
387                     "creating com.sun.star.ucb.UniversalContentBroker failed")),
388             css::uno::Reference< css::uno::XInterface >(),
389             exception);
390     }
391     css::uno::Reference< css::uri::XUriReferenceFactory > uriFactory(
392         css::uri::UriReferenceFactory::create(context));
393     return uriFactory->makeRelative(
394         uriFactory->parse(normalize(broker, uriFactory, baseUriReference)),
395         uriFactory->parse(normalize(broker, uriFactory, uriReference)), true,
396         true, false);
397 }
398 
399 rtl::OUString URIHelper::simpleNormalizedMakeRelative(
400     rtl::OUString const & baseUriReference, rtl::OUString const & uriReference)
401 {
402     com::sun::star::uno::Reference< com::sun::star::uri::XUriReference > rel(
403         URIHelper::normalizedMakeRelative(
404             com::sun::star::uno::Reference<
405             com::sun::star::uno::XComponentContext >(
406                 (com::sun::star::uno::Reference<
407                  com::sun::star::beans::XPropertySet >(
408                     comphelper::getProcessServiceFactory(),
409                     com::sun::star::uno::UNO_QUERY_THROW)->
410                  getPropertyValue(
411                      rtl::OUString(
412                          RTL_CONSTASCII_USTRINGPARAM("DefaultContext")))),
413                 com::sun::star::uno::UNO_QUERY_THROW),
414             baseUriReference, uriReference));
415     return rel.is() ? rel->getUriReference() : uriReference;
416 }
417 
418 //============================================================================
419 //
420 //  FindFirstURLInText
421 //
422 //============================================================================
423 
424 namespace unnamed_svl_urihelper {
425 
426 inline xub_StrLen nextChar(UniString const & rStr, xub_StrLen nPos)
427 {
428     return INetMIME::isHighSurrogate(rStr.GetChar(nPos))
429            && rStr.Len() - nPos >= 2
430            && INetMIME::isLowSurrogate(rStr.GetChar(nPos + 1)) ?
431                nPos + 2 : nPos + 1;
432 }
433 
434 bool isBoundary1(CharClass const & rCharClass, UniString const & rStr,
435                  xub_StrLen nPos, xub_StrLen nEnd)
436 {
437     if (nPos == nEnd)
438         return true;
439     if (rCharClass.isLetterNumeric(rStr, nPos))
440         return false;
441     switch (rStr.GetChar(nPos))
442     {
443     case '$':
444     case '%':
445     case '&':
446     case '-':
447     case '/':
448     case '@':
449     case '\\':
450         return false;
451     default:
452         return true;
453     }
454 }
455 
456 bool isBoundary2(CharClass const & rCharClass, UniString const & rStr,
457                  xub_StrLen nPos, xub_StrLen nEnd)
458 {
459     if (nPos == nEnd)
460         return true;
461     if (rCharClass.isLetterNumeric(rStr, nPos))
462         return false;
463     switch (rStr.GetChar(nPos))
464     {
465     case '!':
466     case '#':
467     case '$':
468     case '%':
469     case '&':
470     case '\'':
471     case '*':
472     case '+':
473     case '-':
474     case '/':
475     case '=':
476     case '?':
477     case '@':
478     case '^':
479     case '_':
480     case '`':
481     case '{':
482     case '|':
483     case '}':
484     case '~':
485         return false;
486     default:
487         return true;
488     }
489 }
490 
491 bool checkWChar(CharClass const & rCharClass, UniString const & rStr,
492                 xub_StrLen * pPos, xub_StrLen * pEnd, bool bBackslash = false,
493                 bool bPipe = false)
494 {
495     sal_Unicode c = rStr.GetChar(*pPos);
496     if (INetMIME::isUSASCII(c))
497     {
498         static sal_uInt8 const aMap[128]
499             = { 0, 0, 0, 0, 0, 0, 0, 0,
500                 0, 0, 0, 0, 0, 0, 0, 0,
501                 0, 0, 0, 0, 0, 0, 0, 0,
502                 0, 0, 0, 0, 0, 0, 0, 0,
503                 0, 1, 0, 0, 4, 4, 4, 1,   //  !"#$%&'
504                 1, 1, 1, 1, 1, 4, 1, 4,   // ()*+,-./
505                 4, 4, 4, 4, 4, 4, 4, 4,   // 01234567
506                 4, 4, 1, 1, 0, 1, 0, 1,   // 89:;<=>?
507                 4, 4, 4, 4, 4, 4, 4, 4,   // @ABCDEFG
508                 4, 4, 4, 4, 4, 4, 4, 4,   // HIJKLMNO
509                 4, 4, 4, 4, 4, 4, 4, 4,   // PQRSTUVW
510                 4, 4, 4, 1, 2, 1, 0, 1,   // XYZ[\]^_
511                 0, 4, 4, 4, 4, 4, 4, 4,   // `abcdefg
512                 4, 4, 4, 4, 4, 4, 4, 4,   // hijklmno
513                 4, 4, 4, 4, 4, 4, 4, 4,   // pqrstuvw
514                 4, 4, 4, 0, 3, 0, 1, 0 }; // xyz{|}~
515         switch (aMap[c])
516         {
517             default: // not uric
518                 return false;
519 
520             case 1: // uric
521                 ++(*pPos);
522                 return true;
523 
524             case 2: // "\"
525                 if (bBackslash)
526                 {
527                     *pEnd = ++(*pPos);
528                     return true;
529                 }
530                 else
531                     return false;
532 
533             case 3: // "|"
534                 if (bPipe)
535                 {
536                     *pEnd = ++(*pPos);
537                     return true;
538                 }
539                 else
540                     return false;
541 
542             case 4: // alpha, digit, "$", "%", "&", "-", "/", "@" (see
543                     // isBoundary1)
544                 *pEnd = ++(*pPos);
545                 return true;
546         }
547     }
548     else if (rCharClass.isLetterNumeric(rStr, *pPos))
549     {
550         *pEnd = *pPos = nextChar(rStr, *pPos);
551         return true;
552     }
553     else
554         return false;
555 }
556 
557 sal_uInt32 scanDomain(UniString const & rStr, xub_StrLen * pPos,
558                       xub_StrLen nEnd)
559 {
560     sal_Unicode const * pBuffer = rStr.GetBuffer();
561     sal_Unicode const * p = pBuffer + *pPos;
562     sal_uInt32 nLabels = INetURLObject::scanDomain(p, pBuffer + nEnd, false);
563     *pPos = sal::static_int_cast< xub_StrLen >(p - pBuffer);
564     return nLabels;
565 }
566 
567 }
568 
569 UniString
570 URIHelper::FindFirstURLInText(UniString const & rText,
571                               xub_StrLen & rBegin,
572                               xub_StrLen & rEnd,
573                               CharClass const & rCharClass,
574                               INetURLObject::EncodeMechanism eMechanism,
575                               rtl_TextEncoding eCharset,
576                               INetURLObject::FSysStyle eStyle)
577 {
578     if (!(rBegin <= rEnd && rEnd <= rText.Len()))
579         return UniString();
580 
581     // Search for the first substring of [rBegin..rEnd[ that matches any of the
582     // following productions (for which the appropriate style bit is set in
583     // eStyle, if applicable).
584     //
585     // 1st Production (known scheme):
586     //    \B1 <one of the known schemes, except file> ":" 1*wchar ["#" 1*wchar]
587     //        \B1
588     //
589     // 2nd Production (file):
590     //    \B1 "FILE:" 1*(wchar / "\" / "|") ["#" 1*wchar] \B1
591     //
592     // 3rd Production (ftp):
593     //    \B1 "FTP" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1
594     //
595     // 4th Production (http):
596     //    \B1 "WWW" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1
597     //
598     // 5th Production (mailto):
599     //    \B2 local-part "@" domain \B1
600     //
601     // 6th Production (UNC file):
602     //    \B1 "\\" domain "\" *(wchar / "\") \B1
603     //
604     // 7th Production (DOS file):
605     //    \B1 ALPHA ":\" *(wchar / "\") \B1
606     //
607     // 8th Production (Unix-like DOS file):
608     //    \B1 ALPHA ":/" *(wchar / "\") \B1
609     //
610     // The productions use the following auxiliary rules.
611     //
612     //    local-part = atom *("." atom)
613     //    atom = 1*(alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+"
614     //              / "-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}"
615     //              / "~")
616     //    domain = label *("." label)
617     //    label = alphanum [*(alphanum / "-") alphanum]
618     //    alphanum = ALPHA / DIGIT
619     //    wchar = <any uric character (ignoring the escaped rule), or "%", or
620     //             a letter or digit (according to rCharClass)>
621     //
622     // "\B1" (boundary 1) stands for the beginning or end of the block of text,
623     // or a character that is neither (a) a letter or digit (according to
624     // rCharClass), nor (b) any of "$", "%", "&", "-", "/", "@", or "\".
625     // (FIXME:  What was the rationale for this set of punctuation characters?)
626     //
627     // "\B2" (boundary 2) stands for the beginning or end of the block of text,
628     // or a character that is neither (a) a letter or digit (according to
629     // rCharClass), nor (b) any of "!", "#", "$", "%", "&", "'", "*", "+", "-",
630     // "/", "=", "?", "@", "^", "_", "`", "{", "|", "}", or "~" (i.e., an RFC
631     // 822 <atom> character, or "@" from \B1's set above).
632     //
633     // Productions 1--4, and 6--8 try to find a maximum-length match, but they
634     // stop at the first <wchar> character that is a "\B1" character which is
635     // only followed by "\B1" characters (taking "\" and "|" characters into
636     // account appropriately).  Production 5 simply tries to find a maximum-
637     // length match.
638     //
639     // Productions 1--4 use the given eMechanism and eCharset.  Productions 5--9
640     // use ENCODE_ALL.
641     //
642     // Productions 6--9 are only applicable if the FSYS_DOS bit is set in
643     // eStyle.
644 
645     bool bBoundary1 = true;
646     bool bBoundary2 = true;
647     for (xub_StrLen nPos = rBegin; nPos != rEnd; nPos = nextChar(rText, nPos))
648     {
649         sal_Unicode c = rText.GetChar(nPos);
650         if (bBoundary1)
651         {
652             if (INetMIME::isAlpha(c))
653             {
654                 xub_StrLen i = nPos;
655                 INetProtocol eScheme
656                     = INetURLObject::CompareProtocolScheme(UniString(rText, i,
657                                                                      rEnd));
658                 if (eScheme == INET_PROT_FILE) // 2nd
659                 {
660                     while (rText.GetChar(i++) != ':') ;
661                     xub_StrLen nPrefixEnd = i;
662                     xub_StrLen nUriEnd = i;
663                     while (i != rEnd
664                            && checkWChar(rCharClass, rText, &i, &nUriEnd, true,
665                                          true)) ;
666                     if (i != nPrefixEnd && rText.GetChar(i) == '#')
667                     {
668                         ++i;
669                         while (i != rEnd
670                                && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
671                     }
672                     if (nUriEnd != nPrefixEnd
673                         && isBoundary1(rCharClass, rText, nUriEnd, rEnd))
674                     {
675                         INetURLObject aUri(UniString(rText, nPos,
676                                                      nUriEnd - nPos),
677                                            INET_PROT_FILE, eMechanism, eCharset,
678                                            eStyle);
679                         if (!aUri.HasError())
680                         {
681                             rBegin = nPos;
682                             rEnd = nUriEnd;
683                             return
684                                 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI);
685                         }
686                     }
687                 }
688                 else if (eScheme != INET_PROT_NOT_VALID) // 1st
689                 {
690                     while (rText.GetChar(i++) != ':') ;
691                     xub_StrLen nPrefixEnd = i;
692                     xub_StrLen nUriEnd = i;
693                     while (i != rEnd
694                            && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
695                     if (i != nPrefixEnd && rText.GetChar(i) == '#')
696                     {
697                         ++i;
698                         while (i != rEnd
699                                && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
700                     }
701                     if (nUriEnd != nPrefixEnd
702                         && (isBoundary1(rCharClass, rText, nUriEnd, rEnd)
703                             || rText.GetChar(nUriEnd) == '\\'))
704                     {
705                         INetURLObject aUri(UniString(rText, nPos,
706                                                      nUriEnd - nPos),
707                                            INET_PROT_HTTP, eMechanism,
708                                            eCharset);
709                         if (!aUri.HasError())
710                         {
711                             rBegin = nPos;
712                             rEnd = nUriEnd;
713                             return
714                                 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI);
715                         }
716                     }
717                 }
718 
719                 // 3rd, 4th:
720                 i = nPos;
721                 sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
722                 if (nLabels >= 3
723                     && rText.GetChar(nPos + 3) == '.'
724                     && (((rText.GetChar(nPos) == 'w'
725                           || rText.GetChar(nPos) == 'W')
726                          && (rText.GetChar(nPos + 1) == 'w'
727                              || rText.GetChar(nPos + 1) == 'W')
728                          && (rText.GetChar(nPos + 2) == 'w'
729                              || rText.GetChar(nPos + 2) == 'W'))
730                         || ((rText.GetChar(nPos) == 'f'
731                              || rText.GetChar(nPos) == 'F')
732                             && (rText.GetChar(nPos + 1) == 't'
733                                 || rText.GetChar(nPos + 1) == 'T')
734                             && (rText.GetChar(nPos + 2) == 'p'
735                                 || rText.GetChar(nPos + 2) == 'P'))))
736                     // (note that rText.GetChar(nPos + 3) is guaranteed to be
737                     // valid)
738                 {
739                     xub_StrLen nUriEnd = i;
740                     if (i != rEnd && rText.GetChar(i) == '/')
741                     {
742                         nUriEnd = ++i;
743                         while (i != rEnd
744                                && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
745                     }
746                     if (i != rEnd && rText.GetChar(i) == '#')
747                     {
748                         ++i;
749                         while (i != rEnd
750                                && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
751                     }
752                     if (isBoundary1(rCharClass, rText, nUriEnd, rEnd)
753                         || rText.GetChar(nUriEnd) == '\\')
754                     {
755                         INetURLObject aUri(UniString(rText, nPos,
756                                                      nUriEnd - nPos),
757                                            INET_PROT_HTTP, eMechanism,
758                                            eCharset);
759                         if (!aUri.HasError())
760                         {
761                             rBegin = nPos;
762                             rEnd = nUriEnd;
763                             return
764                                 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI);
765                         }
766                     }
767                 }
768 
769                 if ((eStyle & INetURLObject::FSYS_DOS) != 0 && rEnd - nPos >= 3
770                     && rText.GetChar(nPos + 1) == ':'
771                     && (rText.GetChar(nPos + 2) == '/'
772                         || rText.GetChar(nPos + 2) == '\\')) // 7th, 8th
773                 {
774                     i = nPos + 3;
775                     xub_StrLen nUriEnd = i;
776                     while (i != rEnd
777                            && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
778                     if (isBoundary1(rCharClass, rText, nUriEnd, rEnd))
779                     {
780                         INetURLObject aUri(UniString(rText, nPos,
781                                                      nUriEnd - nPos),
782                                            INET_PROT_FILE,
783                                            INetURLObject::ENCODE_ALL,
784                                            RTL_TEXTENCODING_UTF8,
785                                            INetURLObject::FSYS_DOS);
786                         if (!aUri.HasError())
787                         {
788                             rBegin = nPos;
789                             rEnd = nUriEnd;
790                             return
791                                 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI);
792                         }
793                     }
794                 }
795             }
796             else if ((eStyle & INetURLObject::FSYS_DOS) != 0 && rEnd - nPos >= 2
797                      && rText.GetChar(nPos) == '\\'
798                      && rText.GetChar(nPos + 1) == '\\') // 6th
799             {
800                 xub_StrLen i = nPos + 2;
801                 sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
802                 if (nLabels >= 1 && i != rEnd && rText.GetChar(i) == '\\')
803                 {
804                     xub_StrLen nUriEnd = ++i;
805                     while (i != rEnd
806                            && checkWChar(rCharClass, rText, &i, &nUriEnd,
807                                          true)) ;
808                     if (isBoundary1(rCharClass, rText, nUriEnd, rEnd))
809                     {
810                         INetURLObject aUri(UniString(rText, nPos,
811                                                      nUriEnd - nPos),
812                                            INET_PROT_FILE,
813                                            INetURLObject::ENCODE_ALL,
814                                            RTL_TEXTENCODING_UTF8,
815                                            INetURLObject::FSYS_DOS);
816                         if (!aUri.HasError())
817                         {
818                             rBegin = nPos;
819                             rEnd = nUriEnd;
820                             return
821                                 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI);
822                         }
823                     }
824                 }
825             }
826         }
827         if (bBoundary2 && INetMIME::isAtomChar(c)) // 5th
828         {
829             bool bDot = false;
830             for (xub_StrLen i = nPos + 1; i != rEnd; ++i)
831             {
832                 sal_Unicode c2 = rText.GetChar(i);
833                 if (INetMIME::isAtomChar(c2))
834                     bDot = false;
835                 else if (bDot)
836                     break;
837                 else if (c2 == '.')
838                     bDot = true;
839                 else
840                 {
841                     if (c2 == '@')
842                     {
843                         ++i;
844                         sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
845                         if (nLabels >= 1
846                             && isBoundary1(rCharClass, rText, i, rEnd))
847                         {
848                             INetURLObject aUri(UniString(rText, nPos, i - nPos),
849                                                INET_PROT_MAILTO,
850                                                INetURLObject::ENCODE_ALL);
851                             if (!aUri.HasError())
852                             {
853                                 rBegin = nPos;
854                                 rEnd = i;
855                                 return aUri.GetMainURL(
856                                            INetURLObject::DECODE_TO_IURI);
857                             }
858                         }
859                     }
860                     break;
861                 }
862             }
863         }
864         bBoundary1 = isBoundary1(rCharClass, rText, nPos, rEnd);
865         bBoundary2 = isBoundary2(rCharClass, rText, nPos, rEnd);
866     }
867     rBegin = rEnd;
868     return UniString();
869 }
870 
871 //============================================================================
872 //
873 //  removePassword
874 //
875 //============================================================================
876 
877 UniString
878 URIHelper::removePassword(UniString const & rURI,
879                           INetURLObject::EncodeMechanism eEncodeMechanism,
880                           INetURLObject::DecodeMechanism eDecodeMechanism,
881                           rtl_TextEncoding eCharset)
882 {
883     INetURLObject aObj(rURI, eEncodeMechanism, eCharset);
884     return aObj.HasError() ?
885                rURI :
886                String(aObj.GetURLNoPass(eDecodeMechanism, eCharset));
887 }
888 
889 //============================================================================
890 //
891 //  queryFSysStyle
892 //
893 //============================================================================
894 
895 INetURLObject::FSysStyle URIHelper::queryFSysStyle(UniString const & rFileUrl,
896                                                    bool bAddConvenienceStyles)
897     throw (uno::RuntimeException)
898 {
899     ::ucbhelper::ContentBroker const * pBroker = ::ucbhelper::ContentBroker::get();
900     uno::Reference< ucb::XContentProviderManager > xManager;
901     if (pBroker)
902         xManager = pBroker->getContentProviderManagerInterface();
903     uno::Reference< beans::XPropertySet > xProperties;
904     if (xManager.is())
905         xProperties
906             = uno::Reference< beans::XPropertySet >(
907                   xManager->queryContentProvider(rFileUrl), uno::UNO_QUERY);
908     sal_Int32 nNotation = ucb::FileSystemNotation::UNKNOWN_NOTATION;
909     if (xProperties.is())
910         try
911         {
912             xProperties->getPropertyValue(rtl::OUString(
913                                               RTL_CONSTASCII_USTRINGPARAM(
914                                                   "FileSystemNotation")))
915                 >>= nNotation;
916         }
917         catch (beans::UnknownPropertyException const &) {}
918         catch (lang::WrappedTargetException const &) {}
919 
920     // The following code depends on the fact that the
921     // com::sun::star::ucb::FileSystemNotation constants range from UNKNOWN to
922     // MAC, without any holes.  The table below has two entries per notation,
923     // the first is used if bAddConvenienceStyles == false, while the second
924     // is used if bAddConvenienceStyles == true:
925     static INetURLObject::FSysStyle const aMap[][2]
926         = { { INetURLObject::FSysStyle(0),
927               INetURLObject::FSYS_DETECT },
928                 // UNKNOWN
929             { INetURLObject::FSYS_UNX,
930               INetURLObject::FSysStyle(INetURLObject::FSYS_VOS
931                                            | INetURLObject::FSYS_UNX) },
932                 // UNIX
933             { INetURLObject::FSYS_DOS,
934               INetURLObject::FSysStyle(INetURLObject::FSYS_VOS
935                                            | INetURLObject::FSYS_UNX
936                                            | INetURLObject::FSYS_DOS) },
937                 // DOS
938             { INetURLObject::FSYS_MAC,
939               INetURLObject::FSysStyle(INetURLObject::FSYS_VOS
940                                            | INetURLObject::FSYS_UNX
941                                            | INetURLObject::FSYS_MAC) } };
942     return aMap[nNotation < ucb::FileSystemNotation::UNKNOWN_NOTATION
943                 || nNotation > ucb::FileSystemNotation::MAC_NOTATION ?
944                         0 :
945                         nNotation
946                             - ucb::FileSystemNotation::UNKNOWN_NOTATION]
947                    [bAddConvenienceStyles];
948 }
949