xref: /aoo41x/main/svl/source/misc/urihelper.cxx (revision 40df464e)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_svl.hxx"
26 #include <svl/urihelper.hxx>
27 #include <com/sun/star/beans/XPropertySet.hpp>
28 #include "com/sun/star/lang/WrappedTargetRuntimeException.hpp"
29 #include "com/sun/star/lang/XMultiComponentFactory.hpp"
30 #include "com/sun/star/ucb/Command.hpp"
31 #include <com/sun/star/ucb/FileSystemNotation.hpp>
32 #include "com/sun/star/ucb/IllegalIdentifierException.hpp"
33 #include "com/sun/star/ucb/UnsupportedCommandException.hpp"
34 #include "com/sun/star/ucb/XCommandEnvironment.hpp"
35 #include "com/sun/star/ucb/XCommandProcessor.hpp"
36 #include "com/sun/star/ucb/XContent.hpp"
37 #include "com/sun/star/ucb/XContentIdentifierFactory.hpp"
38 #include "com/sun/star/ucb/XContentProvider.hpp"
39 #include <com/sun/star/ucb/XContentProviderManager.hpp>
40 #include "com/sun/star/uno/Any.hxx"
41 #include "com/sun/star/uno/Exception.hpp"
42 #include "com/sun/star/uno/Reference.hxx"
43 #include "com/sun/star/uno/RuntimeException.hpp"
44 #include "com/sun/star/uno/Sequence.hxx"
45 #include "com/sun/star/uno/XComponentContext.hpp"
46 #include "com/sun/star/uno/XInterface.hpp"
47 #include "com/sun/star/uri/UriReferenceFactory.hpp"
48 #include "com/sun/star/uri/XUriReference.hpp"
49 #include "com/sun/star/uri/XUriReferenceFactory.hpp"
50 #include "cppuhelper/exc_hlp.hxx"
51 #include "comphelper/processfactory.hxx"
52 #include "osl/diagnose.h"
53 #include "rtl/ustrbuf.hxx"
54 #include "rtl/ustring.h"
55 #include "rtl/ustring.hxx"
56 #include "sal/types.h"
57 #include <tools/debug.hxx>
58 #include <tools/inetmime.hxx>
59 #include <ucbhelper/contentbroker.hxx>
60 #include <unotools/charclass.hxx>
61 #include "rtl/instance.hxx"
62 
63 namespace unnamed_svl_urihelper {}
64 using namespace unnamed_svl_urihelper;
65 	// unnamed namespaces don't work well yet...
66 
67 namespace css = com::sun::star;
68 using namespace com::sun::star;
69 
70 //============================================================================
71 //
72 //  SmartRel2Abs
73 //
74 //============================================================================
75 
76 namespace unnamed_svl_urihelper {
77 
toUniString(ByteString const & rString)78 inline UniString toUniString(ByteString const & rString)
79 {
80 	return UniString(rString, RTL_TEXTENCODING_ISO_8859_1);
81 }
82 
toUniString(UniString const & rString)83 inline UniString toUniString(UniString const & rString)
84 {
85 	return rString;
86 }
87 
88 template< typename Str >
SmartRel2Abs_Impl(INetURLObject const & rTheBaseURIRef,Str const & rTheRelURIRef,Link const & rMaybeFileHdl,bool bCheckFileExists,bool bIgnoreFragment,INetURLObject::EncodeMechanism eEncodeMechanism,INetURLObject::DecodeMechanism eDecodeMechanism,rtl_TextEncoding eCharset,bool bRelativeNonURIs,INetURLObject::FSysStyle eStyle)89 inline UniString SmartRel2Abs_Impl(INetURLObject const & rTheBaseURIRef,
90 								   Str const & rTheRelURIRef,
91 								   Link const & rMaybeFileHdl,
92 								   bool bCheckFileExists,
93 								   bool bIgnoreFragment,
94 								   INetURLObject::EncodeMechanism
95 								       eEncodeMechanism,
96 								   INetURLObject::DecodeMechanism
97 								       eDecodeMechanism,
98 								   rtl_TextEncoding eCharset,
99 								   bool bRelativeNonURIs,
100 								   INetURLObject::FSysStyle eStyle)
101 {
102 	// Backwards compatibility:
103 	if (rTheRelURIRef.Len() != 0 && rTheRelURIRef.GetChar(0) == '#')
104 		return toUniString(rTheRelURIRef);
105 
106 	INetURLObject aAbsURIRef;
107     if (rTheBaseURIRef.HasError())
108         aAbsURIRef.
109             SetSmartURL(rTheRelURIRef, eEncodeMechanism, eCharset, eStyle);
110     else
111     {
112         bool bWasAbsolute;
113 		aAbsURIRef = rTheBaseURIRef.smartRel2Abs(rTheRelURIRef,
114                                                  bWasAbsolute,
115                                                  bIgnoreFragment,
116                                                  eEncodeMechanism,
117                                                  eCharset,
118                                                  bRelativeNonURIs,
119                                                  eStyle);
120         if (bCheckFileExists
121             && !bWasAbsolute
122             && (aAbsURIRef.GetProtocol() == INET_PROT_FILE))
123         {
124             INetURLObject aNonFileURIRef;
125             aNonFileURIRef.SetSmartURL(rTheRelURIRef,
126                                        eEncodeMechanism,
127                                        eCharset,
128                                        eStyle);
129             if (!aNonFileURIRef.HasError()
130                 && aNonFileURIRef.GetProtocol() != INET_PROT_FILE)
131             {
132                 bool bMaybeFile = false;
133                 if (rMaybeFileHdl.IsSet())
134                 {
135                     UniString aFilePath(toUniString(rTheRelURIRef));
136                     bMaybeFile = rMaybeFileHdl.Call(&aFilePath) != 0;
137                 }
138                 if (!bMaybeFile)
139                     aAbsURIRef = aNonFileURIRef;
140             }
141         }
142     }
143 	return aAbsURIRef.GetMainURL(eDecodeMechanism, eCharset);
144 }
145 
146 }
147 
148 UniString
SmartRel2Abs(INetURLObject const & rTheBaseURIRef,ByteString const & rTheRelURIRef,Link const & rMaybeFileHdl,bool bCheckFileExists,bool bIgnoreFragment,INetURLObject::EncodeMechanism eEncodeMechanism,INetURLObject::DecodeMechanism eDecodeMechanism,rtl_TextEncoding eCharset,bool bRelativeNonURIs,INetURLObject::FSysStyle eStyle)149 URIHelper::SmartRel2Abs(INetURLObject const & rTheBaseURIRef,
150 						ByteString const & rTheRelURIRef,
151 						Link const & rMaybeFileHdl,
152 						bool bCheckFileExists,
153 						bool bIgnoreFragment,
154 						INetURLObject::EncodeMechanism eEncodeMechanism,
155 						INetURLObject::DecodeMechanism eDecodeMechanism,
156 						rtl_TextEncoding eCharset,
157 						bool bRelativeNonURIs,
158 						INetURLObject::FSysStyle eStyle)
159 {
160 	return SmartRel2Abs_Impl(rTheBaseURIRef, rTheRelURIRef, rMaybeFileHdl,
161 							 bCheckFileExists, bIgnoreFragment,
162 							 eEncodeMechanism, eDecodeMechanism, eCharset,
163 							 bRelativeNonURIs, eStyle);
164 }
165 
166 UniString
SmartRel2Abs(INetURLObject const & rTheBaseURIRef,UniString const & rTheRelURIRef,Link const & rMaybeFileHdl,bool bCheckFileExists,bool bIgnoreFragment,INetURLObject::EncodeMechanism eEncodeMechanism,INetURLObject::DecodeMechanism eDecodeMechanism,rtl_TextEncoding eCharset,bool bRelativeNonURIs,INetURLObject::FSysStyle eStyle)167 URIHelper::SmartRel2Abs(INetURLObject const & rTheBaseURIRef,
168 						UniString const & rTheRelURIRef,
169 						Link const & rMaybeFileHdl,
170 						bool bCheckFileExists,
171 						bool bIgnoreFragment,
172 						INetURLObject::EncodeMechanism eEncodeMechanism,
173 						INetURLObject::DecodeMechanism eDecodeMechanism,
174 						rtl_TextEncoding eCharset,
175 						bool bRelativeNonURIs,
176 						INetURLObject::FSysStyle eStyle)
177 {
178 	return SmartRel2Abs_Impl(rTheBaseURIRef, rTheRelURIRef, rMaybeFileHdl,
179 							 bCheckFileExists, bIgnoreFragment,
180 							 eEncodeMechanism, eDecodeMechanism, eCharset,
181 							 bRelativeNonURIs, eStyle);
182 }
183 
184 //============================================================================
185 //
186 //  SetMaybeFileHdl
187 //
188 //============================================================================
189 
190 namespace { struct MaybeFileHdl : public rtl::Static< Link, MaybeFileHdl > {}; }
191 
SetMaybeFileHdl(Link const & rTheMaybeFileHdl)192 void URIHelper::SetMaybeFileHdl(Link const & rTheMaybeFileHdl)
193 {
194 	MaybeFileHdl::get() = rTheMaybeFileHdl;
195 }
196 
197 //============================================================================
198 //
199 //  GetMaybeFileHdl
200 //
201 //============================================================================
202 
GetMaybeFileHdl()203 Link URIHelper::GetMaybeFileHdl()
204 {
205 	return MaybeFileHdl::get();
206 }
207 
208 namespace {
209 
isAbsoluteHierarchicalUriReference(css::uno::Reference<css::uri::XUriReference> const & uriReference)210 bool isAbsoluteHierarchicalUriReference(
211     css::uno::Reference< css::uri::XUriReference > const & uriReference)
212 {
213     return uriReference.is() && uriReference->isAbsolute()
214         && uriReference->isHierarchical() && !uriReference->hasRelativePath();
215 }
216 
217 // To improve performance, assume that if for any prefix URL of a given
218 // hierarchical URL either a UCB content cannot be created, or the UCB content
219 // does not support the getCasePreservingURL command, then this will hold for
220 // any other prefix URL of the given URL, too:
221 enum Result { Success, GeneralFailure, SpecificFailure };
222 
normalizePrefix(css::uno::Reference<css::ucb::XContentProvider> const & broker,rtl::OUString const & uri,rtl::OUString * normalized)223 Result normalizePrefix(
224     css::uno::Reference< css::ucb::XContentProvider > const & broker,
225     rtl::OUString const & uri, rtl::OUString * normalized)
226 {
227     OSL_ASSERT(broker.is() && normalized != 0);
228     css::uno::Reference< css::ucb::XContent > content;
229     try {
230         content = broker->queryContent(
231             css::uno::Reference< css::ucb::XContentIdentifierFactory >(
232                 broker, css::uno::UNO_QUERY_THROW)->createContentIdentifier(
233                     uri));
234     } catch (css::ucb::IllegalIdentifierException &) {}
235     if (!content.is()) {
236         return GeneralFailure;
237     }
238     try {
239         #if OSL_DEBUG_LEVEL > 0
240         bool ok =
241         #endif
242             (css::uno::Reference< css::ucb::XCommandProcessor >(
243                    content, css::uno::UNO_QUERY_THROW)->execute(
244                        css::ucb::Command(
245                            rtl::OUString(
246                                RTL_CONSTASCII_USTRINGPARAM(
247                                    "getCasePreservingURL")),
248                            -1, css::uno::Any()),
249                        0,
250                        css::uno::Reference< css::ucb::XCommandEnvironment >())
251                >>= *normalized);
252         OSL_ASSERT(ok);
253     } catch (css::uno::RuntimeException &) {
254         throw;
255     } catch (css::ucb::UnsupportedCommandException &) {
256         return GeneralFailure;
257     } catch (css::uno::Exception &) {
258         return SpecificFailure;
259     }
260     return Success;
261 }
262 
normalize(css::uno::Reference<css::ucb::XContentProvider> const & broker,css::uno::Reference<css::uri::XUriReferenceFactory> const & uriFactory,rtl::OUString const & uriReference)263 rtl::OUString normalize(
264     css::uno::Reference< css::ucb::XContentProvider > const & broker,
265     css::uno::Reference< css::uri::XUriReferenceFactory > const & uriFactory,
266     rtl::OUString const & uriReference)
267 {
268     // normalizePrefix can potentially fail (a typically example being a file
269     // URL that denotes a non-existing resource); in such a case, try to
270     // normalize as long a prefix of the given URL as possible (i.e., normalize
271     // all the existing directories within the path):
272     rtl::OUString normalized;
273     sal_Int32 n = uriReference.indexOf('#');
274     normalized = n == -1 ? uriReference : uriReference.copy(0, n);
275     switch (normalizePrefix(broker, normalized, &normalized)) {
276     case Success:
277         return n == -1 ? normalized : normalized + uriReference.copy(n);
278     case GeneralFailure:
279         return uriReference;
280     case SpecificFailure:
281     default:
282         break;
283     }
284     css::uno::Reference< css::uri::XUriReference > ref(
285         uriFactory->parse(uriReference));
286     if (!isAbsoluteHierarchicalUriReference(ref)) {
287         return uriReference;
288     }
289     sal_Int32 count = ref->getPathSegmentCount();
290     if (count < 2) {
291         return uriReference;
292     }
293     rtl::OUStringBuffer head(ref->getScheme());
294     head.append(static_cast< sal_Unicode >(':'));
295     if (ref->hasAuthority()) {
296         head.appendAscii(RTL_CONSTASCII_STRINGPARAM("//"));
297         head.append(ref->getAuthority());
298     }
299     for (sal_Int32 i = count - 1; i > 0; --i) {
300         rtl::OUStringBuffer buf(head);
301         for (sal_Int32 j = 0; j < i; ++j) {
302             buf.append(static_cast< sal_Unicode >('/'));
303             buf.append(ref->getPathSegment(j));
304         }
305         normalized = buf.makeStringAndClear();
306         if (normalizePrefix(broker, normalized, &normalized) != SpecificFailure)
307         {
308             buf.append(normalized);
309             css::uno::Reference< css::uri::XUriReference > preRef(
310                 uriFactory->parse(normalized));
311             if (!isAbsoluteHierarchicalUriReference(preRef)) {
312                 // This could only happen if something is inconsistent:
313                 break;
314             }
315             sal_Int32 preCount = preRef->getPathSegmentCount();
316             // normalizePrefix may have added or removed a final slash:
317             if (preCount != i) {
318                 if (preCount == i - 1) {
319                     buf.append(static_cast< sal_Unicode >('/'));
320                 } else if (preCount - 1 == i && buf.getLength() > 0
321                            && buf.charAt(buf.getLength() - 1) == '/')
322                 {
323                     buf.setLength(buf.getLength() - 1);
324                 } else {
325                     // This could only happen if something is inconsistent:
326                     break;
327                 }
328             }
329             for (sal_Int32 j = i; j < count; ++j) {
330                 buf.append(static_cast< sal_Unicode >('/'));
331                 buf.append(ref->getPathSegment(j));
332             }
333             if (ref->hasQuery()) {
334                 buf.append(static_cast< sal_Unicode >('?'));
335                 buf.append(ref->getQuery());
336             }
337             if (ref->hasFragment()) {
338                 buf.append(static_cast< sal_Unicode >('#'));
339                 buf.append(ref->getFragment());
340             }
341             return buf.makeStringAndClear();
342         }
343     }
344     return uriReference;
345 }
346 
347 }
348 
349 css::uno::Reference< css::uri::XUriReference >
normalizedMakeRelative(css::uno::Reference<css::uno::XComponentContext> const & context,rtl::OUString const & baseUriReference,rtl::OUString const & uriReference)350 URIHelper::normalizedMakeRelative(
351     css::uno::Reference< css::uno::XComponentContext > const & context,
352     rtl::OUString const & baseUriReference, rtl::OUString const & uriReference)
353 {
354     OSL_ASSERT(context.is());
355     css::uno::Reference< css::lang::XMultiComponentFactory > componentFactory(
356         context->getServiceManager());
357     if (!componentFactory.is()) {
358         throw css::uno::RuntimeException(
359             rtl::OUString(
360                 RTL_CONSTASCII_USTRINGPARAM(
361                     "component context has no service manager")),
362             css::uno::Reference< css::uno::XInterface >());
363     }
364     css::uno::Sequence< css::uno::Any > args(2);
365     args[0] <<= rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("Local"));
366     args[1] <<= rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("Office"));
367     css::uno::Reference< css::ucb::XContentProvider > broker;
368     try {
369         broker = css::uno::Reference< css::ucb::XContentProvider >(
370             componentFactory->createInstanceWithArgumentsAndContext(
371                 rtl::OUString(
372                     RTL_CONSTASCII_USTRINGPARAM(
373                         "com.sun.star.ucb.UniversalContentBroker")),
374                 args, context),
375             css::uno::UNO_QUERY_THROW);
376     } catch (css::uno::RuntimeException &) {
377         throw;
378     } catch (css::uno::Exception &) {
379         css::uno::Any exception(cppu::getCaughtException());
380         throw css::lang::WrappedTargetRuntimeException(
381             rtl::OUString(
382                 RTL_CONSTASCII_USTRINGPARAM(
383                     "creating com.sun.star.ucb.UniversalContentBroker failed")),
384             css::uno::Reference< css::uno::XInterface >(),
385             exception);
386     }
387     css::uno::Reference< css::uri::XUriReferenceFactory > uriFactory(
388         css::uri::UriReferenceFactory::create(context));
389     return uriFactory->makeRelative(
390         uriFactory->parse(normalize(broker, uriFactory, baseUriReference)),
391         uriFactory->parse(normalize(broker, uriFactory, uriReference)), true,
392         true, false);
393 }
394 
simpleNormalizedMakeRelative(rtl::OUString const & baseUriReference,rtl::OUString const & uriReference)395 rtl::OUString URIHelper::simpleNormalizedMakeRelative(
396     rtl::OUString const & baseUriReference, rtl::OUString const & uriReference)
397 {
398     com::sun::star::uno::Reference< com::sun::star::uri::XUriReference > rel(
399         URIHelper::normalizedMakeRelative(
400             com::sun::star::uno::Reference<
401             com::sun::star::uno::XComponentContext >(
402                 (com::sun::star::uno::Reference<
403                  com::sun::star::beans::XPropertySet >(
404                     comphelper::getProcessServiceFactory(),
405                     com::sun::star::uno::UNO_QUERY_THROW)->
406                  getPropertyValue(
407                      rtl::OUString(
408                          RTL_CONSTASCII_USTRINGPARAM("DefaultContext")))),
409                 com::sun::star::uno::UNO_QUERY_THROW),
410             baseUriReference, uriReference));
411     return rel.is() ? rel->getUriReference() : uriReference;
412 }
413 
414 //============================================================================
415 //
416 //  FindFirstURLInText
417 //
418 //============================================================================
419 
420 namespace unnamed_svl_urihelper {
421 
nextChar(UniString const & rStr,xub_StrLen nPos)422 inline xub_StrLen nextChar(UniString const & rStr, xub_StrLen nPos)
423 {
424 	return INetMIME::isHighSurrogate(rStr.GetChar(nPos))
425 		   && rStr.Len() - nPos >= 2
426 		   && INetMIME::isLowSurrogate(rStr.GetChar(nPos + 1)) ?
427 		       nPos + 2 : nPos + 1;
428 }
429 
isBoundary1(CharClass const & rCharClass,UniString const & rStr,xub_StrLen nPos,xub_StrLen nEnd)430 bool isBoundary1(CharClass const & rCharClass, UniString const & rStr,
431                  xub_StrLen nPos, xub_StrLen nEnd)
432 {
433     if (nPos == nEnd)
434         return true;
435     if (rCharClass.isLetterNumeric(rStr, nPos))
436         return false;
437     switch (rStr.GetChar(nPos))
438     {
439     case '$':
440     case '%':
441     case '&':
442     case '-':
443     case '/':
444     case '@':
445     case '\\':
446         return false;
447     default:
448         return true;
449     }
450 }
451 
isBoundary2(CharClass const & rCharClass,UniString const & rStr,xub_StrLen nPos,xub_StrLen nEnd)452 bool isBoundary2(CharClass const & rCharClass, UniString const & rStr,
453                  xub_StrLen nPos, xub_StrLen nEnd)
454 {
455     if (nPos == nEnd)
456         return true;
457     if (rCharClass.isLetterNumeric(rStr, nPos))
458         return false;
459     switch (rStr.GetChar(nPos))
460     {
461     case '!':
462     case '#':
463     case '$':
464     case '%':
465     case '&':
466     case '\'':
467     case '*':
468     case '+':
469     case '-':
470     case '/':
471     case '=':
472     case '?':
473     case '@':
474     case '^':
475     case '_':
476     case '`':
477     case '{':
478     case '|':
479     case '}':
480     case '~':
481         return false;
482     default:
483         return true;
484     }
485 }
486 
checkWChar(CharClass const & rCharClass,UniString const & rStr,xub_StrLen * pPos,xub_StrLen * pEnd,bool bBackslash=false,bool bPipe=false)487 bool checkWChar(CharClass const & rCharClass, UniString const & rStr,
488                 xub_StrLen * pPos, xub_StrLen * pEnd, bool bBackslash = false,
489                 bool bPipe = false)
490 {
491 	sal_Unicode c = rStr.GetChar(*pPos);
492 	if (INetMIME::isUSASCII(c))
493 	{
494 		static sal_uInt8 const aMap[128]
495 			= { 0, 0, 0, 0, 0, 0, 0, 0,
496 				0, 0, 0, 0, 0, 0, 0, 0,
497 				0, 0, 0, 0, 0, 0, 0, 0,
498 				0, 0, 0, 0, 0, 0, 0, 0,
499 				0, 1, 0, 0, 4, 4, 4, 1,   //  !"#$%&'
500 				1, 1, 1, 1, 1, 4, 1, 4,   // ()*+,-./
501 				4, 4, 4, 4, 4, 4, 4, 4,   // 01234567
502 				4, 4, 1, 1, 0, 1, 0, 1,   // 89:;<=>?
503 				4, 4, 4, 4, 4, 4, 4, 4,   // @ABCDEFG
504 				4, 4, 4, 4, 4, 4, 4, 4,   // HIJKLMNO
505 				4, 4, 4, 4, 4, 4, 4, 4,   // PQRSTUVW
506 				4, 4, 4, 1, 2, 1, 0, 1,   // XYZ[\]^_
507 				0, 4, 4, 4, 4, 4, 4, 4,   // `abcdefg
508 				4, 4, 4, 4, 4, 4, 4, 4,   // hijklmno
509 				4, 4, 4, 4, 4, 4, 4, 4,   // pqrstuvw
510 				4, 4, 4, 0, 3, 0, 1, 0 }; // xyz{|}~
511 		switch (aMap[c])
512 		{
513 			default: // not uric
514 				return false;
515 
516 			case 1: // uric
517 				++(*pPos);
518 				return true;
519 
520 			case 2: // "\"
521 				if (bBackslash)
522 				{
523 					*pEnd = ++(*pPos);
524 					return true;
525 				}
526 				else
527 					return false;
528 
529 			case 3: // "|"
530 				if (bPipe)
531 				{
532 					*pEnd = ++(*pPos);
533 					return true;
534 				}
535 				else
536 					return false;
537 
538 			case 4: // alpha, digit, "$", "%", "&", "-", "/", "@" (see
539                     // isBoundary1)
540 				*pEnd = ++(*pPos);
541 				return true;
542 		}
543 	}
544 	else if (rCharClass.isLetterNumeric(rStr, *pPos))
545 	{
546 		*pEnd = *pPos = nextChar(rStr, *pPos);
547 		return true;
548 	}
549 	else
550 		return false;
551 }
552 
scanDomain(UniString const & rStr,xub_StrLen * pPos,xub_StrLen nEnd)553 sal_uInt32 scanDomain(UniString const & rStr, xub_StrLen * pPos,
554                       xub_StrLen nEnd)
555 {
556 	sal_Unicode const * pBuffer = rStr.GetBuffer();
557 	sal_Unicode const * p = pBuffer + *pPos;
558 	sal_uInt32 nLabels = INetURLObject::scanDomain(p, pBuffer + nEnd, false);
559 	*pPos = sal::static_int_cast< xub_StrLen >(p - pBuffer);
560 	return nLabels;
561 }
562 
563 }
564 
565 UniString
FindFirstURLInText(UniString const & rText,xub_StrLen & rBegin,xub_StrLen & rEnd,CharClass const & rCharClass,INetURLObject::EncodeMechanism eMechanism,rtl_TextEncoding eCharset,INetURLObject::FSysStyle eStyle)566 URIHelper::FindFirstURLInText(UniString const & rText,
567                               xub_StrLen & rBegin,
568                               xub_StrLen & rEnd,
569                               CharClass const & rCharClass,
570                               INetURLObject::EncodeMechanism eMechanism,
571                               rtl_TextEncoding eCharset,
572                               INetURLObject::FSysStyle eStyle)
573 {
574     if (!(rBegin <= rEnd && rEnd <= rText.Len()))
575         return UniString();
576 
577     // Search for the first substring of [rBegin..rEnd[ that matches any of the
578     // following productions (for which the appropriate style bit is set in
579     // eStyle, if applicable).
580     //
581     // 1st Production (known scheme):
582     //    \B1 <one of the known schemes, except file> ":" 1*wchar ["#" 1*wchar]
583     //        \B1
584     //
585     // 2nd Production (file):
586     //    \B1 "FILE:" 1*(wchar / "\" / "|") ["#" 1*wchar] \B1
587     //
588     // 3rd Production (ftp):
589     //    \B1 "FTP" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1
590     //
591     // 4th Production (http):
592     //    \B1 "WWW" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1
593     //
594     // 5th Production (mailto):
595     //    \B2 local-part "@" domain \B1
596     //
597     // 6th Production (UNC file):
598     //    \B1 "\\" domain "\" *(wchar / "\") \B1
599     //
600     // 7th Production (DOS file):
601     //    \B1 ALPHA ":\" *(wchar / "\") \B1
602     //
603     // 8th Production (Unix-like DOS file):
604     //    \B1 ALPHA ":/" *(wchar / "\") \B1
605     //
606     // The productions use the following auxiliary rules.
607     //
608     //    local-part = atom *("." atom)
609     //    atom = 1*(alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+"
610     //              / "-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}"
611     //              / "~")
612     //    domain = label *("." label)
613     //    label = alphanum [*(alphanum / "-") alphanum]
614     //    alphanum = ALPHA / DIGIT
615     //    wchar = <any uric character (ignoring the escaped rule), or "%", or
616     //             a letter or digit (according to rCharClass)>
617     //
618     // "\B1" (boundary 1) stands for the beginning or end of the block of text,
619     // or a character that is neither (a) a letter or digit (according to
620     // rCharClass), nor (b) any of "$", "%", "&", "-", "/", "@", or "\".
621     // (FIXME:  What was the rationale for this set of punctuation characters?)
622     //
623     // "\B2" (boundary 2) stands for the beginning or end of the block of text,
624     // or a character that is neither (a) a letter or digit (according to
625     // rCharClass), nor (b) any of "!", "#", "$", "%", "&", "'", "*", "+", "-",
626     // "/", "=", "?", "@", "^", "_", "`", "{", "|", "}", or "~" (i.e., an RFC
627     // 822 <atom> character, or "@" from \B1's set above).
628     //
629     // Productions 1--4, and 6--8 try to find a maximum-length match, but they
630     // stop at the first <wchar> character that is a "\B1" character which is
631     // only followed by "\B1" characters (taking "\" and "|" characters into
632     // account appropriately).  Production 5 simply tries to find a maximum-
633     // length match.
634     //
635     // Productions 1--4 use the given eMechanism and eCharset.  Productions 5--9
636     // use ENCODE_ALL.
637     //
638     // Productions 6--9 are only applicable if the FSYS_DOS bit is set in
639     // eStyle.
640 
641     bool bBoundary1 = true;
642     bool bBoundary2 = true;
643     for (xub_StrLen nPos = rBegin; nPos != rEnd; nPos = nextChar(rText, nPos))
644     {
645         sal_Unicode c = rText.GetChar(nPos);
646         if (bBoundary1)
647         {
648             if (INetMIME::isAlpha(c))
649             {
650                 xub_StrLen i = nPos;
651                 INetProtocol eScheme
652                     = INetURLObject::CompareProtocolScheme(UniString(rText, i,
653                                                                      rEnd));
654                 if (eScheme == INET_PROT_FILE) // 2nd
655                 {
656                     while (rText.GetChar(i++) != ':') ;
657                     xub_StrLen nPrefixEnd = i;
658                     xub_StrLen nUriEnd = i;
659                     while (i != rEnd
660                            && checkWChar(rCharClass, rText, &i, &nUriEnd, true,
661                                          true)) ;
662                     if (i != nPrefixEnd && rText.GetChar(i) == '#')
663                     {
664                         ++i;
665                         while (i != rEnd
666                                && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
667                     }
668                     if (nUriEnd != nPrefixEnd
669                         && isBoundary1(rCharClass, rText, nUriEnd, rEnd))
670                     {
671                         INetURLObject aUri(UniString(rText, nPos,
672                                                      nUriEnd - nPos),
673                                            INET_PROT_FILE, eMechanism, eCharset,
674                                            eStyle);
675                         if (!aUri.HasError())
676                         {
677                             rBegin = nPos;
678                             rEnd = nUriEnd;
679                             return
680                                 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI);
681                         }
682                     }
683                 }
684                 else if (eScheme != INET_PROT_NOT_VALID) // 1st
685                 {
686                     while (rText.GetChar(i++) != ':') ;
687                     xub_StrLen nPrefixEnd = i;
688                     xub_StrLen nUriEnd = i;
689                     while (i != rEnd
690                            && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
691                     if (i != nPrefixEnd && rText.GetChar(i) == '#')
692                     {
693                         ++i;
694                         while (i != rEnd
695                                && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
696                     }
697                     if (nUriEnd != nPrefixEnd
698                         && (isBoundary1(rCharClass, rText, nUriEnd, rEnd)
699                             || rText.GetChar(nUriEnd) == '\\'))
700                     {
701                         INetURLObject aUri(UniString(rText, nPos,
702                                                      nUriEnd - nPos),
703                                            INET_PROT_HTTP, eMechanism,
704                                            eCharset);
705                         if (!aUri.HasError())
706                         {
707                             rBegin = nPos;
708                             rEnd = nUriEnd;
709                             return
710                                 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI);
711                         }
712                     }
713                 }
714 
715                 // 3rd, 4th:
716                 i = nPos;
717                 sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
718                 if (nLabels >= 3
719                     && rText.GetChar(nPos + 3) == '.'
720                     && (((rText.GetChar(nPos) == 'w'
721                           || rText.GetChar(nPos) == 'W')
722                          && (rText.GetChar(nPos + 1) == 'w'
723                              || rText.GetChar(nPos + 1) == 'W')
724                          && (rText.GetChar(nPos + 2) == 'w'
725                              || rText.GetChar(nPos + 2) == 'W'))
726                         || ((rText.GetChar(nPos) == 'f'
727                              || rText.GetChar(nPos) == 'F')
728                             && (rText.GetChar(nPos + 1) == 't'
729                                 || rText.GetChar(nPos + 1) == 'T')
730                             && (rText.GetChar(nPos + 2) == 'p'
731                                 || rText.GetChar(nPos + 2) == 'P'))))
732                     // (note that rText.GetChar(nPos + 3) is guaranteed to be
733                     // valid)
734                 {
735                     xub_StrLen nUriEnd = i;
736                     if (i != rEnd && rText.GetChar(i) == '/')
737                     {
738                         nUriEnd = ++i;
739                         while (i != rEnd
740                                && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
741                     }
742                     if (i != rEnd && rText.GetChar(i) == '#')
743                     {
744                         ++i;
745                         while (i != rEnd
746                                && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
747                     }
748                     if (isBoundary1(rCharClass, rText, nUriEnd, rEnd)
749                         || rText.GetChar(nUriEnd) == '\\')
750                     {
751                         INetURLObject aUri(UniString(rText, nPos,
752                                                      nUriEnd - nPos),
753                                            INET_PROT_HTTP, eMechanism,
754                                            eCharset);
755                         if (!aUri.HasError())
756                         {
757                             rBegin = nPos;
758                             rEnd = nUriEnd;
759                             return
760                                 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI);
761                         }
762                     }
763                 }
764 
765                 if ((eStyle & INetURLObject::FSYS_DOS) != 0 && rEnd - nPos >= 3
766                     && rText.GetChar(nPos + 1) == ':'
767                     && (rText.GetChar(nPos + 2) == '/'
768                         || rText.GetChar(nPos + 2) == '\\')) // 7th, 8th
769                 {
770                     i = nPos + 3;
771                     xub_StrLen nUriEnd = i;
772                     while (i != rEnd
773                            && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
774                     if (isBoundary1(rCharClass, rText, nUriEnd, rEnd))
775                     {
776                         INetURLObject aUri(UniString(rText, nPos,
777                                                      nUriEnd - nPos),
778                                            INET_PROT_FILE,
779                                            INetURLObject::ENCODE_ALL,
780                                            RTL_TEXTENCODING_UTF8,
781                                            INetURLObject::FSYS_DOS);
782                         if (!aUri.HasError())
783                         {
784                             rBegin = nPos;
785                             rEnd = nUriEnd;
786                             return
787                                 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI);
788                         }
789                     }
790                 }
791             }
792             else if ((eStyle & INetURLObject::FSYS_DOS) != 0 && rEnd - nPos >= 2
793                      && rText.GetChar(nPos) == '\\'
794                      && rText.GetChar(nPos + 1) == '\\') // 6th
795             {
796                 xub_StrLen i = nPos + 2;
797                 sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
798                 if (nLabels >= 1 && i != rEnd && rText.GetChar(i) == '\\')
799                 {
800                     xub_StrLen nUriEnd = ++i;
801                     while (i != rEnd
802                            && checkWChar(rCharClass, rText, &i, &nUriEnd,
803                                          true)) ;
804                     if (isBoundary1(rCharClass, rText, nUriEnd, rEnd))
805                     {
806                         INetURLObject aUri(UniString(rText, nPos,
807                                                      nUriEnd - nPos),
808                                            INET_PROT_FILE,
809                                            INetURLObject::ENCODE_ALL,
810                                            RTL_TEXTENCODING_UTF8,
811                                            INetURLObject::FSYS_DOS);
812                         if (!aUri.HasError())
813                         {
814                             rBegin = nPos;
815                             rEnd = nUriEnd;
816                             return
817                                 aUri.GetMainURL(INetURLObject::DECODE_TO_IURI);
818                         }
819                     }
820                 }
821             }
822         }
823         if (bBoundary2 && INetMIME::isAtomChar(c)) // 5th
824         {
825             bool bDot = false;
826             for (xub_StrLen i = nPos + 1; i != rEnd; ++i)
827             {
828                 sal_Unicode c2 = rText.GetChar(i);
829                 if (INetMIME::isAtomChar(c2))
830                     bDot = false;
831                 else if (bDot)
832                     break;
833                 else if (c2 == '.')
834                     bDot = true;
835                 else
836                 {
837                     if (c2 == '@')
838                     {
839                         ++i;
840                         sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
841                         if (nLabels >= 1
842                             && isBoundary1(rCharClass, rText, i, rEnd))
843                         {
844                             INetURLObject aUri(UniString(rText, nPos, i - nPos),
845                                                INET_PROT_MAILTO,
846                                                INetURLObject::ENCODE_ALL);
847                             if (!aUri.HasError())
848                             {
849                                 rBegin = nPos;
850                                 rEnd = i;
851                                 return aUri.GetMainURL(
852                                            INetURLObject::DECODE_TO_IURI);
853                             }
854                         }
855                     }
856                     break;
857                 }
858             }
859         }
860         bBoundary1 = isBoundary1(rCharClass, rText, nPos, rEnd);
861         bBoundary2 = isBoundary2(rCharClass, rText, nPos, rEnd);
862     }
863     rBegin = rEnd;
864     return UniString();
865 }
866 
867 //============================================================================
868 //
869 //  removePassword
870 //
871 //============================================================================
872 
873 UniString
removePassword(UniString const & rURI,INetURLObject::EncodeMechanism eEncodeMechanism,INetURLObject::DecodeMechanism eDecodeMechanism,rtl_TextEncoding eCharset)874 URIHelper::removePassword(UniString const & rURI,
875 						  INetURLObject::EncodeMechanism eEncodeMechanism,
876 						  INetURLObject::DecodeMechanism eDecodeMechanism,
877 						  rtl_TextEncoding eCharset)
878 {
879 	INetURLObject aObj(rURI, eEncodeMechanism, eCharset);
880 	return aObj.HasError() ?
881 		       rURI :
882 		       String(aObj.GetURLNoPass(eDecodeMechanism, eCharset));
883 }
884 
885 //============================================================================
886 //
887 //  queryFSysStyle
888 //
889 //============================================================================
890 
queryFSysStyle(UniString const & rFileUrl,bool bAddConvenienceStyles)891 INetURLObject::FSysStyle URIHelper::queryFSysStyle(UniString const & rFileUrl,
892 												   bool bAddConvenienceStyles)
893 	throw (uno::RuntimeException)
894 {
895 	::ucbhelper::ContentBroker const * pBroker = ::ucbhelper::ContentBroker::get();
896 	uno::Reference< ucb::XContentProviderManager > xManager;
897 	if (pBroker)
898 		xManager = pBroker->getContentProviderManagerInterface();
899 	uno::Reference< beans::XPropertySet > xProperties;
900 	if (xManager.is())
901 		xProperties
902 			= uno::Reference< beans::XPropertySet >(
903 				  xManager->queryContentProvider(rFileUrl), uno::UNO_QUERY);
904 	sal_Int32 nNotation = ucb::FileSystemNotation::UNKNOWN_NOTATION;
905 	if (xProperties.is())
906 		try
907 		{
908 			xProperties->getPropertyValue(rtl::OUString(
909 				                              RTL_CONSTASCII_USTRINGPARAM(
910 												  "FileSystemNotation")))
911 				>>= nNotation;
912 		}
913 		catch (beans::UnknownPropertyException const &) {}
914 		catch (lang::WrappedTargetException const &) {}
915 
916 	// The following code depends on the fact that the
917 	// com::sun::star::ucb::FileSystemNotation constants range from UNKNOWN to
918 	// MAC, without any holes.  The table below has two entries per notation,
919 	// the first is used if bAddConvenienceStyles == false, while the second
920 	// is used if bAddConvenienceStyles == true:
921 	static INetURLObject::FSysStyle const aMap[][2]
922 		= { { INetURLObject::FSysStyle(0),
923 			  INetURLObject::FSYS_DETECT },
924 			    // UNKNOWN
925 			{ INetURLObject::FSYS_UNX,
926 			  INetURLObject::FSysStyle(INetURLObject::FSYS_VOS
927 									       | INetURLObject::FSYS_UNX) },
928 			    // UNIX
929 			{ INetURLObject::FSYS_DOS,
930 			  INetURLObject::FSysStyle(INetURLObject::FSYS_VOS
931 									       | INetURLObject::FSYS_UNX
932 									       | INetURLObject::FSYS_DOS) },
933 			    // DOS
934 			{ INetURLObject::FSYS_MAC,
935 			  INetURLObject::FSysStyle(INetURLObject::FSYS_VOS
936 									       | INetURLObject::FSYS_UNX
937 									       | INetURLObject::FSYS_MAC) } };
938 	return aMap[nNotation < ucb::FileSystemNotation::UNKNOWN_NOTATION
939 			    || nNotation > ucb::FileSystemNotation::MAC_NOTATION ?
940 			            0 :
941 			            nNotation
942 			                - ucb::FileSystemNotation::UNKNOWN_NOTATION]
943 		           [bAddConvenienceStyles];
944 }
945