xref: /aoo42x/main/ucb/source/regexp/regexp.cxx (revision 2f86921c)
1*2f86921cSAndrew Rist /**************************************************************
2cdf0e10cSrcweir  *
3*2f86921cSAndrew Rist  * Licensed to the Apache Software Foundation (ASF) under one
4*2f86921cSAndrew Rist  * or more contributor license agreements.  See the NOTICE file
5*2f86921cSAndrew Rist  * distributed with this work for additional information
6*2f86921cSAndrew Rist  * regarding copyright ownership.  The ASF licenses this file
7*2f86921cSAndrew Rist  * to you under the Apache License, Version 2.0 (the
8*2f86921cSAndrew Rist  * "License"); you may not use this file except in compliance
9*2f86921cSAndrew Rist  * with the License.  You may obtain a copy of the License at
10*2f86921cSAndrew Rist  *
11*2f86921cSAndrew Rist  *   http://www.apache.org/licenses/LICENSE-2.0
12*2f86921cSAndrew Rist  *
13*2f86921cSAndrew Rist  * Unless required by applicable law or agreed to in writing,
14*2f86921cSAndrew Rist  * software distributed under the License is distributed on an
15*2f86921cSAndrew Rist  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16*2f86921cSAndrew Rist  * KIND, either express or implied.  See the License for the
17*2f86921cSAndrew Rist  * specific language governing permissions and limitations
18*2f86921cSAndrew Rist  * under the License.
19*2f86921cSAndrew Rist  *
20*2f86921cSAndrew Rist  *************************************************************/
21*2f86921cSAndrew Rist 
22*2f86921cSAndrew Rist 
23cdf0e10cSrcweir 
24cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
25cdf0e10cSrcweir #include "precompiled_ucb.hxx"
26cdf0e10cSrcweir #include <regexp.hxx>
27cdf0e10cSrcweir 
28cdf0e10cSrcweir #include <cstddef>
29cdf0e10cSrcweir 
30cdf0e10cSrcweir #include "osl/diagnose.h"
31cdf0e10cSrcweir #include <com/sun/star/lang/IllegalArgumentException.hpp>
32cdf0e10cSrcweir #include <rtl/ustrbuf.hxx>
33cdf0e10cSrcweir #include <rtl/ustring.hxx>
34cdf0e10cSrcweir 
35cdf0e10cSrcweir namespace unnamed_ucb_regexp {} using namespace unnamed_ucb_regexp;
36cdf0e10cSrcweir 	// unnamed namespaces don't work well yet...
37cdf0e10cSrcweir 
38cdf0e10cSrcweir using namespace com::sun::star;
39cdf0e10cSrcweir using namespace ucb_impl;
40cdf0e10cSrcweir 
41cdf0e10cSrcweir //============================================================================
42cdf0e10cSrcweir //
43cdf0e10cSrcweir //  Regexp
44cdf0e10cSrcweir //
45cdf0e10cSrcweir //============================================================================
46cdf0e10cSrcweir 
Regexp(Kind eTheKind,rtl::OUString const & rThePrefix,bool bTheEmptyDomain,rtl::OUString const & rTheInfix,bool bTheTranslation,rtl::OUString const & rTheReversePrefix)47cdf0e10cSrcweir inline Regexp::Regexp(Kind eTheKind, rtl::OUString const & rThePrefix,
48cdf0e10cSrcweir 					  bool bTheEmptyDomain, rtl::OUString const & rTheInfix,
49cdf0e10cSrcweir 					  bool bTheTranslation,
50cdf0e10cSrcweir 					  rtl::OUString const & rTheReversePrefix):
51cdf0e10cSrcweir 	m_eKind(eTheKind),
52cdf0e10cSrcweir 	m_aPrefix(rThePrefix),
53cdf0e10cSrcweir 	m_aInfix(rTheInfix),
54cdf0e10cSrcweir 	m_aReversePrefix(rTheReversePrefix),
55cdf0e10cSrcweir 	m_bEmptyDomain(bTheEmptyDomain),
56cdf0e10cSrcweir 	m_bTranslation(bTheTranslation)
57cdf0e10cSrcweir {
58cdf0e10cSrcweir 	OSL_ASSERT(m_eKind == KIND_DOMAIN
59cdf0e10cSrcweir 			   || !m_bEmptyDomain && m_aInfix.getLength() == 0);
60cdf0e10cSrcweir 	OSL_ASSERT(m_bTranslation || m_aReversePrefix.getLength() == 0);
61cdf0e10cSrcweir }
62cdf0e10cSrcweir 
63cdf0e10cSrcweir //============================================================================
64cdf0e10cSrcweir namespace unnamed_ucb_regexp {
65cdf0e10cSrcweir 
matchStringIgnoreCase(sal_Unicode const ** pBegin,sal_Unicode const * pEnd,rtl::OUString const & rString)66cdf0e10cSrcweir bool matchStringIgnoreCase(sal_Unicode const ** pBegin,
67cdf0e10cSrcweir 						   sal_Unicode const * pEnd,
68cdf0e10cSrcweir 						   rtl::OUString const & rString)
69cdf0e10cSrcweir {
70cdf0e10cSrcweir 	sal_Unicode const * p = *pBegin;
71cdf0e10cSrcweir 
72cdf0e10cSrcweir 	sal_Unicode const * q = rString.getStr();
73cdf0e10cSrcweir 	sal_Unicode const * qEnd = q + rString.getLength();
74cdf0e10cSrcweir 
75cdf0e10cSrcweir 	if (pEnd - p < qEnd - q)
76cdf0e10cSrcweir 		return false;
77cdf0e10cSrcweir 
78cdf0e10cSrcweir 	while (q != qEnd)
79cdf0e10cSrcweir 	{
80cdf0e10cSrcweir 		sal_Unicode c1 = *p++;
81cdf0e10cSrcweir 		sal_Unicode c2 = *q++;
82cdf0e10cSrcweir 		if (c1 >= 'a' && c1 <= 'z')
83cdf0e10cSrcweir 			c1 -= 'a' - 'A';
84cdf0e10cSrcweir 		if (c2 >= 'a' && c2 <= 'z')
85cdf0e10cSrcweir 			c2 -= 'a' - 'A';
86cdf0e10cSrcweir 		if (c1 != c2)
87cdf0e10cSrcweir 			return false;
88cdf0e10cSrcweir 	}
89cdf0e10cSrcweir 
90cdf0e10cSrcweir 	*pBegin = p;
91cdf0e10cSrcweir 	return true;
92cdf0e10cSrcweir }
93cdf0e10cSrcweir 
94cdf0e10cSrcweir }
95cdf0e10cSrcweir 
matches(rtl::OUString const & rString,rtl::OUString * pTranslation,bool * pTranslated) const96cdf0e10cSrcweir bool Regexp::matches(rtl::OUString const & rString,
97cdf0e10cSrcweir 					 rtl::OUString * pTranslation, bool * pTranslated) const
98cdf0e10cSrcweir {
99cdf0e10cSrcweir 	sal_Unicode const * pBegin = rString.getStr();
100cdf0e10cSrcweir 	sal_Unicode const * pEnd = pBegin + rString.getLength();
101cdf0e10cSrcweir 
102cdf0e10cSrcweir 	bool bMatches = false;
103cdf0e10cSrcweir 
104cdf0e10cSrcweir 	sal_Unicode const * p = pBegin;
105cdf0e10cSrcweir 	if (matchStringIgnoreCase(&p, pEnd, m_aPrefix))
106cdf0e10cSrcweir 	{
107cdf0e10cSrcweir 		sal_Unicode const * pBlock1Begin = p;
108cdf0e10cSrcweir 		sal_Unicode const * pBlock1End = pEnd;
109cdf0e10cSrcweir 
110cdf0e10cSrcweir 		sal_Unicode const * pBlock2Begin = 0;
111cdf0e10cSrcweir 		sal_Unicode const * pBlock2End = 0;
112cdf0e10cSrcweir 
113cdf0e10cSrcweir 		switch (m_eKind)
114cdf0e10cSrcweir 		{
115cdf0e10cSrcweir 			case KIND_PREFIX:
116cdf0e10cSrcweir 				bMatches = true;
117cdf0e10cSrcweir 				break;
118cdf0e10cSrcweir 
119cdf0e10cSrcweir 			case KIND_AUTHORITY:
120cdf0e10cSrcweir 				bMatches = p == pEnd || *p == '/' || *p == '?' || *p == '#';
121cdf0e10cSrcweir 				break;
122cdf0e10cSrcweir 
123cdf0e10cSrcweir 			case KIND_DOMAIN:
124cdf0e10cSrcweir 				if (!m_bEmptyDomain)
125cdf0e10cSrcweir 				{
126cdf0e10cSrcweir 					if (p == pEnd || *p == '/' || *p == '?' || *p == '#')
127cdf0e10cSrcweir 						break;
128cdf0e10cSrcweir 					++p;
129cdf0e10cSrcweir 				}
130cdf0e10cSrcweir 				for (;;)
131cdf0e10cSrcweir 				{
132cdf0e10cSrcweir 					sal_Unicode const * q = p;
133cdf0e10cSrcweir 					if (matchStringIgnoreCase(&q, pEnd, m_aInfix)
134cdf0e10cSrcweir 						&& (q == pEnd || *q == '/' || *q == '?' || *q == '#'))
135cdf0e10cSrcweir 					{
136cdf0e10cSrcweir 						bMatches = true;
137cdf0e10cSrcweir 						pBlock1End = p;
138cdf0e10cSrcweir 						pBlock2Begin = q;
139cdf0e10cSrcweir 						pBlock2End = pEnd;
140cdf0e10cSrcweir 						break;
141cdf0e10cSrcweir 					}
142cdf0e10cSrcweir 
143cdf0e10cSrcweir 					if (p == pEnd)
144cdf0e10cSrcweir 						break;
145cdf0e10cSrcweir 
146cdf0e10cSrcweir 					sal_Unicode c = *p++;
147cdf0e10cSrcweir 					if (c == '/' || c == '?' || c == '#')
148cdf0e10cSrcweir 						break;
149cdf0e10cSrcweir 				}
150cdf0e10cSrcweir 				break;
151cdf0e10cSrcweir 		}
152cdf0e10cSrcweir 
153cdf0e10cSrcweir 		if (bMatches)
154cdf0e10cSrcweir 		{
155cdf0e10cSrcweir 			if (m_bTranslation)
156cdf0e10cSrcweir 			{
157cdf0e10cSrcweir 				if (pTranslation)
158cdf0e10cSrcweir 				{
159cdf0e10cSrcweir 					rtl::OUStringBuffer aBuffer(m_aReversePrefix);
160cdf0e10cSrcweir 					aBuffer.append(pBlock1Begin, pBlock1End - pBlock1Begin);
161cdf0e10cSrcweir 					aBuffer.append(m_aInfix);
162cdf0e10cSrcweir 					aBuffer.append(pBlock2Begin, pBlock2End - pBlock2Begin);
163cdf0e10cSrcweir 					*pTranslation = aBuffer.makeStringAndClear();
164cdf0e10cSrcweir 				}
165cdf0e10cSrcweir 				if (pTranslated)
166cdf0e10cSrcweir 					*pTranslated = true;
167cdf0e10cSrcweir 			}
168cdf0e10cSrcweir 			else
169cdf0e10cSrcweir 			{
170cdf0e10cSrcweir 				if (pTranslation)
171cdf0e10cSrcweir 					*pTranslation = rString;
172cdf0e10cSrcweir 				if (pTranslated)
173cdf0e10cSrcweir 					*pTranslated = false;
174cdf0e10cSrcweir 			}
175cdf0e10cSrcweir 		}
176cdf0e10cSrcweir 	}
177cdf0e10cSrcweir 
178cdf0e10cSrcweir 	return bMatches;
179cdf0e10cSrcweir }
180cdf0e10cSrcweir 
181cdf0e10cSrcweir //============================================================================
182cdf0e10cSrcweir namespace unnamed_ucb_regexp {
183cdf0e10cSrcweir 
isAlpha(sal_Unicode c)184cdf0e10cSrcweir inline bool isAlpha(sal_Unicode c)
185cdf0e10cSrcweir {
186cdf0e10cSrcweir 	return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
187cdf0e10cSrcweir }
188cdf0e10cSrcweir 
isDigit(sal_Unicode c)189cdf0e10cSrcweir inline bool isDigit(sal_Unicode c)
190cdf0e10cSrcweir {
191cdf0e10cSrcweir 	return c >= '0' && c <= '9';
192cdf0e10cSrcweir }
193cdf0e10cSrcweir 
isScheme(rtl::OUString const & rString,bool bColon)194cdf0e10cSrcweir bool isScheme(rtl::OUString const & rString, bool bColon)
195cdf0e10cSrcweir {
196cdf0e10cSrcweir 	// Return true if rString matches <scheme> (plus a trailing ":" if bColon
197cdf0e10cSrcweir     // is true) from RFC 2396:
198cdf0e10cSrcweir 	sal_Unicode const * p = rString.getStr();
199cdf0e10cSrcweir 	sal_Unicode const * pEnd = p + rString.getLength();
200cdf0e10cSrcweir 	if (p != pEnd && isAlpha(*p))
201cdf0e10cSrcweir 		for (++p;;)
202cdf0e10cSrcweir 		{
203cdf0e10cSrcweir 			if (p == pEnd)
204cdf0e10cSrcweir 				return !bColon;
205cdf0e10cSrcweir 			sal_Unicode c = *p++;
206cdf0e10cSrcweir 			if (!(isAlpha(c) || isDigit(c)
207cdf0e10cSrcweir                   || c == '+' || c == '-' || c == '.'))
208cdf0e10cSrcweir                 return bColon && c == ':' && p == pEnd;
209cdf0e10cSrcweir 		}
210cdf0e10cSrcweir 	return false;
211cdf0e10cSrcweir }
212cdf0e10cSrcweir 
appendStringLiteral(rtl::OUStringBuffer * pBuffer,rtl::OUString const & rString)213cdf0e10cSrcweir void appendStringLiteral(rtl::OUStringBuffer * pBuffer,
214cdf0e10cSrcweir 						 rtl::OUString const & rString)
215cdf0e10cSrcweir {
216cdf0e10cSrcweir 	OSL_ASSERT(pBuffer);
217cdf0e10cSrcweir 
218cdf0e10cSrcweir 	pBuffer->append(sal_Unicode('"'));
219cdf0e10cSrcweir 	sal_Unicode const * p = rString.getStr();
220cdf0e10cSrcweir 	sal_Unicode const * pEnd = p + rString.getLength();
221cdf0e10cSrcweir 	while (p != pEnd)
222cdf0e10cSrcweir 	{
223cdf0e10cSrcweir 		sal_Unicode c = *p++;
224cdf0e10cSrcweir 		if (c == '"' || c == '\\')
225cdf0e10cSrcweir 			pBuffer->append(sal_Unicode('\\'));
226cdf0e10cSrcweir 		pBuffer->append(c);
227cdf0e10cSrcweir 	}
228cdf0e10cSrcweir 	pBuffer->append(sal_Unicode('"'));
229cdf0e10cSrcweir }
230cdf0e10cSrcweir 
231cdf0e10cSrcweir }
232cdf0e10cSrcweir 
getRegexp(bool bReverse) const233cdf0e10cSrcweir rtl::OUString Regexp::getRegexp(bool bReverse) const
234cdf0e10cSrcweir {
235cdf0e10cSrcweir 	if (m_bTranslation)
236cdf0e10cSrcweir 	{
237cdf0e10cSrcweir 		rtl::OUStringBuffer aBuffer;
238cdf0e10cSrcweir 		if (bReverse)
239cdf0e10cSrcweir 		{
240cdf0e10cSrcweir 			if (m_aReversePrefix.getLength() != 0)
241cdf0e10cSrcweir 				appendStringLiteral(&aBuffer, m_aReversePrefix);
242cdf0e10cSrcweir 		}
243cdf0e10cSrcweir 		else
244cdf0e10cSrcweir 		{
245cdf0e10cSrcweir 			if (m_aPrefix.getLength() != 0)
246cdf0e10cSrcweir 				appendStringLiteral(&aBuffer, m_aPrefix);
247cdf0e10cSrcweir 		}
248cdf0e10cSrcweir 		switch (m_eKind)
249cdf0e10cSrcweir 		{
250cdf0e10cSrcweir 			case KIND_PREFIX:
251cdf0e10cSrcweir 				aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("(.*)"));
252cdf0e10cSrcweir 				break;
253cdf0e10cSrcweir 
254cdf0e10cSrcweir 			case KIND_AUTHORITY:
255cdf0e10cSrcweir 				aBuffer.
256cdf0e10cSrcweir 					appendAscii(RTL_CONSTASCII_STRINGPARAM("(([/?#].*)?)"));
257cdf0e10cSrcweir 				break;
258cdf0e10cSrcweir 
259cdf0e10cSrcweir 			case KIND_DOMAIN:
260cdf0e10cSrcweir 				aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("([^/?#]"));
261cdf0e10cSrcweir 				aBuffer.append(sal_Unicode(m_bEmptyDomain ? '*' : '+'));
262cdf0e10cSrcweir 				if (m_aInfix.getLength() != 0)
263cdf0e10cSrcweir 					appendStringLiteral(&aBuffer, m_aInfix);
264cdf0e10cSrcweir 				aBuffer.
265cdf0e10cSrcweir 					appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?)"));
266cdf0e10cSrcweir 				break;
267cdf0e10cSrcweir 		}
268cdf0e10cSrcweir 		aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("->"));
269cdf0e10cSrcweir 		if (bReverse)
270cdf0e10cSrcweir 		{
271cdf0e10cSrcweir 			if (m_aPrefix.getLength() != 0)
272cdf0e10cSrcweir 				appendStringLiteral(&aBuffer, m_aPrefix);
273cdf0e10cSrcweir 		}
274cdf0e10cSrcweir 		else
275cdf0e10cSrcweir 		{
276cdf0e10cSrcweir 			if (m_aReversePrefix.getLength() != 0)
277cdf0e10cSrcweir 				appendStringLiteral(&aBuffer, m_aReversePrefix);
278cdf0e10cSrcweir 		}
279cdf0e10cSrcweir 		aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("\\1"));
280cdf0e10cSrcweir 		return aBuffer.makeStringAndClear();
281cdf0e10cSrcweir 	}
282cdf0e10cSrcweir 	else if (m_eKind == KIND_PREFIX && isScheme(m_aPrefix, true))
283cdf0e10cSrcweir 		return m_aPrefix.copy(0, m_aPrefix.getLength() - 1);
284cdf0e10cSrcweir 	else
285cdf0e10cSrcweir 	{
286cdf0e10cSrcweir 		rtl::OUStringBuffer aBuffer;
287cdf0e10cSrcweir 		if (m_aPrefix.getLength() != 0)
288cdf0e10cSrcweir 			appendStringLiteral(&aBuffer, m_aPrefix);
289cdf0e10cSrcweir 		switch (m_eKind)
290cdf0e10cSrcweir 		{
291cdf0e10cSrcweir 			case KIND_PREFIX:
292cdf0e10cSrcweir 				aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM(".*"));
293cdf0e10cSrcweir 				break;
294cdf0e10cSrcweir 
295cdf0e10cSrcweir 			case KIND_AUTHORITY:
296cdf0e10cSrcweir 				aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?"));
297cdf0e10cSrcweir 				break;
298cdf0e10cSrcweir 
299cdf0e10cSrcweir 			case KIND_DOMAIN:
300cdf0e10cSrcweir 				aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("[^/?#]"));
301cdf0e10cSrcweir 				aBuffer.append(sal_Unicode(m_bEmptyDomain ? '*' : '+'));
302cdf0e10cSrcweir 				if (m_aInfix.getLength() != 0)
303cdf0e10cSrcweir 					appendStringLiteral(&aBuffer, m_aInfix);
304cdf0e10cSrcweir 				aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?"));
305cdf0e10cSrcweir 				break;
306cdf0e10cSrcweir 		}
307cdf0e10cSrcweir 		return aBuffer.makeStringAndClear();
308cdf0e10cSrcweir 	}
309cdf0e10cSrcweir }
310cdf0e10cSrcweir 
311cdf0e10cSrcweir //============================================================================
312cdf0e10cSrcweir namespace unnamed_ucb_regexp {
313cdf0e10cSrcweir 
matchString(sal_Unicode const ** pBegin,sal_Unicode const * pEnd,sal_Char const * pString,size_t nStringLength)314cdf0e10cSrcweir bool matchString(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
315cdf0e10cSrcweir 				 sal_Char const * pString, size_t nStringLength)
316cdf0e10cSrcweir {
317cdf0e10cSrcweir 	sal_Unicode const * p = *pBegin;
318cdf0e10cSrcweir 
319cdf0e10cSrcweir 	sal_uChar const * q = reinterpret_cast< sal_uChar const * >(pString);
320cdf0e10cSrcweir 	sal_uChar const * qEnd = q + nStringLength;
321cdf0e10cSrcweir 
322cdf0e10cSrcweir 	if (pEnd - p < qEnd - q)
323cdf0e10cSrcweir 		return false;
324cdf0e10cSrcweir 
325cdf0e10cSrcweir 	while (q != qEnd)
326cdf0e10cSrcweir 	{
327cdf0e10cSrcweir 		sal_Unicode c1 = *p++;
328cdf0e10cSrcweir 		sal_Unicode c2 = *q++;
329cdf0e10cSrcweir 		if (c1 != c2)
330cdf0e10cSrcweir 			return false;
331cdf0e10cSrcweir 	}
332cdf0e10cSrcweir 
333cdf0e10cSrcweir 	*pBegin = p;
334cdf0e10cSrcweir 	return true;
335cdf0e10cSrcweir }
336cdf0e10cSrcweir 
scanStringLiteral(sal_Unicode const ** pBegin,sal_Unicode const * pEnd,rtl::OUString * pString)337cdf0e10cSrcweir bool scanStringLiteral(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
338cdf0e10cSrcweir 					   rtl::OUString * pString)
339cdf0e10cSrcweir {
340cdf0e10cSrcweir 	sal_Unicode const * p = *pBegin;
341cdf0e10cSrcweir 
342cdf0e10cSrcweir 	if (p == pEnd || *p++ != '"')
343cdf0e10cSrcweir 		return false;
344cdf0e10cSrcweir 
345cdf0e10cSrcweir 	rtl::OUStringBuffer aBuffer;
346cdf0e10cSrcweir 	for (;;)
347cdf0e10cSrcweir 	{
348cdf0e10cSrcweir 		if (p == pEnd)
349cdf0e10cSrcweir 			return false;
350cdf0e10cSrcweir 		sal_Unicode c = *p++;
351cdf0e10cSrcweir 		if (c == '"')
352cdf0e10cSrcweir 			break;
353cdf0e10cSrcweir 		if (c == '\\')
354cdf0e10cSrcweir 		{
355cdf0e10cSrcweir 			if (p == pEnd)
356cdf0e10cSrcweir 				return false;
357cdf0e10cSrcweir 			c = *p++;
358cdf0e10cSrcweir 			if (c != '"' && c != '\\')
359cdf0e10cSrcweir 				return false;
360cdf0e10cSrcweir 		}
361cdf0e10cSrcweir 		aBuffer.append(c);
362cdf0e10cSrcweir 	}
363cdf0e10cSrcweir 
364cdf0e10cSrcweir 	*pBegin = p;
365cdf0e10cSrcweir 	*pString = aBuffer.makeStringAndClear();
366cdf0e10cSrcweir 	return true;
367cdf0e10cSrcweir }
368cdf0e10cSrcweir 
369cdf0e10cSrcweir }
370cdf0e10cSrcweir 
parse(rtl::OUString const & rRegexp)371cdf0e10cSrcweir Regexp Regexp::parse(rtl::OUString const & rRegexp)
372cdf0e10cSrcweir {
373cdf0e10cSrcweir 	// Detect an input of '<scheme>' as an abbreviation of '"<scheme>:".*'
374cdf0e10cSrcweir 	// where <scheme> is as defined in RFC 2396:
375cdf0e10cSrcweir 	if (isScheme(rRegexp, false))
376cdf0e10cSrcweir 		return Regexp(Regexp::KIND_PREFIX,
377cdf0e10cSrcweir                       rRegexp
378cdf0e10cSrcweir                           + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM(":")),
379cdf0e10cSrcweir                       false,
380cdf0e10cSrcweir                       rtl::OUString(),
381cdf0e10cSrcweir 					  false,
382cdf0e10cSrcweir                       rtl::OUString());
383cdf0e10cSrcweir 
384cdf0e10cSrcweir 	sal_Unicode const * p = rRegexp.getStr();
385cdf0e10cSrcweir 	sal_Unicode const * pEnd = p + rRegexp.getLength();
386cdf0e10cSrcweir 
387cdf0e10cSrcweir 	rtl::OUString aPrefix;
388cdf0e10cSrcweir 	scanStringLiteral(&p, pEnd, &aPrefix);
389cdf0e10cSrcweir 
390cdf0e10cSrcweir 	if (p == pEnd)
391cdf0e10cSrcweir 		throw lang::IllegalArgumentException();
392cdf0e10cSrcweir 
393cdf0e10cSrcweir 	if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(".*")))
394cdf0e10cSrcweir 	{
395cdf0e10cSrcweir 		if (p != pEnd)
396cdf0e10cSrcweir 			throw lang::IllegalArgumentException();
397cdf0e10cSrcweir 
398cdf0e10cSrcweir 		return Regexp(Regexp::KIND_PREFIX, aPrefix, false, rtl::OUString(),
399cdf0e10cSrcweir 					  false, rtl::OUString());
400cdf0e10cSrcweir 	}
401cdf0e10cSrcweir 	else if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("(.*)->")))
402cdf0e10cSrcweir 	{
403cdf0e10cSrcweir 		rtl::OUString aReversePrefix;
404cdf0e10cSrcweir 		scanStringLiteral(&p, pEnd, &aReversePrefix);
405cdf0e10cSrcweir 
406cdf0e10cSrcweir 		if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1"))
407cdf0e10cSrcweir 			|| p != pEnd)
408cdf0e10cSrcweir 			throw lang::IllegalArgumentException();
409cdf0e10cSrcweir 
410cdf0e10cSrcweir 		return Regexp(Regexp::KIND_PREFIX, aPrefix, false, rtl::OUString(),
411cdf0e10cSrcweir 					  true, aReversePrefix);
412cdf0e10cSrcweir 	}
413cdf0e10cSrcweir 	else if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
414cdf0e10cSrcweir 	{
415cdf0e10cSrcweir 		if (p != pEnd)
416cdf0e10cSrcweir 			throw lang::IllegalArgumentException();
417cdf0e10cSrcweir 
418cdf0e10cSrcweir 		return Regexp(Regexp::KIND_AUTHORITY, aPrefix, false, rtl::OUString(),
419cdf0e10cSrcweir 					  false, rtl::OUString());
420cdf0e10cSrcweir 	}
421cdf0e10cSrcweir 	else if (matchString(&p, pEnd,
422cdf0e10cSrcweir 						 RTL_CONSTASCII_STRINGPARAM("(([/?#].*)?)->")))
423cdf0e10cSrcweir 	{
424cdf0e10cSrcweir 		rtl::OUString aReversePrefix;
425cdf0e10cSrcweir 		if (!(scanStringLiteral(&p, pEnd, &aReversePrefix)
426cdf0e10cSrcweir 			  && matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1"))
427cdf0e10cSrcweir 			  && p == pEnd))
428cdf0e10cSrcweir 			throw lang::IllegalArgumentException();
429cdf0e10cSrcweir 
430cdf0e10cSrcweir 		return Regexp(Regexp::KIND_AUTHORITY, aPrefix, false, rtl::OUString(),
431cdf0e10cSrcweir 					  true, aReversePrefix);
432cdf0e10cSrcweir 	}
433cdf0e10cSrcweir 	else
434cdf0e10cSrcweir 	{
435cdf0e10cSrcweir 		bool bOpen = false;
436cdf0e10cSrcweir 		if (p != pEnd && *p == '(')
437cdf0e10cSrcweir 		{
438cdf0e10cSrcweir 			++p;
439cdf0e10cSrcweir 			bOpen = true;
440cdf0e10cSrcweir 		}
441cdf0e10cSrcweir 
442cdf0e10cSrcweir 		if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("[^/?#]")))
443cdf0e10cSrcweir 			throw lang::IllegalArgumentException();
444cdf0e10cSrcweir 
445cdf0e10cSrcweir 		if (p == pEnd || (*p != '*' && *p != '+'))
446cdf0e10cSrcweir 			throw lang::IllegalArgumentException();
447cdf0e10cSrcweir 		bool bEmptyDomain = *p++ == '*';
448cdf0e10cSrcweir 
449cdf0e10cSrcweir 		rtl::OUString aInfix;
450cdf0e10cSrcweir 		scanStringLiteral(&p, pEnd, &aInfix);
451cdf0e10cSrcweir 
452cdf0e10cSrcweir 		if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
453cdf0e10cSrcweir 			throw lang::IllegalArgumentException();
454cdf0e10cSrcweir 
455cdf0e10cSrcweir 		rtl::OUString aReversePrefix;
456cdf0e10cSrcweir 		if (bOpen
457cdf0e10cSrcweir 			&& !(matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(")->"))
458cdf0e10cSrcweir 				 && scanStringLiteral(&p, pEnd, &aReversePrefix)
459cdf0e10cSrcweir 				 && matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1"))))
460cdf0e10cSrcweir 			throw lang::IllegalArgumentException();
461cdf0e10cSrcweir 
462cdf0e10cSrcweir 		if (p != pEnd)
463cdf0e10cSrcweir 			throw lang::IllegalArgumentException();
464cdf0e10cSrcweir 
465cdf0e10cSrcweir 		return Regexp(Regexp::KIND_DOMAIN, aPrefix, bEmptyDomain, aInfix,
466cdf0e10cSrcweir 					  bOpen, aReversePrefix);
467cdf0e10cSrcweir 	}
468cdf0e10cSrcweir }
469cdf0e10cSrcweir 
470