1 /**************************************************************
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 *
20 *************************************************************/
21
22
23
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_ucb.hxx"
26 #include <regexp.hxx>
27
28 #include <cstddef>
29
30 #include "osl/diagnose.h"
31 #include <com/sun/star/lang/IllegalArgumentException.hpp>
32 #include <rtl/ustrbuf.hxx>
33 #include <rtl/ustring.hxx>
34
35 namespace unnamed_ucb_regexp {} using namespace unnamed_ucb_regexp;
36 // unnamed namespaces don't work well yet...
37
38 using namespace com::sun::star;
39 using namespace ucb_impl;
40
41 //============================================================================
42 //
43 // Regexp
44 //
45 //============================================================================
46
Regexp(Kind eTheKind,rtl::OUString const & rThePrefix,bool bTheEmptyDomain,rtl::OUString const & rTheInfix,bool bTheTranslation,rtl::OUString const & rTheReversePrefix)47 inline Regexp::Regexp(Kind eTheKind, rtl::OUString const & rThePrefix,
48 bool bTheEmptyDomain, rtl::OUString const & rTheInfix,
49 bool bTheTranslation,
50 rtl::OUString const & rTheReversePrefix):
51 m_eKind(eTheKind),
52 m_aPrefix(rThePrefix),
53 m_aInfix(rTheInfix),
54 m_aReversePrefix(rTheReversePrefix),
55 m_bEmptyDomain(bTheEmptyDomain),
56 m_bTranslation(bTheTranslation)
57 {
58 OSL_ASSERT(m_eKind == KIND_DOMAIN
59 || !m_bEmptyDomain && m_aInfix.getLength() == 0);
60 OSL_ASSERT(m_bTranslation || m_aReversePrefix.getLength() == 0);
61 }
62
63 //============================================================================
64 namespace unnamed_ucb_regexp {
65
matchStringIgnoreCase(sal_Unicode const ** pBegin,sal_Unicode const * pEnd,rtl::OUString const & rString)66 bool matchStringIgnoreCase(sal_Unicode const ** pBegin,
67 sal_Unicode const * pEnd,
68 rtl::OUString const & rString)
69 {
70 sal_Unicode const * p = *pBegin;
71
72 sal_Unicode const * q = rString.getStr();
73 sal_Unicode const * qEnd = q + rString.getLength();
74
75 if (pEnd - p < qEnd - q)
76 return false;
77
78 while (q != qEnd)
79 {
80 sal_Unicode c1 = *p++;
81 sal_Unicode c2 = *q++;
82 if (c1 >= 'a' && c1 <= 'z')
83 c1 -= 'a' - 'A';
84 if (c2 >= 'a' && c2 <= 'z')
85 c2 -= 'a' - 'A';
86 if (c1 != c2)
87 return false;
88 }
89
90 *pBegin = p;
91 return true;
92 }
93
94 }
95
matches(rtl::OUString const & rString,rtl::OUString * pTranslation,bool * pTranslated) const96 bool Regexp::matches(rtl::OUString const & rString,
97 rtl::OUString * pTranslation, bool * pTranslated) const
98 {
99 sal_Unicode const * pBegin = rString.getStr();
100 sal_Unicode const * pEnd = pBegin + rString.getLength();
101
102 bool bMatches = false;
103
104 sal_Unicode const * p = pBegin;
105 if (matchStringIgnoreCase(&p, pEnd, m_aPrefix))
106 {
107 sal_Unicode const * pBlock1Begin = p;
108 sal_Unicode const * pBlock1End = pEnd;
109
110 sal_Unicode const * pBlock2Begin = 0;
111 sal_Unicode const * pBlock2End = 0;
112
113 switch (m_eKind)
114 {
115 case KIND_PREFIX:
116 bMatches = true;
117 break;
118
119 case KIND_AUTHORITY:
120 bMatches = p == pEnd || *p == '/' || *p == '?' || *p == '#';
121 break;
122
123 case KIND_DOMAIN:
124 if (!m_bEmptyDomain)
125 {
126 if (p == pEnd || *p == '/' || *p == '?' || *p == '#')
127 break;
128 ++p;
129 }
130 for (;;)
131 {
132 sal_Unicode const * q = p;
133 if (matchStringIgnoreCase(&q, pEnd, m_aInfix)
134 && (q == pEnd || *q == '/' || *q == '?' || *q == '#'))
135 {
136 bMatches = true;
137 pBlock1End = p;
138 pBlock2Begin = q;
139 pBlock2End = pEnd;
140 break;
141 }
142
143 if (p == pEnd)
144 break;
145
146 sal_Unicode c = *p++;
147 if (c == '/' || c == '?' || c == '#')
148 break;
149 }
150 break;
151 }
152
153 if (bMatches)
154 {
155 if (m_bTranslation)
156 {
157 if (pTranslation)
158 {
159 rtl::OUStringBuffer aBuffer(m_aReversePrefix);
160 aBuffer.append(pBlock1Begin, pBlock1End - pBlock1Begin);
161 aBuffer.append(m_aInfix);
162 aBuffer.append(pBlock2Begin, pBlock2End - pBlock2Begin);
163 *pTranslation = aBuffer.makeStringAndClear();
164 }
165 if (pTranslated)
166 *pTranslated = true;
167 }
168 else
169 {
170 if (pTranslation)
171 *pTranslation = rString;
172 if (pTranslated)
173 *pTranslated = false;
174 }
175 }
176 }
177
178 return bMatches;
179 }
180
181 //============================================================================
182 namespace unnamed_ucb_regexp {
183
isAlpha(sal_Unicode c)184 inline bool isAlpha(sal_Unicode c)
185 {
186 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
187 }
188
isDigit(sal_Unicode c)189 inline bool isDigit(sal_Unicode c)
190 {
191 return c >= '0' && c <= '9';
192 }
193
isScheme(rtl::OUString const & rString,bool bColon)194 bool isScheme(rtl::OUString const & rString, bool bColon)
195 {
196 // Return true if rString matches <scheme> (plus a trailing ":" if bColon
197 // is true) from RFC 2396:
198 sal_Unicode const * p = rString.getStr();
199 sal_Unicode const * pEnd = p + rString.getLength();
200 if (p != pEnd && isAlpha(*p))
201 for (++p;;)
202 {
203 if (p == pEnd)
204 return !bColon;
205 sal_Unicode c = *p++;
206 if (!(isAlpha(c) || isDigit(c)
207 || c == '+' || c == '-' || c == '.'))
208 return bColon && c == ':' && p == pEnd;
209 }
210 return false;
211 }
212
appendStringLiteral(rtl::OUStringBuffer * pBuffer,rtl::OUString const & rString)213 void appendStringLiteral(rtl::OUStringBuffer * pBuffer,
214 rtl::OUString const & rString)
215 {
216 OSL_ASSERT(pBuffer);
217
218 pBuffer->append(sal_Unicode('"'));
219 sal_Unicode const * p = rString.getStr();
220 sal_Unicode const * pEnd = p + rString.getLength();
221 while (p != pEnd)
222 {
223 sal_Unicode c = *p++;
224 if (c == '"' || c == '\\')
225 pBuffer->append(sal_Unicode('\\'));
226 pBuffer->append(c);
227 }
228 pBuffer->append(sal_Unicode('"'));
229 }
230
231 }
232
getRegexp(bool bReverse) const233 rtl::OUString Regexp::getRegexp(bool bReverse) const
234 {
235 if (m_bTranslation)
236 {
237 rtl::OUStringBuffer aBuffer;
238 if (bReverse)
239 {
240 if (m_aReversePrefix.getLength() != 0)
241 appendStringLiteral(&aBuffer, m_aReversePrefix);
242 }
243 else
244 {
245 if (m_aPrefix.getLength() != 0)
246 appendStringLiteral(&aBuffer, m_aPrefix);
247 }
248 switch (m_eKind)
249 {
250 case KIND_PREFIX:
251 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("(.*)"));
252 break;
253
254 case KIND_AUTHORITY:
255 aBuffer.
256 appendAscii(RTL_CONSTASCII_STRINGPARAM("(([/?#].*)?)"));
257 break;
258
259 case KIND_DOMAIN:
260 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("([^/?#]"));
261 aBuffer.append(sal_Unicode(m_bEmptyDomain ? '*' : '+'));
262 if (m_aInfix.getLength() != 0)
263 appendStringLiteral(&aBuffer, m_aInfix);
264 aBuffer.
265 appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?)"));
266 break;
267 }
268 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("->"));
269 if (bReverse)
270 {
271 if (m_aPrefix.getLength() != 0)
272 appendStringLiteral(&aBuffer, m_aPrefix);
273 }
274 else
275 {
276 if (m_aReversePrefix.getLength() != 0)
277 appendStringLiteral(&aBuffer, m_aReversePrefix);
278 }
279 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("\\1"));
280 return aBuffer.makeStringAndClear();
281 }
282 else if (m_eKind == KIND_PREFIX && isScheme(m_aPrefix, true))
283 return m_aPrefix.copy(0, m_aPrefix.getLength() - 1);
284 else
285 {
286 rtl::OUStringBuffer aBuffer;
287 if (m_aPrefix.getLength() != 0)
288 appendStringLiteral(&aBuffer, m_aPrefix);
289 switch (m_eKind)
290 {
291 case KIND_PREFIX:
292 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM(".*"));
293 break;
294
295 case KIND_AUTHORITY:
296 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?"));
297 break;
298
299 case KIND_DOMAIN:
300 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("[^/?#]"));
301 aBuffer.append(sal_Unicode(m_bEmptyDomain ? '*' : '+'));
302 if (m_aInfix.getLength() != 0)
303 appendStringLiteral(&aBuffer, m_aInfix);
304 aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?"));
305 break;
306 }
307 return aBuffer.makeStringAndClear();
308 }
309 }
310
311 //============================================================================
312 namespace unnamed_ucb_regexp {
313
matchString(sal_Unicode const ** pBegin,sal_Unicode const * pEnd,sal_Char const * pString,size_t nStringLength)314 bool matchString(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
315 sal_Char const * pString, size_t nStringLength)
316 {
317 sal_Unicode const * p = *pBegin;
318
319 sal_uChar const * q = reinterpret_cast< sal_uChar const * >(pString);
320 sal_uChar const * qEnd = q + nStringLength;
321
322 if (pEnd - p < qEnd - q)
323 return false;
324
325 while (q != qEnd)
326 {
327 sal_Unicode c1 = *p++;
328 sal_Unicode c2 = *q++;
329 if (c1 != c2)
330 return false;
331 }
332
333 *pBegin = p;
334 return true;
335 }
336
scanStringLiteral(sal_Unicode const ** pBegin,sal_Unicode const * pEnd,rtl::OUString * pString)337 bool scanStringLiteral(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
338 rtl::OUString * pString)
339 {
340 sal_Unicode const * p = *pBegin;
341
342 if (p == pEnd || *p++ != '"')
343 return false;
344
345 rtl::OUStringBuffer aBuffer;
346 for (;;)
347 {
348 if (p == pEnd)
349 return false;
350 sal_Unicode c = *p++;
351 if (c == '"')
352 break;
353 if (c == '\\')
354 {
355 if (p == pEnd)
356 return false;
357 c = *p++;
358 if (c != '"' && c != '\\')
359 return false;
360 }
361 aBuffer.append(c);
362 }
363
364 *pBegin = p;
365 *pString = aBuffer.makeStringAndClear();
366 return true;
367 }
368
369 }
370
parse(rtl::OUString const & rRegexp)371 Regexp Regexp::parse(rtl::OUString const & rRegexp)
372 {
373 // Detect an input of '<scheme>' as an abbreviation of '"<scheme>:".*'
374 // where <scheme> is as defined in RFC 2396:
375 if (isScheme(rRegexp, false))
376 return Regexp(Regexp::KIND_PREFIX,
377 rRegexp
378 + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM(":")),
379 false,
380 rtl::OUString(),
381 false,
382 rtl::OUString());
383
384 sal_Unicode const * p = rRegexp.getStr();
385 sal_Unicode const * pEnd = p + rRegexp.getLength();
386
387 rtl::OUString aPrefix;
388 scanStringLiteral(&p, pEnd, &aPrefix);
389
390 if (p == pEnd)
391 throw lang::IllegalArgumentException();
392
393 if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(".*")))
394 {
395 if (p != pEnd)
396 throw lang::IllegalArgumentException();
397
398 return Regexp(Regexp::KIND_PREFIX, aPrefix, false, rtl::OUString(),
399 false, rtl::OUString());
400 }
401 else if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("(.*)->")))
402 {
403 rtl::OUString aReversePrefix;
404 scanStringLiteral(&p, pEnd, &aReversePrefix);
405
406 if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1"))
407 || p != pEnd)
408 throw lang::IllegalArgumentException();
409
410 return Regexp(Regexp::KIND_PREFIX, aPrefix, false, rtl::OUString(),
411 true, aReversePrefix);
412 }
413 else if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
414 {
415 if (p != pEnd)
416 throw lang::IllegalArgumentException();
417
418 return Regexp(Regexp::KIND_AUTHORITY, aPrefix, false, rtl::OUString(),
419 false, rtl::OUString());
420 }
421 else if (matchString(&p, pEnd,
422 RTL_CONSTASCII_STRINGPARAM("(([/?#].*)?)->")))
423 {
424 rtl::OUString aReversePrefix;
425 if (!(scanStringLiteral(&p, pEnd, &aReversePrefix)
426 && matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1"))
427 && p == pEnd))
428 throw lang::IllegalArgumentException();
429
430 return Regexp(Regexp::KIND_AUTHORITY, aPrefix, false, rtl::OUString(),
431 true, aReversePrefix);
432 }
433 else
434 {
435 bool bOpen = false;
436 if (p != pEnd && *p == '(')
437 {
438 ++p;
439 bOpen = true;
440 }
441
442 if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("[^/?#]")))
443 throw lang::IllegalArgumentException();
444
445 if (p == pEnd || (*p != '*' && *p != '+'))
446 throw lang::IllegalArgumentException();
447 bool bEmptyDomain = *p++ == '*';
448
449 rtl::OUString aInfix;
450 scanStringLiteral(&p, pEnd, &aInfix);
451
452 if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
453 throw lang::IllegalArgumentException();
454
455 rtl::OUString aReversePrefix;
456 if (bOpen
457 && !(matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(")->"))
458 && scanStringLiteral(&p, pEnd, &aReversePrefix)
459 && matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1"))))
460 throw lang::IllegalArgumentException();
461
462 if (p != pEnd)
463 throw lang::IllegalArgumentException();
464
465 return Regexp(Regexp::KIND_DOMAIN, aPrefix, bEmptyDomain, aInfix,
466 bOpen, aReversePrefix);
467 }
468 }
469
470